/*
 *	Sherlock Indexer
 *
 *	(c) 2001--2003 Martin Mares <mj@ucw.cz>
 *	(c) 2002--2003 Robert Spalek <robert@ucw.cz>
 */

#include "sherlock/index.h"

/* iconfig.c */

/* File names */
extern byte *fn_directory;
extern byte *fn_source;
extern byte *fn_fingerprints, *fn_labels_by_id, *fn_attributes, *fn_checksums;
extern byte *fn_links, *fn_urls, *fn_link_graph, *fn_link_graph_index, *fn_sites, *fn_labels, *fn_merges, *fn_signatures, *fn_matches;
extern byte *fn_word_index, *fn_string_index, *fn_references, *fn_string_map, *fn_card_prints;
extern byte *fn_string_hash, *fn_cards, *fn_card_attrs, *fn_parameters, *fn_ref_texts;
extern byte *fn_lexicon, *fn_lex_raw, *fn_lex_ordered, *fn_lex_words, *fn_lex_by_freq;
extern byte *fn_stems, *fn_stems_ordered, *fn_lex_classes, *fn_notes, *fn_notes_new, *fn_keywords, *fn_feedback_gath;
extern uns default_weight;

byte *index_name(byte *file);
int index_name_defined(byte *file);
#define index_bopen(file, flags) bopen(index_name(file), flags, indexer_fb_size)
#define index_maybe_bopen(file, flags) (index_name_defined(file) ? index_bopen(file, flags) : NULL)

/* Miscellaneous */
extern byte *label_attrs, *link_attrs, *ref_link_types;
extern uns string_avg_bucket, indexer_fb_size, sort_delete_src, max_degree;
extern uns progress, progress_screen, progress_status_line;
extern uns ref_max_length, ref_min_length, ref_max_count;
extern uns matcher_signatures, matcher_context, matcher_min_words, matcher_threshold, matcher_passes, matcher_block;
extern uns max_num_objects, min_summed_size, auto_lang, frameset_to_redir;

/* Filters */
extern byte *indexer_filter_name;

#define PROGRESS(i, msg, args...) do { if (progress && !((i) % progress)) { \
	if (progress_status_line) setproctitle(msg, args); \
	if (progress_screen) { printf(msg "\r", args); fflush(stdout); } } } while (0)

/* Number of cards in the index (not a configuration parameter, but also stored in iconfig) */

extern uns card_count;

static inline void
set_card_count(uns cc)
{
  if (card_count == ~0U)
    card_count = cc;
  else
    ASSERT(card_count == cc);
}

/* getbuck.c */

struct mempool;
struct buck2obj_buf;
struct bucket_source {
  u32 oid;				/* oid of the current bucket */
  u32 type;				/* type of the current bucket */
  struct odes *o;			/* parsed content of the current bucket */
  int (*get_next)(struct bucket_source *src, struct mempool *mp, u32 oid); /* Get bucket with the given oid or the next one if oid == ~0U */
  void (*cleanup)(struct bucket_source *src);
  uns progress_current, progress_max;
  struct buck2obj_buf *buck_buf;
  struct fastbuf *in_file;
};

struct bucket_source *get_buck_init(void);

/* Structure of files */

struct csum {
  byte md5[16];
  u32 cardid;
};

struct fprint {
  struct fingerprint fp;
  u32 cardid;
};

#define	CARD_NOTE_GIANT		1	/* Belongs to a very large class, subject to penalties */
#define	CARD_NOTE_HAS_LINKS	2	/* even the unknown ones */
#define	CARD_NOTE_IS_LINKED	4

struct card_note {
  u32 useful_size;			/* Useful size (number of alnum characters) */
  area_t area;				/* We need the area for non-downloaded entries as well */
  /* These fields track how did the card weight evolve */
  byte weight_scanner;			/* Weight assigned by the scanner */
  byte weight_merged;			/* Weight after card merging (includes merger penalties) */
  byte flags;
  byte footprint[16];
};

static inline struct card_note *
bring_note(struct partmap *p, oid_t card)
{
  return partmap_map(p, sizeof(struct card_note) * (sh_off_t)card, sizeof(struct card_note));
}

/* fetch.c */

struct card_hdr {
  struct card_hdr *next;
  struct card_hdr *redirects;
  struct odes *odes;
};

void fetch_cards(void (*got_card)(struct card_attr *attr, struct odes *obj, struct card_hdr *hdr, struct card_note *note, int bonus));

/* resolve.c */

struct resolve_output {
  u32 src;
  u32 dest;
};

#define RESOLVE_SKIP_UNKNOWN	1
#define RESOLVE_SKIP_NEW	2

sh_off_t resolve_optimize_run_length(struct fastbuf *in);
struct fastbuf *resolve_fastbuf(struct fastbuf *in, uns flags, uns add_size);
struct fastbuf *resolve_fingerprints(struct fastbuf *in, uns flags, uns mask);

#define	FIRST_ID_NEW	0x20000000	/* Not downloaded documents are numbered from this ID (no interference with ETYPE_*) */

/* feedback-gath.c */

struct feedback_gatherer {
  byte footprint[16];
  uns cardid;
  byte flags;				/* the same as card_note.flags */
  byte weight;
};

/* Graph edge types */

#define ETYPE_NORMAL 0
#define ETYPE_REDIRECT 0x40000000
#define ETYPE_FRAME 0x80000000
#define ETYPE_IMAGE 0xc0000000
#define ETYPE_MASK 0xc0000000
