/*
 *	Sherlock Language Processing Library
 *
 *	(c) 2003--2004 Martin Mares <mj@ucw.cz>
 */

#ifndef _LANG_LANG_H
#define _LANG_LANG_H

#include "lib/lists.h"

/*
 *  Each language gets its own code (0..31). Codes are assigned in the
 *  configuration file, but code 0 is reserved for documents with
 *  unrecognized language and code 31 is used internally by the search
 *  server for texts not belonging to any language.
 */

#define LANG_UNKNOWN 0
#define LANG_NONE 31

#define MAX_LANGUAGES 31

extern uns lang_count;			/* Number of language codes used */

int lang_name_to_code(byte *name);	/* Names as defined by RFC 1766 */
byte *lang_code_to_name(uns code);
int lang_list_to_code(byte *langs);	/* Extract the primary language from a list (as per RFC 2068) */

/*
 *  Stemmers (lemmatizers)
 */

struct stemmer {
  node n;
  u32 lang_mask;			/* Languages this stemmer applies to */
  byte *name;				/* Name of the stemmer */
  uns id;				/* ... and its internal ID */
  byte *params;				/* Additional stemmer-specific parameters */
  void *priv;				/* Data private to the stemmer */
};

struct word_node {			/* Stemmers and expanders return a list of these nodes */
  node n;
  byte w[1];
};

extern list stemmer_list;
struct mempool;
list *lang_stem(struct stemmer *st, byte *src, struct mempool *mp);
list *lang_expand(struct stemmer *st, byte *src, struct mempool *mp);
void lang_init_stemmers(void);

struct word_node *word_list_find(list *l, byte *w);
struct word_node *word_list_add(struct mempool *mp, list *l, byte *w);
struct word_node *word_list_add_unique(struct mempool *mp, list *l, byte *w);

/*
 *  Synonymic dictionaries
 */

struct syndict {
  node n;
  u32 lang_mask;
  byte *name;
  struct fastbuf *fb;
};

extern list syndict_list;
void syndict_open(struct syndict *sd);
byte **syndict_read_entry(struct syndict *sd, struct mempool *mp);
void syndict_close(struct syndict *sd);

#endif
