/*
 *	Sherlock Indexer -- Lexicon Functions
 *
 *	(c) 2001--2003 Martin Mares <mj@ucw.cz>
 *	(c) 2001 Robert Spalek <robert@ucw.cz>
 */

#include "sherlock/sherlock.h"
#include "lib/conf.h"
#include "lib/mempool.h"
#include "indexer/indexer.h"
#include "indexer/lexicon.h"

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

/*
 *  Configurable parameters
 */

struct lexicon_config lexicon_config = {
  .min_len_ign = 1,
  .min_len = 1,
  .max_len = MAX_WORD_LEN,
  .max_hex_len = MAX_WORD_LEN,
  .max_ctrl_len = MAX_WORD_LEN,
  .max_gap = 3,
  .context_slots = 256
};

clist lex_exceptions;

#define	MAX_WORDS_PER_LINE 32

static byte *
lex_parse_words(struct cfitem *c, byte *value)
{
  byte *w[MAX_WORDS_PER_LINE];
  int i, cnt;
  cnt = wordsplit(value, w, MAX_WORDS_PER_LINE);
  if (cnt < 0)
    return "Too many words specified in a single line";
  for (i=0; i<cnt; i++)
    {
      struct exception *f = cfg_malloc(sizeof(struct exception));
      f->w = w[i];
      switch (c->name[7])
	{
	case 'o': f->class = WC_IGNORED; break;
	case 'm': f->class = WC_NORMAL; break;
	case 'b': f->class = WC_GARBAGE; break;
#ifdef CONFIG_CONTEXTS
	case 't': f->class = WC_CONTEXT; break;
#else
	case 't': return "Support for word contexts not compiled in";
#endif
	default: ASSERT(0);
	}
      clist_add_tail(&lex_exceptions, &f->n);
    }
  return NULL;
}

static struct cfitem lex_config[] = {
  { "Lexicon",		CT_SECTION,	NULL },
  { "MinWordLenIgnore",	CT_INT,		&lex_min_len_ign },
  { "MinWordLen",	CT_INT,		&lex_min_len },
  { "MaxWordLen",	CT_INT,		&lex_max_len },
  { "MaxHexWordLen",	CT_INT,		&lex_max_hex_len },
  { "MaxCtrlWordLen",	CT_INT,		&lex_max_ctrl_len },
  { "WordIgnored",	CT_FUNCTION,	lex_parse_words },
  { "WordNormal",	CT_FUNCTION,	lex_parse_words },
  { "WordGarbage",	CT_FUNCTION,	lex_parse_words },
  { "WordContext",	CT_FUNCTION,	lex_parse_words },
  { "MaxGap",		CT_INT,		&lex_max_gap },
  { "ContextSlots",	CT_INT,		&lex_context_slots },
  { NULL,		CT_STOP,	NULL }
};

static void CONSTRUCTOR lexconf_init(void)
{
  clist_init(&lex_exceptions);
  cf_register(lex_config);
}
