/*
 *	Sherlock Language Processing Library -- Test Utility for Stemming
 *
 *	(c) 2003--2004 Martin Mares <mj@ucw.cz>
 */

#include "sherlock/sherlock.h"
#include "lib/conf.h"
#include "sherlock/index.h"
#include "lib/mempool.h"
#include "lang/lang.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static void NONRET
usage(void)
{
  die("Usage: stemtest [-l <language> ...] [-x]");
}

int
main(int argc, char **argv)
{
  int opt, l;
  u32 langs = 0;
  byte buf[256];
  int expand = 0;

  log_init(argv[0]);
  while ((opt = cf_getopt(argc, argv, CF_SHORT_OPTS "l:x", CF_NO_LONG_OPTS, NULL)) >= 0)
    switch (opt)
      {
      case 'l':
	l = lang_name_to_code(optarg);
	if (l < 0)
	  die("Unknown language %s", optarg);
	langs |= 1 << l;
	break;
      case 'x':
	expand = 1;
	break;
      default:
	usage();
      }
  if (optind < argc)
    usage();
  if (!langs)
    langs = ~0U;
  struct mempool *mp = mp_new(4096);
  lang_init_stemmers();

  while (fgets(buf, sizeof(buf)-1, stdin))
    {
      byte *nl = strchr(buf, '\n');
      if (nl)
	*nl = 0;
      if (strlen(buf) > MAX_WORD_LEN)
	{
	  log(L_ERROR, "Word too long");
	  continue;
	}
      struct stemmer *st;
      WALK_LIST(st, stemmer_list)
	if (st->lang_mask & langs)
	  {
	    printf("Stemmer %s(%s): ", st->name, st->params);
	    fflush(stdout);
	    mp_flush(mp);
	    list *x;
	    if (x = (expand ? lang_expand : lang_stem)(st, buf, mp))
		{
		  struct word_node *w;
		  WALK_LIST(w, *x)
		    puts(w->w);
		}
	    else
	      puts("---");
	  }
      puts(".");
    }
  return 0;
}
