/*
 *	Sherlock Search Engine -- Query Parser
 *
 *	(c) 1997--2005 Martin Mares <mj@ucw.cz>
 */

%{
#include "sherlock/sherlock.h"
#include "lib/mempool.h"
#include "search/sherlockd.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <setjmp.h>

#define yyerror(x) err(x)

static byte *error;
static int yyparse(void);
static void parse_ext(struct query *q, uns id, enum custom_op op, byte *str, uns num, struct val_set *set);

struct expr *new_node(enum expr_type t)
{
  struct expr *n = mp_alloc_zero(current_query->pool, sizeof(struct expr));
  n->type = t;
  return n;
}

struct expr *new_op(enum expr_type t, struct expr *l, struct expr *r)
{
  struct expr *n = mp_alloc_zero(current_query->pool, sizeof(struct expr));
  n->type = t;
  n->u.op.l = l;
  n->u.op.r = r;
  return n;
}

static inline void
init_options(struct options *o)
{
  o->weight = WEIGHT_DEFAULT;
  o->accent_mode = OPT_DEFAULT;
  o->morphing = OPT_DEFAULT;
  o->spelling = OPT_DEFAULT;
  o->synonyming = OPT_DEFAULT;
  o->syn_expand = ~0ULL;
}

static struct options *
new_options(void)
{
  struct options *o = mp_alloc_zero(current_query->pool, sizeof(struct options));
  init_options(o);
  return o;
}

%}

%union {
  int i;
  struct query *q;
  struct expr *x;
  struct val_set *t;
  struct options *o;
  byte *s;
}

/* Tokens */

%token EOLN K_DEBUG CONTROL ACCENTS LIST SHOW CONTEXT TITLELEN INTERVALS DB SITE URLS SITEMAX
%token PARTIAL APPROX ANY DOTDOT STATS AUTONEAR LE GE NE SORTBY ONLY CARDID SPELL MORPH SYN SYNEXP
%token EXPLAIN FULL
%token <i> REF NUM SITEID CUSTOM
%token <s> STRING

%left OR
%left AND
%left NOT MAYBE

%type <x> expr atom radical simple molecule neg_molecule
%type <i> refmask refmask_list maybemin maybemax db_set db_num custom_op
%type <i> maybe_minus maybe_only custom_attr
%type <q> global querystart selector
%type <t> doc_set maybe_set set setnode
%type <o> maybe_options options option

%%

main:
   global expr EOLN {
      $1->expr = $2;
      return 0;
   }
 | global CONTROL STRING {
      $1->cmd = $3;
      return 0;
   }
 ;

querystart:
   /* empty */ { $$ = current_query; }
 ;

selector:
   querystart
 | querystart LIST doc_set {
      $1->range = $3;
      $1->list_only = 1;
      $$ = $1;
   }
 | querystart SHOW doc_set {
      $1->range = $3;
      $1->list_only = 0;
      $$ = $1;
   }
 | querystart STATS {
      $1->list_only = 2;
      $$ = $1;
   }
 ;

global:
   selector
 | global K_DEBUG NUM {
      $1->debug = $3;
      $$ = $1;
   }
 | global CONTEXT NUM {
      $1->context_chars = $3;
      $$ = $1;
   }
 | global CONTEXT FULL {
      $1->context_chars = CONTEXT_FULL;
      $$ = $1;
   }
 | global TITLELEN NUM {
      $1->title_chars = $3;
      $$ = $1;
   }
 | global INTERVALS NUM {
      if ($3 > HARD_MAX_NOTES)
        err("INTERVALS out of range");
      $1->intervals = $3;
      $$ = $1;
   }
 | global option {
      merge_options(&$1->default_options, &$1->default_options, $2);
      $$ = $1;
   }
 | global DB db_set {
      $1->db_mask = $3;
      $$ = $1;
   }
 | global SITE SITEID {
      $1->site_only = $3;
      $$ = $1;
   }
 | global SITEMAX NUM {
      if ($3 > 2)
	err("SITEMAX out of range");
      $1->site_max = $3;
      $$ = $1;
   }
 | global URLS NUM {
      $1->url_max = $3;
      $$ = $1;
   }
 | global PARTIAL NUM {
      $1->partial_answers = $3;
      $$ = $1;
   }
 | global APPROX NUM {
      $1->allow_approx = $3;
      $$ = $1;
   }
 | global CUSTOM custom_op NUM {
      parse_ext($1, $2, $3, NULL, $4, NULL);
      $$ = $1;
   }
 | global CUSTOM custom_op STRING {
      parse_ext($1, $2, $3, $4, 0, NULL);
      $$ = $1;
   }
 | global CUSTOM custom_op '{' set '}' {
      parse_ext($1, $2, $3, NULL, 0, $5);
      $$ = $1;
   }
 | global SORTBY maybe_minus custom_attr maybe_only {
      $1->custom_sorting = $4;
      $1->custom_sort_reverse = ($3 ? ~0U : 0);
      $1->custom_sort_only = $5;
      $$ = $1;
   }
 | global EXPLAIN SITEID {
      $1->explain_id = $3;
      $$ = $1;
   }
 ;

custom_op:
   '<' { $$ = CUSTOM_OP_LT; }
 | '>' { $$ = CUSTOM_OP_GT; }
 | LE  { $$ = CUSTOM_OP_LE; }
 | GE  { $$ = CUSTOM_OP_GE; }
 | '=' { $$ = CUSTOM_OP_EQ; }
 | NE  { $$ = CUSTOM_OP_NE; }
 ;

custom_attr:
   CUSTOM
 | SITE { $$ = PARAM_SITE; }
 | CARDID { $$ = PARAM_CARDID; }
 ;

maybe_minus:
   '-' { $$ = 1; }
 | /* empty */ { $$ = 0; }
 ;

maybe_only:
   ONLY { $$ = 1; }
 | /* empty */ { $$ = 0; }
 ;

expr:
   molecule
 | expr AND expr {
     $$ = new_op(EX_AND, $1, $3);
   }
 | expr OR expr {
     $$ = new_op(EX_OR, $1, $3);
   }
 ;

molecule:
   neg_molecule
 | simple
 ;

neg_molecule:
   '(' expr ')' maybe_options {
     if ($4)
       {
	 $$ = new_node(EX_OPTIONS);
	 memcpy(&$$->u.options, $4, sizeof(struct options));
	 $$->u.options.inside = $2;
       }
     else
       $$ = $2;
   }
 | ANY { $$ = new_node(EX_ANY); }
 | NOT neg_molecule {
     $$ = new_op(EX_NOT, $2, NULL);
   }
 ;

atom:
   radical       { $$=$1; $$->u.match.sense=1; }
 | NOT radical   { $$=$2; $$->u.match.sense=-1; }
 | MAYBE radical { $$=$2; $$->u.match.sense=0; }
 ;

radical:
   refmask STRING maybe_options {
     $$ = new_node(EX_MATCH);
     $$->u.match.classmap = $1 & 0xffff00ff;
     $$->u.match.is_string = !!($1 & 0x8000);
     $$->u.match.word = $2;
     if ($3)
       memcpy(&$$->u.match.o, $3, sizeof(struct options));
     else
       init_options(&$$->u.match.o);
     $$->u.match.next_simple = NULL;
   }
 ;

simple:
   atom
 | atom '.' simple { $$=$1; $1->u.match.next_simple = $3; }
 ;

refmask:
   /* empty */ { $$ = default_word_types; }
 | refmask_list
 ;

refmask_list:
   REF { $$ = $1; }
 | refmask_list ',' REF {
     if (($1 ^ $3) & 0x8000)
       err("Incompatible word types");
     $$ = $1 | $3;
   }
 ;

maybe_options:
   /* empty */ { $$ = NULL; }
 | options { $$ = $1; }
 ;

options:
   option { $$ = $1; }
 | options option { merge_options($1, $2, $1); $$ = $1; }
 ;

option:   
   '/' maybe_minus NUM {
     if ($3 > 30000)
       err("Word weight out of range");
     $$ = new_options();
     $$->weight = $2 ? -$3 : $3;
   }
 | ACCENTS NUM {
     if ($2 > 3)
       err("ACCENTS out of range");
     $$ = new_options();
     $$->accent_mode = $2;
   }
 | MORPH NUM {
     $$ = new_options();
     $$->morphing = $2;
   }
 | SPELL NUM {
     $$ = new_options();
     $$->spelling = $2;
   }
 | SYN NUM {
     $$ = new_options();
     $$->synonyming = $2;
   }
 | SYNEXP '{' maybe_set '}' {
     $$ = new_options();
     $$->syn_expand = 0;
     for (struct val_set *t=$3; t; t=t->next)
       {
	 if (t->text)
	   err("SYNEXP: integer set expected");
	 if (t->max > 62)
	   err("SYNEXP: variant number out of range");
	 for (uns i=t->min; i<=t->max; i++)
	   $$->syn_expand |= 1ULL << i;
       }
   }
 ;

doc_set:
   set {
     struct val_set *t;
     for (t=$1; t; t=t->next)
       if (t->text)
	 err("Integer set expected");
     $$ = $1;
   }
 ;

maybe_set:
   /* empty */ { $$ = NULL; }
 | set
 ;

set:
   setnode { $1->next = NULL; $$ = $1; }
 | set ',' setnode { $3->next = $1; $$ = $3; }
 ;

setnode:
   NUM {
     $$ = mp_alloc(current_query->pool, sizeof(struct val_set));
     $$->min = $$->max = $1;
     $$->text = NULL;
   }
 | STRING {
     $$ = mp_alloc(current_query->pool, sizeof(struct val_set));
     $$->min = $$->max = 0;
     $$->text = $1;
   }
 | maybemin DOTDOT maybemax {
     $$ = mp_alloc(current_query->pool, sizeof(struct val_set));
     if ($1 > $3)
       err("Invalid interval");
     $$->min = $1;
     $$->max = $3;
     $$->text = NULL;
   }
 ;

maybemin:
   /* empty */ { $$ = 0; }
 | NUM { $$ = $1; }
 ;

maybemax:
   /* empty */ { $$ = ~0; }
 | NUM { $$ = $1; }
 ;

db_set:
   db_num { $$ = $1; }
 | db_set ',' db_num { $$ = $1 | $3; }
 ;

db_num:
   STRING {
     struct database *db;
     int i;
     for (db=databases, i=0; db; db=db->next, i++)
       if (!strcmp(db->name, $1))
         break;
     if (!db)
       err("Unknown database");
     $$ = 1 << i;
   }
 ;

%%

static jmp_buf err_jmp;

void
err(byte *x)
{
  error = x;
  longjmp(err_jmp, 1);
}

byte *
parse_query(byte *b)
{
  error = NULL;
  lex_init(b);
  if (!setjmp(err_jmp))
    yyparse();
  return error;
}

#ifdef CONFIG_LASTMOD
static byte *
parse_age(u32 *dest, byte *val, uns intval)
{
  if (val)
    return "Incorrect AGE";
  *dest = intval;
  return NULL;
}
#else
static byte *parse_age(u32 *dest UNUSED, byte *val UNUSED, uns intval UNUSED)
{
  return "Searching by document age not supported in this configuration";
}
#endif

static void
parse_ext(struct query *q, uns id, enum custom_op op, byte *str, uns num, struct val_set *set)
{
  byte *(*pfunc)(u32 *dest, byte *value, uns intval) = NULL;
  byte *(*cfunc)(struct query *q, enum custom_op op, byte *value, uns intval) = NULL;
  byte *msg;
  u32 val;
  u32 min = 0, max = ~0U, empty = 0;
  u32 *pmin = NULL, *pmax = NULL, *pset = NULL;

  switch (id)
    {
    case PARAM_AGE:
      pfunc = parse_age;
      pmin = &q->age_raw_min;
      pmax = &q->age_raw_max;
      break;
#define INT_ATTR(id,keywd,gf,pf)		\
    case OFFSETOF(struct query, id##_min):	\
      pfunc = pf;				\
      pmin = &q->id##_min;			\
      pmax = &q->id##_max;			\
      break;
#define SMALL_SET_ATTR(id,keywd,gf,pf) 		\
    case OFFSETOF(struct query, id##_set):	\
      pfunc = pf;				\
      pset = &q->id##_set;			\
      break;
#define LATE_INT_ATTR INT_ATTR
#define LATE_SMALL_SET_ATTR SMALL_SET_ATTR
    EXTENDED_ATTRS
#undef INT_ATTR
#undef SMALL_SET_ATTR
#undef LATE_INT_ATTR
#undef LATE_SMALL_SET_ATTR

#define CUSTOM_MATCH_KWD(id,keywd,cf)		\
    case OFFSETOF(struct query, id##_value):	\
      cfunc = cf;				\
      break;
    CUSTOM_MATCH_PARSE
#undef CUSTOM_MATCH_KWD

    default:
      die("parse_ext: unknown attribute");
    }

  if (set)
    {
      if (!pset)
	err("Set matching not supported for this attribute");
      if (op != CUSTOM_OP_EQ && op != CUSTOM_OP_NE)
	err("Sets are not ordered");
      *pset = 0;
      while (set)
	{
	  if (set->text)
	    {
	      u32 val;
	      if (msg = pfunc(&val, set->text, 0))
		err(msg);
	      *pset |= 1 << val;
	    }
	  else
	    {
	      u32 min, max;
	      if ((msg = pfunc(&min, NULL, set->min)) || (msg = pfunc(&max, NULL, set->max)))
		err(msg);
	      while (min <= max)
		*pset |= 1 << min++;
	    }
	  set = set->next;
	}
      if (op == CUSTOM_OP_NE)
        *pset = ~*pset;
      return;
    }

  if (cfunc)
    {
      if (msg = cfunc(q, op, str, num))
	err(msg);
      return;
    }

  if (msg = pfunc(&val, str, num))
    err(msg);

  switch (op)
    {
    case CUSTOM_OP_LT:
      /* Unfortunately, we must handle <0 separately as we don't use signed integers */
      if (val) max=val-1; else empty=1;
      break;
    case CUSTOM_OP_GT:
      if (val == ~0U) empty=1; else min=val+1;
      break;
    case CUSTOM_OP_LE:
      max=val;
      break;
    case CUSTOM_OP_GE:
      min=val;
      break;
    case CUSTOM_OP_EQ:
      min=max=val;
      break;
    case CUSTOM_OP_NE:			/* <> supported only for set attributes */
      if (!pset)
	err("<> supported only for set-matched attributes");
      *pset &= ~(1 << val);
      return;
    default:
      ASSERT(0);
    }
  if (pset)
    {
      u32 mask = 0;
      while (min <= MIN(max, 31))
	mask |= 1 << min++;
      *pset &= mask;
    }
  else
    {
      *pmin = MAX(*pmin, min);
      *pmax = MIN(*pmax, max);
      if (*pmin > *pmax || empty)
	{
	  *pmin = ~0U;
	  *pmax = 0;
	}
    }
}

/* Extended attributes */

#ifdef CONFIG_LANG

#include "lang/lang.h"

byte *
ext_lang_parse(u32 *dest, byte *value, uns intval)
{
  if (value)
    {
      int c = lang_name_to_code(value);
      if (c < 0)
	return "LANG: language name not recognized";
      *dest = c;
    }
  else
    {
      if (intval > 31)
	return "LANG: out of range";
      *dest = intval;
    }
  return NULL;
}

#endif

#ifdef CONFIG_FILETYPE

byte *
ext_ft_parse(u32 *dest, byte *value, uns intval)
{
  if (value)
    {
      for (uns i=0; i<MAX_FILE_TYPES; i++)
	if (!strcmp(value, custom_file_type_names[i]))
	  {
	    *dest = i;
	    return NULL;
	  }
      return "FILETYPE: unknown type";
    }
  if (intval >= MAX_FILE_TYPES)
    return "FILETYPE: out of range";
  *dest = intval;
  return NULL;
}

#endif

void
merge_options(struct options *dest, struct options *old, struct options *new)
{
  dest->weight = (new->weight == WEIGHT_DEFAULT) ? old->weight : new->weight;
#define MERGE(a) dest->a = (new->a < 0) ? old->a : new->a;
  MERGE(accent_mode);
  MERGE(morphing);
  MERGE(spelling);
  MERGE(synonyming);
#undef MERGE
  dest->syn_expand = (new->syn_expand == ~0ULL) ? old->syn_expand : new->syn_expand;
}
