/*
 *	Sherlock Search Engine -- Processing of References
 *
 *	(c) 1997--2005 Martin Mares <mj@ucw.cz>
 */

#undef LOCAL_DEBUG
#undef DEBUG_DUMP_HEAP

#include "sherlock/sherlock.h"
#include "lib/mempool.h"
#include "lib/unaligned.h"
#include "lib/heap.h"
#include "lib/prefetch.h"
#include "sherlock/index.h"
#include "indexer/params.h"
#include "search/sherlockd.h"

#ifdef CONFIG_LANG
#include "lang/lang.h"
#endif

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <alloca.h>

#define INFTY 1000000		/* All Q's and weights are between -INFTY and INFTY */
#define PEN_INFTY 1024		/* The same for word bonuses and penalties */

/*
 * The trail buffer contains a restriction of the current document to words
 * present in the query, that all reference chains of interest merged,
 * weighted and filtered. Behind-the-edge (4095) entries are not stored,
 * position 0xffff marks end of trail.
 */

struct trail_entry {
  u16 pos;
  sbyte weight;
  byte word_index;
};

static struct trail_entry *trail_buffer;
static uns trail_size, trail_sorted, trail_buf_size;

/* RHEAP is used for global searching for matching objects */

struct ref_heap_entry {
  oid_t oid;
  struct ref_chain *ref;
};

#define RHEAP_LESS(a,b) (a.oid < b.oid)
#define RHEAP_SWAP(heap,a,b,t) (t=heap[a], heap[a]=heap[b], heap[b]=t)

/* THEAP (aka results->result_heap) holds best matching documents */

static inline int
note_lt(struct result_note *a, struct result_note *b)
{
  return a->q < b->q || a->q == b->q && a->sec_sort_key > b->sec_sort_key;
}
#define THEAP_LESS(a,b) note_lt(a,b)
#define THEAP_SWAP(h,a,b,t) (t=h[a], h[a]=h[b], h[b]=t, h[a]->heap=a, h[b]->heap=b)

#define TRAIL_MAX_ENTRIES (4096+16*512)	/* This could be calculated by the indexer one day */

static void
map_refs(struct query *q)
{
  uns n = q->last_ref - q->first_ref;
  struct mmap_request *mm = alloca(sizeof(struct mmap_request) * n);
  uns chlen = 1;
  uns chmax = 1;

  DBG("Reference chains requested:");
  for (uns i=0; i<n; i++)
    {
      struct ref_chain *ref = &q->first_ref[i];
      DBG("\t%d: @%Lx+%x word=%d nonacc=%d pen=%d lmask=%08x", i, (long long)ref->u.file.start,
	  ref->u.file.size, ref->word_index, ref->noaccent_only, ref->penalty, ref->lang_mask);
      mm[i].u.req.fd = q->dbase->fd_refs;
      mm[i].u.req.start = ref->u.file.start;
      mm[i].u.req.end = MIN(ref->u.file.start + (sh_off_t)ref->u.file.size, q->dbase->ref_file_size);
      mm[i].userdata = i;
    }
  if (mmap_regions(q, mm, n) < 0)
    {
      add_cerr("-117 Too many documents match");
      eval_err(317);
    }
  for (uns i=0; i<n; i++)
    {
      uns j = mm[i].userdata;
      q->first_ref[j].u.mem.pos = mm[i].u.map.start;
      if (i && mm[i-1].u.map.start == mm[i].u.map.start)
	chlen++;
      else
	chlen = 1;
      chmax = MAX(chmax, chlen);
    }

  uns tbs = chmax * TRAIL_MAX_ENTRIES;
  DBG("Need trail with %d entries (multiplicity %d)", tbs, chmax);
  if (tbs > trail_buf_size)
    {
      DBG("Reallocating trail.");
      if (trail_buffer)
	xfree(trail_buffer);
      trail_buf_size = tbs;
      trail_buffer = xmalloc(trail_buf_size * sizeof(struct trail_entry));
    }
}

/* Sorting of the trail */

#define ASORT_PREFIX(x) trail_##x
#define ASORT_KEY_TYPE uns
#define ASORT_ELT(i) trail_buffer[i].pos
#define ASORT_SWAP(i,j) do { struct trail_entry tmp=trail_buffer[i]; trail_buffer[i]=trail_buffer[j]; trail_buffer[j]=tmp; } while (0)
#include "lib/arraysort.h"

static inline struct trail_entry *
get_trail(void)
{
  if (!trail_sorted)
    {
      DBG("\tSorting trail:");
      trail_sorted = 1;
      trail_sort(trail_size);
      trail_buffer[trail_size].pos = 0xffff;
#ifdef LOCAL_DEBUG
      for (uns i=0; trail_buffer[i].pos != 0xffff; i++)
	DBG("\t\t@%03x $%d w%d", trail_buffer[i].pos, trail_buffer[i].weight, trail_buffer[i].word_index);
#endif
    }
  return trail_buffer;
}

/* Explanation mode */

#ifdef CONFIG_EXPLAIN
static int do_explain;
#define EXPLAIN(x...) do { if (unlikely(do_explain)) add_cr(".E" x); } while(0)
#define IF_EXPLAINING(x) x

static void
explain_lookup_type(byte *buf, uns e, uns is_string)
{
  int l;
  if (e & 0x8000)
    l = snprintf(buf, 64, "%s[%d]", mt_names[(e>>11)&15], e&3);
  else
    l = snprintf(buf, 64, "%s", (is_string ? st_names : wt_names)[e >> 12]);
  ASSERT(l > 0 && 64 > l);
}

#else
#define EXPLAIN(x...) do { } while(0)
#define IF_EXPLAINING(x) do { } while(0)
#endif

/* Recording of matches */

static void do_record_match(int wq, uns wpos, uns *matches)
{
  uns i, x, y, z;
  if (wpos == 0xfff)
    return;
  x = (wpos ^ 0xffff) | ((wq/2 + 32768) << 16);
  for (i=0; i < HARD_MAX_NOTES; i++)
    {
      if (matches[i] < x)
	{
	  z = x;
	  while (i < HARD_MAX_NOTES)
	    {
	      y = matches[i];
	      matches[i++] = z;
	      if (!((y ^ x) & 0xffff))
		break;
	      z = y;
	    }
	  break;
	}
      else if (!((matches[i] ^ x) & 0xffff))
	break;
    }
}

/* Matching of phrases and proximity matching */

static inline int
do_record_phrase_match(struct phrase *p, int q, uns start, uns *matches)
{
  q += p->weight;
  q = MIN(32767, q);
  q = MAX(-32767, q);
  DBG("\t\tBest: q=%d, start=%d", q, start);
  do_record_match(q, start, matches);
  return q;
}

static int
do_phrase(struct phrase *p, uns *matches)
{
  int wpos[MAX_PHRASE_LEN], wwt[MAX_PHRASE_LEN];
  uns last_idx = p->length - 1;
  int bestq = prox_limit-1, bestq2 = bestq;
  uns beststart = ~0U, beststart2 = ~0U;

  for (uns idx=0; idx<p->length; idx++)
    {
      wpos[idx] = 0;
      wwt[idx] = -INFTY;
    }

  for (struct trail_entry *trail = get_trail(); trail->pos != 0xffff; trail++)
    {
      uns idx = p->word_to_idx[trail->word_index];
      while (idx)
	{
	  idx--;
	  uns pos = trail->pos;
	  int wt = trail->weight * word_weight_scale;
	  DBG("\t\t@%03x idx=%d wt=%d", pos, idx, wt);

	  /* Try to extend the current part of the phrase */
	  int wrel = wwt[idx];
	  if (p->prox_map & (1 << idx))
	    {
	      if (idx)
		wt += wwt[idx-1] - prox_penalty*(uns)(pos - (wpos[idx-1] + p->relpos[idx]));
	      wrel -= prox_penalty*(uns)(pos - (wpos[idx] + 1));
	    }
	  else
	    {
	      wrel = -INFTY;
	      if (idx)
		{
		  if (pos == wpos[idx-1] + p->relpos[idx])
		    wt += wwt[idx-1];
		  else
		    wt = -INFTY;
		}
	    }
	  if (wrel <= wt && wt >= -INFTY)
	    {
	      wpos[idx] = pos;
	      wwt[idx] = wt;
	    }
	  DBG("\t\t\t-> pos=%d wt=%d", wpos[idx], wwt[idx]);

	  /* If it was the last word of the phrase, record the weight */
	  if (idx == last_idx && wwt[idx] > -INFTY)
	    {
	      int qq = wwt[idx];
	      if (qq > bestq)
		{
		  bestq2 = bestq;
		  beststart2 = beststart;
		  bestq = qq;
		  beststart = wpos[0];
		}
	      else if (qq > bestq2)
		{
		  bestq2 = qq;
		  beststart2 = wpos[0];
		}
	    }

	  /* Go to the next word */
	  idx = p->next_same_word[idx];
	}
    }

  if (beststart == ~0U)
    return -1;
  int qq = do_record_phrase_match(p, bestq, beststart, matches);
  if (beststart2 != ~0U)
    do_record_phrase_match(p, bestq2, beststart2, matches);
  p->matches++;
  return qq;
}

/* Near matching */

static int
do_near(struct query *q, struct phrase *p, uns *matches)
{
  /* Currently examined word cluster: a circular buffer */
  uns cr = 0, cw = 0;			/* Read and write index */
  struct {
    uns idx;				/* Word index */
    int q;				/* Q of the word itself */
    int joiner;				/* Weight of space before the word */
    uns pos;				/* Position in the text */
  } cluster[MAX_PHRASE_LEN];
  uns cluster_word_mask = 0;		/* Which words are present in the cluster */
  int running_weight = 0;		/* Total weight of the cluster */
  int running_q = 0;			/* Total Q of the cluster */
  uns last_pos = -10000;		/* Position of previous word */
  uns last_idx = -10000;		/* Which word it was */
  int joiner;

  /* Current maxima */
  int bestw = -1;
  int bestq = -1, bestq2 = -1;
  uns beststart = ~0U;
  uns beststart2 = ~0U;

  /*
   *  How does the near matcher work:
   *
   *  (1) Assign each position in the document near points which is either:
   *	     near_bonus_word for matched words
   *	     + near_bonus_connect more if previous word is also matched
   *	  or -near_penalty_gap for no match.
   *  (2) Among intervals containing each word at most once, find the one
   *      with maximum Q (that is, Q of words inside according to standard
   *      rules + the near points gathered in this interval) and record
   *      it as a normal match covering the whole interval.
   *  (3) Do the same for the second best interval.
   *
   *  The code below does all these steps in parallel and returns maximum number
   *  of near points gained which is then added to the overall Q of the page
   *  (Q's of word occurences are already accounted for).
   */

  for (struct trail_entry *trail = get_trail(); trail->pos != 0xffff; trail++)
    {
      uns pos = trail->pos;
      uns idx = p->word_to_idx[trail->word_index];
      struct word *word = &q->words[trail->word_index];
      while (idx)
	{
	  idx--;
	  DBG("\t\t@%03x idx=%d", pos, idx);

	  /* Calculate gap weight and adjust the running sum */
	  if (pos <= last_pos)
	    joiner = 0;
	  else if (pos - last_pos > 100)
	    joiner = -1000000;
	  else
	    joiner = (last_pos - pos - 1) * near_penalty_gap;
	  if (last_idx + 1 == idx)
	    joiner += near_bonus_connect;
	  running_weight += joiner;
	  if (running_weight <= 0)
	    {
	      cr = cw = 0;
	      running_weight = running_q = 0;
	      cluster_word_mask = 0;
	    }

	  /* Check for duplicate words */
	  if (cluster_word_mask & (1 << idx))
	    {
	      /* Remove all words until first occurence of the offending one */
	      while (cluster[cr].idx != idx)
		cr = (cr+1) % MAX_PHRASE_LEN;
	      cr = (cr+1) % MAX_PHRASE_LEN;
	      /* Recalculate weight and word mask */
	      running_weight = running_q = 0;
	      cluster_word_mask = 0;
	      for (uns ci = cr; ci != cw; ci = (ci+1) % MAX_PHRASE_LEN)
		{
		  if (ci != cr)
		    {
		      running_weight += cluster[ci].joiner;
		      if (running_weight < 0)
			{
			  cr = ci;
			  running_weight = running_q = 0;
			  cluster_word_mask = 0;
			}
		    }
		  running_weight += near_bonus_word;
		  running_q += cluster[ci].q;
		  cluster_word_mask |= (1 << cluster[ci].idx);
		}
	      DBG("\t\t## rw=%d rq=%d cr=%d cw=%d words=%x", running_weight, running_q, cr, cw, cluster_word_mask);
	    }

	  /* Add the new word */
	  cluster[cw].pos = pos;
	  cluster[cw].idx = idx;
	  cluster[cw].joiner = joiner;
	  cluster[cw].q = word->weight + trail->weight*word_weight_scale;
	  running_q += cluster[cw].q;
	  cluster_word_mask |= (1 << idx);
	  cw = (cw+1) % MAX_PHRASE_LEN;

	  /* Update current maxima */
	  int qq = running_q + running_weight;
	  if (qq > bestq2)
	    {
	      if (qq > bestq)
		{
		  if (beststart != cluster[cr].pos)
		    {
		      bestq2 = bestq;
		      beststart2 = beststart;
		    }
		  bestq = qq;
		  bestw = running_weight;
		  beststart = cluster[cr].pos;
		}
	      else
		{
		  bestq2 = qq;
		  beststart2 = cluster[cr].pos;
		}
	    }

	  /* And go further */
	  running_weight += near_bonus_word;
	  last_pos = pos + 1;
	  last_idx = idx;
	  DBG("\t\t-> rw=%d rq=%d cr=%d cw=%d words=%x (best: %d@%03x %d@%03x, bestw: %d)",
	      running_weight, running_q, cr, cw, cluster_word_mask,
	      bestq, beststart, bestq2, beststart2, bestw);

	  /* Go to the next word */
	  idx = p->next_same_word[idx];
	}
    }

  DBG("\t\tNear matcher score: %d", bestw);
  if (bestw <= 0)
    return 0;
  do_record_match(bestq, beststart, matches);
  if (beststart2 != ~0U)
    do_record_match(bestq2, beststart2, matches);
  p->matches++;
  return bestw;
}

#define MATCH_INT_ATTR(id,keywd,gf,pf)				\
  {								\
    u32 a = gf(attrs);						\
    if (a < q->id##_min || a > q->id##_max)			\
      return 0;							\
  }
#define MATCH_SMALL_SET_ATTR(id,keywd,gf,pf)			\
  {								\
    uns a = gf(attrs);						\
    if (!(q->id##_set & (1 << a)))				\
      return 0;							\
  }

static inline int
match_early_card_attrs(struct query *q UNUSED, struct card_attr *attrs UNUSED, u32 oid UNUSED)
{
#ifdef CONFIG_EXPLAIN
  if (q->explain_id)
    {
      if (q->explain_id != oid)
	return 0;
      do_explain = 1;
    }
  else
    do_explain = 0;
#endif

#define INT_ATTR(id,keywd,gf,pf) MATCH_INT_ATTR(id,keywd,gf,pf)
#define SMALL_SET_ATTR(id,keywd,gf,pf) MATCH_SMALL_SET_ATTR(id,keywd,gf,pf)
#define LATE_INT_ATTR(id,keywd,gf,pf)
#define LATE_SMALL_SET_ATTR(id,keywd,gf,pf) 
  EXTENDED_ATTRS
#undef INT_ATTR
#undef SMALL_SET_ATTR
#undef LATE_INT_ATTR
#undef LATE_SMALL_SET_ATTR

#ifdef CONFIG_LASTMOD
  if (attrs->age < q->age_min || attrs->age > q->age_max)
    return 0;
#endif
  if (attrs->flags & CARD_FLAG_OVERRIDEN)
    return 0;
  return 1;
}

static inline int
match_late_card_attrs(struct query *q UNUSED, struct card_attr *attrs UNUSED)
{
#define INT_ATTR(id,keywd,gf,pf)
#define SMALL_SET_ATTR(id,keywd,gf,pf) 
#define LATE_INT_ATTR(id,keywd,gf,pf) MATCH_INT_ATTR(id,keywd,gf,pf)
#define LATE_SMALL_SET_ATTR(id,keywd,gf,pf) MATCH_SMALL_SET_ATTR(id,keywd,gf,pf)
  EXTENDED_ATTRS
#undef INT_ATTR
#undef SMALL_SET_ATTR
#undef LATE_INT_ATTR
#undef LATE_SMALL_SET_ATTR
  return 1;
}

static void
get_sec_sort_key(struct result_note *note, struct query *q, struct card_attr *attrs UNUSED, oid_t oid)
{
  u32 key = 0;
  switch (q->custom_sorting)
    {
    case PARAM_CARDID:
      key = oid;
      break;
    case PARAM_SITE:
      key = 0;
      break;
#ifdef CONFIG_LASTMOD
    case PARAM_AGE:
      key = attrs->age;
      break;
#endif
#define INT_ATTR(id,keywd,gf,pf)			\
    case OFFSETOF(struct query, id##_min):		\
      key = gf(attrs);					\
      break;
#define SMALL_SET_ATTR(id,keywd,gf,pf)			\
    case OFFSETOF(struct query, id##_set):		\
      key = gf(attrs);					\
      break;
#define LATE_INT_ATTR INT_ATTR
#define LATE_SMALL_SET_ATTR SMALL_SET_ATTR
  EXTENDED_ATTRS
#undef INT_ATTR
#undef SMALL_SET_ATTR
#undef LATE_INT_ATTR
#undef LATE_SMALL_SET_ATTR

#define CUSTOM_MATCH_KWD(id,kwd,pf)			\
    case OFFSETOF(struct query, id##_value):		\
      key = q->id##_value;				\
      break;
CUSTOM_MATCH_PARSE
#undef CUSTOM_MATCH_KWD
    }
  note->sec_sort_key = key ^ q->custom_sort_reverse;
}

static void
do_refs_card(struct query *q, struct card_attr *attr, oid_t oid, u32 words_found)
{
  int qq = 0;
  uns bool = 0;
  uns matches[HARD_MAX_NOTES];		/* ((q+OFFSET) << 16) | (pos ^ 0xffff) */

  /* Skim over all matched words and record their matches */
  bzero(matches, sizeof(matches));
  for (uns wid=0; words_found; wid++)
    if (words_found & (1 << wid))
      {
	words_found &= ~(1 << wid);
	struct word *word = &q->words[wid];
	if (word->q > -PEN_INFTY)	/* Really matched */
	  {
	    word->doc_count++;
	    if (word->is_outer)
	      {
		int wq1 = (uns)word->q*word_weight_scale;
		int wq2 = 0;
		int wq = word->weight + wq1;
		DBG("\tWord %d: best %d (@%03x), 2nd best %d (@%03x), strictly 2nd best %d",
		    wid, word->q, word->pos, word->q2, word->pos2, word->q2strict);
		do_record_match(wq, word->pos, matches);
		if (word->q2 >= 0)
		  do_record_match(wq2 + word->weight, word->pos2, matches);
		if (word->q2strict >= 0)
		  {
		    wq2 = (uns)word->q2strict*word_weight_scale / second_best_reduce;
		    wq += wq2;
		  }
		DBG("\t\t=> Q=%d (%d+%d+%d)", wq, word->weight, wq1, wq2);
		IF_EXPLAINING(
		  if (unlikely(do_explain))
		    {
		      add_cr(".EWord <%s>: Q=%d", q->words[wid].word, wq);
		      add_cr(".E\tBase: %d", word->weight);
		      byte typebuf[64];
		      explain_lookup_type(typebuf, word->explain_ref, word->is_string);
		      add_cr(".E\t1st match: %s (%d) => %d", typebuf, word->q, wq1);
		      if (word->q2strict >= 0)
			{
			  explain_lookup_type(typebuf, word->explain_ref2s, word->is_string);
			  add_cr(".E\t2nd match: %s (%d) => %d", typebuf, word->q2strict, wq2);
			}
		    }
		  );
		qq += wq;
	      }
	    bool |= 1 << word->boolean_id;
	  }
      }

  /* Check optimistic boolean expression */
  if (!(q->optimistic_bool_map[bool >> 5] & (1 << (bool & 31))))
    {
      DBG("No match: !optimistic (%04x)", bool);
      return;
    }

#ifdef LOCAL_DEBUG
  get_trail();
#endif

  /* Process phrases */
  for (uns i=0; i<q->nphrases; i++)
    {
      struct phrase *p = &q->phrases[i];
      int pq;
      DBG("\tMatching phrase #%d", i);
      if ((p->word_mask & bool) != p->word_mask)
	DBG("\t\tMissing words (have %04x need %04x)", bool, p->word_mask);
      else if ((pq = do_phrase(p, matches)) >= 0)
	{
	  EXPLAIN("Phrase #%d: Q=%d", i, pq);
	  qq += pq;
	  bool |= 1 << p->boolean_id;
	}
    }

  /* Check boolean expression */
  if (!(q->bool_map[bool >> 5] & (1 << (bool & 31))))
    {
      DBG("No match: !bool (%04x)", bool);
      return;
    }

  /* Check custom matchers */
#ifdef CUSTOM_MATCH
  int custom_Q = 0;
  if (!CUSTOM_MATCH(q, attr, custom_Q))
    {
      DBG("No match: custom");
      return;
    }
  EXPLAIN("Custom matcher: Q=%d", custom_Q);
  qq += custom_Q;
#endif

  /* Update custom statistics and process late attribute matchers */
  EXTENDED_EARLY_STATS(q, attr);
  if (!match_late_card_attrs(q, attr))
    {
      DBG("No match: late attrs");
      return;
    }
  EXTENDED_LATE_STATS(q, attr);

  /* Process near-matchers to get the final weight */
  for (uns i=0; i<q->nnears; i++)
    {
      struct phrase *p = &q->nears[i];
      u32 have_mask = p->word_mask & bool;
      DBG("\tMatching near #%d", i);
      /* This test is tricky. You are not expected to understand it. */
      if (!(have_mask & (have_mask-1)))
	{
	  DBG("\t\tToo few words (have %04x want %04x)", bool, p->word_mask);
	  EXPLAIN("Near #%d: Unmatched", i);
	}
      else
	{
	  int nq = do_near(q, p, matches);
	  EXPLAIN("Near #%d: Q=%d", i, nq);
	  qq += nq;
	}
    }

  /* Record the match */
  struct results *res = q->results;
  struct result_note *note = res->free_note;
  struct result_note **theap = res->result_heap;
  qq += attr->weight * doc_weight_scale;
  if (q->custom_sort_only)
    qq = 0;
  EXPLAIN("Static weight %d (Q=%d) => total Q=%d", attr->weight, attr->weight * doc_weight_scale, qq);
  DBG("Matched OID %08x with Q=%d", oid, qq);
  q->matching_docs++;
  note->attr = attr;
  note->q = qq;
  get_sec_sort_key(note, q, attr, oid);

    {
      if (res->nresults >= num_matches)
	{
	  if (qq < theap[1]->q)		/* Q too low -> throw away */
	    return;
	  res->free_note = theap[1];
	  HEAP_DELMIN(struct result_note *, theap, res->nresults, THEAP_LESS, THEAP_SWAP);
	}
      else
	res->free_note = res->first_note++;
    }
  theap[++res->nresults] = note;
  note->heap = res->nresults;
  HEAP_INSERT(struct result_note *, theap, res->nresults, THEAP_LESS, THEAP_SWAP);

  /* Copy match array and restrict entries to lower 16 bits */
  for (uns i=0; i<HARD_MAX_NOTES; i++)
    note->best[i] = matches[i];
}

static void
do_process_refs(struct query *q)
{
  struct database *dbase = q->dbase;
  struct card_attr *attrs = dbase->card_attrs;
  uns rcnt = q->last_ref - q->first_ref;
  struct ref_heap_entry rheap[rcnt+1];

  for (uns i=0; i<rcnt; i++)
    {
      struct ref_chain *ref = &q->first_ref[i];
      rheap[i+1].oid = GET_U32_BE16(ref->u.mem.pos);
      rheap[i+1].ref = ref;
    }
  HEAP_INIT(struct ref_heap_entry, rheap, rcnt, RHEAP_LESS, RHEAP_SWAP);

  while (rcnt > 0)
    {
      oid_t oid = rheap[1].oid;
      struct card_attr *attr = &attrs[oid];
      prefetch(attr);
      DBG("OID %x", oid);

      if (!match_early_card_attrs(q, attr, oid))
	{
	  DBG("\tAttributes don't match");
	  while (rcnt > 0 && rheap[1].oid == oid)
	    {
	      struct ref_chain *ref = rheap[1].ref;
	      u16 *x = ref->u.mem.pos + 2;
	      DBG("\tSkipping ref chain %d size %d", ref-q->first_ref, *x);
	      x += *x + 1;
	      oid_t next_oid = GET_U32_BE16(x);
	      DBG("\t\t%08x", next_oid);
	      if (next_oid)
		{
		  prefetch(&attrs[oid]);
		  ref->u.mem.pos = x;
		  ASSERT(rheap[1].oid < next_oid);
		  rheap[1].oid = next_oid;
		  HEAP_INCREASE(struct ref_heap_entry, rheap, rcnt, RHEAP_LESS, RHEAP_SWAP);
		}
	      else
		HEAP_DELMIN(struct ref_heap_entry, rheap, rcnt, RHEAP_LESS, RHEAP_SWAP);
	    }
	  continue;
	}

      uns card_flags = attr->flags;
      struct trail_entry *trail = trail_buffer;
      u32 words_found = 0;
#ifdef CONFIG_LANG
      u32 lang_mask = 1 << CA_GET_FILE_LANG(attr);
#endif

      while (rcnt > 0 && rheap[1].oid == oid)
	{
	  struct ref_chain *ref = rheap[1].ref;
	  u16 *x = ref->u.mem.pos + 3;
	  uns word_index = ref->word_index;
	  struct word *word = &q->words[word_index];
	  int *weight_array;
	  uns *meta_weight_array = dbase->meta_weights;
	  uns type_mask = word->type_mask;

	  if (word->is_string)
	    weight_array = dbase->string_weights;
	  else
	    {
	      weight_array = dbase->word_weights;
#ifdef CONFIG_LANG
	      if (!(ref->lang_mask & lang_mask))
		{
		  uns lang_type_mask = WORD_TYPES_ALL_LANGS | (META_TYPES_ALL_LANGS << 16);
		  if (ref->lang_mask & (1 << LANG_NONE))
		    lang_type_mask |= WORD_TYPES_NO_LANG | (META_TYPES_NO_LANG << 16);
		  type_mask &= lang_type_mask;
		}
#endif
	      if (ref->noaccent_only)
		{
		  type_mask &= ~(WORD_TYPES_AUTO_ACCENT_ALWAYS_STRICT | (META_TYPES_AUTO_ACCENT_ALWAYS_STRICT << 16));
		  if (card_flags & CARD_FLAG_ACCENTED)
		    type_mask &= WORD_TYPES_AUTO_ACCENT_ALWAYS_STRIP | (META_TYPES_AUTO_ACCENT_ALWAYS_STRIP << 16);
		}
	    }

	  DBG("\tRef chain %d: word %d, type_mask %x", ref-q->first_ref, word_index, type_mask);

	  if (!(words_found & (1 << word_index)))
	    {
	      /* words_found doesn't necessarily mean the word's been really matched */
	      words_found |= 1 << word_index;
	      word->q = word->q2 = word->q2strict = -PEN_INFTY;
	    }

	  uns y;
	  while ((y = *x++) & 0xf000)
	    {
	      uns p;
	      int w;
	      if (y & 0x8000)
		{
		  uns type = (y & 0x7800) >> 11;
		  if (!(type_mask & (0x10000 << type)))
		    {
		      DBG("\t\t%04x m%x unmatched", y, type);
		      continue;
		    }
		  p = (y & 0xf800) | ((y & 0x7fc) >> 2);
		  w = (meta_weight_array[type] >> (8*(y & 3))) & 0xff;
		}
	      else
		{
		  uns type = (y & 0xf000) >> 12;
		  if (!(type_mask & (1 << type)))
		    {
		      DBG("\t\t%04x t%x unmatched", y, type);
		      continue;
		    }
		  p = y & 0xfff;
		  w = weight_array[type];
		}
	      DBG("\t\t%04x @%03x $%d-%d", y, p, w, ref->penalty);
	      w -= ref->penalty;
	      if (p == 0xfff)
		w -= blind_match_penalty;
	      else
		{
		  trail->pos = p;
		  trail->weight = w;
		  trail->word_index = word_index;
		  trail++;
		}
	      if (word->q < w)
		{
		  word->q2 = word->q2strict = word->q;
		  word->pos2 = word->pos;
		  word->q = w;
		  word->pos = p;
		  IF_EXPLAINING(word->explain_ref2s = word->explain_ref);
		  IF_EXPLAINING(word->explain_ref = y);
		}
	      else
		{
		  if (word->q2 < w)
		    {
		      word->q2 = w;
		      word->pos2 = p;
		    }
		  if (word->q != w && word->q2strict < w)
		    {
		      word->q2strict = w;
		      IF_EXPLAINING(word->explain_ref2s = y);
		    }
		}
	    }

	  oid_t next_oid = (y << 16) | *x--;
	  DBG("\t\t%08x", next_oid);
	  if (next_oid)
	    {
	      prefetch(&attrs[oid]);
	      ref->u.mem.pos = x;
	      ASSERT(rheap[1].oid < next_oid);
	      rheap[1].oid = next_oid;
	      HEAP_INCREASE(struct ref_heap_entry, rheap, rcnt, RHEAP_LESS, RHEAP_SWAP);
	    }
	  else
	    HEAP_DELMIN(struct ref_heap_entry, rheap, rcnt, RHEAP_LESS, RHEAP_SWAP);
	}

      trail_size = trail - trail_buffer;
      trail_sorted = 0;
      do_refs_card(q, attr, oid, words_found);
    }
}

void
process_refs(struct query *q)
{
  prof_t *oldp = profiler_switch(&prof_reff);
  map_refs(q);
  profiler_switch(&prof_refs);
  do_process_refs(q);
  profiler_switch(oldp);
}

void
refs_init(void)
{
}

void
query_init_refs(struct query *q)
{
  struct results *r = q->results;

  r->result_heap = mp_alloc(r->pool, sizeof(struct result_note *) * (num_matches + 1));
  r->first_note = mp_alloc(r->pool, sizeof(struct result_note) * (num_matches + 1));
  r->free_note = r->first_note++;
}

void
query_finish_refs(struct query *q)
{
  struct results *r = q->results;
  uns num = r->nresults;

  /* Heap-sort the entries (we misuse HEAP_DELMIN and expect it moves the deleted item at the end) */
  while (num)
    {
      HEAP_DELMIN(struct result_note *, r->result_heap, num, THEAP_LESS, THEAP_SWAP);
      /* Remember to translate the notes */
      for (uns i=0; i<HARD_MAX_NOTES; i++)
	r->result_heap[num+1]->best[i] ^= 0xffff;
    }
  add_cr("N%d", r->nresults);

#ifdef DEBUG_DUMP_HEAP
  {
    uns i, j;
    byte *x, buf[256];
    log(L_DEBUG, "Sorted results:");
    for (i=1; i<=r->nresults; i++)
      {
	struct result_note *n = r->result_heap[i];
	x = buf + sprintf(buf, "%3d %08x %6d :", i, n->oid, n->q);
	for (j=0; j<HARD_MAX_NOTES && n->best[j] != 0xffff; j++)
	  x += sprintf(x, " %04x", n->best[j]);
	log(L_DEBUG, buf);
      }
  }
#endif
}
