/*
 *	Sherlock Search Engine -- Databases
 *
 *	(c) 1997--2006 Martin Mares <mj@ucw.cz>
 */

#include "sherlock/sherlock.h"
#include "lib/lfs.h"
#include "sherlock/index.h"
#include "lib/fastbuf.h"
#include "lib/mempool.h"
#include "lib/conf.h"
#include "indexer/lexicon.h"
#include "indexer/params.h"
#include "search/sherlockd.h"

#include <string.h>
#include <alloca.h>
#include <unistd.h>
#include <sys/mman.h>

struct lexicon_config lexicon_config;

void
db_switch_config(struct database *db)
{
  memcpy(&lexicon_config, &db->params->lex_config, sizeof(struct lexicon_config));
}

byte *
db_file_name(struct database *db, byte *fn)
{
  return mp_multicat(db->pool, db->directory, "/", fn, NULL);
}

struct merge_status {
  struct merge_status *next;
  struct card_attr *attrs;
  struct fastbuf *fb;
  struct card_print next_fp;
};

static void
db_reset_attrs(struct database *db)
{
  struct card_attr *attrs = db->card_attrs;
  for (uns i=0; i<db->num_ids; i++)
    attrs[i].flags &= ~CARD_FLAG_DUP;
}

static void
db_apply_blacklist(struct database *db)
{
  struct fastbuf *b = bopen_try(db_file_name(db, "blacklist"), O_RDONLY, 65536);
  if (!b)
    {
      log(L_INFO, "No blacklist found");
      return;
    }
  uns x;
  uns last = 0;
  uns count = 0;
  while ((x = bgetl(b)) != 0xffffffff)
    {
      while (last < x)
	db->card_attrs[last++].flags &= ~CARD_FLAG_DUP;
      ASSERT(last == x);
      db->card_attrs[last++].flags |= CARD_FLAG_DUP;
      count++;
    }
  while (last < db->num_ids)
    db->card_attrs[last++].flags &= ~CARD_FLAG_DUP;
  bclose(b);
  log(L_INFO, "Blacklisted %d cards", count);
}

static void
db_merge(void)
{
  struct merge_status *m, *mm, **mp, *first_ms = NULL;
  uns index_cnt = 0;
  uns override_cnt = 0;

  CLIST_FOR_EACH(struct database *, db, databases)
    if (db->fb_card_prints)
      {
	m = alloca(sizeof(*m));
	m->attrs = db->card_attrs;
	m->fb = db->fb_card_prints;
	if (breadb(m->fb, &m->next_fp, sizeof(struct card_print)))
	  {
	    m->next = first_ms;
	    first_ms = m;
	  }
	index_cnt++;
      }

  for(;;)
    {
      int eq = 0;
      mm = first_ms;
      if (unlikely(!mm))
	break;
      m = mm->next;
      if (unlikely(!m))
	break;
      while (m)
	{
	  int cmp = memcmp(&m->next_fp.fp, &mm->next_fp.fp, sizeof(struct fingerprint));
	  if (cmp < 0)
	    {
	      mm = m;
	      eq = 0;
	    }
	  else if (!cmp)
	    eq++;
	  m = m->next;
	}
      if (eq)
	{
	  mp = &mm->next;
	  while (m = *mp)
	    {
	      if (!memcmp(&m->next_fp.fp, &mm->next_fp.fp, sizeof(struct fingerprint)))
		{
		  m->attrs[m->next_fp.cardid].flags |= CARD_FLAG_DUP;
		  override_cnt++;
		  if (unlikely(!breadb(m->fb, &m->next_fp, sizeof(struct card_print))))
		    {
		      *mp = m->next;
		      continue;
		    }
		}
	      mp = &m->next;
	    }
	}
      if (unlikely(!breadb(mm->fb, &mm->next_fp, sizeof(struct card_print))))
	{
	  for (mp=&first_ms; (m = *mp) != mm; mp = &m->next)
	    ;
	  *mp = mm->next;
	}
    }

  log(L_INFO, "Merged %d indices: %d cards overriden", index_cnt, override_cnt);
}

void
db_init(int merge_only)
{
  uns seen_prints = 0;

  CLIST_FOR_EACH(struct database *, db, databases)
    {
      db->pool = cf_pool;
      byte *fn_params = db_file_name(db, "parameters");
      int fd = open(fn_params, O_RDONLY);
      if (fd < 0)
	{
	  if (db->is_optional)
	    {
	      log(L_INFO, "Database %s missing", db->name);
	      continue;
	    }
	  die("Cannot open %s: %m", fn_params);
	}
      db->params = mp_alloc(db->pool, sizeof(struct index_params) + 1);
      int e = read(fd, db->params, sizeof(struct index_params) + 1);
      if (e < 0)
	die("Cannot read database parameters from %s: %m", fn_params);
      if (e != sizeof(struct index_params) || db->params->version != INDEX_VERSION)
	die("%s: Incompatible index", fn_params);
      close(fd);
      db_switch_config(db);
      if (db->parts & DB_PART_PRINTS)
	{
	  db->fb_card_prints = bopen(db_file_name(db, "card-prints"), O_RDONLY, 65536);
	  seen_prints++;
	}
      int rw = !!(db->parts & (DB_PART_PRINTS | DB_PART_BLACKLIST));
      uns size;
      db->card_attrs = mmap_file(db_file_name(db, "card-attrs"), &size, rw);
      db->num_ids = size / sizeof(struct card_attr);
      if (db->num_ids)
	db->num_ids--;
      db->card_attrs_end = db->card_attrs + db->num_ids;

      byte *fn_cards = db_file_name(db, "cards");
      db->fd_cards = sh_open(fn_cards, O_RDONLY);
      if (db->fd_cards < 0)
	die("Unable to open %s: %m", fn_cards);
      db->card_file_size = sh_seek(db->fd_cards, 0, SEEK_END);

      byte *fn_refs = db_file_name(db, "references");
      db->fd_refs = sh_open(fn_refs, O_RDONLY);
      if (db->fd_refs < 0)
	die("Unable to open %s: %m", fn_refs);
      db->ref_file_size = sh_seek(db->fd_refs, 0, SEEK_END);

      log(L_INFO, "Loading database %s: %d documents", db->name, db->num_ids);
      if (db->parts & DB_PART_BLACKLIST)
	db_apply_blacklist(db);
      else if (db->parts & DB_PART_PRINTS)
	db_reset_attrs(db);
      if (!merge_only)
	{
	  words_init(db);
	  strings_init(db);
	}
    }
  if (seen_prints)
    db_merge();
  CLIST_FOR_EACH(struct database *, db, databases)
    {
      if (db->fb_card_prints)
	{
	  bclose(db->fb_card_prints);
	  db->fb_card_prints = NULL;
	}
      if (db->parts & (DB_PART_PRINTS | DB_PART_BLACKLIST))
	{
	  msync(db->card_attrs, db->num_ids * sizeof(struct card_attr), MS_SYNC);
	  if (mprotect(db->card_attrs, db->num_ids * sizeof(struct card_attr), PROT_READ) < 0)
	    die("Cannot reprotect card attributes read-only: %m");
	}
    }
}

struct database *
attr_to_db(struct card_attr *attr, oid_t *ooid)
{
  CLIST_FOR_EACH(struct database *, db, databases)
    if (attr >= db->card_attrs && attr < db->card_attrs_end)
      {
	if (ooid)
	  *ooid = attr - db->card_attrs;
	return db;
      }
  die("attr_to_db: Orphan object with attribute %p", attr);
}
