/*
 *	Resolver for fingerprints
 *
 *	(c) 2003--2006, Robert Spalek <robert@ucw.cz>
 */

#undef LOCAL_DEBUG

#include "sherlock/sherlock.h"
#include "lib/conf.h"
#include "lib/fastbuf.h"
#include "lib/sorter-globals.h"
#include "indexer/indexer.h"

#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>

/* Configuration */

#define PATH_LEN 20
#define PATH_FP "fingerprints-%d"

/* Hash table for fingerprints */

static uns fp_per_block;

struct fpi_node {
  struct fingerprint fp;
  u32 id;
};

static void
fpi_init_key(struct fpi_node *n, struct fingerprint fp)
{
  n->fp = fp;
  n->id = ~0U;
}

static inline uns
fpi_hash(struct fingerprint fp)
{
  return fp_hash(&fp);
}

static inline int
fpi_eq(struct fingerprint f1, struct fingerprint f2)
{
   return !memcmp(&f1, &f2, sizeof(struct fingerprint));
}

#define HASH_NODE		struct fpi_node
#define HASH_PREFIX(x)		fpi_##x
#define HASH_KEY_COMPLEX(x)	x fp
#define	HASH_KEY_DECL		struct fingerprint fp
#define HASH_WANT_LOOKUP
#define HASH_WANT_FIND
#define HASH_WANT_CLEANUP
#define	HASH_GIVE_INIT_KEY
#define HASH_GIVE_HASHFN
#define HASH_GIVE_EQ
#define HASH_CONSERVE_SPACE
#define HASH_AUTO_POOL		16384
#define HASH_DEFAULT_SIZE	fp_per_block
#include "lib/hashtable.h"

uns
resolve_split_fingerprints(void)
  /* XXX: consider doing it only once during indexation instead of twice */
{
  fp_per_block = sorter_presort_bufsize / (sizeof(struct fpi_node) + 2*sizeof(void*));
  struct fastbuf *fps = index_bopen(fn_fingerprints, O_RDONLY);
  uns size = bfilesize(fps) / sizeof(struct fpi_node);
  if (size <= fp_per_block)
  {
    bclose(fps);
    return 1;
  }

  uns blocks = size / (fp_per_block * 0.9) + 0.999;		// 10% up for a possible inefficiency of the hash function
  struct fastbuf *split[blocks];
  uns count[blocks];
  uns i, max;

  for (i=0; i<blocks; i++)
  {
    byte name[PATH_LEN];
    sprintf(name, PATH_FP, i);
    split[i] = index_bopen(name, O_CREAT | O_TRUNC | O_WRONLY);
    count[i] = 0;
  }
  struct fpi_node n;
  while (bread(fps, &n, sizeof(n)))
  {
    uns h = resolve_hash(&n.fp, blocks);
    bwrite(split[h], &n, sizeof(n));
    count[h]++;
  }
  bclose(fps);
  max = 0;
  for (i=0; i<blocks; i++)
  {
    bclose(split[i]);
    max = MAX(max, count[i]);
  }
  log(L_INFO, "Fingerprints split into %d blocks of maximal length %d", blocks, max);
  return blocks;
}

static void
split_input(struct fastbuf *in, uns record_size, uns blocks, struct fastbuf **out)
{
  uns count[blocks];
  uns i, max;

  for (i=0; i<blocks; i++)
  {
    out[i] = bopen_tmp(indexer_fb_size);
    count[i] = 0;
  }
  struct fingerprint fp;
  while (bread(in, &fp, sizeof(fp)))
  {
    uns h = resolve_hash(&fp, blocks);
    bwrite(out[h], &fp, sizeof(fp));
    bbcopy(in, out[h], record_size);
    count[h]++;
  }
  max = 0;
  for (i=0; i<blocks; i++)
  {
    brewind(out[i]);
    max = MAX(max, count[i]);
  }
  log(L_INFO, "Resolver input split into %d blocks of maximal length %d", blocks, max);
}

static void
load_fingerprints(int i)
{
  byte name[PATH_LEN];
  sprintf(name, PATH_FP, i);
  struct fastbuf *fps = index_bopen(i<0 ? fn_fingerprints : name, O_RDONLY);
  struct fpi_node n;
  fpi_init();
  while (bread(fps, &n, sizeof(n)))
    {
      u32 *id = &fpi_lookup(n.fp)->id;
      if (*id >= FIRST_ID_SKEL)
	*id = n.id;
    }
  bclose(fps);
  if (i >= 0)
  {
    byte *path = index_name(name);
    if (unlink(path) < 0)
      die("unlink(%s): %m", path);
  }
}

static void
resolve_input(struct fastbuf *in, struct fastbuf *out, uns flags, uns record_size, u64 *counters)
{
  struct fingerprint fp;
  struct fpi_node unknown = { .id = ~0U };
  uns count = 0;
  while (bread(in, &fp, sizeof(fp)))
  {
    struct fpi_node *n = fpi_find(fp);
    count++;
    if (!n)
    {
      counters[2]++;
      if (!(flags & RESOLVE_SKIP_UNKNOWN))
	n = &unknown;
    }
    else if (n->id >= FIRST_ID_SKEL)
    {
      counters[1]++;
      if (flags & RESOLVE_SKIP_SKEL)
	n = NULL;
    }
    else
      counters[0]++;
    if (n)
    {
      bwrite(out, &n->id, sizeof(u32));
      bbcopy(in, out, record_size);
    }
    else
      bskip(in, record_size);
  }
  fpi_cleanup();
  log(L_INFO, "Resolved a block of %d records", count);
}

struct fastbuf *
resolve_split_fastbuf(uns blocks, struct fastbuf **in, uns flags, uns record_size)
{
  u64 counters[3] = {0, 0, 0};
  struct fastbuf *out = bopen_tmp(indexer_fb_size);
  for (uns i=0; i<blocks; i++)
  {
    load_fingerprints(blocks > 1 ? (int) i : -1);
    resolve_input(in[i], out, flags, record_size, counters);
    bclose(in[i]);
  }
  log(L_INFO, "Resolver statistics: %llu + %llu skeletons + %llu unknown", (long long) counters[0], (long long) counters[1], (long long) counters[2]);
  brewind(out);
  return out;
}

struct fastbuf *
resolve_fastbuf(struct fastbuf *in, uns flags, uns record_size)
{
  uns blocks = resolve_split_fingerprints();
  if (blocks > 1)
  {
    struct fastbuf *fb[blocks];
    split_input(in, record_size, blocks, fb);
    bclose(in);
    return resolve_split_fastbuf(blocks, fb, flags, record_size);
  }
  else
    return resolve_split_fastbuf(1, &in, flags, record_size);
}
