/*
 *	Sherlock Indexer -- Graph Builder
 *
 *	(c) 2001 Martin Mares <mj@ucw.cz>
 *	(c) 2003--2007 Robert Spalek <robert@ucw.cz>
 */

#include "sherlock/sherlock.h"
#include "lib/lfs.h"
#include "lib/bitarray.h"
#include "lib/lists.h"
#include "lib/conf.h"
#include "lib/getopt.h"
#include "lib/fastbuf.h"
#include "lib/ff-binary.h"
#include "lib/stkstring.h"
#include "lib/sorter-globals.h"
#include "indexer/indexer.h"
#include "indexer/graph.h"

#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>

#define	TRACE(x...)	if (0) printf(x)

/* Rearrange vertices by their site_id's */

static uns objects, skel_objects, all_objects;

static void
compute_translation(void)
{
  uns i;
  u32 *real_vertex = xmalloc(objects * sizeof(u32));
  for (i=0; i<objects; i++)
    real_vertex[i] = i;

  u32 *goes_to = xmalloc(objects * sizeof(u32));
  for (i=0; i<objects; i++)
    goes_to[real_vertex[i]] = i;
  write_free_ary(fn_graph_obj, FN_GRAPH_REAL, &real_vertex, objects, sizeof(u32));
  write_free_ary(fn_graph_obj, FN_GRAPH_GOES, &goes_to, objects, sizeof(u32));

  log(L_INFO, "Sorted objects by site_id");
}

/* Reading data for constructing the graph */

static bitarray_t is_redirect;

static void
read_redirects(void)
{
  is_redirect = xmalloc(BIT_ARRAY_BYTES(objects));
  bit_array_zero(is_redirect, objects);
  uns redirects = 0;
  notes_part_map(0);
  for (uns i=0; i<objects; i++)
    if (bring_note(i)->flags & CARD_NOTE_REDIRECT)
    {
      bit_array_set(is_redirect, i);
      redirects++;
    }
  notes_part_unmap();
  log(L_INFO, "Found %u redirects", redirects);
}

static void
renumber_chain(uns *merge, uns start)
{
  uns min = ~0U;
  uns cur = start;
  /* Renumber to the minimal vertex number from the equivalence class that is not a redirect.  */
  while (1)
  {
    if (!bit_array_isset(is_redirect, cur) && cur < min)
      min = cur;
    if (merge[cur] == cur)
      break;
    cur = merge[cur];
  }
  /* If there are only redirects, then we do not care, which one is chosen.
   * We only want that normal vertices are not merged with redirects.  */
  if (min == ~0U)
    min = start;
  uns next = start;
  do
  {
    cur = next;
    next = merge[cur];
    TRACE("%x -> %x (was %x)%s\n", cur, min, merge[cur],
	bit_array_isset(is_redirect, cur) ? " REDIRECT" : "");
    merge[cur] = min;
  }
  while (next != cur);
  TRACE("\n");
}

static uns *
read_merges(void)
{
  uns *merge = xmalloc(objects * sizeof(uns));
  for (uns i=0; i<objects; i++)
    merge[i] = i;
  // here you can exit if you want to skip the merging phase

  struct fastbuf *fprints = index_bopen(fn_fingerprints, O_RDONLY);
  struct card_print p, last_p;
  last_p.cardid = 0xffffffff;
  uns classes = 0;
  while (bread(fprints, &p, sizeof(p)))
  {
    if (p.cardid >= FIRST_ID_SKEL)
      continue;
    if (last_p.cardid == 0xffffffff)
    {
      last_p = p;
      continue;
    }
    if (!memcmp(&last_p.fp, &p.fp, sizeof(p.fp)))
      merge[p.cardid] = last_p.cardid;
    else
      classes++, renumber_chain(merge, last_p.cardid);
    last_p = p;
  }
  if (last_p.cardid != 0xffffffff)
    classes++, renumber_chain(merge, last_p.cardid);
  bclose(fprints);
  for (uns i=0; i<objects; i++)
    TRACE("%x: %x%s\n", i, merge[i], bit_array_isset(is_redirect, i) ? " REDIRECT" : "");
  log(L_INFO, "Found %u equivalence classes", classes);
  return merge;
}

/* Merging vertices */

struct resolve_output {
  u32 dest, src;
};

static void
record_is_linked(bitarray_t is_linked)
{
  notes_part_map(1);
  for (uns i=0; i<objects; i++)				// mark which vertices have incoming links
    if (bit_array_isset(is_linked, i))
      bring_note(i)->flags |= CARD_NOTE_IS_LINKED;
  notes_part_unmap();
  notes_skel_part_map(1);
  for (uns i=objects; i<all_objects; i++)
    if (bit_array_isset(is_linked, i))
      bring_skel_note(i-objects)->flags |= CARD_NOTE_IS_LINKED;
  notes_skel_part_unmap();
}

static void
merge_vertices(struct fastbuf *in, struct fastbuf **out_obj, struct fastbuf **out_skel)
{
  read_redirects();
  uns *merge = read_merges();
  bitarray_t is_linked = xmalloc(BIT_ARRAY_BYTES(all_objects));
  bit_array_zero(is_linked, all_objects);

  *out_obj = bopen_tmp(indexer_fb_size);
  *out_skel = bopen_tmp(indexer_fb_size);
  struct resolve_output link;
  while (bread(in, &link, sizeof(link)))
    {
      uns src = link.src & ~ETYPE_MASK;
      uns dest = link.dest;
      struct fastbuf *out;

      if (dest < FIRST_ID_SKEL)
      {
	out = *out_obj;
	if (!bit_array_isset(is_redirect, dest)		// let links point to the redirects
	    || bit_array_isset(is_redirect, src))	// unless the source vertex is a redirect
	{
	  dest = merge[dest];
	  link.dest = dest;
	}
	bit_array_set(is_linked, dest);
	if (!bit_array_isset(is_redirect, src))		// never merge redirects
	  src = merge[src];
      }
      else						// dest >= FIRST_ID_SKEL
      {
	out = *out_skel;
	ASSERT(dest != 0xffffffff);
	link.dest = dest & ~FIRST_ID_SKEL;
	dest += objects - FIRST_ID_SKEL;
	bit_array_set(is_linked, dest);
	if (!bit_array_isset(is_redirect, src))		// never merge redirects
	  src = merge[src];
      }
      link.src = (link.src & ETYPE_MASK) | src;
      bwrite(out, &link, sizeof(link));
    }
  bclose(in);
  xfree(is_redirect);
  xfree(merge);
  log(L_INFO, "Merged vertices");
  record_is_linked(is_linked);
  xfree(is_linked);
  brewind(*out_obj);
  brewind(*out_skel);
}

/* Presorting */

#define ASORT_PREFIX(x) dest_##x
#define ASORT_KEY_TYPE struct resolve_output
#define ASORT_ELT(i) array[i]
#define ASORT_LT(x,y) (x.dest < y.dest || x.dest == y.dest && x.src < y.src)
#define ASORT_EXTRA_ARGS , struct resolve_output *array
#include "lib/arraysort.h"

static u32 *goes_to;

#define ASORT_PREFIX(x) goes_dest_##x
#define ASORT_KEY_TYPE struct resolve_output
#define ASORT_ELT(i) array[i]
#define ASORT_LT(x,y) (goes_to[x.dest] < goes_to[y.dest] || x.dest == y.dest && x.src < y.src)
#define ASORT_EXTRA_ARGS , struct resolve_output *array
#include "lib/arraysort.h"

static void
presort_links(struct fastbuf *in, byte *name, struct fastbuf *out[2])
{
  uns record = sizeof(struct resolve_output);
  uns buf_size = sorter_presort_bufsize / record * record;
  struct resolve_output *buf = xmalloc(buf_size);
  uns len, curr = 0;
  out[0] = bopen_tmp(indexer_fb_size);
  out[1] = bopen_tmp(indexer_fb_size);
  while ((len = bread(in, buf, buf_size)))
  {
    uns nr = len / record;
    if (goes_to)
      goes_dest_sort(nr, buf);
    else
      dest_sort(nr, buf);
    for (uns i=0; i<nr; )
    {
      uns start = i++, count = 1;
      for (; i<nr && buf[i].dest == buf[start].dest; i++)	// find and count the neighbors
	if (buf[i].src != buf[i-1].src)
	  count++;
      bput_graph_hdr(out[curr], buf[start].dest, count);
      bputl(out[curr], buf[start++].src);
      for (; start < i; start++)				// and prune them
	if (buf[start].src != buf[start-1].src)
	  bputl(out[curr], buf[start].src);
    }
    curr = !curr;
  }
  float reduction = (btell(out[0]) + btell(out[1]) + 0.) / btell(in);
  log(L_INFO, "Presorting reduced %s to %.1f%%", name, reduction * 100.);
  xfree(buf);
  bclose(in);
  brewind(out[0]);
  brewind(out[1]);
}

/* Sorting neighbors with unifying */

#define ASORT_PREFIX(x) neighbors_##x
#define ASORT_KEY_TYPE uns
#define ASORT_ELT(i) (array[i] /*& ~ETYPE_MASK*/)		// we don't want to unify [redir], [frame], and [img]
#define ASORT_SWAP(i,j) do { uns e=array[j]; array[j]=array[i]; array[i]=e; } while(0)
#define ASORT_EXTRA_ARGS , uns *array
#include "lib/arraysort.h"

static inline uns
sort_neighbors(u32 *buf, uns len)
{
  if (len <= 1)
    return len;
  neighbors_sort(len, buf);
  uns write = 1;
  for (uns i=1; i<len; i++)
    if (buf[i] != buf[i-1])
      buf[write++] = buf[i];
  return write;
}

/* Sorting by the source vertex after resolving */

struct link_merge {
  u32 dest, deg;
};

#define SORT_KEY	struct link_merge
#define SORT_PREFIX(x)	link_##x
#define SORT_UNIFY
#define SORT_INPUT_FBPAIR
#define SORT_OUTPUT_FILE

static inline int
link_compare(struct link_merge *a, struct link_merge *b)
{
  uns ai = a->dest;
  uns bi = b->dest;
  if (goes_to)				// FIXME: speedup since the test is constant in each of the two instances?
  {
    ai = goes_to[ai];
    bi = goes_to[bi];
  }
  COMPARE(ai, bi);
  return 0;
}

static int
link_fetch_key(struct fastbuf *fb, struct link_merge *k)
{
  return bget_graph_hdr(fb, &k->dest, &k->deg);
}

static void
link_copy_data(struct fastbuf *in, struct fastbuf *out, struct link_merge *k)
{
  bput_graph_hdr(out, k->dest, k->deg);
  bbcopy(in, out, k->deg * sizeof(u32));
}

#define	GBUF_TYPE	u32
#define	GBUF_PREFIX(x)	u32b_##x
#include "lib/gbuf.h"

static void
link_merge_data(struct fastbuf *in1, struct fastbuf *in2, struct fastbuf *out, struct link_merge *k1, struct link_merge *k2)
{
  uns total = k1->deg + k2->deg;
  static u32b_t bb;
  u32b_grow(&bb, total);
  bread(in1, bb.ptr, k1->deg * sizeof(u32));
  bread(in2, bb.ptr + k1->deg, k2->deg * sizeof(u32));
  total = sort_neighbors(bb.ptr, total);
  bput_graph_hdr(out, k1->dest, total);
  bwrite(out, bb.ptr, total * sizeof(u32));
}

#include "lib/sorter.h"

/* Construction of the graph index */

static uns last_index;

static void
append_graph_idx(struct fastbuf *graph_idx, uns node, sh_off_t pos)
{
  if (goes_to)
    node = goes_to[node];
  while (last_index++ < node)
    bputo(graph_idx, -1);
  bputo(graph_idx, pos);
}

static uns total_v, max_id;
static u64 total_e;

static void
construct_index(byte *file, uns total_num, u32 *out_degree)
{
  struct fastbuf *graph = index_bopen(file, O_RDONLY);
  struct fastbuf *graph_idx = index_bopen(stk_strcat(file, FN_GRAPH_INDEX), O_WRONLY | O_CREAT | O_TRUNC);
  last_index = 0;
  total_v = max_id = 0;
  total_e = 0;
  //don't clear out_degree
  u32 dest, deg;
  sh_off_t pos = 0;
  while (bget_graph_hdr(graph, &dest, &deg))
  {
    append_graph_idx(graph_idx, dest, pos);
    max_id = MAX(max_id, deg);
    total_v++;
    total_e += deg;
    while (deg--)
    {
      u32 src = bgetl(graph);
      if (out_degree)
	out_degree[src & ~ETYPE_MASK]++;
    }
    pos = btell(graph);
  }
  if (goes_to)						// for the sake of append_graph_idx()
  {
    xfree(goes_to);
    goes_to = NULL;
  }
  append_graph_idx(graph_idx, total_num, pos);
  bclose(graph);
  bclose(graph_idx);

  uns max_od = 0;
  if (out_degree)
    for (uns i=0; i<objects; i++)
      max_od = MAX(max_od, out_degree[i]);
  log(L_INFO, "Built index of %s, %u vertices, %llu edges, in-deg %u, out-deg %d",
      file, total_v, (long long) total_e, max_id, max_od);
}

int
main(int argc, char **argv)
{
  log_init(argv[0]);
  if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 ||
      optind < argc)
  {
    fputs("This program supports only the following command-line arguments:\n" CF_USAGE, stderr);
    exit(1);
  }

  byte *fp_path = index_name(fn_fingerprints);
  all_objects = sh_file_size(fp_path) / sizeof(struct card_print);
  objects = sh_file_size(index_name(fn_attributes)) / sizeof(struct card_attr);
  skel_objects = all_objects - objects;
  set_card_count(objects);
  log(L_INFO, "Processing %u objects + %u skeletons", objects, skel_objects);

  compute_translation();
  struct fastbuf *orig_links = index_bopen(fn_links, O_RDONLY);
  struct fastbuf *resolved_links = resolve_fastbuf(orig_links, RESOLVE_SKIP_UNKNOWN, sizeof(struct link) - sizeof(struct fingerprint));
  struct fastbuf *links_obj, *links_skel;
  merge_vertices(resolved_links, &links_obj, &links_skel);

  struct fastbuf *fb[2];
  presort_links(links_skel, fn_graph_skel, fb);
  link_sort(fb[0], fb[1], index_name(fn_graph_skel));
  log(L_INFO, "Sorted %s", fn_graph_skel);
  alloc_read_ary(fn_graph_obj, FN_GRAPH_GOES, &goes_to, objects, sizeof(u32));
  presort_links(links_obj, fn_graph_obj, fb);
  link_sort(fb[0], fb[1], index_name(fn_graph_obj));
  log(L_INFO, "Sorted %s", fn_graph_obj);

  u32 *out_degree = xmalloc_zero(sizeof(u32) * objects);
  construct_index(fn_graph_obj, objects, out_degree);		// frees goes_to
  struct fastbuf *fb_outdeg = index_bopen(stk_strcat(fn_graph_obj, FN_GRAPH_DEG), O_WRONLY | O_CREAT | O_TRUNC);
  bwrite(fb_outdeg, out_degree, objects * sizeof(u32));
  bclose(fb_outdeg);
  construct_index(fn_graph_skel, skel_objects, out_degree);
  xfree(out_degree);

  return 0;
}
