/*
 *	Sherlock Indexer -- Graph Builder
 *
 *	(c) 2001 Martin Mares <mj@ucw.cz>
 *	(c) 2003 Robert Spalek <robert@ucw.cz>
 */

#include "sherlock/sherlock.h"
#include "lib/lfs.h"
#include "lib/bitarray.h"
#include "lib/lists.h"
#include "lib/getopt.h"
#include "lib/fastbuf.h"
#include "indexer/indexer.h"

#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>

#define	TRACE(x...)	if (0) printf(x)

static void
renumber_chain(uns *merge, bitarray_t is_redirect, uns start)
{
  uns min = ~0U;
  uns cur = start;
  /* Renumber to minimal vertex number from the equivalence class that is not a redirect.  */
  while (1)
  {
    if (!bit_array_isset(is_redirect, cur) && cur < min)
      min = cur;
    if (merge[cur] == cur)
      break;
    cur = merge[cur];
  }
  /* If there are only redirects, then we do not care, which one is chosen.  We
   * only want that normal vertices are not merged with redirects.  */
  if (min == ~0U)
    min = start;
  uns next = start;
  do
  {
    cur = next;
    next = merge[cur];
    TRACE("%x -> %x (was %x)%s\n", cur, min, merge[cur],
	bit_array_isset(is_redirect, cur) ? " REDIRECT" : "");
    merge[cur] = min;
  }
  while (next != cur);
  TRACE("\n");
}

#define ASORT_PREFIX(x) neighbors_##x
#define ASORT_KEY_TYPE uns
#define ASORT_ELT(i) (array[i] /*& ~ETYPE_MASK*/)
#define ASORT_SWAP(i,j) do { uns e=array[j]; array[j]=array[i]; array[i]=e; } while(0)
#define ASORT_EXTRA_ARGS , uns *array
#include "lib/arraysort.h"

uns total_v, total_e, max_d;

static inline uns
sort_buffer(u32 *buf, uns len)
{
  if (len <= 1)
    return len;
  neighbors_sort(len, buf);
  uns write = 1;
  for (uns i=1; i<len; i++)
    if (buf[i] != buf[i-1])
      buf[write++] = buf[i];
  if (write > max_degree)
    write = max_degree;
  return write;
}

static void
output_vertex(struct fastbuf *graph, uns src, uns deg, uns *neighbors)
{
  if (!deg)
    return;
  deg = sort_buffer(neighbors, deg);
  bputl(graph, src);
  bputw(graph, deg);
  bwrite(graph, neighbors, sizeof(uns) * deg);
  total_v++;
  total_e += deg;
  if (deg > max_d)
    max_d = deg;
}

typedef struct {
  struct node n;
  uns deg;
  u32 neighbors[0];
} neigh_t;
typedef struct {
  struct list n;	// of neigh_t
  uns src;
  uns deg;
} rec_t;

#define	SORT_KEY	rec_t
#define	SORT_PREFIX(x)	graph_##x
#define	SORT_PRESORT
#define	SORT_UNIFY
#define	SORT_INPUT_FB
#define	SORT_OUTPUT_FILE

static inline int
graph_compare(rec_t *a, rec_t *b)
{
  if (a->src < b->src)
    return -1;
  else if (a->src > b->src)
    return 1;
  else
    return 0;
}

static inline int
graph_fetch_key(struct fastbuf *f, rec_t *r)
{
  //init_list(&r->n);
  r->src = bgetl(f);
  if (r->src == ~0U)
    return 0;
  r->deg = bgetw(f);
  return 1;
}

static inline void
graph_copy_data(struct fastbuf *src, struct fastbuf *dest, rec_t *r)
{
  bputl(dest, r->src);
  bputw(dest, r->deg);
  bbcopy(src, dest, r->deg * sizeof(u32));
}

static void
graph_merge_data(struct fastbuf *src1, struct fastbuf *src2, struct fastbuf *dest, rec_t *r1, rec_t *r2)
{
  ASSERT(r1->src == r2->src);
  u32 neighbors[r1->deg + r2->deg];
  bread(src1, neighbors, r1->deg * sizeof(u32));
  bread(src2, neighbors + r1->deg, r2->deg * sizeof(u32));
  r1->deg = sort_buffer(neighbors, r1->deg + r2->deg);
  bputl(dest, r1->src);
  bputw(dest, r1->deg);
  bwrite(dest, neighbors, r1->deg * sizeof(u32));
}

static inline byte *
graph_fetch_item(struct fastbuf *f, rec_t *r, byte *limit)
{
  neigh_t *n = (void*) (r+1);
  if ((byte*) n + sizeof(neigh_t) + r->deg * sizeof(u32) > limit)
    return NULL;
  else
  {
    init_list(&r->n);
    add_tail(&r->n, &n->n);
    n->deg = r->deg;
    bread(f, n->neighbors, r->deg * sizeof(u32));
    return (byte*) (n->neighbors + r->deg);
  }
}

static void
graph_store_item(struct fastbuf *f, rec_t *r)
{
  u32 neighbors[r->deg];
  uns write = 0;
  neigh_t *n;
  TRACE("Merging %x: ", r->src);
  WALK_LIST(n, r->n)
  {
    memcpy(neighbors+write, n->neighbors, n->deg * sizeof(u32));
    write += n->deg;
    TRACE("%d ", n->deg);
  }
  ASSERT(write == r->deg);
  write = sort_buffer(neighbors, r->deg);
  TRACE("= %d -> %d\n", r->deg, write);
  bputl(f, r->src);
  bputw(f, write);
  bwrite(f, neighbors, write * sizeof(u32));
}

static inline rec_t *
graph_merge_items(rec_t *r1, rec_t *r2)
{
  ASSERT(r1->src == r2->src);
  add_tail_list(&r1->n, &r2->n);
  r1->deg += r2->deg;
  return r1;
}

#include "lib/sorter.h"

static void UNUSED
append_graph_idx(struct fastbuf *fo, uns new, sh_off_t pos)
{
  static uns last = 0;
  while (last++ < new)
    bputo(fo, pos);
  bputo(fo, pos);
}

int
main(int argc, char **argv)
{
  log_init(argv[0]);
  if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 ||
      optind < argc)
  {
    fputs("This program supports only the following command-line arguments:\n" CF_USAGE, stderr);
    exit(1);
  }

  byte *fp_path = index_name(fn_fingerprints);
  uns all_objects = sh_file_size(fp_path) / sizeof(struct card_print);
  uns objects = sh_file_size(index_name(fn_attributes)) / sizeof(struct card_attr);
  set_card_count(objects);
  uns new_objects = all_objects - objects;
  log(L_INFO, "Processing %u objects + %u not downloaded objects", objects, new_objects);

  bitarray_t is_redirect = xmalloc(BIT_ARRAY_BYTES(objects));
  bit_array_zero(is_redirect, objects);
  uns *merge = xmalloc(objects * sizeof(uns));
  for (uns i=0; i<objects; i++)
    merge[i] = i;

  struct fastbuf *links = index_bopen(fn_links, O_RDONLY);
  if (1)	// here you can comment out the merging phase
  {
    struct card_print link;
    uns redirects = 0;
    while (bread(links, &link, sizeof(link)))
      if ((link.cardid & ETYPE_MASK) == ETYPE_REDIRECT)
      {
	bit_array_set(is_redirect, link.cardid & ~ETYPE_MASK);
	redirects++;
      }
    log(L_INFO, "Found %u redirects", redirects);

    struct fastbuf *fprints = bopen(fp_path, O_RDONLY, indexer_fb_size);
    struct card_print p, last_p;
    last_p.cardid = ~0U;
    uns classes = 0;
    while (bread(fprints, &p, sizeof(p)))
    {
      if (p.cardid >= FIRST_ID_NEW)
	continue;
      if (last_p.cardid == ~0U)
      {
	last_p = p;
	continue;
      }
      if (!memcmp(&last_p.fp, &p.fp, sizeof(p.fp)))
	merge[p.cardid] = last_p.cardid;
      else
	classes++, renumber_chain(merge, is_redirect, last_p.cardid);
      last_p = p;
    }
    if (last_p.cardid != ~0U)
      classes++, renumber_chain(merge, is_redirect, last_p.cardid);
    bclose(fprints);
    for (uns i=0; i<objects; i++)
      TRACE("%x: %x%s\n", i, merge[i], bit_array_isset(is_redirect, i) ? " REDIRECT" : "");
    log(L_INFO, "Found %u equivalence classes", classes);
  }

  struct fastbuf *graph = bopen_tmp(indexer_fb_size);
  total_v = total_e = max_d = 0;
  uns *neighbors = xmalloc(sizeof(uns) * max_degree);
  bitarray_t is_linked = xmalloc(BIT_ARRAY_BYTES(all_objects));
  bit_array_zero(is_linked, all_objects);
  uns last_src = ~0U;
  uns deg = 0;
  bsetpos(links, 0);
  struct fastbuf *resolved_links = resolve_fingerprints(links, RESOLVE_SKIP_UNKNOWN, ~ETYPE_MASK);
  struct resolve_output link;
  while (bread(resolved_links, &link, sizeof(link)))
    {
      uns src = link.src & ~ETYPE_MASK;
      uns type = link.src & ETYPE_MASK;
      ASSERT(src < FIRST_ID_NEW);
      ASSERT(link.dest != ~0U);
      if (last_src == ~0U)
	last_src = src;
      else if (last_src != src)
      {
	if (!bit_array_isset(is_redirect, last_src))	// never merge redirects
	  last_src = merge[last_src];
	output_vertex(graph, last_src, deg, neighbors);
	last_src = src;
	deg = 0;
      }
      if (link.dest < FIRST_ID_NEW)
      {
	if (!bit_array_isset(is_redirect, link.dest)	// let links point to the redirects
	|| bit_array_isset(is_redirect, src))		// unless the source vertex is a redirect
	  link.dest = merge[link.dest];
	bit_array_set(is_linked, link.dest);
      }
      else
	bit_array_set(is_linked, link.dest - FIRST_ID_NEW + objects);
      if (deg < max_degree)
	neighbors[deg++] = link.dest | type;
    }
  if (last_src != ~0U)
  {
    if (!bit_array_isset(is_redirect, last_src))
      last_src = merge[last_src];
    output_vertex(graph, last_src, deg, neighbors);
  }
  bclose(resolved_links);
  xfree(neighbors);
  xfree(merge);
  brewind(graph);
  log(L_INFO, "Translated link graph with %u (%u) vertices, %u edges, and maxdegree %u",
      total_v, objects, total_e, max_d);

  notes_part_map(1);
  for (uns i=0; i<objects; i++)				// mark which vertices have incoming links
    if (bit_array_isset(is_linked, i))
      bring_note(i)->flags |= CARD_NOTE_IS_LINKED;
  notes_part_unmap();
  notes_new_part_map(1);
  for (uns i=objects; i<all_objects; i++)
    if (bit_array_isset(is_linked, i))
      bring_new_note(i-objects)->flags |= CARD_NOTE_IS_LINKED;
  notes_new_part_unmap();

  graph_sort(graph, index_name(fn_link_graph));	// calls bclose(graph)
  log(L_INFO, "Sorted and merged link graph");

  graph = index_bopen(fn_link_graph, O_RDONLY);
  struct fastbuf *graph_idx = index_bopen(fn_link_graph_index, O_WRONLY | O_CREAT | O_TRUNC);
  total_v = total_e = max_d = 0;
  uns src;
  while ((src = bgetl(graph)) != ~0U)
  {
    u16 deg = bgetw(graph);
    total_v++;
    total_e += deg;
    if (deg > max_d)
      max_d = deg;
    append_graph_idx(graph_idx, src, btell(graph) - sizeof(u32) - sizeof(u16));
    bskip(graph, deg * sizeof(u32));
  }
  append_graph_idx(graph_idx, objects, btell(graph));
  bclose(graph_idx);
  bclose(graph);
  log(L_INFO, "Built link graph with %u (%u) vertices, %u edges, and maxdegree %u",
      total_v, objects, total_e, max_d);

  return 0;
}
