/*
 *	Sherlock Indexer -- Processing of Labels
 *
 *	(c) 2001--2006 Martin Mares <mj@ucw.cz>
 *	(c) 2003--2004 Robert Spalek <robert@ucw.cz>
 */

#include "sherlock/sherlock.h"
#include "lib/getopt.h"
#include "lib/fastbuf.h"
#include "sherlock/object.h"
#include "indexer/indexer.h"
#include "indexer/merges.h"

#include <fcntl.h>
#include <stdlib.h>

static byte *card_weights, *card_flags;

#define SORT_KEY struct lab
#define SORT_PREFIX(x) lab_##x
#define SORT_DELETE_INPUT 1
#define SORT_PRESORT
#define SORT_INPUT_FB
#define SORT_OUTPUT_FILE

static inline uns
lab_weight(struct lab *a, uns of_redir)
{
  uns w;
  if (!of_redir)
  {
    w = (a->url_id == a->merged_id) ? 0x10000000 : 0;
    w += card_weights[a->url_id];
  }
  else
  {
    w = (a->redir_id == a->url_id) ? 0x10000000 : 0;
    w += card_weights[a->redir_id];
  }
  return w;
}

static inline int
lab_compare(struct lab *a, struct lab *b)
{
  int wa, wb;

  /* Sort on primary card */
  COMPARE(a->merged_id, b->merged_id);

  /* Same primary card, sort on weight of the secondary card */
  wa = lab_weight(a, 0);
  wb = lab_weight(b, 0);
  COMPARE(wb, wa);

  /* Tie, sort on secondary card ID */
  COMPARE(a->url_id, b->url_id);

  /* Same secondary card: put URL in the front, then all redirects sorted on
   * their weight (and then on their ID).  */
  wa = lab_weight(a, 1);
  wb = lab_weight(b, 1);
  COMPARE(wb, wa);

  /* Tie, sort on redirect ID */
  COMPARE(a->redir_id, b->redir_id);

  /* Let the nature decide */
  return 0;
}

static inline int
lab_fetch_key(struct fastbuf *f, struct lab *k)
{
  return breadb(f, k, sizeof(*k));
}

static inline void
lab_copy_data(struct fastbuf *src, struct fastbuf *dest, struct lab *k)
{
  bwrite(dest, k, sizeof(*k));
  bbcopy(src, dest, k->count);
}

static inline byte *
lab_fetch_item(struct fastbuf *f, struct lab *k, byte *limit)
{
  byte *pos = (byte *)(k+1);

  if (pos + k->count > limit)
    return NULL;
  breadb(f, pos, k->count);
  return pos + k->count;
}

static inline void
lab_store_item(struct fastbuf *f, struct lab *k)
{
  bwrite(f, k, sizeof(*k) + k->count);
}

#include "lib/sorter.h"

int
main(int argc, char **argv)
{
  struct fastbuf *in, *out;
  uns redir_id;
  uns count = 0;

  log_init(argv[0]);
  if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 ||
      optind < argc)
  {
    fputs("This program supports only the following command-line arguments:\n" CF_USAGE, stderr);
    exit(1);
  }

  out = index_bopen(fn_labels, O_RDWR | O_CREAT | O_TRUNC);

  /* Look at original by-ID labels through our merging lens */
  log(L_INFO, "Searching for labels");
  attrs_part_map(0);
  READ_ATTR(card_flags, flags);
  READ_ATTR(card_weights, weight);
  merges_map(0);
  in = index_bopen(fn_labels_by_id, O_RDONLY);
  get_attr_set_type(BUCKET_TYPE_V33);
  while ((int) (redir_id = bgetl(in)) >= 0)
    {
      uns flags = bgetc(in);
      uns url_id = (card_flags[redir_id] & CARD_FLAG_EMPTY) ? merges[redir_id] : redir_id;
      uns merged_id = (url_id != ~0U) ? merges[url_id] : url_id;
      if (merged_id != ~0U && merged_id != url_id)
	{
	  ASSERT(card_flags[url_id] & CARD_FLAG_DUP);
	  ASSERT(card_flags[merged_id] & CARD_FLAG_MERGED);
	  ASSERT(merges[merged_id] == merged_id);
	}

      sh_off_t start = btell(in);
      struct parsed_attr attr;
      while (bget_attr(in, &attr) > 0)
	;

      struct lab l = {
	.merged_id = merged_id,
	.url_id = url_id,
	.redir_id = redir_id,
	.count = btell(in) - start - 1,
	.flags = flags
      };
      if (url_id == ~0U || (card_flags[url_id] & CARD_FLAG_EMPTY))
	{
	  /* Empty cards will be skipped by fetch.c anyway, so don't bother with storing labels for them */
	  ASSERT(merged_id == ~0U);
	  continue;
	}
      else if (flags & LABEL_FLAG_MERGED_ONLY)
	{
	  /* Otherwise, the label is inherited from the original object in fetch.c, saving space in the label file */
	  if (!(card_flags[redir_id] & (CARD_FLAG_DUP | CARD_FLAG_MERGED | CARD_FLAG_EMPTY)))
	    continue;
	}
      else if (!(flags & LABEL_TYPE_URL))
	{
	  /* Non-URL labels should be attached only to the primary card */
	  if (merged_id != redir_id)
	    continue;
	}
      bwrite(out, &l, sizeof(l));
      bsetpos(in, start);
      bbcopy(in, out, l.count);
      bgetc(in);
      count++;
    }
  bclose(in);

  log(L_INFO, "Extracted %u labels", count);

  /* Sort the resulting labels */
  brewind(out);
  lab_sort(out, index_name(fn_labels));

  merges_unmap();
  xfree(card_weights);
  xfree(card_flags);
  attrs_part_unmap();
  return 0;
}
