/*
 *	Sherlock Indexer -- Fetching of Cards for Second Indexer Pass
 *
 *	(c) 2001--2005 Martin Mares <mj@ucw.cz>
 *	(c) 2003--2005 Robert Spalek <robert@ucw.cz>
 */

#include "sherlock/sherlock.h"
#include "lib/fastbuf.h"
#include "lib/mempool.h"
#include "lib/url.h"
#include "sherlock/object.h"
#include "sherlock/attrset.h"
#include "sherlock/objread.h"
#include "indexer/indexer.h"

#include <string.h>
#include <fcntl.h>

uns fetch_id;

static void
fetch_cards_raw(void (*got_card)(struct card_attr *attr, struct odes *obj, struct card_note *note))
{
  struct mempool *pool;
  struct fastbuf *fb_attrs, *fb_notes;
  struct odes *o;
  struct card_attr attr;
  struct card_note note;
  struct bucket_source *src;

  pool = mp_new(16384);
  src = get_buck_init();
  fb_attrs = index_bopen(fn_attributes, O_RDONLY);
  fb_notes = index_bopen(fn_notes, O_RDONLY);
  fetch_id = -1;
  while (breadb(fb_attrs, &attr, sizeof(attr)))
    {
      fetch_id++;
      breadb(fb_notes, &note, sizeof(note));
      mp_flush(pool);
      if (!src->get_next(src, pool, attr.card))
	die("Ran out of buckets");
      o = src->o;
      got_card(&attr, src->o, &note);
    }
  src->cleanup(src);
  bclose(fb_attrs);
  bclose(fb_notes);
  mp_delete(pool);
  log(L_INFO, "Processed %d objects", fetch_id+1);
}

void
fetch_cards(void (*got_card)(struct card_attr *attr, struct odes *obj, struct card_note *note))
{
  struct mempool *pool;
  struct fastbuf *fb_labels, *fb_attrs, *fb_notes;
  struct odes *o;
  u32 next_label_id;
  struct card_attr attr;
  struct card_note note;
  struct odes *main_hdr, *url_hdr, *current_hdr;
  struct obj_read_state read_state;
  struct bucket_source *src;

  if (raw_stage2_input)
    return fetch_cards_raw(got_card);

  pool = mp_new(16384);
  src = get_buck_init();
  fb_labels = index_bopen(fn_labels, O_RDONLY);
  fb_attrs = index_bopen(fn_attributes, O_RDONLY);
  fb_notes = index_bopen(fn_notes, O_RDONLY);
  fetch_id = -1;
  next_label_id = bgetl(fb_labels);
  while (breadb(fb_attrs, &attr, sizeof(attr)))
    {
      fetch_id++;
      breadb(fb_notes, &note, sizeof(note));
      if (attr.flags & (CARD_FLAG_EMPTY | CARD_FLAG_DUP))
	{
	  while (next_label_id == fetch_id)
	    {
	      bgetl(fb_labels);
	      bgetl(fb_labels);
	      uns cnt = bgetl(fb_labels);
	      /* This really can happen, for example when we attach frame backlinks to empty frames */
	      DBG("Ignoring label for skipped object %08x", fetch_id);
	      bskip(fb_labels, cnt+1);
	      next_label_id = bgetl(fb_labels);
	    }
	  continue;
	}
      mp_flush(pool);
      if (!src->get_next(src, pool, attr.card))
	die("Object pool changed unexpectedly");
      o = src->o;

      /* Pick all per-URL attributes and move them to the main header */
      main_hdr = obj_new(pool);
      struct oattr **Oa = &o->attrs, *oa;
      while (oa = *Oa)
	if (oa->attr == 'U' || attr_set_match(&label_attr_set, oa))
	  {
	    *Oa = oa->next;				// delete from this chain
	    oa->next = main_hdr->attrs;			// prepend to main_hdr, no need to check the existence
	    main_hdr->attrs = oa;
	  }
	else
	  Oa = &oa->next;
      if (attr.flags & CARD_FLAG_MERGED)		// merged cards get all per-URL attributes from labels
	main_hdr = obj_new(pool);
      obj_add_son_ref(o, 'U' + OBJ_ATTR_SON, main_hdr);
      current_hdr = url_hdr = main_hdr;

      /* Read all labels and create the corresponding headers */
      u32 src_card = 0;
      u32 src_redir = 0;
      while (next_label_id == fetch_id)
	{
	  u32 scard = bgetl(fb_labels);
	  u32 sredir = bgetl(fb_labels);
	  uns cnt = bgetl(fb_labels);
	  uns flags = bgetc(fb_labels);
	  if (flags & LABEL_TYPE_URL)
	    {
	      if (scard != src_card)
		{
		  if (src_card)
		    current_hdr = url_hdr = obj_add_son(o, 'U' + OBJ_ATTR_SON);
		  src_card = scard;
		  src_redir = scard;
		}
	      if (sredir != src_redir)
		{
		  current_hdr = obj_add_son(url_hdr, 'y' + OBJ_ATTR_SON);
		  src_redir = sredir;
		}
	      obj_read_start(&read_state, current_hdr);
	    }
	  else
	    obj_read_start(&read_state, o);
	  get_attr_set_type(BUCKET_TYPE_V33);
	  sh_off_t stop = btell(fb_labels) + cnt;
	  int last_attr = 0;
	  while (btell(fb_labels) < stop)
	    {
	      struct parsed_attr pa;
	      if (bget_attr(fb_labels, &pa) <= 0)
		die("Unexpected inconsistency of label file");
	      copy_parsed_attr(o->pool, &pa);
	      if ((flags & LABEL_FLAG_OVERRIDE) && pa.attr != last_attr)
		obj_set_attr(read_state.obj, pa.attr, NULL);
	      obj_read_attr_ref(&read_state, pa.attr, pa.val);
	      last_attr = pa.attr;
	    }
	  obj_read_end(&read_state);
	  next_label_id = bgetl(fb_labels);
	}
      if (note.useful_size)
	obj_set_attr_num(o, 'u', note.useful_size);
      if (attr.flags & CARD_FLAG_FRAMESET)
	{
	  /* Frameset to be deleted, but CARD_FLAG_EMPTY is no longer set,
	   * which means that some labels have been attached, so we index it,
	   * but without the X attribute.
	   */
	  obj_set_attr(o, 'X', NULL);
	}
      got_card(&attr, o, &note);
    }
  ASSERT(next_label_id == ~0U);
  src->cleanup(src);
  bclose(fb_attrs);
  bclose(fb_notes);
  bclose(fb_labels);
  mp_delete(pool);
  log(L_INFO, "Processed %d objects", fetch_id+1);
}
