/*
 *	Sherlock Indexer -- Fetching of Cards for Second Indexer Pass
 *
 *	(c) 2001--2003 Martin Mares <mj@ucw.cz>
 *	(c) 2003--2004 Robert Spalek <robert@ucw.cz>
 */

#include "sherlock/sherlock.h"
#include "lib/fastbuf.h"
#include "lib/mempool.h"
#include "lib/url.h"
#include "sherlock/object.h"
#include "indexer/indexer.h"
#include "filter/filter.h"

#include <string.h>
#include <fcntl.h>

struct fetch_filter_data {
  int bonus;
  byte *url;
  struct url url_s;
};
static struct filter_args *fetch_filter_args;

struct filter_binding fetch_bindings[] = {
  /* URL and its parts */
  { "url",		OFFSETOF(struct fetch_filter_data, url) },
  { "protocol",		OFFSETOF(struct fetch_filter_data, url_s.protocol) },
  { "host",		OFFSETOF(struct fetch_filter_data, url_s.host) },
  { "port",		OFFSETOF(struct fetch_filter_data, url_s.port) },
  { "path",		OFFSETOF(struct fetch_filter_data, url_s.rest) },
  { "username",		OFFSETOF(struct fetch_filter_data, url_s.user) },
  { "password",		OFFSETOF(struct fetch_filter_data, url_s.pass) },
  /* Attributes */
  { "card_bonus",	OFFSETOF(struct fetch_filter_data, bonus) },
  { NULL,		0 }
};

static void
fetch_filter_init(void)
{
  if (!indexer_filter_name || !indexer_filter_name[0])
    return;
  fetch_filter_args = filter_intr_new(filter_load(indexer_filter_name, filter_builtin_vars, fetch_bindings, NULL));
  filter_intr_undo_init(fetch_filter_args);
}

static int
fetch_filter(struct odes *obj, struct mempool *pool, struct card_hdr *hdr, int *Bonus)
{
  struct filter_args *a = fetch_filter_args;
  struct fetch_filter_data d;
  byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE];
  int res;

  if (!a)
    {
      *Bonus = 0;
      return 1;
    }
  a->attr = obj;
  d.bonus = 0;
  d.url = obj_find_aval(hdr->odes, 'U');
  ASSERT(d.url);
  if (url_canon_split(d.url, buf1, buf2, &d.url_s))
    die("fetch_filter: error parsing URL");
  a->raw = &d;
  a->pool = pool;
  res = filter_intr_run(a);
  *Bonus = d.bonus;
  return res;
}

static inline void
fetch_filter_undo(void)
{
  if (fetch_filter_args)
    filter_intr_undo(fetch_filter_args);
}

void
fetch_cards(void (*got_card)(struct card_attr *attr, struct odes *obj, struct card_hdr *hdr, struct card_note *note, int bonus))
{
  struct mempool *pool;
  struct fastbuf *labels, *attrs, *notes;
  struct odes *o;
  u32 next_label_id;
  uns id;
  struct card_attr attr;
  struct card_note note;
  byte buf[MAX_ATTR_SIZE];
  struct card_hdr main_hdr, *last_hdr, *add_hdr;
  struct bucket_source *src;

  fetch_filter_init();
  pool = mp_new(16384);
  src = get_buck_init();
  labels = index_bopen(fn_labels, O_RDONLY);
  attrs = index_bopen(fn_attributes, O_RDONLY);
  notes = index_bopen(fn_notes, O_RDONLY);
  id = -1;
  next_label_id = bgetl(labels);
  while (breadb(attrs, &attr, sizeof(attr)))
    {
      id++;
      breadb(notes, &note, sizeof(note));
      if (attr.flags & (CARD_FLAG_EMPTY | CARD_FLAG_DUP))
	{
	  while (next_label_id == id)
	    {
	      bgetl(labels);
	      bgetl(labels);
	      uns cnt = bgetl(labels);
	      /* This really can happen, for example when we attach frame backlinks to empty frames */
	      DBG("Ignoring label for skipped object %08x", id);
	      bskip(labels, cnt);
	      next_label_id = bgetl(labels);
	    }
	  continue;
	}
      mp_flush(pool);
      if (!src->get_next(src, pool, attr.card))
	die("Object pool changed unexpectedly");
      o = src->o;
      main_hdr.redirects = NULL;
      main_hdr.odes = obj_new(pool);
      add_hdr = last_hdr = &main_hdr;
      struct oattr **Oa = &o->attrs, *oa;
      while (oa = *Oa)
	if (oa->attr == 'U' || strchr(label_attrs, oa->attr))
	  {
	    *Oa = oa->next;			// delete from this chain
	    oa->next = main_hdr.odes->attrs;	// prepend to main_hdr.odes, no need to check the existence
	    main_hdr.odes->attrs = oa;
	  }
	else
	  Oa = &oa->next;
      if (attr.flags & CARD_FLAG_MERGED)
	{
	  /* For merged cards, we attach header attributes as labels */
	  main_hdr.odes = obj_new(pool);
	}
      u32 src_card = 0;
      u32 src_redir = 0;
      struct card_hdr **New_redir = &last_hdr->redirects;
      while (next_label_id == id)
	{
	  u32 scard = bgetl(labels);
	  u32 sredir = bgetl(labels);
	  if (src_card && src_card != scard)
	    {
	      struct card_hdr *h = mp_alloc(pool, sizeof(*h));
	      last_hdr->next = h;
	      last_hdr = h;
	      h->redirects = NULL;
	      h->odes = obj_new(pool);
	      add_hdr = last_hdr;
	    }
	  if (src_card != scard)
	    {
	      src_card = scard;
	      src_redir = scard;
	      New_redir = &last_hdr->redirects;
	    }
	  if (sredir != src_redir)
	    {
	      struct card_hdr *h = mp_alloc(pool, sizeof(*h));
	      h->redirects = h->next = NULL;
	      h->odes = obj_new(pool);
	      *New_redir = h;
	      New_redir = &h->next;
	      src_redir = sredir;
	      add_hdr = h;
	    }
	  uns cnt = bgetl(labels);
	  sh_off_t stop = btell(labels) + cnt;
	  while (btell(labels) < stop)
	    {
	      if (!bgets0(labels, buf, sizeof(buf)))
		die("Unexpected inconsistency of label file");
	      if (buf[0] == 'x')
		obj_add_attr(o, buf[0], buf+1);
	      else
		obj_add_attr(add_hdr->odes, buf[0], buf+1);
	    }
	  next_label_id = bgetl(labels);
	}
      last_hdr->next = NULL;
      if (note.useful_size)
	obj_set_attr_num(o, 'k', note.useful_size);
      if (attr.flags & CARD_FLAG_FRAMESET)
	{
	  /* Frameset to be deleted, but CARD_FLAG_EMPTY is no longer set,
	   * which means that some labels have been attached, so we index it,
	   * but without the X attribute.
	   */
	  obj_set_attr(o, 'X', NULL);
	}
      int bonus;
      if (!fetch_filter(o, pool, &main_hdr, &bonus))
	die("Filtering rules have changed since scanner for %s", obj_find_aval(main_hdr.odes, 'U'));
      got_card(&attr, o, &main_hdr, &note, bonus);
      fetch_filter_undo();
    }
  ASSERT(next_label_id == ~0U);
  src->cleanup(src);
  bclose(attrs);
  bclose(labels);
  mp_delete(pool);
  log(L_INFO, "Processed %d objects", id+1);
}
