/*
 *	Knuth-Morris-Pratt's search automat for N given strings
 * 
 *	(c) 1999, 2001, Robert Spalek <robert@ucw.cz>
 */

#include "lib/lists.h"

#define	MF_TOLOWER	1
#define	MF_UNACCENT	2
#define	MF_ONLYALPHA	4
	/* how to modify characters in the string */
#define	CONTROL_CHAR	':'
	/* all non-alphabetic characters are treated as CONTROL_CHAR */

typedef uns kmp_state_t;
typedef word kmp_char_t;

struct kmp_transition {
	struct node n;			/* link list of sons for a given node */
	struct kmp_transition *next;	/* collision in the hash-table of all transitions */
	kmp_state_t from, to;
	kmp_char_t c;
};
struct kmp_transitions {
	int count, size;
	struct list *sons;		/* link-list of all sons for each given node */
	uns hash_size;
	struct kmp_transition **chain;	/* hash-table of [node, char]->son */
};

struct kmp_output {
	struct kmp_output *next;	/* output link list for every node */
	uns id;
};

struct mempool;
struct kmp {
	struct mempool *mp;
	int modify_flags;		/* which nocase/noaccent mode is this kmp for */
	int words_len;			/* total length of searched words */
	struct kmp_transitions g;	/* hash table of forward transitions of automat */
	kmp_state_t *f;			/* back transitions of automat */
	struct kmp_output **out;	/* found words for every state */
};

struct kmp *kmp_new(struct mempool *mp, int words_len, uns modify_flags);
void kmp_enter_string(struct kmp *kmp, const byte *str, uns id);
void kmp_build(struct kmp *kmp);

struct kmp_result {
	struct node n;			/* strings with non-zero frequency are put into a link-list */
	uns occur;
};

void kmp_search(struct kmp *kmp, const byte *str, struct list *nonzeroes, struct kmp_result *freq);
	/* For every found string with id ID, it increments freq[ID].  */
