Can anyone give me an advice, how to build aho-corasick algorithm, that's using a compressed suffix tree. I wrote one, but it uses uncompressed tree, which results in MLE on test 7. I understood, how to build the compressed tree, but have no idea, how to cooperate it with aho-corasick.
P.S. I bind my compressed suffix tree realisation, if someone will be interesed.
#include <cstdio>
using namespace std;
typedef unsigned short id;
typedef unsigned char uc;
typedef unsigned int ui;
const id DICT_SIZE = 10005, NONE = DICT_SIZE + 1;
ui N, M;
uc wbuf[100001];
ui wbs;
uc buf [900001];
ui bfs;
void get_line() {
bfs = 0;
uc ch = getchar();
while (ch != '\n') {
buf[bfs] = ch;
ch = getchar();
bfs++;
}
}
void get_word() {
uc ch = getchar();
while (ch != '\n') {
if (ch != 0 && ch != '\r') {
wbuf[wbs] = ch;
wbs++;
}
ch = getchar();
}
}
struct pstr {
id start, end;
pstr() {
start = end = 0;
}
pstr(ui s, ui e) {
start = s;
end = e;
}
int len() {
return end - start;
}
void cut_begin(ui s) {
start = start + s;
}
void cut_end(ui e) {
end = start + e;
}
void print() {
for (int i = 0; i < len(); i++)putchar((*this)[i]);
putchar('\n');
}
uc & operator[](ui);
pstr & operator=(const pstr);
};
uc & pstr::operator[](ui i) {
return wbuf[start + i];
}
pstr& pstr::operator =(const pstr p) {
this->end = p.end;
this->start = p.start;
return *this;
}
struct vert {
pstr p;
id length, parent, index, g [256];
uc ac;
bool leaf;
id & operator[](uc);
void addstr(pstr & b, id & wlen);
vert & c(uc);
void init(id ind) {
for (id i = 0; i < 256; i++)
g[i] = NONE;
index = ind;
}
void print() {
printf("index=%d ac='%c' parent=%d leaf=%d p: ", index, ac, parent, leaf);
p.print();
}
bool search(uc * str, ui &st, ui & size);
};
vert dict [DICT_SIZE + 2]; //0 - root
id dinc = 1;
id & vert::operator[](uc ch) {
return g[ch];
}
id create_vect() {
dict[dinc].init(dinc);
dinc++;
return dinc - 1;
}
vert & vert::c(uc ch) {
return dict[g[ch]];
}
void vert::addstr(pstr & b, id & wlen) {
ui i;
for (i = 0; b[i] == p[i] && i < p.len() && i < b.len(); i++);
if (i == p.len()) {//p - полностью подстрока b
if (g[b[i]] != NONE) {
if (b.len() == i) {
leaf = true;
length = wlen;
} else {
id ind = g[b[i]];
b.cut_begin(i + 1);
dict[ind].addstr(b, wlen);
}
} else {
if (b.len() == i) {
leaf = true;
length = wlen;
} else {
id n = create_vect();
g[b[i]] = n;
vert & v = dict[n];
v.ac = b[i];
v.p = b;
v.p.cut_begin(i + 1);
v.parent = index;
v.leaf = true;
v.length = wlen;
}
}
} else {
id n = create_vect();
dict[parent][ac] = n;
vert & nv = dict[n];
nv.p = p;
nv.p.cut_end(i);
nv.parent = parent;
nv.ac = ac;
nv.g[p[i]] = index;
ac = p[i];
parent = n;
p.cut_begin(i + 1);
if (i == b.len()) {
nv.leaf = true;
nv.length = wlen;
} else {
id w = create_vect();
vert & wv = dict[w];
wv.p = b;
wv.p.cut_begin(i + 1);
wv.parent = n;
wv.ac = b[i];
wv.leaf = true;
wv.length = wlen;
nv.g[b[i]] = w;
}
}
}
int main(int argc, char** argv) {
#ifndef ONLINE_JUDGE
freopen("input.txt", "r", stdin);
freopen("output.txt", "w", stdout);
#endif
scanf("%u", &N);
getchar();
wbs = 0;
dict[0].init(0);
dict[NONE].init(NONE);
for (ui i = 0; i < N; i++) {
ui _wbs = wbs;
get_word();
pstr ps(_wbs, wbs);
if (ps.len() == 0)continue;
id wlen = ps.len();
dict[0].addstr(ps, wlen);
}
for (ui i = 0; i < dinc; i++)dict[i].print();
//previous line prints the result of suffix tree building
scanf("%u", &M);
getchar();
for (ui k = 0; k < M; k++) {
get_line();
if (bfs == 0)continue;
for (ui i = 0; i < bfs; i++) {
}
}
printf("Passed");
return 0;
}
Better way to view:
http://pastebin.com/M6TUztjc Edited by author 03.11.2010 23:30