ENG  RUSTimus Online Judge
Online Judge
Problems
Authors
Online contests
About Online Judge
Frequently asked questions
Site news
Webboard
Links
Problem set
Submit solution
Judge status
Guide
Register
Update your info
Authors ranklist
Current contest
Scheduled contests
Past contests
Rules
back to board

Discussion of Problem 1269. Obscene Words Filter

Aho-corasick whith compressed suffix tree
Posted by George Agapov 3 Nov 2010 23:26
Can anyone give me an advice, how to build aho-corasick algorithm, that's using a compressed suffix tree. I wrote one, but it uses uncompressed tree, which results in MLE on test 7. I understood, how to build the compressed tree, but have no idea, how to cooperate it with aho-corasick.

P.S. I bind my compressed suffix tree realisation, if someone will be interesed.


#include <cstdio>

using namespace std;

typedef unsigned short id;
typedef unsigned char uc;
typedef unsigned int ui;

const id DICT_SIZE = 10005, NONE = DICT_SIZE + 1;

ui N, M;

uc wbuf[100001];
ui wbs;
uc buf [900001];
ui bfs;

void get_line() {
    bfs = 0;
    uc ch = getchar();
    while (ch != '\n') {
        buf[bfs] = ch;
        ch = getchar();
        bfs++;
    }
}

void get_word() {
    uc ch = getchar();
    while (ch != '\n') {
        if (ch != 0 && ch != '\r') {
            wbuf[wbs] = ch;
            wbs++;
        }
        ch = getchar();
    }
}

struct pstr {
    id start, end;

    pstr() {
        start = end = 0;
    }

    pstr(ui s, ui e) {
        start = s;
        end = e;
    }

    int len() {
        return end - start;
    }

    void cut_begin(ui s) {
        start = start + s;
    }

    void cut_end(ui e) {
        end = start + e;
    }

    void print() {
        for (int i = 0; i < len(); i++)putchar((*this)[i]);
        putchar('\n');
    }
    uc & operator[](ui);
    pstr & operator=(const pstr);
};

uc & pstr::operator[](ui i) {
    return wbuf[start + i];
}

pstr& pstr::operator =(const pstr p) {
    this->end = p.end;
    this->start = p.start;
    return *this;
}

struct vert {
    pstr p;
    id length, parent, index, g [256];
    uc ac;
    bool leaf;
    id & operator[](uc);
    void addstr(pstr & b, id & wlen);
    vert & c(uc);

    void init(id ind) {
        for (id i = 0; i < 256; i++)
            g[i] = NONE;
        index = ind;
    }

    void print() {
        printf("index=%d ac='%c' parent=%d leaf=%d p: ", index, ac, parent, leaf);
        p.print();
    }
    bool search(uc * str, ui &st, ui & size);
};
vert dict [DICT_SIZE + 2]; //0 - root
id dinc = 1;

id & vert::operator[](uc ch) {
    return g[ch];
}

id create_vect() {
    dict[dinc].init(dinc);
    dinc++;
    return dinc - 1;
}

vert & vert::c(uc ch) {
    return dict[g[ch]];
}

void vert::addstr(pstr & b, id & wlen) {
    ui i;
    for (i = 0; b[i] == p[i] && i < p.len() && i < b.len(); i++);
    if (i == p.len()) {//p - полностью подстрока b
        if (g[b[i]] != NONE) {
            if (b.len() == i) {
                leaf = true;
                length = wlen;
            } else {
                id ind = g[b[i]];
                b.cut_begin(i + 1);
                dict[ind].addstr(b, wlen);
            }
        } else {
            if (b.len() == i) {
                leaf = true;
                length = wlen;
            } else {
                id n = create_vect();
                g[b[i]] = n;
                vert & v = dict[n];
                v.ac = b[i];
                v.p = b;
                v.p.cut_begin(i + 1);
                v.parent = index;
                v.leaf = true;
                v.length = wlen;
            }
        }
    } else {
        id n = create_vect();
        dict[parent][ac] = n;
        vert & nv = dict[n];
        nv.p = p;
        nv.p.cut_end(i);
        nv.parent = parent;
        nv.ac = ac;
        nv.g[p[i]] = index;
        ac = p[i];
        parent = n;
        p.cut_begin(i + 1);
        if (i == b.len()) {
            nv.leaf = true;
            nv.length = wlen;
        } else {
            id w = create_vect();
            vert & wv = dict[w];
            wv.p = b;
            wv.p.cut_begin(i + 1);
            wv.parent = n;
            wv.ac = b[i];
            wv.leaf = true;
            wv.length = wlen;
            nv.g[b[i]] = w;
        }
    }
}

int main(int argc, char** argv) {
#ifndef ONLINE_JUDGE
    freopen("input.txt", "r", stdin);
    freopen("output.txt", "w", stdout);
#endif
    scanf("%u", &N);
    getchar();
    wbs = 0;
    dict[0].init(0);
    dict[NONE].init(NONE);
    for (ui i = 0; i < N; i++) {
        ui _wbs = wbs;
        get_word();
        pstr ps(_wbs, wbs);
        if (ps.len() == 0)continue;
        id wlen = ps.len();
        dict[0].addstr(ps, wlen);
    }
    for (ui i = 0; i < dinc; i++)dict[i].print();
    //previous line prints the result of suffix tree building
    scanf("%u", &M);
    getchar();
    for (ui k = 0; k < M; k++) {
        get_line();
        if (bfs == 0)continue;
        for (ui i = 0; i < bfs; i++) {

        }
    }
    printf("Passed");
    return 0;
}

Better way to view: http://pastebin.com/M6TUztjc

Edited by author 03.11.2010 23:30