Добавил:
itan_hunt
Опубликованный материал нарушает ваши авторские права? Сообщите нам.
Вуз:
Предмет:
Файл:lab6 / Aho-Corasick
.cpp#include "Aho-Corasick.h"
Trie::Trie(set<string> patterns): root(new Node(make_pair(-1, nullptr))) {
for(auto &pattern: patterns) {
Node* state = root;
for(auto &letter: pattern) {
auto outEdge = state->outEdges.find(letter);
if (outEdge == state->outEdges.end()) {
Node* node = new Node(make_pair(letter, state));
state->outEdges.emplace(letter, node);
}
state = state->outEdges[letter];
}
state->pattern = pattern;
}
makeLinks(root);
queue<Node*> myQueue;
myQueue.push(root);
while(!myQueue.empty()) {
Node* parent = myQueue.front();
myQueue.pop();
for(auto &outEdge: parent->outEdges) {
makeLinks(outEdge.second);
myQueue.push(outEdge.second);
}
}
}
void Trie::makeLinks(Node* &node) {
if (node == root || node->inEdge.second == root) {
node->failure = root;
return;
}
Node* failure = node->inEdge.second->failure, *output = nullptr;
char edgeToFind = node->inEdge.first;
while(true) {
auto outEdge = failure->outEdges.find(edgeToFind);
if (outEdge != failure->outEdges.end()) {
failure = outEdge->second;
if (!failure->pattern.empty()) {
output = failure;
} else if (failure->output) {
output = failure->output;
}
break;
} else if (failure == root) {
break;
} else
failure = failure->failure;
}
node->failure = failure;
node->output = output;
}
map<string, set<lli>> AhoCorasick(string &text, Trie &trie) {
map<string, set<lli>> solutions;
Node* state = trie.root;
lli i = 0, size = text.size();
while (i < size) {
auto outEdge = state->outEdges.find(text[i]);
if (outEdge != state->outEdges.end()) {
state = outEdge->second;
Node* output = state->output;
while(output) {
solutions[output->pattern].emplace(i - output->pattern.size() + 2);
output = output->output;
}
if (!state->pattern.empty()) {
solutions[state->pattern].emplace(i - state->pattern.size() + 2);
if (state->outEdges.empty()) {
state = state->failure;
}
}
i++;
} else if (state == trie.root){
i++;
} else {
state = state->failure;
}
}
return solutions;
}
set<string> split(string &myTemplate, char delim){
string buffer{""};
set<string> patterns;
int i = 0;
for(auto letter:myTemplate) {
if(letter != delim) {
buffer += letter;
} else if(letter == delim && buffer != "") {
patterns.emplace(buffer);
buffer = "";
}
i++;
}
if(buffer != "") {
patterns.emplace(buffer);
}
return patterns;
}
set<lli> AhoCorasickWithJoker(string &text, string &myTemplate, char joker) {
set<string> patterns = split(myTemplate, joker);
Trie trie(patterns);
map<string, set<lli>> myTemplateSolutions = AhoCorasick(myTemplate, trie),
textSolutions = AhoCorasick(text, trie);
lli textSize = text.size(), countOfPatterns = 0;
vector<lli> entries(textSize, 0);
for(auto &pattern: patterns) {
set<lli> templateSolution = myTemplateSolutions[pattern],
textSolution = textSolutions[pattern];
countOfPatterns += templateSolution.size();
for(auto &textPosition: textSolution) {
for(auto &templatePosition: templateSolution) {
lli index = textPosition - templatePosition;
if (index > -1) {
entries[index]++;
}
}
}
}
set<lli> templateEntries;
for(int p = 0; p < textSize; p++) {
if (entries[p] == countOfPatterns) {
templateEntries.emplace(p + 1);
}
}
return templateEntries;
}
Соседние файлы в папке lab6