diff --git a/ZACLib/ZACLib.cpp b/ZACLib/ZACLib.cpp index ae457c4..4b2156a 100644 --- a/ZACLib/ZACLib.cpp +++ b/ZACLib/ZACLib.cpp @@ -6,6 +6,7 @@ #include "ZACLib.hpp" #include #include +#include namespace ZACLib { Replace::Replace() { @@ -167,6 +168,8 @@ namespace ZACLib { void Search::AddRule(const ZAC_SV& from) { if (from.empty()) return; + built = false; + if (from.size() > max_rule_len) max_rule_len = from.size(); int node = 0; for (const char i : from) { const auto c = static_cast(i); @@ -214,23 +217,38 @@ namespace ZACLib { } } } + built = true; } std::vector Search::Do(const ZAC_SV& input) const { std::vector result; if (trie.empty()) return result; + if (!built) { + for (size_t i = 0; i < input.size(); ++i) { + size_t best_len = 0; + size_t best_rule = Node::kInvalidOutput; + for (size_t rule_id = 0; rule_id < outputs.size(); ++rule_id) { + const auto& rule = outputs[rule_id]; + const size_t len = rule.size(); + if (len == 0 || len > i + 1) continue; + if (std::memcmp(input.data() + i + 1 - len, rule.data(), len) == 0 && len > best_len) { + best_len = len; + best_rule = rule_id; + } + } + + if (best_rule != Node::kInvalidOutput) { + result.push_back(Match{i + 1 - best_len, best_len, best_rule}); + } + } + return result; + } + int state = 0; for (size_t i = 0; i < input.size(); ++i) { const auto c = static_cast(input[i]); - while (state != 0 && trie[state].next[c] == -1) { - state = trie[state].fail; - } - if (trie[state].next[c] != -1) { - state = trie[state].next[c]; - } else { - state = 0; - } + state = trie[state].next[c]; if (trie[state].output_id != Node::kInvalidOutput) { result.push_back( @@ -250,6 +268,7 @@ namespace ZACLib { void Has::AddRule(const ZAC_SV& from) { if (from.empty()) return; + built = false; int node = 0; for (const unsigned char c : from) { if (trie[node].next[c] == -1) { @@ -259,6 +278,7 @@ namespace ZACLib { node = trie[node].next[c]; } trie[node].output_id = 0; + outputs.emplace_back(from.data(), from.size()); } void Has::Build() { @@ -285,9 +305,21 @@ namespace ZACLib { } } } + built = true; } bool Has::Do(const ZAC_SV& input) const { + if (!built) { + for (size_t i = 0; i < input.size(); ++i) { + for (const auto& rule : outputs) { + const size_t len = rule.size(); + if (len == 0 || len > i + 1) continue; + if (std::memcmp(input.data() + i + 1 - len, rule.data(), len) == 0) return true; + } + } + return false; + } + int state = 0; for (const unsigned char c : input) { state = trie[state].next[c]; @@ -300,4 +332,5 @@ namespace ZACLib { } return false; } + } // namespace ZACLIB diff --git a/ZACLib/ZACLib.hpp b/ZACLib/ZACLib.hpp index af98317..bedd888 100644 --- a/ZACLib/ZACLib.hpp +++ b/ZACLib/ZACLib.hpp @@ -44,6 +44,7 @@ namespace ZACLib { private: std::vector trie; std::vector outputs; + bool built = false; size_t max_rule_len = 0; }; @@ -56,8 +57,10 @@ namespace ZACLib { private: std::vector trie; + std::vector outputs; + bool built = false; }; } // namespace ZACLIB -#endif // ZACLIB_HPP \ No newline at end of file +#endif // ZACLIB_HPP diff --git a/ZACLib/ZACLib_single.hpp b/ZACLib/ZACLib_single.hpp index 2592784..faba2f2 100644 --- a/ZACLib/ZACLib_single.hpp +++ b/ZACLib/ZACLib_single.hpp @@ -30,6 +30,7 @@ #include #include #include +#include namespace ZACLib { @@ -237,7 +238,7 @@ namespace ZACLib { } private: - std::vector trie{Node{}}; + std::vector trie; std::vector outputs; size_t max_rule_len = 0; }; @@ -254,6 +255,7 @@ namespace ZACLib { void AddRule(const ZAC_SV& from) { if (from.empty()) return; + built = false; int node = 0; for (const char i : from) { const auto c = static_cast(i); @@ -297,23 +299,38 @@ namespace ZACLib { } } } + built = true; } std::vector Do(const ZAC_SV& input) const { std::vector result; if (trie.empty()) return result; + if (!built) { + for (size_t i = 0; i < input.size(); ++i) { + size_t best_len = 0; + size_t best_rule = Node::kInvalidOutput; + for (size_t rule_id = 0; rule_id < outputs.size(); ++rule_id) { + const auto& rule = outputs[rule_id]; + const size_t len = rule.size(); + if (len == 0 || len > i + 1) continue; + if (std::memcmp(input.data() + i + 1 - len, rule.data(), len) == 0 && len > best_len) { + best_len = len; + best_rule = rule_id; + } + } + + if (best_rule != Node::kInvalidOutput) { + result.push_back(Match{i + 1 - best_len, best_len, best_rule}); + } + } + return result; + } + int state = 0; for (size_t i = 0; i < input.size(); ++i) { const auto c = static_cast(input[i]); - while (state != 0 && trie[state].next[c] == -1) { - state = trie[state].fail; - } - if (trie[state].next[c] != -1) { - state = trie[state].next[c]; - } else { - state = 0; - } + state = trie[state].next[c]; if (trie[state].output_id != Node::kInvalidOutput) { result.push_back( Match{ @@ -328,8 +345,9 @@ namespace ZACLib { } private: - std::vector trie{Node{}}; + std::vector trie; std::vector outputs; + bool built = false; }; @@ -339,6 +357,7 @@ namespace ZACLib { void AddRule(const ZAC_SV& from) { if (from.empty()) return; + built = false; int node = 0; for (const unsigned char c : from) { if (trie[node].next[c] == -1) { @@ -348,6 +367,7 @@ namespace ZACLib { node = trie[node].next[c]; } trie[node].output_id = 0; + outputs.emplace_back(from.data(), from.size()); } void Build() { @@ -375,9 +395,21 @@ namespace ZACLib { } } } + built = true; } bool Do(const ZAC_SV& input) const { + if (!built) { + for (size_t i = 0; i < input.size(); ++i) { + for (const auto& rule : outputs) { + const size_t len = rule.size(); + if (len == 0 || len > i + 1) continue; + if (std::memcmp(input.data() + i + 1 - len, rule.data(), len) == 0) return true; + } + } + return false; + } + int state = 0; for (const unsigned char c : input) { state = trie[state].next[c]; @@ -392,7 +424,9 @@ namespace ZACLib { } private: - std::vector trie{Node{}}; + std::vector trie; + std::vector outputs; + bool built = false; }; } // namespace ZACLIB