From 488225867778216c6cdeb94fc0b57324fc970a1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=9F=E8=8A=B7=E9=85=B1=E7=B4=AB?= Date: Tue, 3 Mar 2026 18:55:55 +0800 Subject: [PATCH 1/2] Sync single-header Search/Has safety and fix header state fields --- ZACLib/ZACLib.cpp | 33 +++++++++++++++++++++++++------- ZACLib/ZACLib.hpp | 4 +++- ZACLib/ZACLib_single.hpp | 41 ++++++++++++++++++++++++++++++---------- 3 files changed, 60 insertions(+), 18 deletions(-) diff --git a/ZACLib/ZACLib.cpp b/ZACLib/ZACLib.cpp index ae457c4..32051dd 100644 --- a/ZACLib/ZACLib.cpp +++ b/ZACLib/ZACLib.cpp @@ -167,6 +167,7 @@ namespace ZACLib { void Search::AddRule(const ZAC_SV& from) { if (from.empty()) return; + built = false; int node = 0; for (const char i : from) { const auto c = static_cast(i); @@ -185,6 +186,7 @@ namespace ZACLib { } void Search::Build() { + built = true; std::queue q; for (int c = 0; c < 256; ++c) { int nxt = trie[0].next[c]; @@ -223,13 +225,17 @@ namespace ZACLib { int state = 0; for (size_t i = 0; i < input.size(); ++i) { const auto c = static_cast(input[i]); - while (state != 0 && trie[state].next[c] == -1) { - state = trie[state].fail; - } - if (trie[state].next[c] != -1) { - state = trie[state].next[c]; + if (!built) { + while (state != 0 && trie[state].next[c] == -1) { + state = trie[state].fail; + } + if (trie[state].next[c] != -1) { + state = trie[state].next[c]; + } else { + state = 0; + } } else { - state = 0; + state = trie[state].next[c]; } if (trie[state].output_id != Node::kInvalidOutput) { @@ -250,6 +256,7 @@ namespace ZACLib { void Has::AddRule(const ZAC_SV& from) { if (from.empty()) return; + built = false; int node = 0; for (const unsigned char c : from) { if (trie[node].next[c] == -1) { @@ -262,6 +269,7 @@ namespace ZACLib { } void Has::Build() { + built = true; std::queue q; for (int c = 0; c < 256; ++c) { int nxt = trie[0].next[c]; @@ -290,7 +298,18 @@ namespace ZACLib { bool Has::Do(const ZAC_SV& input) const { int state = 0; for (const unsigned char c : input) { - state = trie[state].next[c]; + if (!built) { + while (state != 0 && trie[state].next[c] == -1) { + state = trie[state].fail; + } + if (trie[state].next[c] != -1) { + state = trie[state].next[c]; + } else { + state = 0; + } + } else { + state = trie[state].next[c]; + } int s = state; while (s != 0) { diff --git a/ZACLib/ZACLib.hpp b/ZACLib/ZACLib.hpp index af98317..1cae00d 100644 --- a/ZACLib/ZACLib.hpp +++ b/ZACLib/ZACLib.hpp @@ -44,6 +44,7 @@ namespace ZACLib { private: std::vector trie; std::vector outputs; + bool built = false; size_t max_rule_len = 0; }; @@ -56,8 +57,9 @@ namespace ZACLib { private: std::vector trie; + bool built = false; }; } // namespace ZACLIB -#endif // ZACLIB_HPP \ No newline at end of file +#endif // ZACLIB_HPP diff --git a/ZACLib/ZACLib_single.hpp b/ZACLib/ZACLib_single.hpp index 2592784..d91691e 100644 --- a/ZACLib/ZACLib_single.hpp +++ b/ZACLib/ZACLib_single.hpp @@ -237,7 +237,7 @@ namespace ZACLib { } private: - std::vector trie{Node{}}; + std::vector trie; std::vector outputs; size_t max_rule_len = 0; }; @@ -254,6 +254,7 @@ namespace ZACLib { void AddRule(const ZAC_SV& from) { if (from.empty()) return; + built = false; int node = 0; for (const char i : from) { const auto c = static_cast(i); @@ -271,6 +272,7 @@ namespace ZACLib { } void Build() { + built = true; std::queue q; for (int c = 0; c < 256; ++c) { int nxt = trie[0].next[c]; @@ -306,13 +308,17 @@ namespace ZACLib { int state = 0; for (size_t i = 0; i < input.size(); ++i) { const auto c = static_cast(input[i]); - while (state != 0 && trie[state].next[c] == -1) { - state = trie[state].fail; - } - if (trie[state].next[c] != -1) { - state = trie[state].next[c]; + if (!built) { + while (state != 0 && trie[state].next[c] == -1) { + state = trie[state].fail; + } + if (trie[state].next[c] != -1) { + state = trie[state].next[c]; + } else { + state = 0; + } } else { - state = 0; + state = trie[state].next[c]; } if (trie[state].output_id != Node::kInvalidOutput) { result.push_back( @@ -328,8 +334,9 @@ namespace ZACLib { } private: - std::vector trie{Node{}}; + std::vector trie; std::vector outputs; + bool built = false; }; @@ -339,6 +346,7 @@ namespace ZACLib { void AddRule(const ZAC_SV& from) { if (from.empty()) return; + built = false; int node = 0; for (const unsigned char c : from) { if (trie[node].next[c] == -1) { @@ -351,6 +359,7 @@ namespace ZACLib { } void Build() { + built = true; std::queue q; for (int c = 0; c < 256; ++c) { int nxt = trie[0].next[c]; @@ -380,7 +389,18 @@ namespace ZACLib { bool Do(const ZAC_SV& input) const { int state = 0; for (const unsigned char c : input) { - state = trie[state].next[c]; + if (!built) { + while (state != 0 && trie[state].next[c] == -1) { + state = trie[state].fail; + } + if (trie[state].next[c] != -1) { + state = trie[state].next[c]; + } else { + state = 0; + } + } else { + state = trie[state].next[c]; + } int s = state; while (s != 0) { @@ -392,7 +412,8 @@ namespace ZACLib { } private: - std::vector trie{Node{}}; + std::vector trie; + bool built = false; }; } // namespace ZACLIB From 7dde29c9a1f6b2fe5116b11d89021613c17f67e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=9F=E8=8A=B7=E9=85=B1=E7=B4=AB?= Date: Tue, 3 Mar 2026 19:12:51 +0800 Subject: [PATCH 2/2] Fix Search/Has unbuilt correctness and Build state safety --- ZACLib/ZACLib.cpp | 66 ++++++++++++++++++++++++---------------- ZACLib/ZACLib.hpp | 1 + ZACLib/ZACLib_single.hpp | 65 +++++++++++++++++++++++---------------- 3 files changed, 80 insertions(+), 52 deletions(-) diff --git a/ZACLib/ZACLib.cpp b/ZACLib/ZACLib.cpp index 32051dd..4b2156a 100644 --- a/ZACLib/ZACLib.cpp +++ b/ZACLib/ZACLib.cpp @@ -6,6 +6,7 @@ #include "ZACLib.hpp" #include #include +#include namespace ZACLib { Replace::Replace() { @@ -168,6 +169,7 @@ namespace ZACLib { void Search::AddRule(const ZAC_SV& from) { if (from.empty()) return; built = false; + if (from.size() > max_rule_len) max_rule_len = from.size(); int node = 0; for (const char i : from) { const auto c = static_cast(i); @@ -186,7 +188,6 @@ namespace ZACLib { } void Search::Build() { - built = true; std::queue q; for (int c = 0; c < 256; ++c) { int nxt = trie[0].next[c]; @@ -216,27 +217,38 @@ namespace ZACLib { } } } + built = true; } std::vector Search::Do(const ZAC_SV& input) const { std::vector result; if (trie.empty()) return result; - int state = 0; - for (size_t i = 0; i < input.size(); ++i) { - const auto c = static_cast(input[i]); - if (!built) { - while (state != 0 && trie[state].next[c] == -1) { - state = trie[state].fail; + if (!built) { + for (size_t i = 0; i < input.size(); ++i) { + size_t best_len = 0; + size_t best_rule = Node::kInvalidOutput; + for (size_t rule_id = 0; rule_id < outputs.size(); ++rule_id) { + const auto& rule = outputs[rule_id]; + const size_t len = rule.size(); + if (len == 0 || len > i + 1) continue; + if (std::memcmp(input.data() + i + 1 - len, rule.data(), len) == 0 && len > best_len) { + best_len = len; + best_rule = rule_id; + } } - if (trie[state].next[c] != -1) { - state = trie[state].next[c]; - } else { - state = 0; + + if (best_rule != Node::kInvalidOutput) { + result.push_back(Match{i + 1 - best_len, best_len, best_rule}); } - } else { - state = trie[state].next[c]; } + return result; + } + + int state = 0; + for (size_t i = 0; i < input.size(); ++i) { + const auto c = static_cast(input[i]); + state = trie[state].next[c]; if (trie[state].output_id != Node::kInvalidOutput) { result.push_back( @@ -266,10 +278,10 @@ namespace ZACLib { node = trie[node].next[c]; } trie[node].output_id = 0; + outputs.emplace_back(from.data(), from.size()); } void Has::Build() { - built = true; std::queue q; for (int c = 0; c < 256; ++c) { int nxt = trie[0].next[c]; @@ -293,23 +305,24 @@ namespace ZACLib { } } } + built = true; } bool Has::Do(const ZAC_SV& input) const { - int state = 0; - for (const unsigned char c : input) { - if (!built) { - while (state != 0 && trie[state].next[c] == -1) { - state = trie[state].fail; - } - if (trie[state].next[c] != -1) { - state = trie[state].next[c]; - } else { - state = 0; + if (!built) { + for (size_t i = 0; i < input.size(); ++i) { + for (const auto& rule : outputs) { + const size_t len = rule.size(); + if (len == 0 || len > i + 1) continue; + if (std::memcmp(input.data() + i + 1 - len, rule.data(), len) == 0) return true; } - } else { - state = trie[state].next[c]; } + return false; + } + + int state = 0; + for (const unsigned char c : input) { + state = trie[state].next[c]; int s = state; while (s != 0) { @@ -319,4 +332,5 @@ namespace ZACLib { } return false; } + } // namespace ZACLIB diff --git a/ZACLib/ZACLib.hpp b/ZACLib/ZACLib.hpp index 1cae00d..bedd888 100644 --- a/ZACLib/ZACLib.hpp +++ b/ZACLib/ZACLib.hpp @@ -57,6 +57,7 @@ namespace ZACLib { private: std::vector trie; + std::vector outputs; bool built = false; }; diff --git a/ZACLib/ZACLib_single.hpp b/ZACLib/ZACLib_single.hpp index d91691e..faba2f2 100644 --- a/ZACLib/ZACLib_single.hpp +++ b/ZACLib/ZACLib_single.hpp @@ -30,6 +30,7 @@ #include #include #include +#include namespace ZACLib { @@ -272,7 +273,6 @@ namespace ZACLib { } void Build() { - built = true; std::queue q; for (int c = 0; c < 256; ++c) { int nxt = trie[0].next[c]; @@ -299,27 +299,38 @@ namespace ZACLib { } } } + built = true; } std::vector Do(const ZAC_SV& input) const { std::vector result; if (trie.empty()) return result; - int state = 0; - for (size_t i = 0; i < input.size(); ++i) { - const auto c = static_cast(input[i]); - if (!built) { - while (state != 0 && trie[state].next[c] == -1) { - state = trie[state].fail; + if (!built) { + for (size_t i = 0; i < input.size(); ++i) { + size_t best_len = 0; + size_t best_rule = Node::kInvalidOutput; + for (size_t rule_id = 0; rule_id < outputs.size(); ++rule_id) { + const auto& rule = outputs[rule_id]; + const size_t len = rule.size(); + if (len == 0 || len > i + 1) continue; + if (std::memcmp(input.data() + i + 1 - len, rule.data(), len) == 0 && len > best_len) { + best_len = len; + best_rule = rule_id; + } } - if (trie[state].next[c] != -1) { - state = trie[state].next[c]; - } else { - state = 0; + + if (best_rule != Node::kInvalidOutput) { + result.push_back(Match{i + 1 - best_len, best_len, best_rule}); } - } else { - state = trie[state].next[c]; } + return result; + } + + int state = 0; + for (size_t i = 0; i < input.size(); ++i) { + const auto c = static_cast(input[i]); + state = trie[state].next[c]; if (trie[state].output_id != Node::kInvalidOutput) { result.push_back( Match{ @@ -356,10 +367,10 @@ namespace ZACLib { node = trie[node].next[c]; } trie[node].output_id = 0; + outputs.emplace_back(from.data(), from.size()); } void Build() { - built = true; std::queue q; for (int c = 0; c < 256; ++c) { int nxt = trie[0].next[c]; @@ -384,23 +395,24 @@ namespace ZACLib { } } } + built = true; } bool Do(const ZAC_SV& input) const { - int state = 0; - for (const unsigned char c : input) { - if (!built) { - while (state != 0 && trie[state].next[c] == -1) { - state = trie[state].fail; - } - if (trie[state].next[c] != -1) { - state = trie[state].next[c]; - } else { - state = 0; + if (!built) { + for (size_t i = 0; i < input.size(); ++i) { + for (const auto& rule : outputs) { + const size_t len = rule.size(); + if (len == 0 || len > i + 1) continue; + if (std::memcmp(input.data() + i + 1 - len, rule.data(), len) == 0) return true; } - } else { - state = trie[state].next[c]; } + return false; + } + + int state = 0; + for (const unsigned char c : input) { + state = trie[state].next[c]; int s = state; while (s != 0) { @@ -413,6 +425,7 @@ namespace ZACLib { private: std::vector trie; + std::vector outputs; bool built = false; }; } // namespace ZACLIB