Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
cmake-build*/
**/cmake-build*/
/.idea
!/.idea/dictionaries
!/.idea/dictionaries

**/localtest*/
2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions ZACLib/Android.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# ZACLib/Android.mk
LOCAL_PATH := $(call my-dir)

include $(CLEAR_VARS)

LOCAL_MODULE := ZACLib
LOCAL_SRC_FILES := ZACLib.cpp
LOCAL_C_INCLUDES := $(LOCAL_PATH)

LOCAL_CPPFLAGS := -std=c++11 # or higher

include $(BUILD_STATIC_LIBRARY)
6 changes: 2 additions & 4 deletions ZACLib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@

set(CMAKE_CXX_STANDARD 11) # or higher

# ZACLib/CMakeLists.txt
add_library(ZACLib STATIC "ZACLib.cpp") # if use source
target_include_directories(ZACLib PUBLIC ".")
set_target_properties(ZACLib PROPERTIES
CXX_STANDARD 11
CXX_STANDARD 11 # or higner
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

这里有一个拼写错误,“higner” 应该是 “higher”。

    CXX_STANDARD 11 # or higher

CXX_STANDARD_REQUIRED YES
CXX_EXTENSIONS NO
)
81 changes: 57 additions & 24 deletions ZACLib/ZACLib_single.hpp → ZACLib/Deprecated/ZACLib_single.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,38 +22,72 @@
#define ZACLIB_TYPES_HPP

#if __cplusplus >= 201703L
#include <string_view>
#include <string_view>
#else
#include <cstring>
#include <cstring>
#endif

#include <string>
#include <array>
#include <limits>

#if defined(__ANDROID__) || defined(__arm__) || defined(__aarch64__)
using ZAC_CHAR = unsigned char;
#else
using ZAC_CHAR = char;
#endif


namespace ZACLib {
#if __cplusplus >= 201703L
using ZAC_SV = std::string_view;
class ZAC_SV : public std::basic_string_view<ZAC_CHAR> {
public:
using std::basic_string_view<ZAC_CHAR>::basic_string_view;
ZAC_SV() = default;

// ReSharper disable once CppRedundantCastExpression
ZAC_SV(const std::string& s) noexcept
: std::basic_string_view<ZAC_CHAR>(reinterpret_cast<const ZAC_CHAR*>(s.data()), s.size()) {}

#if defined(__ANDROID__) || defined(__arm__) || defined(__aarch64__)
ZAC_SV(const char* s, const size_t n)
: std::basic_string_view<ZAC_CHAR>(reinterpret_cast<const ZAC_CHAR*>(s), n) {}

ZAC_SV(const char* s)
: std::basic_string_view<ZAC_CHAR>(
reinterpret_cast<const ZAC_CHAR*>(s),
s ? std::char_traits<char>::length(s) : 0) {}
#endif
};
#else
class ZAC_SV {
const char* m_data;
const ZAC_CHAR* m_data;
const size_t m_size;

public:
ZAC_SV() : m_data(nullptr),
m_size(0) {}

ZAC_SV(const char* d, const size_t s) : m_data(d),
ZAC_SV(const ZAC_CHAR* d, const size_t s) : m_data(d),
m_size(s) {}

#if defined(__ANDROID__) || defined(__arm__) || defined(__aarch64__)
ZAC_SV(const char* d, const size_t s) : m_data(reinterpret_cast<const ZAC_CHAR*>(d)),
m_size(s) {}
#endif

ZAC_SV(const std::string& s) : m_data(s.c_str()),
// ReSharper disable once CppRedundantCastExpression
ZAC_SV(const std::string& s) noexcept : m_data(reinterpret_cast<const ZAC_CHAR*>(s.data())),
m_size(s.size()) {} // 模仿std::string_view,不禁止隐式构造

ZAC_SV(const char* d) : m_data(d),
m_size(d ? std::strlen(d) : 0) {} // 模仿std::string_view,不禁止隐式构造
ZAC_SV(const ZAC_CHAR* d) : m_data(d),
m_size(d ? std::strlen(reinterpret_cast<const char*>(d)) : 0) {} // 模仿std::string_view,不禁止隐式构造

const char* data() const noexcept {
#if defined(__ANDROID__) || defined(__arm__) || defined(__aarch64__)
ZAC_SV(const char* d) : m_data(reinterpret_cast<const ZAC_CHAR*>(d)),
m_size(d ? std::strlen(d) : 0) {}
#endif

const ZAC_CHAR* data() const noexcept {
return m_data;
}

Expand All @@ -65,12 +99,12 @@ namespace ZACLib {
return m_size == 0;
}

const char* begin() const { return m_data; }
const char* end() const { return m_data + m_size; }
const char* cbegin() const { return m_data; }
const char* cend() const { return m_data + m_size; }
const ZAC_CHAR* begin() const { return m_data; }
const ZAC_CHAR* end() const { return m_data + m_size; }
const ZAC_CHAR* cbegin() const { return m_data; }
const ZAC_CHAR* cend() const { return m_data + m_size; }

const char& operator[](const size_t i) const { return m_data[i]; }
const ZAC_CHAR& operator[](const size_t i) const { return m_data[i]; }
};
#endif

Expand All @@ -89,7 +123,6 @@ namespace ZACLib {
}

#endif //ZACLIB_TYPES_HPP

// ZACLib_Types.hpp end

namespace ZACLib {
Expand All @@ -105,7 +138,7 @@ namespace ZACLib {
if (from.empty()) return;
if (from.size() > max_rule_len) max_rule_len = from.size();
int node = 0;
for (const char i : from) {
for (const ZAC_CHAR i : from) {
const auto c = static_cast<unsigned char>(i);
if (trie[node].next[c] == -1) {
trie[node].next[c] = trie.size(); // NOLINT(*-narrowing-conversions)
Expand All @@ -116,7 +149,7 @@ namespace ZACLib {
if (from.size() > trie[node].match_len) {
trie[node].output_id = outputs.size(); // NOLINT(*-narrowing-conversions)
trie[node].match_len = from.size();
outputs.emplace_back(to.data(), to.size());
outputs.emplace_back(reinterpret_cast<const char*>(to.data()), to.size());
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

ZAC_CHARunsigned char 时,to.data() 返回 const char*reinterpret_cast<const char*> 是不必要的,但更重要的是,这与 ZACLib_Types.hpp 中的 ArmCastChar 宏不一致,后者在需要时进行转换。为了保持一致性和清晰性,建议使用在 ZACLib_Types.hpp 中定义的 ArmCastChar 宏。

                outputs.emplace_back(ArmCastChar(to.data()), to.size());

}
}

Expand Down Expand Up @@ -158,7 +191,7 @@ namespace ZACLib {

if (input.empty()) return result;
if (max_rule_len == 0) {
result.append(input.data(), input.size());
result.append(reinterpret_cast<const char*>(input.data()), input.size());
return result;
}

Expand Down Expand Up @@ -206,15 +239,15 @@ namespace ZACLib {
continue;
}

result.append(input.data() + last_pos, cursor - last_pos);
result.append(reinterpret_cast<const char*>(input.data() + last_pos), cursor - last_pos);
result.append(outputs[best_output]);
cursor += best_len;
last_pos = cursor;
}
};

for (size_t i = 0; i < input.size(); ++i) {
const unsigned char c = input[i];
const auto c = static_cast<unsigned char>(input[i]);
state = trie[state].next[c];

if (trie[state].output_id != invalid_output) {
Expand All @@ -230,7 +263,7 @@ namespace ZACLib {
emit_until(input.size());

if (last_pos < input.size()) {
result.append(input.data() + last_pos, input.size() - last_pos);
result.append(reinterpret_cast<const char*>(input.data() + last_pos), input.size() - last_pos);
}

return result;
Expand All @@ -255,7 +288,7 @@ namespace ZACLib {
void AddRule(const ZAC_SV& from) {
if (from.empty()) return;
int node = 0;
for (const char i : from) {
for (const ZAC_CHAR i : from) {
const auto c = static_cast<unsigned char>(i);
if (trie[node].next[c] == -1) {
trie[node].next[c] = trie.size(); // NOLINT(*-narrowing-conversions)
Expand All @@ -266,7 +299,7 @@ namespace ZACLib {
if (from.size() > trie[node].match_len) {
trie[node].output_id = outputs.size(); // NOLINT(*-narrowing-conversions)
trie[node].match_len = from.size();
outputs.emplace_back(from.data(), from.size());
outputs.emplace_back(reinterpret_cast<const char*>(from.data()), from.size());
}
}

Expand Down
19 changes: 19 additions & 0 deletions ZACLib/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# ZACLib/Makefile
CXX := g++
CXXFLAGS := -I./ \
-std=c++11 # or higher

SRC := ZACLib.cpp
OBJ := $(SRC:.cpp=.o)
TARGET := ZACLib

all: $(TARGET)

$(TARGET): $(OBJ)
$(CXX) $(OBJ) -o $(TARGET)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

此命令 $(CXX) $(OBJ) -o $(TARGET) 会创建一个可执行文件,而不是静态库。根据项目的其他构建文件(CMake, Android.mk),目标应该是生成一个静态库。要创建静态库(通常是 .a 文件),应该使用 ar 工具。例如:ar rcs libZACLib.a ZACLib.o。当前的 Makefile 无法正确生成库文件。


%.o: %.cpp
$(CXX) $(CXXFLAGS) -c $< -o $@

clean:
rm -f $(OBJ) $(TARGET)
37 changes: 18 additions & 19 deletions ZACLib/ZACLib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// Created by wanjiangzhi on 2026/2/24.
//

// ReSharper disable CppClassNeverUsed
#include "ZACLib.hpp"
#include <array>
#include <queue>
Expand All @@ -13,26 +12,26 @@ namespace ZACLib {
}

void Replace::AddRule(const ZAC_SV& from) {
AddRule(from, ZAC_SV("", 0));
AddRule(from, ZAC_SV(""));
}

void Replace::AddRule(const ZAC_SV& from, const ZAC_SV& to) {
if (from.empty()) return;
if (from.size() > max_rule_len) max_rule_len = from.size();
int node = 0;
for (const char i : from) {
for (const ZAC_CHAR i : from) {
const auto c = static_cast<unsigned char>(i);
if (trie[node].next[c] == -1) {
trie[node].next[c] = trie.size(); // NOLINT(*-narrowing-conversions)
trie[node].next[c] = static_cast<Node::value_type>(trie.size());
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

A narrowing conversion occurs here when casting trie.size() (which is size_t) to Node::value_type (which is int). On 64-bit systems, the number of nodes in the Trie can exceed the maximum value of a 32-bit signed integer (INT_MAX, approximately 2.1 billion). If this happens, the value will wrap around to a negative integer. Since this value is later used as an index into the trie vector (e.g., on line 28: node = trie[node].next[c]), a negative value will result in an out-of-bounds memory access, leading to a crash (Denial of Service) or potentially more severe memory corruption.

if (trie.size() >= static_cast<size_t>(std::numeric_limits<Node::value_type>::max())) {
    // Handle error: Trie too large
    return;
}
trie[node].next[c] = static_cast<Node::value_type>(trie.size());

trie.emplace_back();
}
node = trie[node].next[c];
}

if (from.size() > trie[node].match_len) {
trie[node].output_id = outputs.size(); // NOLINT(*-narrowing-conversions)
trie[node].output_id = static_cast<Node::value_type>(outputs.size());
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

This line involves a critical narrowing conversion. Casting outputs.size() (a size_t) to Node::value_type (an int) can lead to a negative output_id if outputs.size() exceeds INT_MAX. This negative output_id, when later used as an index for the outputs vector in the Do method (line 132), will result in a massive out-of-bounds memory access on 64-bit systems, causing an immediate crash (Denial of Service). It's essential to prevent this overflow and handle cases where the number of patterns exceeds the capacity of Node::value_type.

if (outputs.size() >= static_cast<size_t>(std::numeric_limits<Node::value_type>::max())) {
    // Handle error: Too many patterns
    return;
}
trie[node].output_id = static_cast<Node::value_type>(outputs.size());

trie[node].match_len = from.size();
outputs.emplace_back(to.data(), to.size());
outputs.emplace_back(ArmCastChar(to.data()), to.size());
}
}

Expand Down Expand Up @@ -62,7 +61,7 @@ namespace ZACLib {
if (v != -1) {
trie[v].fail = trie[trie[u].fail].next[c];

int f = trie[v].fail;
const int f = trie[v].fail;
if (trie[f].match_len > trie[v].match_len) {
trie[v].match_len = trie[f].match_len;
trie[v].output_id = trie[f].output_id;
Expand All @@ -82,11 +81,11 @@ namespace ZACLib {

if (input.empty()) return result;
if (max_rule_len == 0) {
result.append(input.data(), input.size());
result.append(ArmCastChar(input.data()), input.size());
return result;
}

const auto invalid_output = Node::kInvalidOutput;
constexpr auto invalid_output = Node::kInvalidOutput;
const size_t ring_size = max_rule_len + 1;
std::vector<size_t> pending_start(ring_size, invalid_output);
std::vector<size_t> pending_len(ring_size, 0);
Expand Down Expand Up @@ -121,24 +120,23 @@ namespace ZACLib {
size_t cursor = 0;

auto emit_until = [&](const size_t upper_bound) {
while (cursor < upper_bound) {
while (cursor < upper_bound) { // NOLINT
size_t best_len = 0;
size_t best_output = invalid_output;
get_best(cursor, best_len, best_output);
if (best_len == 0) {
++cursor;
continue;
}

result.append(input.data() + last_pos, cursor - last_pos);
result.append(ArmCastChar(input.data() + last_pos), cursor - last_pos);
result.append(outputs[best_output]);
cursor += best_len;
last_pos = cursor;
}
};

for (size_t i = 0; i < input.size(); ++i) {
const unsigned char c = input[i];
const auto c = static_cast<unsigned char>(input[i]);
state = trie[state].next[c];

if (trie[state].output_id != invalid_output) {
Expand All @@ -154,7 +152,7 @@ namespace ZACLib {
emit_until(input.size());

if (last_pos < input.size()) {
result.append(input.data() + last_pos, input.size() - last_pos);
result.append(ArmCastChar(input.data() + last_pos), input.size() - last_pos);
}

return result;
Expand All @@ -168,19 +166,20 @@ namespace ZACLib {
void Search::AddRule(const ZAC_SV& from) {
if (from.empty()) return;
int node = 0;
for (const char i : from) {
for (const ZAC_CHAR i : from) {
const auto c = static_cast<unsigned char>(i);
if (trie[node].next[c] == -1) {
trie[node].next[c] = trie.size(); // NOLINT(*-narrowing-conversions)
trie[node].next[c] = static_cast<Node::value_type>(trie.size());
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

Similar to the issue in Replace::AddRule, this narrowing conversion from size_t to int can lead to out-of-bounds memory access if the number of nodes in the Trie exceeds INT_MAX. This would cause a crash when the negative index is used in the Do method.

if (trie.size() >= static_cast<size_t>(std::numeric_limits<Node::value_type>::max())) {
    // Handle error
    return;
}
trie[node].next[c] = static_cast<Node::value_type>(trie.size());

trie.emplace_back();
}
node = trie[node].next[c];
}

if (from.size() > trie[node].match_len) {
trie[node].output_id = outputs.size(); // NOLINT(*-narrowing-conversions)

trie[node].output_id = outputs.size();
trie[node].match_len = from.size();
outputs.emplace_back(from.data(), from.size());
outputs.emplace_back(ArmCastChar(from.data()), from.size());
}
}

Expand Down Expand Up @@ -253,7 +252,7 @@ namespace ZACLib {
int node = 0;
for (const unsigned char c : from) {
if (trie[node].next[c] == -1) {
trie[node].next[c] = trie.size(); // NOLINT(*-narrowing-conversions)
trie[node].next[c] = static_cast<Node::value_type>(trie.size());
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

This narrowing conversion from size_t to int can lead to out-of-bounds memory access if the number of nodes in the Trie exceeds INT_MAX, causing a crash in the Do method.

if (trie.size() >= static_cast<size_t>(std::numeric_limits<Node::value_type>::max())) {
    // Handle error
    return;
}
trie[node].next[c] = static_cast<Node::value_type>(trie.size());

trie.emplace_back();
}
node = trie[node].next[c];
Expand Down
1 change: 0 additions & 1 deletion ZACLib/ZACLib.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// Created by wanjiangzhi on 2026/2/24.
//

// ReSharper disable CppNonExplicitConvertingConstructor
#ifndef ZACLIB_HPP
#define ZACLIB_HPP

Expand Down
Loading