Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
cmake-build*/
**/cmake-build*/
/.idea
!/.idea/dictionaries
/.idea/
!/.idea/dictionaries

**/localtest*/
2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions ZACLib/Android.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# ZACLib/Android.mk
LOCAL_PATH := $(call my-dir)

include $(CLEAR_VARS)

LOCAL_MODULE := ZACLib
LOCAL_SRC_FILES := ZACLib.cpp
LOCAL_C_INCLUDES := $(LOCAL_PATH)

LOCAL_CPPFLAGS := -std=c++11 # or higher

include $(BUILD_STATIC_LIBRARY)
8 changes: 3 additions & 5 deletions ZACLib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@

set(CMAKE_CXX_STANDARD 11) # or higher

add_library(ZACLib STATIC "ZACLib.cpp") # if use source
# ZACLib/CMakeLists.txt
add_library(ZACLib STATIC "ZACLib.cpp")
target_include_directories(ZACLib PUBLIC ".")
set_target_properties(ZACLib PROPERTIES
CXX_STANDARD 11
CXX_STANDARD 11 # or higher
CXX_STANDARD_REQUIRED YES
CXX_EXTENSIONS NO
)
19 changes: 19 additions & 0 deletions ZACLib/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# ZACLib/Makefile
CXX := g++
CXXFLAGS := -I./ \
-std=c++11 # or higher

SRC := ZACLib.cpp
OBJ := $(SRC:.cpp=.o)
TARGET := libZACLib.a

all: $(TARGET)

$(TARGET): $(OBJ)
ar rcs $@ $(OBJ)

%.o: %.cpp
$(CXX) $(CXXFLAGS) -c $< -o $@

clean:
rm -f $(OBJ) $(TARGET)
37 changes: 18 additions & 19 deletions ZACLib/ZACLib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// Created by wanjiangzhi on 2026/2/24.
//

// ReSharper disable CppClassNeverUsed
#include "ZACLib.hpp"
#include <array>
#include <queue>
Expand All @@ -13,26 +12,26 @@ namespace ZACLib {
}

void Replace::AddRule(const ZAC_SV& from) {
AddRule(from, ZAC_SV("", 0));
AddRule(from, ZAC_SV(""));
}

void Replace::AddRule(const ZAC_SV& from, const ZAC_SV& to) {
if (from.empty()) return;
if (from.size() > max_rule_len) max_rule_len = from.size();
int node = 0;
for (const char i : from) {
for (const ZAC_CHAR i : from) {
const auto c = static_cast<unsigned char>(i);
if (trie[node].next[c] == -1) {
trie[node].next[c] = trie.size(); // NOLINT(*-narrowing-conversions)
trie[node].next[c] = Node::ToIndexOrThrow(trie.size());
trie.emplace_back();
}
node = trie[node].next[c];
}

if (from.size() > trie[node].match_len) {
trie[node].output_id = outputs.size(); // NOLINT(*-narrowing-conversions)
trie[node].output_id = outputs.size();
trie[node].match_len = from.size();
outputs.emplace_back(to.data(), to.size());
outputs.emplace_back(reinterpret_cast<const char*>(to.data()), to.size());
}
}

Expand Down Expand Up @@ -62,7 +61,7 @@ namespace ZACLib {
if (v != -1) {
trie[v].fail = trie[trie[u].fail].next[c];

int f = trie[v].fail;
const int f = trie[v].fail;
if (trie[f].match_len > trie[v].match_len) {
trie[v].match_len = trie[f].match_len;
trie[v].output_id = trie[f].output_id;
Expand All @@ -82,11 +81,11 @@ namespace ZACLib {

if (input.empty()) return result;
if (max_rule_len == 0) {
result.append(input.data(), input.size());
result.append(reinterpret_cast<const char*>(input.data()), input.size());
return result;
}

const auto invalid_output = Node::kInvalidOutput;
constexpr auto invalid_output = Node::kInvalidOutput;
const size_t ring_size = max_rule_len + 1;
std::vector<size_t> pending_start(ring_size, invalid_output);
std::vector<size_t> pending_len(ring_size, 0);
Expand Down Expand Up @@ -121,24 +120,23 @@ namespace ZACLib {
size_t cursor = 0;

auto emit_until = [&](const size_t upper_bound) {
while (cursor < upper_bound) {
while (cursor < upper_bound) { // NOLINT
size_t best_len = 0;
size_t best_output = invalid_output;
get_best(cursor, best_len, best_output);
if (best_len == 0) {
++cursor;
continue;
}

result.append(input.data() + last_pos, cursor - last_pos);
result.append(reinterpret_cast<const char*>(input.data() + last_pos), cursor - last_pos);
result.append(outputs[best_output]);
cursor += best_len;
last_pos = cursor;
}
};

for (size_t i = 0; i < input.size(); ++i) {
const unsigned char c = input[i];
const auto c = static_cast<unsigned char>(input[i]);
state = trie[state].next[c];

if (trie[state].output_id != invalid_output) {
Expand All @@ -154,7 +152,7 @@ namespace ZACLib {
emit_until(input.size());

if (last_pos < input.size()) {
result.append(input.data() + last_pos, input.size() - last_pos);
result.append(reinterpret_cast<const char*>(input.data() + last_pos), input.size() - last_pos);
}

return result;
Expand All @@ -168,19 +166,20 @@ namespace ZACLib {
void Search::AddRule(const ZAC_SV& from) {
if (from.empty()) return;
int node = 0;
for (const char i : from) {
for (const ZAC_CHAR i : from) {
const auto c = static_cast<unsigned char>(i);
if (trie[node].next[c] == -1) {
trie[node].next[c] = trie.size(); // NOLINT(*-narrowing-conversions)
trie[node].next[c] = Node::ToIndexOrThrow(trie.size());
trie.emplace_back();
}
node = trie[node].next[c];
}

if (from.size() > trie[node].match_len) {
trie[node].output_id = outputs.size(); // NOLINT(*-narrowing-conversions)

trie[node].output_id = outputs.size();
trie[node].match_len = from.size();
outputs.emplace_back(from.data(), from.size());
outputs.emplace_back(reinterpret_cast<const char*>(from.data()), from.size());
}
}

Expand Down Expand Up @@ -253,7 +252,7 @@ namespace ZACLib {
int node = 0;
for (const unsigned char c : from) {
if (trie[node].next[c] == -1) {
trie[node].next[c] = trie.size(); // NOLINT(*-narrowing-conversions)
trie[node].next[c] = Node::ToIndexOrThrow(trie.size());
trie.emplace_back();
}
node = trie[node].next[c];
Expand Down
1 change: 0 additions & 1 deletion ZACLib/ZACLib.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// Created by wanjiangzhi on 2026/2/24.
//

// ReSharper disable CppNonExplicitConvertingConstructor
#ifndef ZACLIB_HPP
#define ZACLIB_HPP

Expand Down
17 changes: 17 additions & 0 deletions ZACLib/ZACLib.vcxproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<!-- ZACLib/ZACLib.vcxproj -->
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<ConfigurationType>StaticLibrary</ConfigurationType>
<CharacterSet>Unicode</CharacterSet>
<CppStandard>c++11</CppStandard> <!-- or higher -->
</PropertyGroup>

<ItemGroup>
<ClCompile Include="ZACLib.cpp" />
</ItemGroup>

<ItemGroup>
<ClInclude Include=".\**\*.h*" />
</ItemGroup>
Comment on lines +14 to +16
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using a broad wildcard .\**\*.h* can be fragile and might unintentionally include temporary or unrelated header files. It's generally safer and more explicit to list the header files directly.

  <ItemGroup>
    <ClInclude Include="ZACLib.hpp" />
    <ClInclude Include="ZACLib_Types.hpp" />
  </ItemGroup>

</Project>
101 changes: 78 additions & 23 deletions ZACLib/ZACLib_Types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,82 @@
#define ZACLIB_TYPES_HPP

#if __cplusplus >= 201703L
#include <string_view>
#define HAS_STRING_VIEW
#endif

#ifdef HAS_STRING_VIEW
#include <string_view>
#else
#include <cstring>
#include <cstring>
#endif

#include <string>
#include <array>
#include <limits>
#include <stdexcept>

#if defined(__CHAR_UNSIGNED__) || (defined(_CHAR_UNSIGNED) && _CHAR_UNSIGNED)
#define CHAR_UNSIGNED
#endif

namespace ZACLib {
#ifdef CHAR_UNSIGNED
#define ArmCastChar(ptr) reinterpret_cast<const ZAC_CHAR*>(ptr)
using ZAC_CHAR = unsigned char;
#else
using ZAC_CHAR = char;
#define ArmCastChar(ptr) (ptr)
#endif

#if __cplusplus >= 201703L
using ZAC_SV = std::string_view;
class ZAC_SV : public std::basic_string_view<ZAC_CHAR> {
public:
using std::basic_string_view<ZAC_CHAR>::basic_string_view;
ZAC_SV() = default;

ZAC_SV(const std::string& s) noexcept
: std::basic_string_view<ZAC_CHAR>(reinterpret_cast<const ZAC_CHAR*>(s.data()), s.size()) {}

#ifdef CHAR_UNSIGNED
ZAC_SV(const char* s, const size_t n)
: std::basic_string_view<ZAC_CHAR>(reinterpret_cast<const ZAC_CHAR*>(s), n) {}

ZAC_SV(const char* s)
: std::basic_string_view<ZAC_CHAR>(
reinterpret_cast<const ZAC_CHAR*>(s),
s ? std::char_traits<char>::length(s) : 0
) {}
#endif
};
#else
class ZAC_SV {
const char* m_data;
const size_t m_size;
const ZAC_CHAR* m_data;
const std::size_t m_size;

public:
ZAC_SV() : m_data(nullptr),
m_size(0) {}

ZAC_SV(const char* d, const size_t s) : m_data(d),
m_size(s) {}
ZAC_SV(const ZAC_CHAR* d, const std::size_t s) : m_data(d),
m_size(s) {}

#ifdef CHAR_UNSIGNED
ZAC_SV(const char* d, const size_t s) : m_data(reinterpret_cast<const ZAC_CHAR*>(d)),
m_size(s) {}
#endif

ZAC_SV(const std::string& s) : m_data(s.c_str()),
m_size(s.size()) {} // 模仿std::string_view,不禁止隐式构造
ZAC_SV(const std::string& s) noexcept : m_data(ArmCastChar(s.data())),
m_size(s.size()) {}

ZAC_SV(const char* d) : m_data(d),
m_size(d ? std::strlen(d) : 0) {} // 模仿std::string_view,不禁止隐式构造
ZAC_SV(const ZAC_CHAR* d) : m_data(d),
m_size(d ? std::strlen(reinterpret_cast<const char*>(d)) : 0) {}

const char* data() const noexcept {
#ifdef CHAR_UNSIGNED
ZAC_SV(const char* d) : m_data(reinterpret_cast<const ZAC_CHAR*>(d)),
m_size(d ? std::strlen(d) : 0) {}
#endif

const ZAC_CHAR* data() const noexcept {
return m_data;
}

Expand All @@ -49,22 +94,31 @@ namespace ZACLib {
return m_size == 0;
}

const char* begin() const { return m_data; }
const char* end() const { return m_data + m_size; }
const char* cbegin() const { return m_data; }
const char* cend() const { return m_data + m_size; }
const ZAC_CHAR* begin() const { return m_data; }
const ZAC_CHAR* end() const { return m_data + m_size; }
const ZAC_CHAR* cbegin() const { return m_data; }
const ZAC_CHAR* cend() const { return m_data + m_size; }

const char& operator[](const size_t i) const { return m_data[i]; }
const ZAC_CHAR& operator[](const size_t i) const { return m_data[i]; }
};
#endif

struct Node {
std::array<int, 256> next{};
int fail;
// ReSharper disable once CppVariableCanBeMadeConstexpr
static const auto kInvalidOutput = std::numeric_limits<size_t>::max();
size_t output_id;
size_t match_len;
using value_type = int;
using next_type = std::array<value_type, 256>;
using size_type = std::size_t;
next_type next{};
value_type fail;
size_type output_id;
size_type match_len;
static constexpr auto kInvalidOutput = std::numeric_limits<size_type>::max();

static value_type ToIndexOrThrow(const size_type value) {
if (value > static_cast<size_type>(std::numeric_limits<value_type>::max())) {
throw std::overflow_error("Trie node count exceeds Node::value_type range");
}
return static_cast<value_type>(value);
}

Node() : fail(0),
output_id(kInvalidOutput),
Expand All @@ -73,3 +127,4 @@ namespace ZACLib {
}

#endif //ZACLIB_TYPES_HPP
// ReSharper enable CppNonExplicitConvertingConstructor
Loading