From de85c6f7c69e232360e36b003f388bc78dc83351 Mon Sep 17 00:00:00 2001 From: Leonid Meleshin Date: Thu, 20 Jun 2024 00:13:14 +0400 Subject: [PATCH 1/5] refactor(C++): store files in src dir --- c-plus-plus/.gitignore | 10 ++++++++-- c-plus-plus/Makefile | 13 +++++++------ c-plus-plus/benchmark.yml | 19 +++++++++++++------ c-plus-plus/{ => src}/collatz/MaxSequence.cpp | 0 c-plus-plus/{ => src}/mandelbrot/Simple.cpp | 0 c-plus-plus/{ => src}/primes/Atkin.cpp | 0 c-plus-plus/{ => src}/primes/Simple.cpp | 0 c-plus-plus/{ => src}/recursion/Tak.cpp | 0 c-plus-plus/{ => src}/treap/Naive.cpp | 0 9 files changed, 28 insertions(+), 14 deletions(-) rename c-plus-plus/{ => src}/collatz/MaxSequence.cpp (100%) rename c-plus-plus/{ => src}/mandelbrot/Simple.cpp (100%) rename c-plus-plus/{ => src}/primes/Atkin.cpp (100%) rename c-plus-plus/{ => src}/primes/Simple.cpp (100%) rename c-plus-plus/{ => src}/recursion/Tak.cpp (100%) rename c-plus-plus/{ => src}/treap/Naive.cpp (100%) diff --git a/c-plus-plus/.gitignore b/c-plus-plus/.gitignore index 50df97b..eb9e144 100644 --- a/c-plus-plus/.gitignore +++ b/c-plus-plus/.gitignore @@ -1,2 +1,8 @@ -*/* -!**.cpp \ No newline at end of file +# Ignore everything in any subdirectory +**/* + +# Un-ignore all directories +!*/ + +# Un-ignore .cpp files +!**/*.cpp \ No newline at end of file diff --git a/c-plus-plus/Makefile b/c-plus-plus/Makefile index 408f3a7..6055859 100644 --- a/c-plus-plus/Makefile +++ b/c-plus-plus/Makefile @@ -1,14 +1,11 @@ CXXFLAGS = --std=c++17 -flto -s -SCRIPTS = $(wildcard **/*.cpp) +SCRIPTS = $(wildcard ./source/*/*.cpp) BINARIES = $(SCRIPTS:%.cpp=%-clang) \ $(SCRIPTS:%.cpp=%-clang-static) \ $(SCRIPTS:%.cpp=%-gcc) \ $(SCRIPTS:%.cpp=%-gcc-static) \ - # $(SCRIPTS:%.cpp=%-graalvm) \ - # $(SCRIPTS:%.cpp=%-graalvm-static) \ - %-clang: %.cpp clang++ -O3 $(CXXFLAGS) -o $@ $< @@ -30,6 +27,10 @@ BINARIES = $(SCRIPTS:%.cpp=%-clang) \ all: $(BINARIES) -.PHONY: clean clean: - rm -f $(BINARIES) \ No newline at end of file + rm -f $(BINARIES) + +list: + @echo $(BINARIES) + +.PHONY: clean list \ No newline at end of file diff --git a/c-plus-plus/benchmark.yml b/c-plus-plus/benchmark.yml index 30aec2d..2d41e57 100644 --- a/c-plus-plus/benchmark.yml +++ b/c-plus-plus/benchmark.yml @@ -15,16 +15,23 @@ strategy: # command: 'graalvm.lli %s-graalvm' files: - - primes/Simple - - primes/Atkin + - title: 'primes/Simple' + file: 'src/primes/Simple' - - collatz/MaxSequence + - title: 'primes/Atkin' + file: 'src/primes/Atkin' - - mandelbrot/Simple + - title: 'collatz/MaxSequence' + file: 'src/collatz/MaxSequence' - - treap/Naive + - title: 'mandelbrot/Simple' + file: 'src/mandelbrot/Simple' - - recursion/Tak + - title: 'treap/Naive' + file: 'src/treap/Naive' + + - title: 'recursion/Tak' + file: 'src/recursion/Tak' exclude: - command: 'gcc' diff --git a/c-plus-plus/collatz/MaxSequence.cpp b/c-plus-plus/src/collatz/MaxSequence.cpp similarity index 100% rename from c-plus-plus/collatz/MaxSequence.cpp rename to c-plus-plus/src/collatz/MaxSequence.cpp diff --git a/c-plus-plus/mandelbrot/Simple.cpp b/c-plus-plus/src/mandelbrot/Simple.cpp similarity index 100% rename from c-plus-plus/mandelbrot/Simple.cpp rename to c-plus-plus/src/mandelbrot/Simple.cpp diff --git a/c-plus-plus/primes/Atkin.cpp b/c-plus-plus/src/primes/Atkin.cpp similarity index 100% rename from c-plus-plus/primes/Atkin.cpp rename to c-plus-plus/src/primes/Atkin.cpp diff --git a/c-plus-plus/primes/Simple.cpp b/c-plus-plus/src/primes/Simple.cpp similarity index 100% rename from c-plus-plus/primes/Simple.cpp rename to c-plus-plus/src/primes/Simple.cpp diff --git a/c-plus-plus/recursion/Tak.cpp b/c-plus-plus/src/recursion/Tak.cpp similarity index 100% rename from c-plus-plus/recursion/Tak.cpp rename to c-plus-plus/src/recursion/Tak.cpp diff --git a/c-plus-plus/treap/Naive.cpp b/c-plus-plus/src/treap/Naive.cpp similarity index 100% rename from c-plus-plus/treap/Naive.cpp rename to c-plus-plus/src/treap/Naive.cpp From 20da98828c9bd32989b92c35db7f98ab9be86a5f Mon Sep 17 00:00:00 2001 From: Leonid Meleshin Date: Thu, 20 Jun 2024 00:22:49 +0400 Subject: [PATCH 2/5] fix(CI): run bench on alternative filename changes --- benchmark.py | 2 +- run_on_changes.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark.py b/benchmark.py index a984e93..7b3e06b 100755 --- a/benchmark.py +++ b/benchmark.py @@ -203,7 +203,7 @@ def writeResultsMarkdown(results): conf = loadConfiguration(configurationFilename) for run in conf['runs']: - if run['script']['title'] not in scripts and '*' not in scripts: + if run['script']['title'] not in scripts and run['script']['file'] not in scripts and '*' not in scripts: continue split = run['command']['command'].split() diff --git a/run_on_changes.py b/run_on_changes.py index 4084a18..b6990df 100755 --- a/run_on_changes.py +++ b/run_on_changes.py @@ -8,13 +8,13 @@ # Parsing changed files to find unique language-script pairs benchmark_commands = set() for file_path in changed_files: - parts = file_path.split('/') + parts = file_path.split('/', 2) if len(parts) < 3: continue # Skip invalid paths language, category, script = parts[0], parts[1], parts[2] script_name = script.split('.')[0] # Extract script name without extension # Assuming the benchmark script supports language and script parameters - command = f"python3 ./benchmark.py --languages {language} --scripts {category}/{script_name} --times 1" + command = f"python3 ./benchmark.py run --languages {language} --scripts {category}/{script_name} --times 1" benchmark_commands.add(command) # Executing benchmark commands From 6b39d49323e402a1c12782e0c6654f4c6a0b040c Mon Sep 17 00:00:00 2001 From: Leonid Meleshin Date: Thu, 20 Jun 2024 11:54:47 +0400 Subject: [PATCH 3/5] feat(C++/regexp): add stdlib bench --- c-plus-plus/Makefile | 19 ++++++---- c-plus-plus/src/regexp/stdlib.cpp | 63 +++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 8 deletions(-) create mode 100644 c-plus-plus/src/regexp/stdlib.cpp diff --git a/c-plus-plus/Makefile b/c-plus-plus/Makefile index 6055859..723aa6b 100644 --- a/c-plus-plus/Makefile +++ b/c-plus-plus/Makefile @@ -1,6 +1,9 @@ -CXXFLAGS = --std=c++17 -flto -s +CXXFLAGS = -O3 --std=c++20 -flto -s +INCLUDES = +OBJECTS = +LDFLAGS = -SCRIPTS = $(wildcard ./source/*/*.cpp) +SCRIPTS = $(wildcard ./src/*/*.cpp) BINARIES = $(SCRIPTS:%.cpp=%-clang) \ $(SCRIPTS:%.cpp=%-clang-static) \ @@ -8,22 +11,22 @@ BINARIES = $(SCRIPTS:%.cpp=%-clang) \ $(SCRIPTS:%.cpp=%-gcc-static) \ %-clang: %.cpp - clang++ -O3 $(CXXFLAGS) -o $@ $< + clang++ $(CXXFLAGS) -o $@ $< %-clang-static: %.cpp - clang++ -O3 $(CXXFLAGS) -static -o $@ $< + clang++ $(CXXFLAGS) -static -o $@ $< %-gcc : %.cpp - g++ -O3 $(CXXFLAGS) -o $@ $< + g++ $(CXXFLAGS) -o $@ $< %-gcc-static : %.cpp - g++ -O3 $(CXXFLAGS) -static -o $@ $< + g++ $(CXXFLAGS) -static -o $@ $< %-graalvm : %.cpp - $${GRAALVM_LLVM_TOOLCHAIN}/clang++ -O3 $(CXXFLAGS) -o $@ $< + $${GRAALVM_LLVM_TOOLCHAIN}/clang++ $(CXXFLAGS) -o $@ $< %-graalvm-static : %.cpp - $${GRAALVM_LLVM_TOOLCHAIN}/clang++ -O3 $(CXXFLAGS) -static -o $@ $< + $${GRAALVM_LLVM_TOOLCHAIN}/clang++ $(CXXFLAGS) -static -o $@ $< all: $(BINARIES) diff --git a/c-plus-plus/src/regexp/stdlib.cpp b/c-plus-plus/src/regexp/stdlib.cpp new file mode 100644 index 0000000..35dc573 --- /dev/null +++ b/c-plus-plus/src/regexp/stdlib.cpp @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include + +const size_t N_REGEX_ITERATIONS = 1000; +const size_t N_CHUNKS_ITERATIONS = 10; + +// array of regexs +const std::vector regexps = { + std::regex("[Hh]ello [Ww]orld[!]?"), + std::regex("[Hh]ello [Ww]orld[!]?$"), + std::regex("[0-9][0-9][0-9][0-9][0-9]"), + std::regex("[0-9][0-9][0-9][0-9][0-9]$"), + std::regex("[CV]V[a-z]askjdvc[a-z0-9A-Z]"), + std::regex("^[0-9][0-9]\\.[0-9][0-9]\\.[0-9][0-9]\\.[0-9][0-9]\\s[a-zA-Z]"), + std::regex("^z.*$"), + std::regex("^z.z.z.z.z.z.z.z.z.z.z.z.z.z*$"), + std::regex("^[z][z][z][z][z][z][z][z][z][z][z][z][z][z][z]\\S*$"), + std::regex("^[az]\\S\\D*$"), +}; + +const std::vector chunks = { + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "10.10.10.10 cjdhjhsdclkjhasl dflaskijd flkzsjd vlkszJ cvlsJKHD CVlaskjdvcl sdjvl ksDJv lkj lkj lzkjsdf lkj lzskdjv lkzj 66678", + "hello world !", + "bar!234ahem.. 'hello world !' ..c !hello worldzzz !zzzzzzzzzzzzzzzzzzz ksajf 874r hbsdfk i7r kjasdhf ikasuwhfia7234 kwaejhfkawehf7234h zzzzzzzzzzzzzzzzzzzzzzzzz dsssssssssssssssssssssssssssssssssssssssss aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ssssssssssssssssssssssssssssssssssssssss ddddddddddddddddddddddddddddddddddddddddddddddddd ffffffffffffffffffffffffffffffffffffffffffff ggggggggggggggggggggggggggggggggggggggggggggggggggg hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk llllllllllllllllllllllllllllllllllllllllllllllllllll qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr ttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttt yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppiauwdhfl,asdnfliehjrsgoirtjoersahjrouh waei7yi7q34f kwejfg hello world", + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", +}; + +int main(int argc, char *argv[]) +{ + const auto start_time = std::chrono::high_resolution_clock::now(); + + for (size_t i = 0; i < N_REGEX_ITERATIONS; i++) + { + for (const auto &re : regexps) + { + for (size_t j = 0; j < N_CHUNKS_ITERATIONS; j++) + { + for (auto chunk : chunks) + { + for (std::smatch sm; std::regex_search(chunk, sm, re);) + { + // std::cout << sm.str() << std::endl; + chunk = sm.suffix().str(); + } + } + } + } + } + + + const auto end_time = std::chrono::high_resolution_clock::now(); + const auto duration = std::chrono::duration_cast(end_time - start_time).count(); + + std::cout << "Execution time: " << duration << "ms" << std::endl; + + return 0; +} \ No newline at end of file From b1a81bd67a7b8887a1275a494c0e89aa261a5c5d Mon Sep 17 00:00:00 2001 From: Leonid Meleshin Date: Thu, 20 Jun 2024 12:43:40 +0400 Subject: [PATCH 4/5] feat(C++/regexp): add boost bench --- .gitmodules | 3 ++ c-plus-plus/Makefile | 28 +++++++-------- c-plus-plus/lib/regex-boost | 1 + c-plus-plus/src/regexp/boost.cpp | 62 ++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 15 deletions(-) create mode 100644 .gitmodules create mode 160000 c-plus-plus/lib/regex-boost create mode 100644 c-plus-plus/src/regexp/boost.cpp diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..b23d6ab --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "c-plus-plus/lib/regex-boost"] + path = c-plus-plus/lib/regex-boost + url = https://github.com/boostorg/regex.git diff --git a/c-plus-plus/Makefile b/c-plus-plus/Makefile index 723aa6b..7d1b3a8 100644 --- a/c-plus-plus/Makefile +++ b/c-plus-plus/Makefile @@ -1,7 +1,7 @@ -CXXFLAGS = -O3 --std=c++20 -flto -s -INCLUDES = -OBJECTS = -LDFLAGS = +CXXFLAGS += -Wall -fPIC -std=c++17 -O3 +INCLUDES += -Ilib/regex-boost/include +OBJECTS += +LDFLAGS += -lstdc++ -lpthread SCRIPTS = $(wildcard ./src/*/*.cpp) @@ -11,29 +11,27 @@ BINARIES = $(SCRIPTS:%.cpp=%-clang) \ $(SCRIPTS:%.cpp=%-gcc-static) \ %-clang: %.cpp - clang++ $(CXXFLAGS) -o $@ $< + clang++ $(CXXFLAGS) $(INCLUDES) -o $@ $< $(LDFLAGS) %-clang-static: %.cpp - clang++ $(CXXFLAGS) -static -o $@ $< + clang++ $(CXXFLAGS) $(INCLUDES) -static -o $@ $< $(LDFLAGS) %-gcc : %.cpp - g++ $(CXXFLAGS) -o $@ $< + g++ $(CXXFLAGS) $(INCLUDES) -o $@ $< $(LDFLAGS) %-gcc-static : %.cpp - g++ $(CXXFLAGS) -static -o $@ $< + g++ $(CXXFLAGS) $(INCLUDES) -static -o $@ $< $(LDFLAGS) -%-graalvm : %.cpp - $${GRAALVM_LLVM_TOOLCHAIN}/clang++ $(CXXFLAGS) -o $@ $< +all: libraries $(BINARIES) -%-graalvm-static : %.cpp - $${GRAALVM_LLVM_TOOLCHAIN}/clang++ $(CXXFLAGS) -static -o $@ $< - -all: $(BINARIES) +libraries: + cd ./lib/regex-boost && mkdir -p bin && cd bin && cmake -DCMAKE_POSITION_INDEPENDENT_CODE=ON .. && cmake --build .; clean: rm -f $(BINARIES) + cd ./lib/regex-boost && rm -rf bin; list: @echo $(BINARIES) -.PHONY: clean list \ No newline at end of file +.PHONY: clean list all libraries \ No newline at end of file diff --git a/c-plus-plus/lib/regex-boost b/c-plus-plus/lib/regex-boost new file mode 160000 index 0000000..cb55913 --- /dev/null +++ b/c-plus-plus/lib/regex-boost @@ -0,0 +1 @@ +Subproject commit cb559132939670aa45eb25fd867fce0be8b08837 diff --git a/c-plus-plus/src/regexp/boost.cpp b/c-plus-plus/src/regexp/boost.cpp new file mode 100644 index 0000000..9a0ff05 --- /dev/null +++ b/c-plus-plus/src/regexp/boost.cpp @@ -0,0 +1,62 @@ +#include +#include +#include +#include +#include +#include +#include + +const size_t N_REGEX_ITERATIONS = 1000; +const size_t N_CHUNKS_ITERATIONS = 10; + +const std::vector regexps = { + boost::regex("[Hh]ello [Ww]orld[!]?"), + boost::regex("[Hh]ello [Ww]orld[!]?$"), + boost::regex("[0-9][0-9][0-9][0-9][0-9]"), + boost::regex("[0-9][0-9][0-9][0-9][0-9]$"), + boost::regex("[CV]V[a-z]askjdvc[a-z0-9A-Z]"), + boost::regex("^[0-9][0-9]\\.[0-9][0-9]\\.[0-9][0-9]\\.[0-9][0-9]\\s[a-zA-Z]"), + boost::regex("^z.*$"), + boost::regex("^z.z.z.z.z.z.z.z.z.z.z.z.z.z*$"), + boost::regex("^[z][z][z][z][z][z][z][z][z][z][z][z][z][z][z]\\S*$"), + boost::regex("^[az]\\S\\D*$"), +}; + +const std::vector chunks = { + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "10.10.10.10 cjdhjhsdclkjhasl dflaskijd flkzsjd vlkszJ cvlsJKHD CVlaskjdvcl sdjvl ksDJv lkj lkj lzkjsdf lkj lzskdjv lkzj 66678", + "hello world !", + "bar!234ahem.. 'hello world !' ..c !hello worldzzz !zzzzzzzzzzzzzzzzzzz ksajf 874r hbsdfk i7r kjasdhf ikasuwhfia7234 kwaejhfkawehf7234h zzzzzzzzzzzzzzzzzzzzzzzzz dsssssssssssssssssssssssssssssssssssssssss aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ssssssssssssssssssssssssssssssssssssssss ddddddddddddddddddddddddddddddddddddddddddddddddd ffffffffffffffffffffffffffffffffffffffffffff ggggggggggggggggggggggggggggggggggggggggggggggggggg hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk llllllllllllllllllllllllllllllllllllllllllllllllllll qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr ttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttt yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppiauwdhfl,asdnfliehjrsgoirtjoersahjrouh waei7yi7q34f kwejfg hello world", + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", +}; + +int main(int argc, char *argv[]) +{ + const auto start_time = std::chrono::high_resolution_clock::now(); + + for (size_t i = 0; i < N_REGEX_ITERATIONS; i++) + { + for (const auto &re : regexps) + { + for (size_t j = 0; j < N_CHUNKS_ITERATIONS; j++) + { + for (auto chunk : chunks) + { + for (boost::smatch sm; boost::regex_search(chunk, sm, re);) + { + // std::cout << sm.str() << std::endl; + chunk = sm.suffix().str(); + } + } + } + } + } + + + const auto end_time = std::chrono::high_resolution_clock::now(); + const auto duration = std::chrono::duration_cast(end_time - start_time).count(); + + std::cout << "Execution time: " << duration << "ms" << std::endl; + + return 0; +} \ No newline at end of file From 652baf86405e667a52e271f8db94b300407bc3a4 Mon Sep 17 00:00:00 2001 From: Leonid Meleshin Date: Thu, 20 Jun 2024 13:54:37 +0400 Subject: [PATCH 5/5] feat(C++/regexp): add hyperscan bench --- .github/workflows/benchmark_on_change.yml | 1 + .gitmodules | 3 + Dockerfile | 2 + c-plus-plus/Makefile | 4 +- c-plus-plus/lib/regex-hyperscan | 1 + c-plus-plus/src/regexp/hyperscan.cpp | 95 +++++++++++++++++++++++ 6 files changed, 105 insertions(+), 1 deletion(-) create mode 160000 c-plus-plus/lib/regex-hyperscan create mode 100644 c-plus-plus/src/regexp/hyperscan.cpp diff --git a/.github/workflows/benchmark_on_change.yml b/.github/workflows/benchmark_on_change.yml index ba84424..5081c51 100644 --- a/.github/workflows/benchmark_on_change.yml +++ b/.github/workflows/benchmark_on_change.yml @@ -17,6 +17,7 @@ jobs: - name: Checkout uses: actions/checkout@v4 with: + submodules: 'recursive' fetch-depth: 2 - name: Pull Docker image diff --git a/.gitmodules b/.gitmodules index b23d6ab..585f6e8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "c-plus-plus/lib/regex-boost"] path = c-plus-plus/lib/regex-boost url = https://github.com/boostorg/regex.git +[submodule "c-plus-plus/lib/regex-hyperscan"] + path = c-plus-plus/lib/regex-hyperscan + url = https://github.com/intel/hyperscan diff --git a/Dockerfile b/Dockerfile index b76b7ce..b0e5415 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,6 +10,7 @@ RUN apt update \ curl \ wget \ libc6 \ + ragel \ unzip \ gnulib \ gnupg2 \ @@ -24,6 +25,7 @@ RUN apt update \ build-essential \ ca-certificates \ libreadline-dev \ + libboost-all-dev \ apt-transport-https \ software-properties-common diff --git a/c-plus-plus/Makefile b/c-plus-plus/Makefile index 7d1b3a8..4924029 100644 --- a/c-plus-plus/Makefile +++ b/c-plus-plus/Makefile @@ -1,5 +1,5 @@ CXXFLAGS += -Wall -fPIC -std=c++17 -O3 -INCLUDES += -Ilib/regex-boost/include +INCLUDES += -Ilib/regex-boost/include -Ilib/regex-hyperscan/src -Ilib/regex-hyperscan/bin -Ilib/regex-hyperscan/bin/lib/libhs.a OBJECTS += LDFLAGS += -lstdc++ -lpthread @@ -26,10 +26,12 @@ all: libraries $(BINARIES) libraries: cd ./lib/regex-boost && mkdir -p bin && cd bin && cmake -DCMAKE_POSITION_INDEPENDENT_CODE=ON .. && cmake --build .; + cd ./lib/regex-hyperscan && mkdir -p bin && cd bin && cmake -DCMAKE_POSITION_INDEPENDENT_CODE=ON .. && cmake --build .; clean: rm -f $(BINARIES) cd ./lib/regex-boost && rm -rf bin; + cd ./lib/regex-hyperscan && rm -rf bin; list: @echo $(BINARIES) diff --git a/c-plus-plus/lib/regex-hyperscan b/c-plus-plus/lib/regex-hyperscan new file mode 160000 index 0000000..bc3b191 --- /dev/null +++ b/c-plus-plus/lib/regex-hyperscan @@ -0,0 +1 @@ +Subproject commit bc3b191ab56055e8560c7cdc161c289c4d76e3d2 diff --git a/c-plus-plus/src/regexp/hyperscan.cpp b/c-plus-plus/src/regexp/hyperscan.cpp new file mode 100644 index 0000000..684eb2a --- /dev/null +++ b/c-plus-plus/src/regexp/hyperscan.cpp @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include +#include "hs.h" +#include "database.h" +#include "scratch.h" + +const size_t N_REGEX_ITERATIONS = 1000; +const size_t N_CHUNKS_ITERATIONS = 10; + +const std::vector rules = { + std::string("[Hh]ello [Ww]orld[!]?"), + std::string("[Hh]ello [Ww]orld[!]?$"), + std::string("[0-9][0-9][0-9][0-9][0-9]"), + std::string("[0-9][0-9][0-9][0-9][0-9]$"), + std::string("[CV]V[a-z]askjdvc[a-z0-9A-Z]"), + std::string("^[0-9][0-9]\\.[0-9][0-9]\\.[0-9][0-9]\\.[0-9][0-9]\\s[a-zA-Z]"), + std::string("^z.*$"), + std::string("^z.z.z.z.z.z.z.z.z.z.z.z.z.z*$"), + std::string("^[z][z][z][z][z][z][z][z][z][z][z][z][z][z][z]\\S*$"), + std::string("^[az]\\S\\D*$"), +}; + +std::vector> regexps; + +const std::vector chunks = { + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "10.10.10.10 cjdhjhsdclkjhasl dflaskijd flkzsjd vlkszJ cvlsJKHD CVlaskjdvcl sdjvl ksDJv lkj lkj lzkjsdf lkj lzskdjv lkzj 66678", + "hello world !", + "bar!234ahem.. 'hello world !' ..c !hello worldzzz !zzzzzzzzzzzzzzzzzzz ksajf 874r hbsdfk i7r kjasdhf ikasuwhfia7234 kwaejhfkawehf7234h zzzzzzzzzzzzzzzzzzzzzzzzz dsssssssssssssssssssssssssssssssssssssssss aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ssssssssssssssssssssssssssssssssssssssss ddddddddddddddddddddddddddddddddddddddddddddddddd ffffffffffffffffffffffffffffffffffffffffffff ggggggggggggggggggggggggggggggggggggggggggggggggggg hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk llllllllllllllllllllllllllllllllllllllllllllllllllll qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr ttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttt yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppiauwdhfl,asdnfliehjrsgoirtjoersahjrouh waei7yi7q34f kwejfg hello world", + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", +}; + +int main(int argc, char *argv[]) +{ + regexps.resize(rules.size()); + + for (size_t i = 0; i < rules.size(); i++) + { + hs_compile_error_t *compile_err; + hs_database_t *db; + hs_scratch_t *scratch; + + hs_error_t err = hs_compile(rules[i].c_str(), HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, HS_MODE_BLOCK, NULL, &db, &compile_err); + if (err != HS_SUCCESS) + { + std::cerr << "ERROR: Unable to compile pattern \"" << rules[i] << "\": " << compile_err->message << std::endl; + hs_free_compile_error(compile_err); + return -1; + } + + err = hs_alloc_scratch(db, &scratch); + if (err != HS_SUCCESS) + { + std::cerr << "ERROR: Unable to allocate scratch space. Exiting." << std::endl; + hs_free_database(db); + return -1; + } + + regexps[i] = std::make_tuple(db, scratch); + } + + const auto start_time = std::chrono::high_resolution_clock::now(); + + for (size_t i = 0; i < N_REGEX_ITERATIONS; i++) + { + for (const auto &re : regexps) + { + for (size_t j = 0; j < N_CHUNKS_ITERATIONS; j++) + { + for (auto chunk : chunks) + { + hs_error_t err = hs_scan(static_cast(std::get<0>(re)), chunk.c_str(), chunk.size(), 0, static_cast(std::get<1>(re)), [](unsigned int id, unsigned long long from, unsigned long long to, unsigned int flags, void *ctx) -> int { + return 0; + }, nullptr); + if (err != HS_SUCCESS) + { + std::cerr << "ERROR: Unable to scan chunk. Exiting." << std::endl; + return -1; + } + } + } + } + } + + const auto end_time = std::chrono::high_resolution_clock::now(); + const auto duration = std::chrono::duration_cast(end_time - start_time).count(); + + std::cout << "Execution time: " << duration << "ms" << std::endl; + + return 0; +} \ No newline at end of file