Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 34 additions & 60 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@ endif()

option(EMEL_ENABLE_TESTS "Build tests" ON)
option(EMEL_ENABLE_FUZZ "Build fuzz targets" OFF)
option(EMEL_ENABLE_TENSOR_PARSER_TEXT_MACHINES
"Build tensor/parser/text machine surfaces"
OFF
)

include(FetchContent)
include(cmake/sml_version.cmake)
Expand Down Expand Up @@ -50,24 +46,6 @@ target_link_libraries(emel
)


if(EMEL_ENABLE_TENSOR_PARSER_TEXT_MACHINES)
add_executable(mock_model_load
tools/mock_main.cpp
)

target_link_libraries(mock_model_load
PRIVATE
emel_core
)

target_include_directories(mock_model_load
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
${CMAKE_CURRENT_SOURCE_DIR}/include
${boost_sml_SOURCE_DIR}/include
)
endif()

if(EMEL_ENABLE_TESTS)
include(CTest)
enable_testing()
Expand Down Expand Up @@ -120,38 +98,36 @@ if(EMEL_ENABLE_TESTS)
tests/tensor/view/lifecycle_tests.cpp
)

if(EMEL_ENABLE_TENSOR_PARSER_TEXT_MACHINES)
list(APPEND EMEL_TEST_SOURCES
tests/gguf/loader/lifecycle_tests.cpp
tests/text/jinja/parser_tests.cpp
tests/text/jinja/lexer_tests.cpp
tests/text/jinja/formatter_tests.cpp
tests/text/formatter/formatter_tests.cpp
tests/text/encoders/common_tests.cpp
tests/text/encoders/bpe_tests.cpp
tests/text/encoders/spm_tests.cpp
tests/text/encoders/wpm_tests.cpp
tests/text/encoders/ugm_tests.cpp
tests/text/encoders/rwkv_tests.cpp
tests/text/encoders/plamo2_tests.cpp
tests/text/encoders/fallback_tests.cpp
tests/text/conditioner/text_conditioner_tests.cpp
tests/text/detokenizer/detokenizer_tests.cpp
tests/text/renderer/renderer_tests.cpp
tests/text/unicode/unicode_tests.cpp
tests/text/tokenizer/preprocessor_tests.cpp
tests/text/tokenizer/preprocessor_spm_tests.cpp
tests/text/tokenizer/preprocessor_wpm_tests.cpp
tests/text/tokenizer/preprocessor_rwkv_tests.cpp
tests/text/tokenizer/preprocessor_plamo2_tests.cpp
tests/text/tokenizer/preprocessor_fallback_tests.cpp
tests/text/tokenizer/bpe_regex_tests.cpp
tests/text/tokenizer/bpe_split_tests.cpp
tests/text/tokenizer/tokenizer_tests.cpp
tests/text/tokenizer/tokenizer_parity_tests.cpp
tests/text/tokenizer/tokenizer_action_guard_tests.cpp
)
endif()
list(APPEND EMEL_TEST_SOURCES
tests/gguf/loader/lifecycle_tests.cpp
tests/text/jinja/parser_tests.cpp
tests/text/jinja/lexer_tests.cpp
tests/text/jinja/formatter_tests.cpp
tests/text/formatter/formatter_tests.cpp
tests/text/encoders/common_tests.cpp
tests/text/encoders/bpe_tests.cpp
tests/text/encoders/spm_tests.cpp
tests/text/encoders/wpm_tests.cpp
tests/text/encoders/ugm_tests.cpp
tests/text/encoders/rwkv_tests.cpp
tests/text/encoders/plamo2_tests.cpp
tests/text/encoders/fallback_tests.cpp
tests/text/conditioner/text_conditioner_tests.cpp
tests/text/detokenizer/detokenizer_tests.cpp
tests/text/renderer/renderer_tests.cpp
tests/text/unicode/unicode_tests.cpp
tests/text/tokenizer/preprocessor_tests.cpp
tests/text/tokenizer/preprocessor_spm_tests.cpp
tests/text/tokenizer/preprocessor_wpm_tests.cpp
tests/text/tokenizer/preprocessor_rwkv_tests.cpp
tests/text/tokenizer/preprocessor_plamo2_tests.cpp
tests/text/tokenizer/preprocessor_fallback_tests.cpp
tests/text/tokenizer/bpe_regex_tests.cpp
tests/text/tokenizer/bpe_split_tests.cpp
tests/text/tokenizer/tokenizer_tests.cpp
tests/text/tokenizer/tokenizer_parity_tests.cpp
tests/text/tokenizer/tokenizer_action_guard_tests.cpp
)

add_executable(emel_tests_bin
${EMEL_TEST_SOURCES}
Expand Down Expand Up @@ -244,12 +220,10 @@ if(EMEL_ENABLE_FUZZ)
)
endfunction()

if(EMEL_ENABLE_TENSOR_PARSER_TEXT_MACHINES)
add_executable(emel_fuzz_gguf_parser
tests/fuzz/gguf_parser_fuzz.cpp
)
emel_configure_fuzzer(emel_fuzz_gguf_parser)
endif()
add_executable(emel_fuzz_gguf_parser
tests/fuzz/gguf_parser_fuzz.cpp
)
emel_configure_fuzzer(emel_fuzz_gguf_parser)

add_executable(emel_fuzz_gbnf_parser
tests/fuzz/gbnf_parser_fuzz.cpp
Expand Down
129 changes: 92 additions & 37 deletions docs/benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,98 @@ Source: `snapshots/bench/benchmarks_compare.txt`
Note: While EMEL is modular and easy to bench in isolation, llama.cpp code is very
entangled. These microbenches aim for apples-to-apples comparisons but likely
are not. True benchmarks will be end-to-end once the system is complete.
Legacy benchmark IDs like `batch/splitter_*` and `jinja/renderer_*` are intentionally
retained for snapshot/report continuity and should be renamed after consumers migrate.

| Benchmark | emel.cpp ns/op | llama.cpp ns/op | ratio |
| --- | ---: | ---: | ---: |
| `batch/splitter_equal` | 1626.933 | 6278.408 | 0.259x |
| `batch/splitter_seq` | 1319.379 | 2638.238 | 0.500x |
| `batch/splitter_simple` | 738.408 | 2273.875 | 0.325x |
| `buffer/allocator_alloc_graph` | 16.671 | 55.083 | 0.303x |
| `buffer/allocator_full` | 37.625 | 252.400 | 0.149x |
| `buffer/allocator_reserve_n` | 19.971 | 442.804 | 0.045x |
| `jinja/parser_long` | 30502.542 | 49796.596 | 0.613x |
| `jinja/parser_short` | 388.525 | 491.550 | 0.790x |
| `jinja/renderer_long` | 89658.308 | 227931.921 | 0.393x |
| `jinja/renderer_short` | 1427.583 | 3803.167 | 0.375x |
| `memory/coordinator_recurrent_full` | 3895.246 | 5590.212 | 0.697x |
| `tokenizer/full_bpe_long` | 6621.133 | 7004.667 | 0.945x |
| `tokenizer/full_bpe_short` | 163.496 | 157.471 | 1.038x |
| `tokenizer/full_plamo2_long` | 10211.054 | 10239.642 | 0.997x |
| `tokenizer/full_plamo2_short` | 2205.075 | 1822.450 | 1.210x |
| `tokenizer/full_rwkv_long` | 2418.412 | 2436.733 | 0.992x |
| `tokenizer/full_rwkv_short` | 1854.350 | 2193.179 | 0.846x |
| `tokenizer/full_spm_long` | 9995.317 | 10792.767 | 0.926x |
| `tokenizer/full_spm_short` | 187.167 | 191.354 | 0.978x |
| `tokenizer/full_ugm_long` | 8868.146 | 8974.592 | 0.988x |
| `tokenizer/full_ugm_short` | 1738.117 | 2098.412 | 0.828x |
| `tokenizer/full_wpm_long` | 25314.525 | 25538.029 | 0.991x |
| `tokenizer/full_wpm_short` | 2077.092 | 2376.600 | 0.874x |
| `tokenizer/preprocessor_bpe_long` | 2776.758 | 5373.312 | 0.517x |
| `tokenizer/preprocessor_bpe_short` | 78.850 | 1747.050 | 0.045x |
| `tokenizer/preprocessor_plamo2_long` | 3082.279 | 4788.679 | 0.644x |
| `tokenizer/preprocessor_plamo2_short` | 2386.262 | 3548.504 | 0.672x |
| `tokenizer/preprocessor_rwkv_long` | 2972.246 | 4580.996 | 0.649x |
| `tokenizer/preprocessor_rwkv_short` | 2305.317 | 3535.229 | 0.652x |
| `tokenizer/preprocessor_spm_long` | 3046.325 | 4598.229 | 0.662x |
| `tokenizer/preprocessor_spm_short` | 2361.629 | 3762.438 | 0.628x |
| `tokenizer/preprocessor_ugm_long` | 3027.463 | 4692.613 | 0.645x |
| `tokenizer/preprocessor_ugm_short` | 2348.642 | 3552.613 | 0.661x |
| `tokenizer/preprocessor_wpm_long` | 2952.042 | 4562.908 | 0.647x |
| `tokenizer/preprocessor_wpm_short` | 2307.729 | 3534.338 | 0.653x |
| `batch/planner_equal` | 1846.750 | 8689.946 | 0.213x |
| `batch/planner_seq` | 1781.388 | 3996.500 | 0.446x |
| `batch/planner_simple` | 1348.817 | 3498.363 | 0.386x |
| `gbnf/rule_parser_basic` | 247.521 | 471.233 | 0.525x |
| `gbnf/rule_parser_complex` | 1933.033 | 2515.221 | 0.769x |
| `kernel/aarch64/op_add` | 88.783 | 5061.321 | 0.018x |
| `kernel/aarch64/op_cos` | 1668.921 | 6025.850 | 0.277x |
| `kernel/aarch64/op_div` | 88.600 | 4142.504 | 0.021x |
| `kernel/aarch64/op_dup` | 85.975 | 4095.954 | 0.021x |
| `kernel/aarch64/op_log` | 1843.883 | 6106.117 | 0.302x |
| `kernel/aarch64/op_mul` | 91.025 | 5091.896 | 0.018x |
| `kernel/aarch64/op_mul_mat` | 4540.008 | 10639.004 | 0.427x |
| `kernel/aarch64/op_sin` | 1447.079 | 5599.971 | 0.258x |
| `kernel/aarch64/op_soft_max` | 2066.808 | 4972.771 | 0.416x |
| `kernel/aarch64/op_sqr` | 86.779 | 4090.646 | 0.021x |
| `kernel/aarch64/op_sqrt` | 137.033 | 4436.392 | 0.031x |
| `kernel/aarch64/op_sub` | 91.279 | 5088.383 | 0.018x |
| `kernel/aarch64/op_unary_exp` | 1297.300 | 5642.096 | 0.230x |
| `kernel/aarch64/op_unary_neg` | 89.208 | 4536.625 | 0.020x |
| `kernel/aarch64/op_unary_relu` | 85.879 | 4413.375 | 0.019x |
| `kernel/x86_64/op_add` | 60.092 | 5068.100 | 0.012x |
| `kernel/x86_64/op_cos` | 1969.629 | 5873.692 | 0.335x |
| `kernel/x86_64/op_div` | 74.679 | 4153.717 | 0.018x |
| `kernel/x86_64/op_dup` | 47.033 | 4013.613 | 0.012x |
| `kernel/x86_64/op_log` | 1820.858 | 6532.413 | 0.279x |
| `kernel/x86_64/op_mul` | 60.196 | 5235.196 | 0.011x |
| `kernel/x86_64/op_mul_mat` | 44244.079 | 10511.242 | 4.209x |
| `kernel/x86_64/op_sin` | 1296.000 | 5583.742 | 0.232x |
| `kernel/x86_64/op_soft_max` | 2062.137 | 5244.917 | 0.393x |
| `kernel/x86_64/op_sqr` | 49.138 | 4063.596 | 0.012x |
| `kernel/x86_64/op_sqrt` | 143.012 | 4265.863 | 0.034x |
| `kernel/x86_64/op_sub` | 60.096 | 5310.508 | 0.011x |
| `kernel/x86_64/op_unary_exp` | 1284.658 | 5399.771 | 0.238x |
| `kernel/x86_64/op_unary_neg` | 51.946 | 4309.450 | 0.012x |
| `kernel/x86_64/op_unary_relu` | 52.304 | 4238.471 | 0.012x |
| `logits/sampler_raw/vocab_128000` | 19259.958 | 18468.492 | 1.043x |
| `logits/sampler_raw/vocab_256000` | 38539.842 | 36725.137 | 1.049x |
| `logits/sampler_raw/vocab_32000` | 5214.146 | 4826.229 | 1.080x |
| `logits/sampler_sml/vocab_128000` | 15429.442 | 14757.788 | 1.046x |
| `logits/sampler_sml/vocab_256000` | 34200.133 | 30380.342 | 1.126x |
| `logits/sampler_sml/vocab_32000` | 4436.292 | 4330.962 | 1.024x |
| `logits/validator_raw/vocab_128000` | 90205.633 | 90458.808 | 0.997x |
| `logits/validator_raw/vocab_256000` | 181372.546 | 179498.462 | 1.010x |
| `logits/validator_raw/vocab_32000` | 23735.550 | 23904.125 | 0.993x |
| `logits/validator_sml/vocab_128000` | 99648.387 | 99266.212 | 1.004x |
| `logits/validator_sml/vocab_256000` | 197266.092 | 199430.296 | 0.989x |
| `logits/validator_sml/vocab_32000` | 24528.092 | 24126.225 | 1.017x |
| `memory/hybrid_full` | 408.700 | 36677.713 | 0.011x |
| `memory/kv_full` | 103.067 | 36946.496 | 0.003x |
| `memory/recurrent_full` | 113.079 | 5595.042 | 0.020x |
| `text/encoders/bpe_long` | 10221.996 | 10221.204 | 1.000x |
| `text/encoders/bpe_short` | 159.125 | 153.158 | 1.039x |
| `text/encoders/fallback_long` | 2470.238 | 2485.546 | 0.994x |
| `text/encoders/fallback_short` | 50.267 | 47.825 | 1.051x |
| `text/encoders/plamo2_long` | 4848.942 | 4878.158 | 0.994x |
| `text/encoders/plamo2_short` | 107.117 | 104.096 | 1.029x |
| `text/encoders/rwkv_long` | 4557.729 | 4543.887 | 1.003x |
| `text/encoders/rwkv_short` | 2697.533 | 2658.883 | 1.015x |
| `text/encoders/spm_long` | 12589.987 | 12349.475 | 1.019x |
| `text/encoders/spm_short` | 213.188 | 205.325 | 1.038x |
| `text/encoders/ugm_long` | 8308.617 | 8295.337 | 1.002x |
| `text/encoders/ugm_short` | 137.250 | 137.008 | 1.002x |
| `text/encoders/wpm_long` | 26858.621 | 26355.825 | 1.019x |
| `text/encoders/wpm_short` | 531.438 | 540.237 | 0.984x |
| `text/jinja/formatter_long` | 87073.829 | 400326.883 | 0.218x |
| `text/jinja/formatter_short` | 1144.017 | 6368.133 | 0.180x |
| `text/jinja/parser_long` | 35030.512 | 52803.367 | 0.663x |
| `text/jinja/parser_short` | 547.888 | 632.633 | 0.866x |
| `tokenizer/full_bpe_long` | 9967.413 | 9607.096 | 1.038x |
| `tokenizer/full_bpe_short` | 220.113 | 218.846 | 1.006x |
| `tokenizer/full_plamo2_long` | 9890.796 | 9985.525 | 0.991x |
| `tokenizer/full_plamo2_short` | 1799.446 | 1769.058 | 1.017x |
| `tokenizer/full_rwkv_long` | 3566.475 | 3551.117 | 1.004x |
| `tokenizer/full_rwkv_short` | 2373.500 | 2159.892 | 1.099x |
| `tokenizer/full_spm_long` | 13766.279 | 13689.263 | 1.006x |
| `tokenizer/full_spm_short` | 296.825 | 285.354 | 1.040x |
| `tokenizer/full_ugm_long` | 10042.667 | 9989.429 | 1.005x |
| `tokenizer/full_ugm_short` | 1817.804 | 1818.546 | 1.000x |
| `tokenizer/full_wpm_long` | 28866.112 | 34007.938 | 0.849x |
| `tokenizer/full_wpm_short` | 2204.133 | 2210.221 | 0.997x |
| `tokenizer/preprocessor_bpe_long` | 2775.246 | 5265.688 | 0.527x |
| `tokenizer/preprocessor_bpe_short` | 82.854 | 1747.217 | 0.047x |
| `tokenizer/preprocessor_plamo2_long` | 3052.371 | 4619.908 | 0.661x |
| `tokenizer/preprocessor_plamo2_short` | 2367.925 | 3575.713 | 0.662x |
| `tokenizer/preprocessor_rwkv_long` | 3077.379 | 4554.646 | 0.676x |
| `tokenizer/preprocessor_rwkv_short` | 2356.238 | 3536.963 | 0.666x |
| `tokenizer/preprocessor_spm_long` | 3092.796 | 4569.296 | 0.677x |
| `tokenizer/preprocessor_spm_short` | 2361.154 | 3586.446 | 0.658x |
| `tokenizer/preprocessor_ugm_long` | 3139.088 | 4625.679 | 0.679x |
| `tokenizer/preprocessor_ugm_short` | 2375.508 | 3560.692 | 0.667x |
| `tokenizer/preprocessor_wpm_long` | 3043.238 | 4503.621 | 0.676x |
| `tokenizer/preprocessor_wpm_short` | 2599.613 | 3530.233 | 0.736x |
2 changes: 0 additions & 2 deletions docs/templates/benchmarks.md.j2
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,5 @@ Source: `snapshots/bench/benchmarks_compare.txt`
Note: While EMEL is modular and easy to bench in isolation, llama.cpp code is very
entangled. These microbenches aim for apples-to-apples comparisons but likely
are not. True benchmarks will be end-to-end once the system is complete.
Legacy benchmark IDs like `batch/splitter_*` and `jinja/renderer_*` are intentionally
retained for snapshot/report continuity and should be renamed after consumers migrate.

{{ benchmarks_table }}
1 change: 0 additions & 1 deletion scripts/fuzz_smoke.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ cmake -S "$ROOT_DIR" -B "$BUILD_DIR" -G Ninja \
-DCMAKE_CXX_FLAGS="$fuzz_cxx_flags" \
-DCMAKE_EXE_LINKER_FLAGS="$fuzz_link_flags" \
-DEMEL_ENABLE_FUZZ=ON \
-DEMEL_ENABLE_TENSOR_PARSER_TEXT_MACHINES=OFF \
-DEMEL_ENABLE_TESTS=OFF

cmake --build "$BUILD_DIR" --parallel
Expand Down
Loading