stateforward · gabewillen · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -17,10 +17,6 @@ endif()
 
 option(EMEL_ENABLE_TESTS "Build tests" ON)
 option(EMEL_ENABLE_FUZZ "Build fuzz targets" OFF)
-option(EMEL_ENABLE_TENSOR_PARSER_TEXT_MACHINES
-  "Build tensor/parser/text machine surfaces"
-  OFF
-)
 
 include(FetchContent)
 include(cmake/sml_version.cmake)
@@ -50,24 +46,6 @@ target_link_libraries(emel
 )
 
 
-if(EMEL_ENABLE_TENSOR_PARSER_TEXT_MACHINES)
-  add_executable(mock_model_load
-    tools/mock_main.cpp
-  )
-
-  target_link_libraries(mock_model_load
-    PRIVATE
-      emel_core
-  )
-
-  target_include_directories(mock_model_load
-    PRIVATE
-      ${CMAKE_CURRENT_SOURCE_DIR}/src
-      ${CMAKE_CURRENT_SOURCE_DIR}/include
-      ${boost_sml_SOURCE_DIR}/include
-  )
-endif()
-
 if(EMEL_ENABLE_TESTS)
   include(CTest)
   enable_testing()
@@ -120,38 +98,36 @@ if(EMEL_ENABLE_TESTS)
     tests/tensor/view/lifecycle_tests.cpp
   )
 
-  if(EMEL_ENABLE_TENSOR_PARSER_TEXT_MACHINES)
-    list(APPEND EMEL_TEST_SOURCES
-      tests/gguf/loader/lifecycle_tests.cpp
-      tests/text/jinja/parser_tests.cpp
-      tests/text/jinja/lexer_tests.cpp
-      tests/text/jinja/formatter_tests.cpp
-      tests/text/formatter/formatter_tests.cpp
-      tests/text/encoders/common_tests.cpp
-      tests/text/encoders/bpe_tests.cpp
-      tests/text/encoders/spm_tests.cpp
-      tests/text/encoders/wpm_tests.cpp
-      tests/text/encoders/ugm_tests.cpp
-      tests/text/encoders/rwkv_tests.cpp
-      tests/text/encoders/plamo2_tests.cpp
-      tests/text/encoders/fallback_tests.cpp
-      tests/text/conditioner/text_conditioner_tests.cpp
-      tests/text/detokenizer/detokenizer_tests.cpp
-      tests/text/renderer/renderer_tests.cpp
-      tests/text/unicode/unicode_tests.cpp
-      tests/text/tokenizer/preprocessor_tests.cpp
-      tests/text/tokenizer/preprocessor_spm_tests.cpp
-      tests/text/tokenizer/preprocessor_wpm_tests.cpp
-      tests/text/tokenizer/preprocessor_rwkv_tests.cpp
-      tests/text/tokenizer/preprocessor_plamo2_tests.cpp
-      tests/text/tokenizer/preprocessor_fallback_tests.cpp
-      tests/text/tokenizer/bpe_regex_tests.cpp
-      tests/text/tokenizer/bpe_split_tests.cpp
-      tests/text/tokenizer/tokenizer_tests.cpp
-      tests/text/tokenizer/tokenizer_parity_tests.cpp
-      tests/text/tokenizer/tokenizer_action_guard_tests.cpp
-    )
-  endif()
+  list(APPEND EMEL_TEST_SOURCES
+    tests/gguf/loader/lifecycle_tests.cpp
+    tests/text/jinja/parser_tests.cpp
+    tests/text/jinja/lexer_tests.cpp
+    tests/text/jinja/formatter_tests.cpp
+    tests/text/formatter/formatter_tests.cpp
+    tests/text/encoders/common_tests.cpp
+    tests/text/encoders/bpe_tests.cpp
+    tests/text/encoders/spm_tests.cpp
+    tests/text/encoders/wpm_tests.cpp
+    tests/text/encoders/ugm_tests.cpp
+    tests/text/encoders/rwkv_tests.cpp
+    tests/text/encoders/plamo2_tests.cpp
+    tests/text/encoders/fallback_tests.cpp
+    tests/text/conditioner/text_conditioner_tests.cpp
+    tests/text/detokenizer/detokenizer_tests.cpp
+    tests/text/renderer/renderer_tests.cpp
+    tests/text/unicode/unicode_tests.cpp
+    tests/text/tokenizer/preprocessor_tests.cpp
+    tests/text/tokenizer/preprocessor_spm_tests.cpp
+    tests/text/tokenizer/preprocessor_wpm_tests.cpp
+    tests/text/tokenizer/preprocessor_rwkv_tests.cpp
+    tests/text/tokenizer/preprocessor_plamo2_tests.cpp
+    tests/text/tokenizer/preprocessor_fallback_tests.cpp
+    tests/text/tokenizer/bpe_regex_tests.cpp
+    tests/text/tokenizer/bpe_split_tests.cpp
+    tests/text/tokenizer/tokenizer_tests.cpp
+    tests/text/tokenizer/tokenizer_parity_tests.cpp
+    tests/text/tokenizer/tokenizer_action_guard_tests.cpp
+  )
 
   add_executable(emel_tests_bin
     ${EMEL_TEST_SOURCES}
@@ -244,12 +220,10 @@ if(EMEL_ENABLE_FUZZ)
     )
   endfunction()
 
-  if(EMEL_ENABLE_TENSOR_PARSER_TEXT_MACHINES)
-    add_executable(emel_fuzz_gguf_parser
-      tests/fuzz/gguf_parser_fuzz.cpp
-    )
-    emel_configure_fuzzer(emel_fuzz_gguf_parser)
-  endif()
+  add_executable(emel_fuzz_gguf_parser
+    tests/fuzz/gguf_parser_fuzz.cpp
+  )
+  emel_configure_fuzzer(emel_fuzz_gguf_parser)
 
   add_executable(emel_fuzz_gbnf_parser
     tests/fuzz/gbnf_parser_fuzz.cpp

diff --git a/docs/benchmarks.md b/docs/benchmarks.md
@@ -5,43 +5,98 @@ Source: `snapshots/bench/benchmarks_compare.txt`
 Note: While EMEL is modular and easy to bench in isolation, llama.cpp code is very
 entangled. These microbenches aim for apples-to-apples comparisons but likely
 are not. True benchmarks will be end-to-end once the system is complete.
-Legacy benchmark IDs like `batch/splitter_*` and `jinja/renderer_*` are intentionally
-retained for snapshot/report continuity and should be renamed after consumers migrate.
 
 | Benchmark | emel.cpp ns/op | llama.cpp ns/op | ratio |
 | --- | ---: | ---: | ---: |
-| `batch/splitter_equal` | 1626.933 | 6278.408 | 0.259x |
-| `batch/splitter_seq` | 1319.379 | 2638.238 | 0.500x |
-| `batch/splitter_simple` | 738.408 | 2273.875 | 0.325x |
-| `buffer/allocator_alloc_graph` | 16.671 | 55.083 | 0.303x |
-| `buffer/allocator_full` | 37.625 | 252.400 | 0.149x |
-| `buffer/allocator_reserve_n` | 19.971 | 442.804 | 0.045x |
-| `jinja/parser_long` | 30502.542 | 49796.596 | 0.613x |
-| `jinja/parser_short` | 388.525 | 491.550 | 0.790x |
-| `jinja/renderer_long` | 89658.308 | 227931.921 | 0.393x |
-| `jinja/renderer_short` | 1427.583 | 3803.167 | 0.375x |
-| `memory/coordinator_recurrent_full` | 3895.246 | 5590.212 | 0.697x |
-| `tokenizer/full_bpe_long` | 6621.133 | 7004.667 | 0.945x |
-| `tokenizer/full_bpe_short` | 163.496 | 157.471 | 1.038x |
-| `tokenizer/full_plamo2_long` | 10211.054 | 10239.642 | 0.997x |
-| `tokenizer/full_plamo2_short` | 2205.075 | 1822.450 | 1.210x |
-| `tokenizer/full_rwkv_long` | 2418.412 | 2436.733 | 0.992x |
-| `tokenizer/full_rwkv_short` | 1854.350 | 2193.179 | 0.846x |
-| `tokenizer/full_spm_long` | 9995.317 | 10792.767 | 0.926x |
-| `tokenizer/full_spm_short` | 187.167 | 191.354 | 0.978x |
-| `tokenizer/full_ugm_long` | 8868.146 | 8974.592 | 0.988x |
-| `tokenizer/full_ugm_short` | 1738.117 | 2098.412 | 0.828x |
-| `tokenizer/full_wpm_long` | 25314.525 | 25538.029 | 0.991x |
-| `tokenizer/full_wpm_short` | 2077.092 | 2376.600 | 0.874x |
-| `tokenizer/preprocessor_bpe_long` | 2776.758 | 5373.312 | 0.517x |
-| `tokenizer/preprocessor_bpe_short` | 78.850 | 1747.050 | 0.045x |
-| `tokenizer/preprocessor_plamo2_long` | 3082.279 | 4788.679 | 0.644x |
-| `tokenizer/preprocessor_plamo2_short` | 2386.262 | 3548.504 | 0.672x |
-| `tokenizer/preprocessor_rwkv_long` | 2972.246 | 4580.996 | 0.649x |
-| `tokenizer/preprocessor_rwkv_short` | 2305.317 | 3535.229 | 0.652x |
-| `tokenizer/preprocessor_spm_long` | 3046.325 | 4598.229 | 0.662x |
-| `tokenizer/preprocessor_spm_short` | 2361.629 | 3762.438 | 0.628x |
-| `tokenizer/preprocessor_ugm_long` | 3027.463 | 4692.613 | 0.645x |
-| `tokenizer/preprocessor_ugm_short` | 2348.642 | 3552.613 | 0.661x |
-| `tokenizer/preprocessor_wpm_long` | 2952.042 | 4562.908 | 0.647x |
-| `tokenizer/preprocessor_wpm_short` | 2307.729 | 3534.338 | 0.653x |
+| `batch/planner_equal` | 1846.750 | 8689.946 | 0.213x |
+| `batch/planner_seq` | 1781.388 | 3996.500 | 0.446x |
+| `batch/planner_simple` | 1348.817 | 3498.363 | 0.386x |
+| `gbnf/rule_parser_basic` | 247.521 | 471.233 | 0.525x |
+| `gbnf/rule_parser_complex` | 1933.033 | 2515.221 | 0.769x |
+| `kernel/aarch64/op_add` | 88.783 | 5061.321 | 0.018x |
+| `kernel/aarch64/op_cos` | 1668.921 | 6025.850 | 0.277x |
+| `kernel/aarch64/op_div` | 88.600 | 4142.504 | 0.021x |
+| `kernel/aarch64/op_dup` | 85.975 | 4095.954 | 0.021x |
+| `kernel/aarch64/op_log` | 1843.883 | 6106.117 | 0.302x |
+| `kernel/aarch64/op_mul` | 91.025 | 5091.896 | 0.018x |
+| `kernel/aarch64/op_mul_mat` | 4540.008 | 10639.004 | 0.427x |
+| `kernel/aarch64/op_sin` | 1447.079 | 5599.971 | 0.258x |
+| `kernel/aarch64/op_soft_max` | 2066.808 | 4972.771 | 0.416x |
+| `kernel/aarch64/op_sqr` | 86.779 | 4090.646 | 0.021x |
+| `kernel/aarch64/op_sqrt` | 137.033 | 4436.392 | 0.031x |
+| `kernel/aarch64/op_sub` | 91.279 | 5088.383 | 0.018x |
+| `kernel/aarch64/op_unary_exp` | 1297.300 | 5642.096 | 0.230x |
+| `kernel/aarch64/op_unary_neg` | 89.208 | 4536.625 | 0.020x |
+| `kernel/aarch64/op_unary_relu` | 85.879 | 4413.375 | 0.019x |
+| `kernel/x86_64/op_add` | 60.092 | 5068.100 | 0.012x |
+| `kernel/x86_64/op_cos` | 1969.629 | 5873.692 | 0.335x |
+| `kernel/x86_64/op_div` | 74.679 | 4153.717 | 0.018x |
+| `kernel/x86_64/op_dup` | 47.033 | 4013.613 | 0.012x |
+| `kernel/x86_64/op_log` | 1820.858 | 6532.413 | 0.279x |
+| `kernel/x86_64/op_mul` | 60.196 | 5235.196 | 0.011x |
+| `kernel/x86_64/op_mul_mat` | 44244.079 | 10511.242 | 4.209x |
+| `kernel/x86_64/op_sin` | 1296.000 | 5583.742 | 0.232x |
+| `kernel/x86_64/op_soft_max` | 2062.137 | 5244.917 | 0.393x |
+| `kernel/x86_64/op_sqr` | 49.138 | 4063.596 | 0.012x |
+| `kernel/x86_64/op_sqrt` | 143.012 | 4265.863 | 0.034x |
+| `kernel/x86_64/op_sub` | 60.096 | 5310.508 | 0.011x |
+| `kernel/x86_64/op_unary_exp` | 1284.658 | 5399.771 | 0.238x |
+| `kernel/x86_64/op_unary_neg` | 51.946 | 4309.450 | 0.012x |
+| `kernel/x86_64/op_unary_relu` | 52.304 | 4238.471 | 0.012x |
+| `logits/sampler_raw/vocab_128000` | 19259.958 | 18468.492 | 1.043x |
+| `logits/sampler_raw/vocab_256000` | 38539.842 | 36725.137 | 1.049x |
+| `logits/sampler_raw/vocab_32000` | 5214.146 | 4826.229 | 1.080x |
+| `logits/sampler_sml/vocab_128000` | 15429.442 | 14757.788 | 1.046x |
+| `logits/sampler_sml/vocab_256000` | 34200.133 | 30380.342 | 1.126x |
+| `logits/sampler_sml/vocab_32000` | 4436.292 | 4330.962 | 1.024x |
+| `logits/validator_raw/vocab_128000` | 90205.633 | 90458.808 | 0.997x |
+| `logits/validator_raw/vocab_256000` | 181372.546 | 179498.462 | 1.010x |
+| `logits/validator_raw/vocab_32000` | 23735.550 | 23904.125 | 0.993x |
+| `logits/validator_sml/vocab_128000` | 99648.387 | 99266.212 | 1.004x |
+| `logits/validator_sml/vocab_256000` | 197266.092 | 199430.296 | 0.989x |
+| `logits/validator_sml/vocab_32000` | 24528.092 | 24126.225 | 1.017x |
+| `memory/hybrid_full` | 408.700 | 36677.713 | 0.011x |
+| `memory/kv_full` | 103.067 | 36946.496 | 0.003x |
+| `memory/recurrent_full` | 113.079 | 5595.042 | 0.020x |
+| `text/encoders/bpe_long` | 10221.996 | 10221.204 | 1.000x |
+| `text/encoders/bpe_short` | 159.125 | 153.158 | 1.039x |
+| `text/encoders/fallback_long` | 2470.238 | 2485.546 | 0.994x |
+| `text/encoders/fallback_short` | 50.267 | 47.825 | 1.051x |
+| `text/encoders/plamo2_long` | 4848.942 | 4878.158 | 0.994x |
+| `text/encoders/plamo2_short` | 107.117 | 104.096 | 1.029x |
+| `text/encoders/rwkv_long` | 4557.729 | 4543.887 | 1.003x |
+| `text/encoders/rwkv_short` | 2697.533 | 2658.883 | 1.015x |
+| `text/encoders/spm_long` | 12589.987 | 12349.475 | 1.019x |
+| `text/encoders/spm_short` | 213.188 | 205.325 | 1.038x |
+| `text/encoders/ugm_long` | 8308.617 | 8295.337 | 1.002x |
+| `text/encoders/ugm_short` | 137.250 | 137.008 | 1.002x |
+| `text/encoders/wpm_long` | 26858.621 | 26355.825 | 1.019x |
+| `text/encoders/wpm_short` | 531.438 | 540.237 | 0.984x |
+| `text/jinja/formatter_long` | 87073.829 | 400326.883 | 0.218x |
+| `text/jinja/formatter_short` | 1144.017 | 6368.133 | 0.180x |
+| `text/jinja/parser_long` | 35030.512 | 52803.367 | 0.663x |
+| `text/jinja/parser_short` | 547.888 | 632.633 | 0.866x |
+| `tokenizer/full_bpe_long` | 9967.413 | 9607.096 | 1.038x |
+| `tokenizer/full_bpe_short` | 220.113 | 218.846 | 1.006x |
+| `tokenizer/full_plamo2_long` | 9890.796 | 9985.525 | 0.991x |
+| `tokenizer/full_plamo2_short` | 1799.446 | 1769.058 | 1.017x |
+| `tokenizer/full_rwkv_long` | 3566.475 | 3551.117 | 1.004x |
+| `tokenizer/full_rwkv_short` | 2373.500 | 2159.892 | 1.099x |
+| `tokenizer/full_spm_long` | 13766.279 | 13689.263 | 1.006x |
+| `tokenizer/full_spm_short` | 296.825 | 285.354 | 1.040x |
+| `tokenizer/full_ugm_long` | 10042.667 | 9989.429 | 1.005x |
+| `tokenizer/full_ugm_short` | 1817.804 | 1818.546 | 1.000x |
+| `tokenizer/full_wpm_long` | 28866.112 | 34007.938 | 0.849x |
+| `tokenizer/full_wpm_short` | 2204.133 | 2210.221 | 0.997x |
+| `tokenizer/preprocessor_bpe_long` | 2775.246 | 5265.688 | 0.527x |
+| `tokenizer/preprocessor_bpe_short` | 82.854 | 1747.217 | 0.047x |
+| `tokenizer/preprocessor_plamo2_long` | 3052.371 | 4619.908 | 0.661x |
+| `tokenizer/preprocessor_plamo2_short` | 2367.925 | 3575.713 | 0.662x |
+| `tokenizer/preprocessor_rwkv_long` | 3077.379 | 4554.646 | 0.676x |
+| `tokenizer/preprocessor_rwkv_short` | 2356.238 | 3536.963 | 0.666x |
+| `tokenizer/preprocessor_spm_long` | 3092.796 | 4569.296 | 0.677x |
+| `tokenizer/preprocessor_spm_short` | 2361.154 | 3586.446 | 0.658x |
+| `tokenizer/preprocessor_ugm_long` | 3139.088 | 4625.679 | 0.679x |
+| `tokenizer/preprocessor_ugm_short` | 2375.508 | 3560.692 | 0.667x |
+| `tokenizer/preprocessor_wpm_long` | 3043.238 | 4503.621 | 0.676x |
+| `tokenizer/preprocessor_wpm_short` | 2599.613 | 3530.233 | 0.736x |
diff --git a/docs/templates/benchmarks.md.j2 b/docs/templates/benchmarks.md.j2
@@ -5,7 +5,5 @@ Source: `snapshots/bench/benchmarks_compare.txt`
 Note: While EMEL is modular and easy to bench in isolation, llama.cpp code is very
 entangled. These microbenches aim for apples-to-apples comparisons but likely
 are not. True benchmarks will be end-to-end once the system is complete.
-Legacy benchmark IDs like `batch/splitter_*` and `jinja/renderer_*` are intentionally
-retained for snapshot/report continuity and should be renamed after consumers migrate.
 
 {{ benchmarks_table }}
diff --git a/scripts/fuzz_smoke.sh b/scripts/fuzz_smoke.sh
@@ -59,7 +59,6 @@ cmake -S "$ROOT_DIR" -B "$BUILD_DIR" -G Ninja \
   -DCMAKE_CXX_FLAGS="$fuzz_cxx_flags" \
   -DCMAKE_EXE_LINKER_FLAGS="$fuzz_link_flags" \
   -DEMEL_ENABLE_FUZZ=ON \
-  -DEMEL_ENABLE_TENSOR_PARSER_TEXT_MACHINES=OFF \
   -DEMEL_ENABLE_TESTS=OFF
 
 cmake --build "$BUILD_DIR" --parallel