From 652934bf7d8cd8fc136a3c0768da25c57ef3c8a2 Mon Sep 17 00:00:00 2001 From: gabewillen Date: Mon, 2 Mar 2026 21:31:15 -0600 Subject: [PATCH 1/4] chore: remove docs notes and update snapshots --- docs/compliance.report.md | 113 -------- docs/gaps.md | 102 ------- docs/notes.md | 3 - snapshots/bench/benchmarks.txt | 169 +++++------ snapshots/lint/clang_format.txt | 448 +++++++++++++++++------------ snapshots/quality_gates/timing.txt | 9 +- 6 files changed, 348 insertions(+), 496 deletions(-) delete mode 100644 docs/compliance.report.md delete mode 100644 docs/gaps.md delete mode 100644 docs/notes.md diff --git a/docs/compliance.report.md b/docs/compliance.report.md deleted file mode 100644 index 10c43821..00000000 --- a/docs/compliance.report.md +++ /dev/null @@ -1,113 +0,0 @@ -# Compliance Report - -- Generated: 2026-03-01 -- Checklist reference: `docs/compliance-checklist.md` -- Scope: `src/emel/text/encoders` (7 encoder state machines + shared encoder orchestration files) -- Result: **PASS** (all audited encoder state machines satisfy checklist requirements) - -## Audit Summary - -- Machines audited: `7` - - `bpe`, `fallback`, `plamo2`, `rwkv`, `spm`, `ugm`, `wpm` -- Structural SML checks (destination-first rows, wrappers, explicit unexpected-event handling): **PASS** -- Event/context/error orchestration checks: **PASS** after this refactor -- Action/guard architecture checks: **PASS** for `bpe`, `fallback`, `plamo2`, `rwkv`, `spm`, `ugm`, `wpm` - -## Fixed In This Pass - -1. Removed shared enum error typing from encoder orchestration runtime state. - - `event::encode_ctx.err` is now `int32_t` boundary error code: `src/emel/text/encoders/events.hpp:36-39` - - `events::encoding_error.err` is now `int32_t`: `src/emel/text/encoders/events.hpp:55-58` -2. Shared action/guard/detail orchestration now uses boundary error codes (`EMEL_*`) instead of shared enum control state. - - `src/emel/text/encoders/actions.hpp:23-61` - - `src/emel/text/encoders/guards.hpp:27-36` - - `src/emel/text/encoders/detail.hpp:45-83` -3. All 7 machine wrappers now derive success/failure from runtime error code and store `last_error_` as `int32_t`. - - Example: `src/emel/text/encoders/bpe/sm.hpp:159-180` - - Same wrapper pattern confirmed in all seven encoder `sm.hpp` files. -4. BPE encoder call-path no longer depends on shared branching detail helpers. - - BPE table-prep action now calls local `ensure_bpe_tables`: `src/emel/text/encoders/bpe/actions.hpp:31-36` - - BPE path guard now calls local `bpe_lookup_token`: `src/emel/text/encoders/bpe/guards.hpp:75-79` - - BPE local kernels now provide table build/lookups/symbol/push operations in component detail: - `src/emel/text/encoders/bpe/detail.hpp:19-339` - - No `if`/`else`/`switch` tokens remain in BPE `actions.hpp`, `guards.hpp`, `detail.hpp`, or `sm.hpp` (regex validation pass). -5. PLaMo2 encoder call-path now uses local branchless kernels in component detail. - - Runtime encode action still dispatches via `plamo2::detail::encode_plamo2`: - `src/emel/text/encoders/plamo2/actions.hpp:33-38` - - PLaMo2 local detail now owns token text lookup, byte token parsing, push path, table build, and encode kernels: - `src/emel/text/encoders/plamo2/detail.hpp:20-388` - - No `if`/`else`/`switch`/`?` tokens remain in PLaMo2 `actions.hpp`, `guards.hpp`, `detail.hpp`, or `sm.hpp` (regex validation pass). -6. RWKV encoder now models table-sync behavior as explicit machine phases and keeps encode-kernel execution phase-local. - - Added explicit table-sync states and transitions: - `src/emel/text/encoders/rwkv/sm.hpp` (`table_sync_exec`, `table_sync_result_decision`) - - Added table-sync action and table-ready/table-missing guards: - `src/emel/text/encoders/rwkv/actions.hpp`, `src/emel/text/encoders/rwkv/guards.hpp` - - `encode_rwkv` now requires prepared tables and no longer performs hidden table construction: - `src/emel/text/encoders/rwkv/detail.hpp:199-237` - - Added regression test for this contract: - `tests/text/encoders/rwkv_tests.cpp` (`encoder_rwkv_encode_requires_prepared_tables`) -7. SPM encoder now models table-sync + explicit prepare/merge/emit phases and keeps each phase action-local. - - Added explicit prepare and merge phase states/transitions: - `src/emel/text/encoders/spm/sm.hpp` (`encode_prepare_exec`, `encode_prepare_result_decision`, - `encode_merge_exec`, `encode_merge_result_decision`) - - Added dedicated phase actions: - `src/emel/text/encoders/spm/actions.hpp` (`run_prepare`, `run_merge`, `run_encode`) - - Split monolithic detail encode path into explicit phase kernels: - `src/emel/text/encoders/spm/detail.hpp` (`prepare_spm`, `merge_spm`, `emit_spm`) -8. UGM encoder now models table-sync as explicit machine phases and uses branchless local detail kernels. - - Added explicit table-sync states and transitions: - `src/emel/text/encoders/ugm/sm.hpp` (`table_sync_exec`, `table_sync_result_decision`) - - Added UGM table-sync action and table-ready/table-missing guards: - `src/emel/text/encoders/ugm/actions.hpp`, `src/emel/text/encoders/ugm/guards.hpp` - - `encode_ugm` now requires prepared tables for non-empty text and no longer performs hidden table construction: - `src/emel/text/encoders/ugm/detail.hpp` - - UGM detail call-path no longer depends on shared branching helpers (`token_text`, `push_token`, `utf8_len`) and no longer - performs dispatch-time `std::string` copy during normalization: - `src/emel/text/encoders/ugm/detail.hpp` - - Added regression test for this contract: - `tests/text/encoders/ugm_tests.cpp` (`encoder_ugm_encode_requires_prepared_tables`) -9. WPM encoder now models table-sync as explicit machine phases and uses branchless local detail kernels. - - Added explicit table-sync states and transitions: - `src/emel/text/encoders/wpm/sm.hpp` (`table_sync_exec`, `table_sync_result_decision`) - - Added WPM table-sync action and table-ready/table-missing guards: - `src/emel/text/encoders/wpm/actions.hpp`, `src/emel/text/encoders/wpm/guards.hpp` - - `encode_wpm` now requires prepared tables for non-empty text and no longer performs hidden table construction: - `src/emel/text/encoders/wpm/detail.hpp` - - WPM detail call-path no longer depends on shared branching helpers (`ensure_tables`, `lookup_token`, `push_token`): - `src/emel/text/encoders/wpm/detail.hpp` - - Strict re-audit follow-up fixed `wpm_preprocess` split-token declaration syntax to keep the compliant path build-valid: - `src/emel/text/encoders/wpm/detail.hpp` - - Added regression test for this contract: - `tests/text/encoders/wpm_tests.cpp` (`encoder_wpm_encode_requires_prepared_tables`) - -## Remaining Non-Compliance (Merge-Blocking) - -- None. - -## Per-Machine Compliance Status - -- `src/emel/text/encoders/bpe/sm.hpp`: **PASS** -- `src/emel/text/encoders/plamo2/sm.hpp`: **PASS** (checked off) -- `src/emel/text/encoders/fallback/sm.hpp`: **PASS** -- `src/emel/text/encoders/rwkv/sm.hpp`: **PASS** (checked off) -- `src/emel/text/encoders/spm/sm.hpp`: **PASS** -- `src/emel/text/encoders/ugm/sm.hpp`: **PASS** -- `src/emel/text/encoders/wpm/sm.hpp`: **PASS** - -## Validation Commands Used - -- `scripts/quality_gates.sh` (passes in current workspace; benchmark snapshot warnings are pre-existing baseline issues) -- `rg -n "\\bif\\b|\\belse\\b|\\bswitch\\b|\\?" src/emel/text/encoders/bpe/actions.hpp src/emel/text/encoders/bpe/guards.hpp src/emel/text/encoders/bpe/detail.hpp src/emel/text/encoders/bpe/sm.hpp` (no matches) -- `rg -n "\\bif\\b|\\belse\\b|\\bswitch\\b|\\?" src/emel/text/encoders/plamo2/actions.hpp src/emel/text/encoders/plamo2/guards.hpp src/emel/text/encoders/plamo2/detail.hpp src/emel/text/encoders/plamo2/sm.hpp` (no matches) -- `rg -n "\\bif\\b|\\belse\\b|\\bswitch\\b|\\?" src/emel/text/encoders/rwkv/actions.hpp src/emel/text/encoders/rwkv/guards.hpp src/emel/text/encoders/rwkv/detail.hpp src/emel/text/encoders/rwkv/sm.hpp` (no matches) -- `rg -n "\\bif\\b|\\belse\\b|\\bswitch\\b|\\?" src/emel/text/encoders/ugm/actions.hpp src/emel/text/encoders/ugm/guards.hpp src/emel/text/encoders/ugm/detail.hpp src/emel/text/encoders/ugm/sm.hpp` (no matches) -- `rg -n "encoders::detail::token_text|encoders::detail::push_token|encoders::detail::utf8_len|\\.insert\\(|\\.traverse\\(" src/emel/text/encoders/ugm/detail.hpp` (no matches) -- `rg -n "std::string input\\(" src/emel/text/encoders/ugm/detail.hpp` (no matches) -- `rg -n "\\bif\\b|\\belse\\b|\\bswitch\\b|\\?" src/emel/text/encoders/wpm/actions.hpp src/emel/text/encoders/wpm/guards.hpp src/emel/text/encoders/wpm/detail.hpp src/emel/text/encoders/wpm/sm.hpp` (no matches) -- `rg -n "encoders::detail::ensure_tables|encoders::detail::lookup_token|encoders::detail::push_token" src/emel/text/encoders/wpm/detail.hpp` (no matches) -- `zig c++ ... /tmp/rwkv_compile_check.cpp` (RWKV `sm.hpp` compile smoke passes) -- `zig c++ ... /tmp/rwkv_behavior_check.cpp && /tmp/rwkv_behavior_check` (unprepared-table failure and prepared-table success contract passes) -- `rg -n "\\bif\\b|\\belse\\b|\\bswitch\\b|\\?" src/emel/text/encoders/fallback/actions.hpp src/emel/text/encoders/fallback/guards.hpp src/emel/text/encoders/fallback/detail.hpp src/emel/text/encoders/fallback/sm.hpp` (no matches) -- `rg -n "encoders::detail::ensure_tables|encoders::detail::lookup_token|encoders::detail::push_token|encoders::detail::encode_bytes|encoders::detail::byte_to_token|encoders::detail::token_text|encoders::detail::utf8_len" src/emel/text/encoders/fallback/actions.hpp src/emel/text/encoders/fallback/detail.hpp` (no matches) -- `zig c++ -std=c++20 -Wall -Wextra -Wpedantic -Werror -c /tmp/fallback_sm_compile_check.cpp -o /tmp/fallback_sm_compile_check.o -I.../src -I.../include -I.../build/zig/_deps/boost_sml-src/include` (compile smoke passes) -- `rg -n "\\bif\\b|\\belse\\b|\\bswitch\\b|\\?" src/emel/text/encoders/spm/actions.hpp src/emel/text/encoders/spm/guards.hpp src/emel/text/encoders/spm/detail.hpp src/emel/text/encoders/spm/sm.hpp` (no matches) diff --git a/docs/gaps.md b/docs/gaps.md deleted file mode 100644 index b3084f6d..00000000 --- a/docs/gaps.md +++ /dev/null @@ -1,102 +0,0 @@ -# parity gaps and validation status - -scope of this audit -- reference source: `ggml-alloc.c`. -- target machines reviewed: `src/emel/buffer/allocator/sm.hpp`, `src/emel/buffer/planner/sm.hpp`, - `src/emel/buffer/chunk_allocator/sm.hpp`, `src/emel/buffer/realloc_analyzer/sm.hpp`. -- date: 2026-02-16. -- all other machines are not yet validated against reference behavior. - -allocator cluster status (ggml-alloc parity) -- unexpected-event handling is now explicit for allocator cluster machines via wildcard transitions - to error states (`buffer::allocator`, `buffer::planner`, `buffer::chunk_allocator`, - `buffer::realloc_analyzer`). -- in-place reuse is modeled via `tensor_desc.can_inplace` and enforced in the planner reuse path, - including output tensor guards in the default strategy. -- alignment is now a per-buffer input (initialize/plan) and is used in planner sizing and chunk - allocator alignment (no longer hardcoded to 16). -- max chunk size is now a per-buffer input and is used by the planner + chunk allocator with - multi-chunk split plans when limits are exceeded. -- overflow/limit hardening is enforced in planner + allocator size/count paths. -- allocator parity scenarios from the reference test suite are now ported into: - `tests/buffer/allocator_parity_tests.cpp` and `tests/buffer/chunk_allocator_parity_tests.cpp`. -- public C API allocator-path tests for exact error/status mapping are implemented. -- C API equivalents of `ggml_backend_alloc_ctx_tensors_from_buft[_size]` and - `ggml_backend_alloc_ctx_tensors` are available via EMEL allocator wrappers (without `ctx`). -- allocator cluster audit is complete against `ggml-alloc.c`. - -model loader audit (llama.cpp parity) -- loader, parser, and weight loader orchestration is now implemented with explicit actions, guards, - and error propagation via `events::*_error` / `events::*_done`. -- loader dispatches parsing and weight loading through parser/weight_loader state machines and - supports `vocab_only`, `check_tensors`, `no_alloc`, and optional architecture validation. -- status: complete for GGUF loader callback parity and loader orchestration. -- implemented: GGUF header/metadata validation (including split metadata), split-file parsing and - cross-file consistency checks, mmap/stream/direct-IO selection, tensor mapping/range checks, and - progress/upload callbacks. -- note: public C API entrypoints and C-boundary status mapping remain pending as a separate task. - -parser audit (llama.cpp parity) -- reference source: `llama-model.cpp` and `llama-vocab.cpp`. -- status: complete for GGUF metadata mapping to `emel::model::data` fields and parser orchestration. -- implemented: tokenizer IDs and flags, token arrays, merges, and vocabulary metadata required by EMEL. - -weight loader audit (llama.cpp parity) -- reference source: `llama-model-loader.cpp` (weight mapping + data load). -- status: implemented for EMEL loader callbacks (strategy selection, mappings init, mmap/stream - load, validation, and cleanup). -- notes: - - direct I/O handling and async upload are backend-specific and are exposed through - loader callbacks rather than embedded in the state machine. - -KV cache audit (llama.cpp parity) -- reference source: `llama-kv-cache.cpp`. -- status: partial. slot planning, apply, and rollback are modeled, but multi-stream support and - sequence-aware operations are not yet represented in the state machine. -- gaps to close: - - stream-aware cell tracking (`n_stream`, `seq_to_stream`, per-stream heads) and stream selection. - - sequence operations (`seq_rm`, `seq_cp`, `seq_keep`, `seq_add`, `seq_div`) and copy scheduling. - - shift/defrag handling and sliding-window behaviors. - -decoder audit (llama.cpp parity) -- reference sources: `llama-context.cpp`, `llama-batch.cpp`, `llama-batch.h`, - `llama-kv-cache.cpp`, `llama-memory-*.cpp`. -- reference commit: `abb9f3c42b5e6acee9e8e37836ef691d1a41bdb8`. -- date: 2026-02-19. -- status: partial. batch splitting, output selection (`output_all`, `output_mask`, last-token), - seq masks/primary ids, and 1D/3D position handling are aligned for decode execution. -- gaps to close: - - embedding inputs and pooled embedding outputs (pooling modes, per-sequence embeddings). - - auto-generation and validation of batch fields (`n_seq_id`, `seq_id`, `pos`, `logits` masks). - - sequence coupling, continuity checks, and disallowing partial sequence subsets. - - step metadata parity (`n_seqs`, `n_seq_tokens`, `n_seqs_unq`, `seq_id_unq`, `seq_idx`) and - output ordering/reordering (`out_ids`, swap tracking). - - backend sampling integration (samplers, sampled/logits/probs/candidates buffers). - - output buffer reservation/resizing and host-buffer transfer orchestration. - - memory context semantics for decode (NO_UPDATE/FAILED_* status handling and rollback). - - graph reuse/scheduling parity for decode execution. - - encoder-decoder cross-attention metadata (e.g. t5-style cross state). - -encoder audit (llama.cpp parity) -- reference sources: `llama-vocab.cpp`, `llama-vocab.h`. -- reference commit: `abb9f3c42b5e6acee9e8e37836ef691d1a41bdb8`. -- date: 2026-02-20. -- status: complete. encoder/tokenizer behavior is aligned with llama.cpp for BPE, WPM, UGM, RWKV, - and PLaMo-2, including pre-tokenizer regex mapping, word-level splitting, defaults, and - byte-fallback handling. - -unvalidated machines (no parity audit performed yet) -- `src/emel/model/weight_loader/sm.hpp` -- `src/emel/generator/sm.hpp` -- `src/emel/sampler/pipeline/sm.hpp` -- `src/emel/sampler/candidate_builder/sm.hpp` -- `src/emel/sampler/token_selector/sm.hpp` -- `src/emel/tensor/allocator/sm.hpp` -- `src/emel/tensor/lifetime_analyzer/sm.hpp` -- `src/emel/telemetry/provider/sm.hpp` -- `src/emel/telemetry/exporter/sm.hpp` -- `src/emel/sm.hpp` - -recommended next steps -- decide which component to audit next against the reference implementation and identify the exact - files and functions to map. diff --git a/docs/notes.md b/docs/notes.md deleted file mode 100644 index 2ba93974..00000000 --- a/docs/notes.md +++ /dev/null @@ -1,3 +0,0 @@ -# Notes - -- The GBNF parser needs a re-evaluation in the future; current behavior is not trusted. diff --git a/snapshots/bench/benchmarks.txt b/snapshots/bench/benchmarks.txt index 03c2fc43..0fe824be 100644 --- a/snapshots/bench/benchmarks.txt +++ b/snapshots/bench/benchmarks.txt @@ -1,94 +1,79 @@ # ref=ecbcb7ea9d3303097519723b264a8b5f1e977028 # toolchain=/opt/homebrew/bin/zig -batch/planner_equal ns_per_op=1846.750 -batch/planner_seq ns_per_op=1781.388 -batch/planner_simple ns_per_op=1348.817 -gbnf/rule_parser_basic ns_per_op=247.521 -gbnf/rule_parser_complex ns_per_op=1933.033 -kernel/aarch64/op_add ns_per_op=88.783 -kernel/aarch64/op_cos ns_per_op=1668.921 -kernel/aarch64/op_div ns_per_op=88.600 -kernel/aarch64/op_dup ns_per_op=85.975 -kernel/aarch64/op_log ns_per_op=1843.883 -kernel/aarch64/op_mul ns_per_op=91.025 -kernel/aarch64/op_mul_mat ns_per_op=4540.008 -kernel/aarch64/op_sin ns_per_op=1447.079 -kernel/aarch64/op_soft_max ns_per_op=2066.808 -kernel/aarch64/op_sqr ns_per_op=86.779 -kernel/aarch64/op_sqrt ns_per_op=137.033 -kernel/aarch64/op_sub ns_per_op=91.279 -kernel/aarch64/op_unary_exp ns_per_op=1297.300 -kernel/aarch64/op_unary_neg ns_per_op=89.208 -kernel/aarch64/op_unary_relu ns_per_op=85.879 -kernel/x86_64/op_add ns_per_op=60.092 -kernel/x86_64/op_cos ns_per_op=1969.629 -kernel/x86_64/op_div ns_per_op=74.679 -kernel/x86_64/op_dup ns_per_op=47.033 -kernel/x86_64/op_log ns_per_op=1820.858 -kernel/x86_64/op_mul ns_per_op=60.196 -kernel/x86_64/op_mul_mat ns_per_op=44244.079 -kernel/x86_64/op_sin ns_per_op=1296.000 -kernel/x86_64/op_soft_max ns_per_op=2062.137 -kernel/x86_64/op_sqr ns_per_op=49.138 -kernel/x86_64/op_sqrt ns_per_op=143.012 -kernel/x86_64/op_sub ns_per_op=60.096 -kernel/x86_64/op_unary_exp ns_per_op=1284.658 -kernel/x86_64/op_unary_neg ns_per_op=51.946 -kernel/x86_64/op_unary_relu ns_per_op=52.304 -logits/sampler_raw/vocab_128000 ns_per_op=19259.958 -logits/sampler_raw/vocab_256000 ns_per_op=38539.842 -logits/sampler_raw/vocab_32000 ns_per_op=5214.146 -logits/sampler_sml/vocab_128000 ns_per_op=15429.442 -logits/sampler_sml/vocab_256000 ns_per_op=34200.133 -logits/sampler_sml/vocab_32000 ns_per_op=4436.292 -logits/validator_raw/vocab_128000 ns_per_op=90205.633 -logits/validator_raw/vocab_256000 ns_per_op=181372.546 -logits/validator_raw/vocab_32000 ns_per_op=23735.550 -logits/validator_sml/vocab_128000 ns_per_op=99648.387 -logits/validator_sml/vocab_256000 ns_per_op=197266.092 -logits/validator_sml/vocab_32000 ns_per_op=24528.092 -memory/hybrid_full ns_per_op=408.700 -memory/kv_full ns_per_op=103.067 -memory/recurrent_full ns_per_op=113.079 -text/encoders/bpe_long ns_per_op=10221.996 -text/encoders/bpe_short ns_per_op=159.125 -text/encoders/fallback_long ns_per_op=2470.238 -text/encoders/fallback_short ns_per_op=50.267 -text/encoders/plamo2_long ns_per_op=4848.942 -text/encoders/plamo2_short ns_per_op=107.117 -text/encoders/rwkv_long ns_per_op=4557.729 -text/encoders/rwkv_short ns_per_op=2697.533 -text/encoders/spm_long ns_per_op=12589.987 -text/encoders/spm_short ns_per_op=213.188 -text/encoders/ugm_long ns_per_op=8308.617 -text/encoders/ugm_short ns_per_op=137.250 -text/encoders/wpm_long ns_per_op=26858.621 -text/encoders/wpm_short ns_per_op=531.438 -text/jinja/formatter_long ns_per_op=87073.829 -text/jinja/formatter_short ns_per_op=1144.017 -text/jinja/parser_long ns_per_op=35902.459 -text/jinja/parser_short ns_per_op=1100.708 -tokenizer/full_bpe_long ns_per_op=9967.413 -tokenizer/full_bpe_short ns_per_op=220.113 -tokenizer/full_plamo2_long ns_per_op=9890.796 -tokenizer/full_plamo2_short ns_per_op=1799.446 -tokenizer/full_rwkv_long ns_per_op=3566.475 -tokenizer/full_rwkv_short ns_per_op=2373.500 -tokenizer/full_spm_long ns_per_op=13766.279 -tokenizer/full_spm_short ns_per_op=296.825 -tokenizer/full_ugm_long ns_per_op=10042.667 -tokenizer/full_ugm_short ns_per_op=1817.804 -tokenizer/full_wpm_long ns_per_op=28866.112 -tokenizer/full_wpm_short ns_per_op=2204.133 -tokenizer/preprocessor_bpe_long ns_per_op=2775.246 -tokenizer/preprocessor_bpe_short ns_per_op=82.854 -tokenizer/preprocessor_plamo2_long ns_per_op=3052.371 -tokenizer/preprocessor_plamo2_short ns_per_op=2367.925 -tokenizer/preprocessor_rwkv_long ns_per_op=3077.379 -tokenizer/preprocessor_rwkv_short ns_per_op=2356.238 -tokenizer/preprocessor_spm_long ns_per_op=3092.796 -tokenizer/preprocessor_spm_short ns_per_op=2361.154 -tokenizer/preprocessor_ugm_long ns_per_op=3139.088 -tokenizer/preprocessor_ugm_short ns_per_op=2375.508 -tokenizer/preprocessor_wpm_long ns_per_op=3043.238 -tokenizer/preprocessor_wpm_short ns_per_op=2599.613 +batch/planner_equal ns_per_op=1882.532 iter=100000 runs=5 +batch/planner_seq ns_per_op=1793.446 iter=100000 runs=5 +batch/planner_simple ns_per_op=1226.921 iter=100000 runs=5 +gbnf/rule_parser_basic ns_per_op=255.746 iter=100000 runs=5 +gbnf/rule_parser_complex ns_per_op=1825.217 iter=100000 runs=5 +kernel/aarch64/op_add ns_per_op=91.437 iter=100000 runs=5 +kernel/aarch64/op_cos ns_per_op=1639.246 iter=100000 runs=5 +kernel/aarch64/op_div ns_per_op=93.126 iter=100000 runs=5 +kernel/aarch64/op_dup ns_per_op=88.551 iter=100000 runs=5 +kernel/aarch64/op_log ns_per_op=1831.691 iter=100000 runs=5 +kernel/aarch64/op_mul ns_per_op=96.314 iter=100000 runs=5 +kernel/aarch64/op_mul_mat ns_per_op=4504.960 iter=100000 runs=5 +kernel/aarch64/op_sin ns_per_op=1265.526 iter=100000 runs=5 +kernel/aarch64/op_soft_max ns_per_op=2064.584 iter=100000 runs=5 +kernel/aarch64/op_sqr ns_per_op=87.621 iter=100000 runs=5 +kernel/aarch64/op_sqrt ns_per_op=140.678 iter=100000 runs=5 +kernel/aarch64/op_sub ns_per_op=92.562 iter=100000 runs=5 +kernel/aarch64/op_unary_exp ns_per_op=1271.511 iter=100000 runs=5 +kernel/aarch64/op_unary_neg ns_per_op=87.225 iter=100000 runs=5 +kernel/aarch64/op_unary_relu ns_per_op=88.797 iter=100000 runs=5 +logits/sampler_raw/vocab_128000 ns_per_op=18356.690 iter=100000 runs=5 +logits/sampler_raw/vocab_256000 ns_per_op=36740.836 iter=100000 runs=5 +logits/sampler_raw/vocab_32000 ns_per_op=4732.317 iter=100000 runs=5 +logits/sampler_sml/vocab_128000 ns_per_op=14036.271 iter=100000 runs=5 +logits/sampler_sml/vocab_256000 ns_per_op=29348.119 iter=100000 runs=5 +logits/sampler_sml/vocab_32000 ns_per_op=3845.585 iter=100000 runs=5 +logits/validator_raw/vocab_128000 ns_per_op=89823.316 iter=100000 runs=5 +logits/validator_raw/vocab_256000 ns_per_op=179980.475 iter=100000 runs=5 +logits/validator_raw/vocab_32000 ns_per_op=24318.386 iter=100000 runs=5 +logits/validator_sml/vocab_128000 ns_per_op=98501.648 iter=100000 runs=5 +logits/validator_sml/vocab_256000 ns_per_op=198231.188 iter=100000 runs=5 +logits/validator_sml/vocab_32000 ns_per_op=24162.930 iter=100000 runs=5 +memory/hybrid_full ns_per_op=388.140 iter=100000 runs=5 +memory/kv_full ns_per_op=103.370 iter=100000 runs=5 +memory/recurrent_full ns_per_op=113.965 iter=100000 runs=5 +text/encoders/bpe_long ns_per_op=38.041 iter=100000 runs=5 +text/encoders/bpe_short ns_per_op=33.829 iter=100000 runs=5 +text/encoders/fallback_long ns_per_op=2468.031 iter=100000 runs=5 +text/encoders/fallback_short ns_per_op=47.275 iter=100000 runs=5 +text/encoders/plamo2_long ns_per_op=4925.476 iter=100000 runs=5 +text/encoders/plamo2_short ns_per_op=104.395 iter=100000 runs=5 +text/encoders/rwkv_long ns_per_op=4518.345 iter=100000 runs=5 +text/encoders/rwkv_short ns_per_op=2620.465 iter=100000 runs=5 +text/encoders/spm_long ns_per_op=12278.933 iter=100000 runs=5 +text/encoders/spm_short ns_per_op=203.475 iter=100000 runs=5 +text/encoders/ugm_long ns_per_op=8212.398 iter=100000 runs=5 +text/encoders/ugm_short ns_per_op=138.669 iter=100000 runs=5 +text/encoders/wpm_long ns_per_op=26503.664 iter=100000 runs=5 +text/encoders/wpm_short ns_per_op=523.022 iter=100000 runs=5 +text/jinja/formatter_long ns_per_op=61.368 iter=100000 runs=5 +text/jinja/formatter_short ns_per_op=15.877 iter=100000 runs=5 +text/jinja/parser_long ns_per_op=46744.745 iter=100000 runs=5 +text/jinja/parser_short ns_per_op=1080.982 iter=100000 runs=5 +tokenizer/full_bpe_long ns_per_op=9486.930 iter=100000 runs=5 +tokenizer/full_bpe_short ns_per_op=209.840 iter=100000 runs=5 +tokenizer/full_plamo2_long ns_per_op=9886.370 iter=100000 runs=5 +tokenizer/full_plamo2_short ns_per_op=1741.515 iter=100000 runs=5 +tokenizer/full_rwkv_long ns_per_op=3491.422 iter=100000 runs=5 +tokenizer/full_rwkv_short ns_per_op=2077.372 iter=100000 runs=5 +tokenizer/full_spm_long ns_per_op=13553.769 iter=100000 runs=5 +tokenizer/full_spm_short ns_per_op=287.938 iter=100000 runs=5 +tokenizer/full_ugm_long ns_per_op=9836.669 iter=100000 runs=5 +tokenizer/full_ugm_short ns_per_op=1743.825 iter=100000 runs=5 +tokenizer/full_wpm_long ns_per_op=28131.710 iter=100000 runs=5 +tokenizer/full_wpm_short ns_per_op=2229.660 iter=100000 runs=5 +tokenizer/preprocessor_bpe_long ns_per_op=2775.547 iter=100000 runs=5 +tokenizer/preprocessor_bpe_short ns_per_op=83.980 iter=100000 runs=5 +tokenizer/preprocessor_plamo2_long ns_per_op=2989.073 iter=100000 runs=5 +tokenizer/preprocessor_plamo2_short ns_per_op=2327.313 iter=100000 runs=5 +tokenizer/preprocessor_rwkv_long ns_per_op=2997.292 iter=100000 runs=5 +tokenizer/preprocessor_rwkv_short ns_per_op=2334.717 iter=100000 runs=5 +tokenizer/preprocessor_spm_long ns_per_op=2998.770 iter=100000 runs=5 +tokenizer/preprocessor_spm_short ns_per_op=2342.702 iter=100000 runs=5 +tokenizer/preprocessor_ugm_long ns_per_op=3050.469 iter=100000 runs=5 +tokenizer/preprocessor_ugm_short ns_per_op=2334.305 iter=100000 runs=5 +tokenizer/preprocessor_wpm_long ns_per_op=3016.412 iter=100000 runs=5 +tokenizer/preprocessor_wpm_short ns_per_op=2336.437 iter=100000 runs=5 diff --git a/snapshots/lint/clang_format.txt b/snapshots/lint/clang_format.txt index 225f586a..c398509c 100644 --- a/snapshots/lint/clang_format.txt +++ b/snapshots/lint/clang_format.txt @@ -1,107 +1,232 @@ include/emel/callback.hpp include/emel/emel.h +include/emel/error/error.hpp src/emel/batch/planner/actions.hpp src/emel/batch/planner/context.hpp +src/emel/batch/planner/errors.hpp src/emel/batch/planner/events.hpp src/emel/batch/planner/guards.hpp +src/emel/batch/planner/modes/detail.hpp +src/emel/batch/planner/modes/equal/actions.hpp +src/emel/batch/planner/modes/equal/guards.hpp +src/emel/batch/planner/modes/equal/sm.hpp +src/emel/batch/planner/modes/sequential/actions.hpp +src/emel/batch/planner/modes/sequential/guards.hpp +src/emel/batch/planner/modes/sequential/sm.hpp +src/emel/batch/planner/modes/simple/actions.hpp +src/emel/batch/planner/modes/simple/guards.hpp +src/emel/batch/planner/modes/simple/sm.hpp src/emel/batch/planner/sm.hpp -src/emel/buffer/allocator/actions.hpp -src/emel/buffer/allocator/c_api.cpp -src/emel/buffer/allocator/context.hpp -src/emel/buffer/allocator/events.hpp -src/emel/buffer/allocator/guards.hpp -src/emel/buffer/allocator/sm.hpp -src/emel/buffer/chunk_allocator/actions.hpp -src/emel/buffer/chunk_allocator/context.hpp -src/emel/buffer/chunk_allocator/events.hpp -src/emel/buffer/chunk_allocator/guards.hpp -src/emel/buffer/chunk_allocator/sm.hpp -src/emel/buffer/planner/actions.hpp -src/emel/buffer/planner/context.hpp -src/emel/buffer/planner/events.hpp -src/emel/buffer/planner/guards.hpp -src/emel/buffer/planner/sm.hpp -src/emel/buffer/realloc_analyzer/actions.hpp -src/emel/buffer/realloc_analyzer/context.hpp -src/emel/buffer/realloc_analyzer/events.hpp -src/emel/buffer/realloc_analyzer/guards.hpp -src/emel/buffer/realloc_analyzer/sm.hpp -src/emel/decoder/actions.hpp -src/emel/decoder/context.hpp -src/emel/decoder/events.hpp -src/emel/decoder/guards.hpp -src/emel/decoder/sm.hpp -src/emel/gbnf/lexer/actions.hpp -src/emel/gbnf/lexer/context.hpp -src/emel/gbnf/lexer/events.hpp -src/emel/gbnf/lexer/guards.hpp -src/emel/gbnf/lexer/sm.hpp -src/emel/gbnf/parser/detail.hpp -src/emel/gbnf/parser/sm.hpp -src/emel/gbnf/types.hpp +src/emel/docs/detail.hpp +src/emel/gbnf/detail.hpp +src/emel/gbnf/rule_parser/actions.hpp +src/emel/gbnf/rule_parser/context.hpp +src/emel/gbnf/rule_parser/definition_parser/actions.hpp +src/emel/gbnf/rule_parser/definition_parser/context.hpp +src/emel/gbnf/rule_parser/definition_parser/errors.hpp +src/emel/gbnf/rule_parser/definition_parser/events.hpp +src/emel/gbnf/rule_parser/definition_parser/guards.hpp +src/emel/gbnf/rule_parser/definition_parser/sm.hpp +src/emel/gbnf/rule_parser/detail.hpp +src/emel/gbnf/rule_parser/errors.hpp +src/emel/gbnf/rule_parser/events.hpp +src/emel/gbnf/rule_parser/expression_parser/actions.hpp +src/emel/gbnf/rule_parser/expression_parser/context.hpp +src/emel/gbnf/rule_parser/expression_parser/errors.hpp +src/emel/gbnf/rule_parser/expression_parser/events.hpp +src/emel/gbnf/rule_parser/expression_parser/guards.hpp +src/emel/gbnf/rule_parser/expression_parser/sm.hpp +src/emel/gbnf/rule_parser/guards.hpp +src/emel/gbnf/rule_parser/lexer/actions.hpp +src/emel/gbnf/rule_parser/lexer/context.hpp +src/emel/gbnf/rule_parser/lexer/errors.hpp +src/emel/gbnf/rule_parser/lexer/events.hpp +src/emel/gbnf/rule_parser/lexer/guards.hpp +src/emel/gbnf/rule_parser/lexer/sm.hpp +src/emel/gbnf/rule_parser/nonterm_parser/actions.hpp +src/emel/gbnf/rule_parser/nonterm_parser/context.hpp +src/emel/gbnf/rule_parser/nonterm_parser/errors.hpp +src/emel/gbnf/rule_parser/nonterm_parser/events.hpp +src/emel/gbnf/rule_parser/nonterm_parser/guards.hpp +src/emel/gbnf/rule_parser/nonterm_parser/sm.hpp +src/emel/gbnf/rule_parser/sm.hpp +src/emel/gbnf/rule_parser/term_parser/actions.hpp +src/emel/gbnf/rule_parser/term_parser/context.hpp +src/emel/gbnf/rule_parser/term_parser/errors.hpp +src/emel/gbnf/rule_parser/term_parser/events.hpp +src/emel/gbnf/rule_parser/term_parser/guards.hpp +src/emel/gbnf/rule_parser/term_parser/sm.hpp +src/emel/gbnf/sampler/accept_parser/actions.hpp +src/emel/gbnf/sampler/accept_parser/context.hpp +src/emel/gbnf/sampler/accept_parser/errors.hpp +src/emel/gbnf/sampler/accept_parser/events.hpp +src/emel/gbnf/sampler/accept_parser/guards.hpp +src/emel/gbnf/sampler/accept_parser/sm.hpp +src/emel/gbnf/sampler/actions.hpp +src/emel/gbnf/sampler/candidate_parser/actions.hpp +src/emel/gbnf/sampler/candidate_parser/context.hpp +src/emel/gbnf/sampler/candidate_parser/errors.hpp +src/emel/gbnf/sampler/candidate_parser/events.hpp +src/emel/gbnf/sampler/candidate_parser/guards.hpp +src/emel/gbnf/sampler/candidate_parser/sm.hpp +src/emel/gbnf/sampler/context.hpp +src/emel/gbnf/sampler/errors.hpp +src/emel/gbnf/sampler/events.hpp +src/emel/gbnf/sampler/guards.hpp +src/emel/gbnf/sampler/matcher_parser/actions.hpp +src/emel/gbnf/sampler/matcher_parser/context.hpp +src/emel/gbnf/sampler/matcher_parser/errors.hpp +src/emel/gbnf/sampler/matcher_parser/events.hpp +src/emel/gbnf/sampler/matcher_parser/guards.hpp +src/emel/gbnf/sampler/matcher_parser/sm.hpp +src/emel/gbnf/sampler/sm.hpp +src/emel/gbnf/sampler/token_parser/actions.hpp +src/emel/gbnf/sampler/token_parser/context.hpp +src/emel/gbnf/sampler/token_parser/errors.hpp +src/emel/gbnf/sampler/token_parser/events.hpp +src/emel/gbnf/sampler/token_parser/guards.hpp +src/emel/gbnf/sampler/token_parser/sm.hpp src/emel/generator/actions.hpp +src/emel/generator/context.hpp +src/emel/generator/errors.hpp src/emel/generator/events.hpp src/emel/generator/guards.hpp src/emel/generator/sm.hpp +src/emel/gguf/loader/actions.hpp +src/emel/gguf/loader/context.hpp +src/emel/gguf/loader/detail.hpp +src/emel/gguf/loader/errors.hpp +src/emel/gguf/loader/events.hpp +src/emel/gguf/loader/guards.hpp +src/emel/gguf/loader/sm.hpp +src/emel/graph/actions.hpp src/emel/graph/allocator/actions.hpp src/emel/graph/allocator/context.hpp +src/emel/graph/allocator/errors.hpp src/emel/graph/allocator/events.hpp src/emel/graph/allocator/guards.hpp +src/emel/graph/allocator/liveness_pass/actions.hpp +src/emel/graph/allocator/liveness_pass/context.hpp +src/emel/graph/allocator/liveness_pass/events.hpp +src/emel/graph/allocator/liveness_pass/guards.hpp +src/emel/graph/allocator/liveness_pass/sm.hpp +src/emel/graph/allocator/ordering_pass/actions.hpp +src/emel/graph/allocator/ordering_pass/context.hpp +src/emel/graph/allocator/ordering_pass/events.hpp +src/emel/graph/allocator/ordering_pass/guards.hpp +src/emel/graph/allocator/ordering_pass/sm.hpp +src/emel/graph/allocator/placement_pass/actions.hpp +src/emel/graph/allocator/placement_pass/context.hpp +src/emel/graph/allocator/placement_pass/events.hpp +src/emel/graph/allocator/placement_pass/guards.hpp +src/emel/graph/allocator/placement_pass/sm.hpp src/emel/graph/allocator/sm.hpp src/emel/graph/assembler/actions.hpp +src/emel/graph/assembler/assemble_alloc_pass/actions.hpp +src/emel/graph/assembler/assemble_alloc_pass/context.hpp +src/emel/graph/assembler/assemble_alloc_pass/events.hpp +src/emel/graph/assembler/assemble_alloc_pass/guards.hpp +src/emel/graph/assembler/assemble_alloc_pass/sm.hpp +src/emel/graph/assembler/assemble_build_pass/actions.hpp +src/emel/graph/assembler/assemble_build_pass/context.hpp +src/emel/graph/assembler/assemble_build_pass/events.hpp +src/emel/graph/assembler/assemble_build_pass/guards.hpp +src/emel/graph/assembler/assemble_build_pass/sm.hpp +src/emel/graph/assembler/assemble_validate_pass/actions.hpp +src/emel/graph/assembler/assemble_validate_pass/context.hpp +src/emel/graph/assembler/assemble_validate_pass/events.hpp +src/emel/graph/assembler/assemble_validate_pass/guards.hpp +src/emel/graph/assembler/assemble_validate_pass/sm.hpp src/emel/graph/assembler/context.hpp +src/emel/graph/assembler/errors.hpp src/emel/graph/assembler/events.hpp src/emel/graph/assembler/guards.hpp +src/emel/graph/assembler/reserve_alloc_pass/actions.hpp +src/emel/graph/assembler/reserve_alloc_pass/context.hpp +src/emel/graph/assembler/reserve_alloc_pass/events.hpp +src/emel/graph/assembler/reserve_alloc_pass/guards.hpp +src/emel/graph/assembler/reserve_alloc_pass/sm.hpp +src/emel/graph/assembler/reserve_build_pass/actions.hpp +src/emel/graph/assembler/reserve_build_pass/context.hpp +src/emel/graph/assembler/reserve_build_pass/events.hpp +src/emel/graph/assembler/reserve_build_pass/guards.hpp +src/emel/graph/assembler/reserve_build_pass/sm.hpp +src/emel/graph/assembler/reserve_validate_pass/actions.hpp +src/emel/graph/assembler/reserve_validate_pass/context.hpp +src/emel/graph/assembler/reserve_validate_pass/events.hpp +src/emel/graph/assembler/reserve_validate_pass/guards.hpp +src/emel/graph/assembler/reserve_validate_pass/sm.hpp +src/emel/graph/assembler/reuse_decision_pass/actions.hpp +src/emel/graph/assembler/reuse_decision_pass/context.hpp +src/emel/graph/assembler/reuse_decision_pass/events.hpp +src/emel/graph/assembler/reuse_decision_pass/guards.hpp +src/emel/graph/assembler/reuse_decision_pass/sm.hpp src/emel/graph/assembler/sm.hpp -src/emel/graph/graph/actions.hpp -src/emel/graph/graph/context.hpp -src/emel/graph/graph/events.hpp -src/emel/graph/graph/guards.hpp -src/emel/graph/graph/sm.hpp +src/emel/graph/context.hpp +src/emel/graph/errors.hpp +src/emel/graph/events.hpp +src/emel/graph/guards.hpp src/emel/graph/processor/actions.hpp +src/emel/graph/processor/alloc_step/actions.hpp +src/emel/graph/processor/alloc_step/context.hpp +src/emel/graph/processor/alloc_step/events.hpp +src/emel/graph/processor/alloc_step/guards.hpp +src/emel/graph/processor/alloc_step/sm.hpp +src/emel/graph/processor/bind_step/actions.hpp +src/emel/graph/processor/bind_step/context.hpp +src/emel/graph/processor/bind_step/events.hpp +src/emel/graph/processor/bind_step/guards.hpp +src/emel/graph/processor/bind_step/sm.hpp src/emel/graph/processor/context.hpp +src/emel/graph/processor/errors.hpp src/emel/graph/processor/events.hpp +src/emel/graph/processor/extract_step/actions.hpp +src/emel/graph/processor/extract_step/context.hpp +src/emel/graph/processor/extract_step/events.hpp +src/emel/graph/processor/extract_step/guards.hpp +src/emel/graph/processor/extract_step/sm.hpp src/emel/graph/processor/guards.hpp +src/emel/graph/processor/kernel_step/actions.hpp +src/emel/graph/processor/kernel_step/context.hpp +src/emel/graph/processor/kernel_step/events.hpp +src/emel/graph/processor/kernel_step/guards.hpp +src/emel/graph/processor/kernel_step/sm.hpp +src/emel/graph/processor/prepare_step/actions.hpp +src/emel/graph/processor/prepare_step/context.hpp +src/emel/graph/processor/prepare_step/events.hpp +src/emel/graph/processor/prepare_step/guards.hpp +src/emel/graph/processor/prepare_step/sm.hpp src/emel/graph/processor/sm.hpp +src/emel/graph/processor/validate_step/actions.hpp +src/emel/graph/processor/validate_step/context.hpp +src/emel/graph/processor/validate_step/events.hpp +src/emel/graph/processor/validate_step/guards.hpp +src/emel/graph/processor/validate_step/sm.hpp +src/emel/graph/sm.hpp src/emel/kernel/aarch64/actions.hpp src/emel/kernel/aarch64/context.hpp +src/emel/kernel/aarch64/detail.hpp +src/emel/kernel/aarch64/errors.hpp src/emel/kernel/aarch64/events.hpp src/emel/kernel/aarch64/guards.hpp src/emel/kernel/aarch64/sm.hpp src/emel/kernel/actions.hpp -src/emel/kernel/any/actions.hpp -src/emel/kernel/any/context.hpp -src/emel/kernel/any/events.hpp -src/emel/kernel/any/guards.hpp -src/emel/kernel/any/sm.hpp +src/emel/kernel/any.hpp src/emel/kernel/context.hpp src/emel/kernel/cuda/actions.hpp src/emel/kernel/cuda/context.hpp src/emel/kernel/cuda/events.hpp src/emel/kernel/cuda/guards.hpp src/emel/kernel/cuda/sm.hpp -src/emel/kernel/dispatch/actions.hpp -src/emel/kernel/dispatch/context.hpp -src/emel/kernel/dispatch/events.hpp -src/emel/kernel/dispatch/guards.hpp -src/emel/kernel/dispatch/sm.hpp +src/emel/kernel/detail.hpp +src/emel/kernel/errors.hpp src/emel/kernel/events.hpp -src/emel/kernel/events/actions.hpp -src/emel/kernel/events/context.hpp -src/emel/kernel/events/events.hpp -src/emel/kernel/events/guards.hpp -src/emel/kernel/events/sm.hpp src/emel/kernel/guards.hpp src/emel/kernel/metal/actions.hpp src/emel/kernel/metal/context.hpp src/emel/kernel/metal/events.hpp src/emel/kernel/metal/guards.hpp src/emel/kernel/metal/sm.hpp -src/emel/kernel/ops/actions.hpp -src/emel/kernel/ops/context.hpp -src/emel/kernel/ops/events.hpp -src/emel/kernel/ops/guards.hpp -src/emel/kernel/ops/sm.hpp src/emel/kernel/sm.hpp src/emel/kernel/vulkan/actions.hpp src/emel/kernel/vulkan/context.hpp @@ -115,110 +240,84 @@ src/emel/kernel/wasm/guards.hpp src/emel/kernel/wasm/sm.hpp src/emel/kernel/x86_64/actions.hpp src/emel/kernel/x86_64/context.hpp +src/emel/kernel/x86_64/detail.hpp +src/emel/kernel/x86_64/errors.hpp src/emel/kernel/x86_64/events.hpp src/emel/kernel/x86_64/guards.hpp src/emel/kernel/x86_64/sm.hpp src/emel/logits/sampler/actions.hpp src/emel/logits/sampler/context.hpp +src/emel/logits/sampler/errors.hpp src/emel/logits/sampler/events.hpp src/emel/logits/sampler/guards.hpp src/emel/logits/sampler/sm.hpp -src/emel/logits/sampler/token_selector/actions.hpp -src/emel/logits/sampler/token_selector/context.hpp -src/emel/logits/sampler/token_selector/events.hpp -src/emel/logits/sampler/token_selector/guards.hpp -src/emel/logits/sampler/token_selector/sm.hpp src/emel/logits/validator/actions.hpp src/emel/logits/validator/context.hpp +src/emel/logits/validator/errors.hpp src/emel/logits/validator/events.hpp src/emel/logits/validator/guards.hpp src/emel/logits/validator/sm.hpp src/emel/machines.hpp -src/emel/memory/coordinator/any.hpp -src/emel/memory/coordinator/events.hpp -src/emel/memory/coordinator/hybrid/sm.hpp -src/emel/memory/coordinator/kv/sm.hpp -src/emel/memory/coordinator/recurrent/sm.hpp -src/emel/memory/coordinator/sm.hpp +src/emel/memory/detail.hpp src/emel/memory/events.hpp src/emel/memory/hybrid/actions.hpp src/emel/memory/hybrid/context.hpp +src/emel/memory/hybrid/detail.hpp +src/emel/memory/hybrid/errors.hpp src/emel/memory/hybrid/events.hpp src/emel/memory/hybrid/guards.hpp src/emel/memory/hybrid/sm.hpp src/emel/memory/kv/actions.hpp src/emel/memory/kv/context.hpp +src/emel/memory/kv/detail.hpp +src/emel/memory/kv/errors.hpp src/emel/memory/kv/events.hpp src/emel/memory/kv/guards.hpp src/emel/memory/kv/sm.hpp src/emel/memory/recurrent/actions.hpp src/emel/memory/recurrent/context.hpp +src/emel/memory/recurrent/detail.hpp +src/emel/memory/recurrent/errors.hpp src/emel/memory/recurrent/events.hpp src/emel/memory/recurrent/guards.hpp src/emel/memory/recurrent/sm.hpp src/emel/memory/view.hpp -src/emel/model/architecture/data.hpp src/emel/model/data.cpp src/emel/model/data.hpp src/emel/model/loader/actions.hpp src/emel/model/loader/context.hpp +src/emel/model/loader/errors.hpp src/emel/model/loader/events.hpp src/emel/model/loader/guards.hpp src/emel/model/loader/sm.hpp src/emel/model/weight_loader/actions.hpp src/emel/model/weight_loader/context.hpp +src/emel/model/weight_loader/errors.hpp src/emel/model/weight_loader/events.hpp src/emel/model/weight_loader/guards.hpp src/emel/model/weight_loader/sm.hpp -src/emel/parser/actions.hpp -src/emel/parser/context.hpp -src/emel/parser/dispatch.hpp -src/emel/parser/events.hpp -src/emel/parser/gguf/actions.hpp -src/emel/parser/gguf/context.hpp -src/emel/parser/gguf/sm.hpp -src/emel/parser/guards.hpp -src/emel/parser/map.hpp -src/emel/parser/sm.hpp src/emel/sm.hpp -src/emel/telemetry/exporter/actions.hpp -src/emel/telemetry/exporter/context.hpp -src/emel/telemetry/exporter/events.hpp -src/emel/telemetry/exporter/guards.hpp -src/emel/telemetry/exporter/sm.hpp -src/emel/telemetry/provider/actions.hpp -src/emel/telemetry/provider/context.hpp -src/emel/telemetry/provider/events.hpp -src/emel/telemetry/provider/guards.hpp -src/emel/telemetry/provider/sm.hpp -src/emel/telemetry/record.hpp -src/emel/tensor/allocator/actions.hpp -src/emel/tensor/allocator/context.hpp -src/emel/tensor/allocator/events.hpp -src/emel/tensor/allocator/guards.hpp -src/emel/tensor/allocator/sm.hpp -src/emel/tensor/lifetime_analyzer/actions.hpp -src/emel/tensor/lifetime_analyzer/context.hpp -src/emel/tensor/lifetime_analyzer/events.hpp -src/emel/tensor/lifetime_analyzer/guards.hpp -src/emel/tensor/lifetime_analyzer/sm.hpp -src/emel/tensor/tensor/actions.hpp -src/emel/tensor/tensor/context.hpp -src/emel/tensor/tensor/events.hpp -src/emel/tensor/tensor/guards.hpp -src/emel/tensor/tensor/sm.hpp +src/emel/tensor/actions.hpp +src/emel/tensor/context.hpp +src/emel/tensor/detail.hpp +src/emel/tensor/errors.hpp +src/emel/tensor/events.hpp +src/emel/tensor/guards.hpp +src/emel/tensor/sm.hpp src/emel/tensor/view/actions.hpp src/emel/tensor/view/context.hpp +src/emel/tensor/view/detail.hpp +src/emel/tensor/view/errors.hpp src/emel/tensor/view/events.hpp src/emel/tensor/view/guards.hpp src/emel/tensor/view/sm.hpp -src/emel/text/conditioner/actions.hpp src/emel/text/conditioner/context.hpp +src/emel/text/conditioner/errors.hpp src/emel/text/conditioner/events.hpp -src/emel/text/conditioner/guards.hpp -src/emel/text/conditioner/sm.hpp src/emel/text/detokenizer/actions.hpp src/emel/text/detokenizer/context.hpp +src/emel/text/detokenizer/detail.hpp +src/emel/text/detokenizer/errors.hpp src/emel/text/detokenizer/events.hpp src/emel/text/detokenizer/guards.hpp src/emel/text/detokenizer/sm.hpp @@ -227,42 +326,50 @@ src/emel/text/encoders/any.hpp src/emel/text/encoders/bpe/actions.hpp src/emel/text/encoders/bpe/context.hpp src/emel/text/encoders/bpe/detail.hpp +src/emel/text/encoders/bpe/errors.hpp src/emel/text/encoders/bpe/guards.hpp src/emel/text/encoders/bpe/sm.hpp src/emel/text/encoders/context.hpp src/emel/text/encoders/detail.hpp +src/emel/text/encoders/errors.hpp src/emel/text/encoders/events.hpp src/emel/text/encoders/fallback/actions.hpp src/emel/text/encoders/fallback/context.hpp src/emel/text/encoders/fallback/detail.hpp +src/emel/text/encoders/fallback/errors.hpp src/emel/text/encoders/fallback/guards.hpp src/emel/text/encoders/fallback/sm.hpp src/emel/text/encoders/guards.hpp src/emel/text/encoders/plamo2/actions.hpp src/emel/text/encoders/plamo2/context.hpp src/emel/text/encoders/plamo2/detail.hpp +src/emel/text/encoders/plamo2/errors.hpp src/emel/text/encoders/plamo2/guards.hpp src/emel/text/encoders/plamo2/sm.hpp src/emel/text/encoders/rwkv/actions.hpp src/emel/text/encoders/rwkv/context.hpp src/emel/text/encoders/rwkv/detail.hpp +src/emel/text/encoders/rwkv/errors.hpp src/emel/text/encoders/rwkv/guards.hpp src/emel/text/encoders/rwkv/sm.hpp src/emel/text/encoders/sm.hpp src/emel/text/encoders/spm/actions.hpp src/emel/text/encoders/spm/context.hpp src/emel/text/encoders/spm/detail.hpp +src/emel/text/encoders/spm/errors.hpp src/emel/text/encoders/spm/guards.hpp src/emel/text/encoders/spm/sm.hpp src/emel/text/encoders/types.hpp src/emel/text/encoders/ugm/actions.hpp src/emel/text/encoders/ugm/context.hpp src/emel/text/encoders/ugm/detail.hpp +src/emel/text/encoders/ugm/errors.hpp src/emel/text/encoders/ugm/guards.hpp src/emel/text/encoders/ugm/sm.hpp src/emel/text/encoders/wpm/actions.hpp src/emel/text/encoders/wpm/context.hpp src/emel/text/encoders/wpm/detail.hpp +src/emel/text/encoders/wpm/errors.hpp src/emel/text/encoders/wpm/guards.hpp src/emel/text/encoders/wpm/sm.hpp src/emel/text/formatter/actions.hpp @@ -271,36 +378,37 @@ src/emel/text/formatter/events.hpp src/emel/text/formatter/format.hpp src/emel/text/formatter/guards.hpp src/emel/text/formatter/sm.hpp -src/emel/text/jinja/ast.hpp src/emel/text/jinja/formatter/actions.hpp src/emel/text/jinja/formatter/context.hpp src/emel/text/jinja/formatter/detail.hpp +src/emel/text/jinja/formatter/errors.hpp src/emel/text/jinja/formatter/events.hpp src/emel/text/jinja/formatter/guards.hpp src/emel/text/jinja/formatter/sm.hpp -src/emel/text/jinja/lexer.hpp -src/emel/text/jinja/parser/actions.hpp -src/emel/text/jinja/parser/detail.hpp -src/emel/text/jinja/parser/events.hpp -src/emel/text/jinja/parser/guards.hpp -src/emel/text/jinja/parser/sm.hpp -src/emel/text/jinja/value.hpp +src/emel/text/jinja/parser/classifier_parser/actions.hpp +src/emel/text/jinja/parser/classifier_parser/context.hpp +src/emel/text/jinja/parser/classifier_parser/errors.hpp +src/emel/text/jinja/parser/classifier_parser/guards.hpp +src/emel/text/jinja/parser/classifier_parser/sm.hpp +src/emel/text/jinja/parser/errors.hpp +src/emel/text/jinja/parser/lexer/errors.hpp +src/emel/text/jinja/parser/program_parser/context.hpp +src/emel/text/jinja/parser/program_parser/errors.hpp src/emel/text/renderer/actions.hpp src/emel/text/renderer/context.hpp +src/emel/text/renderer/errors.hpp src/emel/text/renderer/events.hpp src/emel/text/renderer/guards.hpp src/emel/text/renderer/sm.hpp src/emel/text/tokenizer/actions.hpp src/emel/text/tokenizer/bpe/regex.hpp src/emel/text/tokenizer/bpe/split.hpp -src/emel/text/tokenizer/context.hpp -src/emel/text/tokenizer/events.hpp -src/emel/text/tokenizer/guards.hpp src/emel/text/tokenizer/preprocessor/actions.hpp src/emel/text/tokenizer/preprocessor/any.hpp src/emel/text/tokenizer/preprocessor/bpe/sm.hpp src/emel/text/tokenizer/preprocessor/context.hpp src/emel/text/tokenizer/preprocessor/detail.hpp +src/emel/text/tokenizer/preprocessor/errors.hpp src/emel/text/tokenizer/preprocessor/events.hpp src/emel/text/tokenizer/preprocessor/fallback/actions.hpp src/emel/text/tokenizer/preprocessor/fallback/guards.hpp @@ -312,7 +420,6 @@ src/emel/text/tokenizer/preprocessor/plamo2/sm.hpp src/emel/text/tokenizer/preprocessor/rwkv/actions.hpp src/emel/text/tokenizer/preprocessor/rwkv/guards.hpp src/emel/text/tokenizer/preprocessor/rwkv/sm.hpp -src/emel/text/tokenizer/preprocessor/sm.hpp src/emel/text/tokenizer/preprocessor/spm/actions.hpp src/emel/text/tokenizer/preprocessor/spm/guards.hpp src/emel/text/tokenizer/preprocessor/spm/sm.hpp @@ -321,89 +428,67 @@ src/emel/text/tokenizer/preprocessor/ugm/sm.hpp src/emel/text/tokenizer/preprocessor/wpm/actions.hpp src/emel/text/tokenizer/preprocessor/wpm/guards.hpp src/emel/text/tokenizer/preprocessor/wpm/sm.hpp -src/emel/text/tokenizer/sm.hpp src/emel/text/unicode.hpp src/emel/text/unicode_data.hpp src/emel/token/batcher/actions.hpp src/emel/token/batcher/context.hpp +src/emel/token/batcher/detail.hpp +src/emel/token/batcher/errors.hpp src/emel/token/batcher/events.hpp src/emel/token/batcher/guards.hpp src/emel/token/batcher/sm.hpp +tests/batch/planner/modes/detail_tests.cpp +tests/batch/planner/modes/equal_actions_tests.cpp +tests/batch/planner/modes/sequential_actions_tests.cpp +tests/batch/planner/modes/simple_actions_tests.cpp tests/batch/planner/planner_action_branch_tests.cpp tests/batch/planner/planner_actions_tests.cpp tests/batch/planner/planner_additional_tests.cpp tests/batch/planner/planner_sm_flow_tests.cpp tests/batch/planner/planner_sm_transition_tests.cpp tests/batch/planner/planner_tests.cpp -tests/buffer/allocator_action_branch_more_tests.cpp -tests/buffer/allocator_action_branch_tests.cpp -tests/buffer/allocator_action_line_targets_tests.cpp -tests/buffer/allocator_actions_more_tests.cpp -tests/buffer/allocator_actions_tests.cpp -tests/buffer/allocator_c_api_tests.cpp -tests/buffer/allocator_detail_tests.cpp -tests/buffer/allocator_error_tests.cpp -tests/buffer/allocator_guard_tests.cpp -tests/buffer/allocator_parity_tests.cpp -tests/buffer/allocator_sm_transition_tests.cpp -tests/buffer/allocator_tests.cpp -tests/buffer/chunk_allocator_action_branch_tests.cpp -tests/buffer/chunk_allocator_error_tests.cpp -tests/buffer/chunk_allocator_parity_tests.cpp -tests/buffer/chunk_allocator_sm_testing_policy_tests.cpp -tests/buffer/chunk_allocator_sm_transition_tests.cpp -tests/buffer/chunk_allocator_tests.cpp -tests/buffer/planner_action_branch_tests.cpp -tests/buffer/planner_actions_branch_targets_tests.cpp -tests/buffer/planner_actions_tests.cpp -tests/buffer/planner_error_tests.cpp -tests/buffer/planner_sm_transition_tests.cpp -tests/buffer/planner_tests.cpp -tests/buffer/realloc_analyzer_action_branch_tests.cpp -tests/buffer/realloc_analyzer_error_tests.cpp -tests/buffer/realloc_analyzer_sm_flow_tests.cpp -tests/buffer/realloc_analyzer_sm_transition_tests.cpp -tests/buffer/realloc_analyzer_tests.cpp -tests/decoder/decoder_action_branch_tests.cpp -tests/decoder/decoder_sm_transition_tests.cpp -tests/decoder/decoder_tests.cpp +tests/fuzz/gbnf_parser_fuzz.cpp +tests/fuzz/gguf_parser_fuzz.cpp +tests/gbnf/lexer_tests.cpp tests/gbnf/parser_tests.cpp +tests/gbnf/sampler_tests.cpp +tests/generator/lifecycle_tests.cpp +tests/gguf/loader/lifecycle_tests.cpp +tests/graph/allocator/allocator_action_branch_tests.cpp +tests/graph/allocator/allocator_tests.cpp +tests/graph/assembler/assembler_action_branch_tests.cpp +tests/graph/assembler/assembler_tests.cpp +tests/graph/graph_tests.cpp tests/graph/processor/processor_action_branch_tests.cpp tests/graph/processor/processor_sm_transition_tests.cpp tests/graph/processor/processor_tests.cpp +tests/graph/wrapper_visibility_tests.cpp +tests/kernel/aarch64_tests.cpp +tests/kernel/lifecycle_tests.cpp +tests/kernel/test_helpers.hpp +tests/kernel/x86_64_tests.cpp tests/logits/sampler/pipeline_tests.cpp -tests/logits/sampler/token_selector_tests.cpp tests/logits/validator/validator_tests.cpp -tests/memory/coordinator_any_lifecycle_tests.cpp tests/memory/hybrid/lifecycle_tests.cpp tests/memory/kv/lifecycle_tests.cpp tests/memory/recurrent/lifecycle_tests.cpp -tests/model/gguf_loader_tests.cpp -tests/model/gguf_parser_tests.cpp -tests/model/gguf_tests.cpp -tests/model/loader_tests.cpp -tests/model/parser_tests.cpp -tests/model/weight_loader_tests.cpp +tests/model/loader/lifecycle_tests.cpp +tests/model/weight_loader/lifecycle_tests.cpp +tests/sm/callback_tests.cpp tests/sm/sm_any_tests.cpp tests/sm/sm_policy_tests.cpp -tests/telemetry/exporter_tests.cpp -tests/telemetry/provider_tests.cpp -tests/tensor/allocator_action_branch_more_tests.cpp -tests/tensor/allocator_action_branch_targets_tests.cpp -tests/tensor/allocator_action_branch_tests.cpp -tests/tensor/allocator_branch_tests.cpp -tests/tensor/allocator_error_tests.cpp -tests/tensor/allocator_sm_testing_policy_tests.cpp -tests/tensor/allocator_sm_transition_more_tests.cpp -tests/tensor/allocator_sm_transition_tests.cpp -tests/tensor/allocator_tests.cpp -tests/tensor/lifetime_analyzer_action_branch_tests.cpp -tests/tensor/lifetime_analyzer_error_tests.cpp -tests/tensor/lifetime_analyzer_sm_error_tests.cpp -tests/tensor/lifetime_analyzer_sm_transition_tests.cpp -tests/tensor/lifetime_analyzer_tests.cpp -tests/text/conditioner/conditioner_tests.cpp -tests/text/encoders/encoder_tests.cpp +tests/tensor/lifecycle_tests.cpp +tests/tensor/view/lifecycle_tests.cpp +tests/text/detokenizer/detokenizer_tests.cpp +tests/text/encoders/bpe_tests.cpp +tests/text/encoders/common_tests.cpp +tests/text/encoders/fallback_tests.cpp +tests/text/encoders/plamo2_tests.cpp +tests/text/encoders/rwkv_tests.cpp +tests/text/encoders/spm_tests.cpp +tests/text/encoders/test_support.hpp +tests/text/encoders/ugm_tests.cpp +tests/text/encoders/wpm_tests.cpp tests/text/jinja/formatter_tests.cpp tests/text/jinja/lexer_tests.cpp tests/text/jinja/parser_tests.cpp @@ -416,7 +501,6 @@ tests/text/tokenizer/preprocessor_rwkv_tests.cpp tests/text/tokenizer/preprocessor_spm_tests.cpp tests/text/tokenizer/preprocessor_tests.cpp tests/text/tokenizer/preprocessor_wpm_tests.cpp -tests/text/tokenizer/tokenizer_action_guard_tests.cpp tests/text/tokenizer/tokenizer_parity_tests.cpp tests/text/tokenizer/tokenizer_tests.cpp tests/text/unicode/unicode_tests.cpp diff --git a/snapshots/quality_gates/timing.txt b/snapshots/quality_gates/timing.txt index 9970adbf..c13f98fb 100644 --- a/snapshots/quality_gates/timing.txt +++ b/snapshots/quality_gates/timing.txt @@ -1,7 +1,8 @@ # quality_gates timing (seconds) build_with_zig 0 -test_with_coverage 124 +test_with_coverage 77 paritychecker 5 -fuzz_smoke 77 -bench_snapshot 80 -total 286 +fuzz_smoke 30 +bench_snapshot 81 +generate_docs 38 +total 231 From c20879a58ef37c8ef4980fa2ba31ba6cab46088c Mon Sep 17 00:00:00 2001 From: gabewillen Date: Mon, 2 Mar 2026 21:45:52 -0600 Subject: [PATCH 2/4] jinja: interpolate template vars during formatter render --- README.md | 35 ++++++++- docs/benchmarks.md | 95 ++++++++++++++++++++++- src/emel/text/jinja/formatter/actions.hpp | 90 ++++++++++++++++++++- tools/docsgen/docsgen.cpp | 4 +- 4 files changed, 217 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index d0a3afe5..f39953c0 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,38 @@ environments, while Zig remains the default for day-to-day builds. ## Docs index -{{ docs_toc }} +- [`docs/benchmarks.md`](docs/benchmarks.md) +- [`docs/architecture/batch_planner_modes_equal.md`](docs/architecture/batch_planner_modes_equal.md) +- [`docs/architecture/batch_planner_modes_sequential.md`](docs/architecture/batch_planner_modes_sequential.md) +- [`docs/architecture/batch_planner_modes_simple.md`](docs/architecture/batch_planner_modes_simple.md) +- [`docs/architecture/gbnf_rule_parser_definition_parser.md`](docs/architecture/gbnf_rule_parser_definition_parser.md) +- [`docs/architecture/gbnf_rule_parser_expression_parser.md`](docs/architecture/gbnf_rule_parser_expression_parser.md) +- [`docs/architecture/gbnf_rule_parser_nonterm_parser.md`](docs/architecture/gbnf_rule_parser_nonterm_parser.md) +- [`docs/architecture/gbnf_rule_parser_term_parser.md`](docs/architecture/gbnf_rule_parser_term_parser.md) +- [`docs/architecture/gbnf_sampler_accept_parser.md`](docs/architecture/gbnf_sampler_accept_parser.md) +- [`docs/architecture/gbnf_sampler_candidate_parser.md`](docs/architecture/gbnf_sampler_candidate_parser.md) +- [`docs/architecture/gbnf_sampler_matcher_parser.md`](docs/architecture/gbnf_sampler_matcher_parser.md) +- [`docs/architecture/gbnf_sampler_token_parser.md`](docs/architecture/gbnf_sampler_token_parser.md) +- [`docs/architecture/graph_allocator_liveness_pass.md`](docs/architecture/graph_allocator_liveness_pass.md) +- [`docs/architecture/graph_allocator_ordering_pass.md`](docs/architecture/graph_allocator_ordering_pass.md) +- [`docs/architecture/graph_allocator_placement_pass.md`](docs/architecture/graph_allocator_placement_pass.md) +- [`docs/architecture/graph_assembler_assemble_alloc_pass.md`](docs/architecture/graph_assembler_assemble_alloc_pass.md) +- [`docs/architecture/graph_assembler_assemble_build_pass.md`](docs/architecture/graph_assembler_assemble_build_pass.md) +- [`docs/architecture/graph_assembler_assemble_validate_pass.md`](docs/architecture/graph_assembler_assemble_validate_pass.md) +- [`docs/architecture/graph_assembler_reserve_alloc_pass.md`](docs/architecture/graph_assembler_reserve_alloc_pass.md) +- [`docs/architecture/graph_assembler_reserve_build_pass.md`](docs/architecture/graph_assembler_reserve_build_pass.md) +- [`docs/architecture/graph_assembler_reserve_validate_pass.md`](docs/architecture/graph_assembler_reserve_validate_pass.md) +- [`docs/architecture/graph_assembler_reuse_decision_pass.md`](docs/architecture/graph_assembler_reuse_decision_pass.md) +- [`docs/architecture/graph_processor_alloc_step.md`](docs/architecture/graph_processor_alloc_step.md) +- [`docs/architecture/graph_processor_bind_step.md`](docs/architecture/graph_processor_bind_step.md) +- [`docs/architecture/graph_processor_extract_step.md`](docs/architecture/graph_processor_extract_step.md) +- [`docs/architecture/graph_processor_kernel_step.md`](docs/architecture/graph_processor_kernel_step.md) +- [`docs/architecture/graph_processor_prepare_step.md`](docs/architecture/graph_processor_prepare_step.md) +- [`docs/architecture/graph_processor_validate_step.md`](docs/architecture/graph_processor_validate_step.md) +- [`docs/architecture/text_jinja_parser_classifier_parser.md`](docs/architecture/text_jinja_parser_classifier_parser.md) +- [`docs/architecture/text_jinja_parser_program_parser_expression_parser.md`](docs/architecture/text_jinja_parser_program_parser_expression_parser.md) +- [`docs/architecture/text_jinja_parser_program_parser.md`](docs/architecture/text_jinja_parser_program_parser.md) +- [`docs/architecture/text_jinja_parser_program_parser_statement_parser.md`](docs/architecture/text_jinja_parser_program_parser_statement_parser.md) ## Regenerating docs @@ -106,4 +137,4 @@ environments, while Zig remains the default for day-to-day builds. scripts/generate_docs.sh ``` -Use `scripts/generate_docs.sh --check` in CI to validate generated artifacts. +Use `scripts/generate_docs.sh --check` in CI to validate generated artifacts. \ No newline at end of file diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 0f3c749c..a6890656 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -6,4 +6,97 @@ Note: While EMEL is modular and easy to bench in isolation, llama.cpp code is ve entangled. These microbenches aim for apples-to-apples comparisons but likely are not. True benchmarks will be end-to-end once the system is complete. -{{ benchmarks_table }} +| Benchmark | emel.cpp ns/op | llama.cpp ns/op | ratio | +| --- | ---: | ---: | ---: | +| `batch/planner_equal` | 1846.750 | 8689.946 | 0.213x | +| `batch/planner_seq` | 1781.388 | 3996.500 | 0.446x | +| `batch/planner_simple` | 1348.817 | 3498.363 | 0.386x | +| `gbnf/rule_parser_basic` | 247.521 | 471.233 | 0.525x | +| `gbnf/rule_parser_complex` | 1933.033 | 2515.221 | 0.769x | +| `kernel/aarch64/op_add` | 88.783 | 5061.321 | 0.018x | +| `kernel/aarch64/op_cos` | 1668.921 | 6025.850 | 0.277x | +| `kernel/aarch64/op_div` | 88.600 | 4142.504 | 0.021x | +| `kernel/aarch64/op_dup` | 85.975 | 4095.954 | 0.021x | +| `kernel/aarch64/op_log` | 1843.883 | 6106.117 | 0.302x | +| `kernel/aarch64/op_mul` | 91.025 | 5091.896 | 0.018x | +| `kernel/aarch64/op_mul_mat` | 4540.008 | 10639.004 | 0.427x | +| `kernel/aarch64/op_sin` | 1447.079 | 5599.971 | 0.258x | +| `kernel/aarch64/op_soft_max` | 2066.808 | 4972.771 | 0.416x | +| `kernel/aarch64/op_sqr` | 86.779 | 4090.646 | 0.021x | +| `kernel/aarch64/op_sqrt` | 137.033 | 4436.392 | 0.031x | +| `kernel/aarch64/op_sub` | 91.279 | 5088.383 | 0.018x | +| `kernel/aarch64/op_unary_exp` | 1297.300 | 5642.096 | 0.230x | +| `kernel/aarch64/op_unary_neg` | 89.208 | 4536.625 | 0.020x | +| `kernel/aarch64/op_unary_relu` | 85.879 | 4413.375 | 0.019x | +| `kernel/x86_64/op_add` | 60.092 | 5068.100 | 0.012x | +| `kernel/x86_64/op_cos` | 1969.629 | 5873.692 | 0.335x | +| `kernel/x86_64/op_div` | 74.679 | 4153.717 | 0.018x | +| `kernel/x86_64/op_dup` | 47.033 | 4013.613 | 0.012x | +| `kernel/x86_64/op_log` | 1820.858 | 6532.413 | 0.279x | +| `kernel/x86_64/op_mul` | 60.196 | 5235.196 | 0.011x | +| `kernel/x86_64/op_mul_mat` | 44244.079 | 10511.242 | 4.209x | +| `kernel/x86_64/op_sin` | 1296.000 | 5583.742 | 0.232x | +| `kernel/x86_64/op_soft_max` | 2062.137 | 5244.917 | 0.393x | +| `kernel/x86_64/op_sqr` | 49.138 | 4063.596 | 0.012x | +| `kernel/x86_64/op_sqrt` | 143.012 | 4265.863 | 0.034x | +| `kernel/x86_64/op_sub` | 60.096 | 5310.508 | 0.011x | +| `kernel/x86_64/op_unary_exp` | 1284.658 | 5399.771 | 0.238x | +| `kernel/x86_64/op_unary_neg` | 51.946 | 4309.450 | 0.012x | +| `kernel/x86_64/op_unary_relu` | 52.304 | 4238.471 | 0.012x | +| `logits/sampler_raw/vocab_128000` | 19259.958 | 18468.492 | 1.043x | +| `logits/sampler_raw/vocab_256000` | 38539.842 | 36725.137 | 1.049x | +| `logits/sampler_raw/vocab_32000` | 5214.146 | 4826.229 | 1.080x | +| `logits/sampler_sml/vocab_128000` | 15429.442 | 14757.788 | 1.046x | +| `logits/sampler_sml/vocab_256000` | 34200.133 | 30380.342 | 1.126x | +| `logits/sampler_sml/vocab_32000` | 4436.292 | 4330.962 | 1.024x | +| `logits/validator_raw/vocab_128000` | 90205.633 | 90458.808 | 0.997x | +| `logits/validator_raw/vocab_256000` | 181372.546 | 179498.462 | 1.010x | +| `logits/validator_raw/vocab_32000` | 23735.550 | 23904.125 | 0.993x | +| `logits/validator_sml/vocab_128000` | 99648.387 | 99266.212 | 1.004x | +| `logits/validator_sml/vocab_256000` | 197266.092 | 199430.296 | 0.989x | +| `logits/validator_sml/vocab_32000` | 24528.092 | 24126.225 | 1.017x | +| `memory/hybrid_full` | 408.700 | 36677.713 | 0.011x | +| `memory/kv_full` | 103.067 | 36946.496 | 0.003x | +| `memory/recurrent_full` | 113.079 | 5595.042 | 0.020x | +| `text/encoders/bpe_long` | 10221.996 | 10221.204 | 1.000x | +| `text/encoders/bpe_short` | 159.125 | 153.158 | 1.039x | +| `text/encoders/fallback_long` | 2470.238 | 2485.546 | 0.994x | +| `text/encoders/fallback_short` | 50.267 | 47.825 | 1.051x | +| `text/encoders/plamo2_long` | 4848.942 | 4878.158 | 0.994x | +| `text/encoders/plamo2_short` | 107.117 | 104.096 | 1.029x | +| `text/encoders/rwkv_long` | 4557.729 | 4543.887 | 1.003x | +| `text/encoders/rwkv_short` | 2697.533 | 2658.883 | 1.015x | +| `text/encoders/spm_long` | 12589.987 | 12349.475 | 1.019x | +| `text/encoders/spm_short` | 213.188 | 205.325 | 1.038x | +| `text/encoders/ugm_long` | 8308.617 | 8295.337 | 1.002x | +| `text/encoders/ugm_short` | 137.250 | 137.008 | 1.002x | +| `text/encoders/wpm_long` | 26858.621 | 26355.825 | 1.019x | +| `text/encoders/wpm_short` | 531.438 | 540.237 | 0.984x | +| `text/jinja/formatter_long` | 87073.829 | 400326.883 | 0.218x | +| `text/jinja/formatter_short` | 1144.017 | 6368.133 | 0.180x | +| `text/jinja/parser_long` | 35902.459 | 42470.375 | 0.845x | +| `text/jinja/parser_short` | 1100.708 | 532.792 | 2.066x | +| `tokenizer/full_bpe_long` | 9967.413 | 9607.096 | 1.038x | +| `tokenizer/full_bpe_short` | 220.113 | 218.846 | 1.006x | +| `tokenizer/full_plamo2_long` | 9890.796 | 9985.525 | 0.991x | +| `tokenizer/full_plamo2_short` | 1799.446 | 1769.058 | 1.017x | +| `tokenizer/full_rwkv_long` | 3566.475 | 3551.117 | 1.004x | +| `tokenizer/full_rwkv_short` | 2373.500 | 2159.892 | 1.099x | +| `tokenizer/full_spm_long` | 13766.279 | 13689.263 | 1.006x | +| `tokenizer/full_spm_short` | 296.825 | 285.354 | 1.040x | +| `tokenizer/full_ugm_long` | 10042.667 | 9989.429 | 1.005x | +| `tokenizer/full_ugm_short` | 1817.804 | 1818.546 | 1.000x | +| `tokenizer/full_wpm_long` | 28866.112 | 34007.938 | 0.849x | +| `tokenizer/full_wpm_short` | 2204.133 | 2210.221 | 0.997x | +| `tokenizer/preprocessor_bpe_long` | 2775.246 | 5265.688 | 0.527x | +| `tokenizer/preprocessor_bpe_short` | 82.854 | 1747.217 | 0.047x | +| `tokenizer/preprocessor_plamo2_long` | 3052.371 | 4619.908 | 0.661x | +| `tokenizer/preprocessor_plamo2_short` | 2367.925 | 3575.713 | 0.662x | +| `tokenizer/preprocessor_rwkv_long` | 3077.379 | 4554.646 | 0.676x | +| `tokenizer/preprocessor_rwkv_short` | 2356.238 | 3536.963 | 0.666x | +| `tokenizer/preprocessor_spm_long` | 3092.796 | 4569.296 | 0.677x | +| `tokenizer/preprocessor_spm_short` | 2361.154 | 3586.446 | 0.658x | +| `tokenizer/preprocessor_ugm_long` | 3139.088 | 4625.679 | 0.679x | +| `tokenizer/preprocessor_ugm_short` | 2375.508 | 3560.692 | 0.667x | +| `tokenizer/preprocessor_wpm_long` | 3043.238 | 4503.621 | 0.676x | +| `tokenizer/preprocessor_wpm_short` | 2599.613 | 3530.233 | 0.736x | diff --git a/src/emel/text/jinja/formatter/actions.hpp b/src/emel/text/jinja/formatter/actions.hpp index c939e40d..d0ca7c27 100644 --- a/src/emel/text/jinja/formatter/actions.hpp +++ b/src/emel/text/jinja/formatter/actions.hpp @@ -1,6 +1,8 @@ #pragma once #include +#include +#include #include "emel/text/jinja/formatter/detail.hpp" #include "emel/text/jinja/formatter/events.hpp" @@ -10,6 +12,83 @@ namespace emel::text::jinja::formatter::action { namespace runtime_detail { +inline const emel::text::jinja::value * +lookup_global(const emel::text::jinja::object_value * const globals, + const std::string_view key) noexcept { + if (!globals || globals->count == 0) { + return nullptr; + } + + for (size_t i = 0; i < globals->count; ++i) { + const auto & entry = globals->entries[i]; + if (entry.key.type == emel::text::jinja::value_type::string && + entry.key.string_v.view == key) { + return &entry.val; + } + } + return nullptr; +} + +inline void append_value(std::string & output, + const emel::text::jinja::value & value) { + switch (value.type) { + case emel::text::jinja::value_type::string: + output += value.string_v.view; + break; + case emel::text::jinja::value_type::integer: + output += std::to_string(value.int_v); + break; + case emel::text::jinja::value_type::floating: + output += std::to_string(value.float_v); + break; + case emel::text::jinja::value_type::boolean: + output += value.bool_v ? "true" : "false"; + break; + default: + break; + } +} + +inline void render_node(std::string & output, + const emel::text::jinja::ast_node * const node, + const emel::text::jinja::object_value * const globals) { + if (!node) { + return; + } + + if (const auto * text = dynamic_cast(node); + text != nullptr) { + output += text->value; + return; + } + if (const auto * comment = dynamic_cast(node); + comment != nullptr) { + (void)comment; + return; + } + if (const auto * id = dynamic_cast(node); + id != nullptr) { + const auto * value = lookup_global(globals, id->name); + if (value != nullptr) { + append_value(output, *value); + } + return; + } + if (dynamic_cast(node) != nullptr) { + return; + } +} + +inline std::string render_program(const emel::text::jinja::program & program, + const emel::text::jinja::object_value * const globals) { + std::string output; + output.reserve(program.body.size() * 16); + for (const auto & node : program.body) { + render_node(output, node.get(), globals); + } + return output; +} + template constexpr decltype(auto) unwrap_runtime_event(const runtime_event_type & ev) noexcept { if constexpr (requires { ev.event_; }) { @@ -50,8 +129,15 @@ struct copy_source_text { void operator()(const runtime_event_type & ev) const noexcept { const auto & runtime_ev = runtime_detail::unwrap_runtime_event(ev); const auto & request = runtime_ev.request; - std::memcpy(&request.output, request.source_text.data(), request.source_text.size()); - detail::mark_done(runtime_ev.ctx, request.source_text.size(), false); + const auto rendered = runtime_detail::render_program(request.program, request.globals); + if (rendered.size() > request.output_capacity) { + detail::mark_error(runtime_ev.ctx, error::invalid_request, true, 0); + return; + } + if (!rendered.empty()) { + std::memcpy(&request.output, rendered.data(), rendered.size()); + } + detail::mark_done(runtime_ev.ctx, rendered.size(), false); } }; diff --git a/tools/docsgen/docsgen.cpp b/tools/docsgen/docsgen.cpp index f527f7d1..70b8d955 100644 --- a/tools/docsgen/docsgen.cpp +++ b/tools/docsgen/docsgen.cpp @@ -7,11 +7,11 @@ #include #include #include -#include #include -#include +#include #include #include +#include #include #include #include From 77fcc365540bbea443de3d456500dcedb1f7307d Mon Sep 17 00:00:00 2001 From: gabewillen Date: Mon, 2 Mar 2026 21:49:48 -0600 Subject: [PATCH 3/4] docsgen: render template vars from parsed jinja AST --- src/emel/text/jinja/formatter/actions.hpp | 90 +---------------- tools/docsgen/docsgen.cpp | 112 ++++++++++------------ 2 files changed, 51 insertions(+), 151 deletions(-) diff --git a/src/emel/text/jinja/formatter/actions.hpp b/src/emel/text/jinja/formatter/actions.hpp index d0ca7c27..c939e40d 100644 --- a/src/emel/text/jinja/formatter/actions.hpp +++ b/src/emel/text/jinja/formatter/actions.hpp @@ -1,8 +1,6 @@ #pragma once #include -#include -#include #include "emel/text/jinja/formatter/detail.hpp" #include "emel/text/jinja/formatter/events.hpp" @@ -12,83 +10,6 @@ namespace emel::text::jinja::formatter::action { namespace runtime_detail { -inline const emel::text::jinja::value * -lookup_global(const emel::text::jinja::object_value * const globals, - const std::string_view key) noexcept { - if (!globals || globals->count == 0) { - return nullptr; - } - - for (size_t i = 0; i < globals->count; ++i) { - const auto & entry = globals->entries[i]; - if (entry.key.type == emel::text::jinja::value_type::string && - entry.key.string_v.view == key) { - return &entry.val; - } - } - return nullptr; -} - -inline void append_value(std::string & output, - const emel::text::jinja::value & value) { - switch (value.type) { - case emel::text::jinja::value_type::string: - output += value.string_v.view; - break; - case emel::text::jinja::value_type::integer: - output += std::to_string(value.int_v); - break; - case emel::text::jinja::value_type::floating: - output += std::to_string(value.float_v); - break; - case emel::text::jinja::value_type::boolean: - output += value.bool_v ? "true" : "false"; - break; - default: - break; - } -} - -inline void render_node(std::string & output, - const emel::text::jinja::ast_node * const node, - const emel::text::jinja::object_value * const globals) { - if (!node) { - return; - } - - if (const auto * text = dynamic_cast(node); - text != nullptr) { - output += text->value; - return; - } - if (const auto * comment = dynamic_cast(node); - comment != nullptr) { - (void)comment; - return; - } - if (const auto * id = dynamic_cast(node); - id != nullptr) { - const auto * value = lookup_global(globals, id->name); - if (value != nullptr) { - append_value(output, *value); - } - return; - } - if (dynamic_cast(node) != nullptr) { - return; - } -} - -inline std::string render_program(const emel::text::jinja::program & program, - const emel::text::jinja::object_value * const globals) { - std::string output; - output.reserve(program.body.size() * 16); - for (const auto & node : program.body) { - render_node(output, node.get(), globals); - } - return output; -} - template constexpr decltype(auto) unwrap_runtime_event(const runtime_event_type & ev) noexcept { if constexpr (requires { ev.event_; }) { @@ -129,15 +50,8 @@ struct copy_source_text { void operator()(const runtime_event_type & ev) const noexcept { const auto & runtime_ev = runtime_detail::unwrap_runtime_event(ev); const auto & request = runtime_ev.request; - const auto rendered = runtime_detail::render_program(request.program, request.globals); - if (rendered.size() > request.output_capacity) { - detail::mark_error(runtime_ev.ctx, error::invalid_request, true, 0); - return; - } - if (!rendered.empty()) { - std::memcpy(&request.output, rendered.data(), rendered.size()); - } - detail::mark_done(runtime_ev.ctx, rendered.size(), false); + std::memcpy(&request.output, request.source_text.data(), request.source_text.size()); + detail::mark_done(runtime_ev.ctx, request.source_text.size(), false); } }; diff --git a/tools/docsgen/docsgen.cpp b/tools/docsgen/docsgen.cpp index 70b8d955..e948c328 100644 --- a/tools/docsgen/docsgen.cpp +++ b/tools/docsgen/docsgen.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -19,9 +20,8 @@ #include "emel/emel.h" #include "emel/docs/detail.hpp" -#include "emel/text/jinja/parser/sm.hpp" -#include "emel/text/jinja/formatter/sm.hpp" #include "emel/text/jinja/parser/detail.hpp" +#include "emel/text/jinja/parser/sm.hpp" namespace fs = std::filesystem; @@ -287,20 +287,50 @@ struct template_var { std::string value; }; -bool formatter_render_done_sink(const emel::text::jinja::events::rendering_done &) { +bool parser_parse_done_sink(const emel::text::jinja::events::parsing_done &) { return true; } -bool formatter_render_error_sink(const emel::text::jinja::events::rendering_error &) { +bool parser_parse_error_sink(const emel::text::jinja::events::parsing_error &) { return true; } -bool parser_parse_done_sink(const emel::text::jinja::events::parsing_done &) { - return true; +const std::string * find_template_var(const std::unordered_map & vars, + const std::string_view key) { + const auto it = vars.find(std::string(key)); + if (it == vars.end()) { + return nullptr; + } + return &it->second; } -bool parser_parse_error_sink(const emel::text::jinja::events::parsing_error &) { - return true; +bool append_rendered_node(std::string & out, + const emel::text::jinja::ast_node * const node, + const std::unordered_map & vars) { + if (const auto * text = dynamic_cast(node); + text != nullptr) { + out += text->value; + return true; + } + + if (const auto * id = dynamic_cast(node); + id != nullptr) { + const auto * value = find_template_var(vars, id->name); + if (value == nullptr) { + std::fprintf(stderr, "error: template variable not provided: %s\n", id->name.c_str()); + return false; + } + out += *value; + return true; + } + + if (dynamic_cast(node) != nullptr || + dynamic_cast(node) != nullptr) { + return true; + } + + std::fprintf(stderr, "error: unsupported template AST node\n"); + return false; } std::optional render_template(const fs::path & template_path, @@ -333,70 +363,26 @@ std::optional render_template(const fs::path & template_path, return std::nullopt; } - std::vector entries; - entries.reserve(vars.size()); + std::unordered_map template_vars; + template_vars.reserve(vars.size()); for (const auto & var : vars) { - emel::text::jinja::value key; - key.type = emel::text::jinja::value_type::string; - key.string_v.view = var.key; - - emel::text::jinja::value val; - val.type = emel::text::jinja::value_type::string; - val.string_v.view = var.value; - - emel::text::jinja::object_entry entry; - entry.key = key; - entry.val = val; - entries.push_back(entry); - } - - emel::text::jinja::object_value globals{}; - if (!entries.empty()) { - globals.entries = entries.data(); - globals.count = entries.size(); - globals.capacity = entries.size(); - globals.has_builtins = false; + template_vars.insert_or_assign(var.key, var.value); } - std::size_t estimate = template_text.size() + 8192; + std::size_t estimate = template_text.size(); for (const auto & var : vars) { estimate += var.value.size(); } - std::vector output_buffer; - output_buffer.resize(estimate); - - size_t out_len = 0; - size_t error_pos = 0; - int32_t render_err = static_cast(emel::text::jinja::formatter::error::none); - emel::text::jinja::formatter::action::context render_ctx; - emel::text::jinja::formatter::sm renderer{render_ctx}; - const emel::text::jinja::event::render::done_callback render_done_cb = - emel::text::jinja::event::render::done_callback::from<&formatter_render_done_sink>(); - const emel::text::jinja::event::render::error_callback render_error_cb = - emel::text::jinja::event::render::error_callback::from<&formatter_render_error_sink>(); - emel::text::jinja::event::render render_ev{ - program, - template_text, - output_buffer[0], - output_buffer.size(), - render_done_cb, - render_error_cb, - entries.empty() ? nullptr : &globals, - &out_len, - nullptr, - &render_err, - &error_pos, - }; - - renderer.process_event(render_ev); - if (render_err != static_cast(emel::text::jinja::formatter::error::none) || - !renderer.is(boost::sml::state)) { - std::fprintf(stderr, "error: jinja render failed\n"); - return std::nullopt; + std::string rendered; + rendered.reserve(estimate); + for (const auto & node : program.body) { + if (!append_rendered_node(rendered, node.get(), template_vars)) { + return std::nullopt; + } } - return std::string(output_buffer.data(), out_len); + return rendered; } std::optional build_benchmarks_table(const doc_paths & paths) { From 4927b038edaf5be8a526a54ac79cf23e5c236ebc Mon Sep 17 00:00:00 2001 From: gabewillen Date: Mon, 2 Mar 2026 22:14:17 -0600 Subject: [PATCH 4/4] Add jinja parity mode and fuzz targets --- CMakeLists.txt | 10 + scripts/fuzz_smoke.sh | 3 + tests/fuzz/jinja_formatter_fuzz.cpp | 109 +++++++++ tests/fuzz/jinja_parser_fuzz.cpp | 52 +++++ .../invalid_unclosed_expression.j2 | 1 + tests/text/jinja/parity_texts/literal_text.j2 | 1 + tools/paritychecker/CMakeLists.txt | 14 ++ tools/paritychecker/parity_main.cpp | 14 +- tools/paritychecker/parity_runner.cpp | 211 +++++++++++++++++- tools/paritychecker/parity_runner.hpp | 1 + tools/paritychecker/paritychecker_tests.cpp | 46 ++++ 11 files changed, 459 insertions(+), 3 deletions(-) create mode 100644 tests/fuzz/jinja_formatter_fuzz.cpp create mode 100644 tests/fuzz/jinja_parser_fuzz.cpp create mode 100644 tests/text/jinja/parity_texts/invalid_unclosed_expression.j2 create mode 100644 tests/text/jinja/parity_texts/literal_text.j2 diff --git a/CMakeLists.txt b/CMakeLists.txt index d73387f5..8559b128 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -229,4 +229,14 @@ if(EMEL_ENABLE_FUZZ) tests/fuzz/gbnf_parser_fuzz.cpp ) emel_configure_fuzzer(emel_fuzz_gbnf_parser) + + add_executable(emel_fuzz_jinja_parser + tests/fuzz/jinja_parser_fuzz.cpp + ) + emel_configure_fuzzer(emel_fuzz_jinja_parser) + + add_executable(emel_fuzz_jinja_formatter + tests/fuzz/jinja_formatter_fuzz.cpp + ) + emel_configure_fuzzer(emel_fuzz_jinja_formatter) endif() diff --git a/scripts/fuzz_smoke.sh b/scripts/fuzz_smoke.sh index 9f06fcad..c15a30ba 100755 --- a/scripts/fuzz_smoke.sh +++ b/scripts/fuzz_smoke.sh @@ -66,6 +66,7 @@ cmake --build "$BUILD_DIR" --parallel run_fuzzer() { local name="$1" local corpus="$2" + mkdir -p "$corpus" "$BUILD_DIR/$name" -seed=1 -max_total_time=10 -max_len=4096 "$corpus" } @@ -73,3 +74,5 @@ if [[ -x "$BUILD_DIR/emel_fuzz_gguf_parser" ]]; then run_fuzzer emel_fuzz_gguf_parser "$ROOT_DIR/tests/fuzz/corpus/gguf_parser" fi run_fuzzer emel_fuzz_gbnf_parser "$ROOT_DIR/tests/fuzz/corpus/gbnf_parser" +run_fuzzer emel_fuzz_jinja_parser "$ROOT_DIR/tests/fuzz/corpus/jinja_parser" +run_fuzzer emel_fuzz_jinja_formatter "$ROOT_DIR/tests/fuzz/corpus/jinja_formatter" diff --git a/tests/fuzz/jinja_formatter_fuzz.cpp b/tests/fuzz/jinja_formatter_fuzz.cpp new file mode 100644 index 00000000..30702711 --- /dev/null +++ b/tests/fuzz/jinja_formatter_fuzz.cpp @@ -0,0 +1,109 @@ +#include +#include +#include +#include + +#include "emel/text/jinja/formatter/errors.hpp" +#include "emel/text/jinja/formatter/events.hpp" +#include "emel/text/jinja/formatter/sm.hpp" +#include "emel/text/jinja/parser/errors.hpp" +#include "emel/text/jinja/parser/events.hpp" +#include "emel/text/jinja/parser/sm.hpp" + +namespace { + +struct parser_dispatch_state { + bool done_called = false; + bool error_called = false; +}; + +struct formatter_dispatch_state { + bool done_called = false; + bool error_called = false; +}; + +bool on_parse_done(void * owner, const emel::text::jinja::events::parsing_done &) noexcept { + auto * state = static_cast(owner); + state->done_called = true; + return true; +} + +bool on_parse_error(void * owner, const emel::text::jinja::events::parsing_error &) noexcept { + auto * state = static_cast(owner); + state->error_called = true; + return true; +} + +bool on_render_done(void * owner, + const emel::text::jinja::events::rendering_done &) noexcept { + auto * state = static_cast(owner); + state->done_called = true; + return true; +} + +bool on_render_error(void * owner, + const emel::text::jinja::events::rendering_error &) noexcept { + auto * state = static_cast(owner); + state->error_called = true; + return true; +} + +} // namespace + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) { + emel::text::jinja::program program{}; + parser_dispatch_state parse_state{}; + formatter_dispatch_state render_state{}; + + emel::text::jinja::parser::action::context parse_ctx{}; + emel::text::jinja::parser::sm parser{parse_ctx}; + emel::text::jinja::formatter::action::context formatter_ctx{}; + emel::text::jinja::formatter::sm formatter{formatter_ctx}; + + int32_t parse_err = static_cast(emel::text::jinja::parser::error::none); + size_t parse_error_pos = 0; + const std::string_view input(reinterpret_cast(data), size); + const emel::text::jinja::event::parse::done_callback parse_done_cb{ + &parse_state, + on_parse_done}; + const emel::text::jinja::event::parse::error_callback parse_error_cb{ + &parse_state, + on_parse_error}; + const emel::text::jinja::event::parse parse_ev{ + input, + program, + parse_done_cb, + parse_error_cb, + parse_err, + parse_error_pos, + }; + (void)parser.process_event(parse_ev); + + std::array output_buffer = {}; + size_t output_len = 0; + bool output_truncated = false; + int32_t render_err = static_cast(emel::text::jinja::formatter::error::none); + size_t render_error_pos = 0; + const emel::text::jinja::event::render::done_callback render_done_cb{ + &render_state, + on_render_done}; + const emel::text::jinja::event::render::error_callback render_error_cb{ + &render_state, + on_render_error}; + const emel::text::jinja::event::render render_ev{ + program, + input, + output_buffer[0], + output_buffer.size(), + render_done_cb, + render_error_cb, + nullptr, + &output_len, + &output_truncated, + &render_err, + &render_error_pos, + }; + (void)formatter.process_event(render_ev); + + return 0; +} diff --git a/tests/fuzz/jinja_parser_fuzz.cpp b/tests/fuzz/jinja_parser_fuzz.cpp new file mode 100644 index 00000000..4e2f8e83 --- /dev/null +++ b/tests/fuzz/jinja_parser_fuzz.cpp @@ -0,0 +1,52 @@ +#include +#include +#include + +#include "emel/text/jinja/parser/errors.hpp" +#include "emel/text/jinja/parser/events.hpp" +#include "emel/text/jinja/parser/sm.hpp" + +namespace { + +struct dispatch_state { + bool done_called = false; + bool error_called = false; +}; + +bool on_done(void * owner, const emel::text::jinja::events::parsing_done &) noexcept { + auto * state = static_cast(owner); + state->done_called = true; + return true; +} + +bool on_error(void * owner, const emel::text::jinja::events::parsing_error &) noexcept { + auto * state = static_cast(owner); + state->error_called = true; + return true; +} + +} // namespace + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) { + emel::text::jinja::program program{}; + emel::text::jinja::parser::action::context parse_ctx{}; + emel::text::jinja::parser::sm parser{parse_ctx}; + dispatch_state state{}; + int32_t parse_err = static_cast(emel::text::jinja::parser::error::none); + size_t parse_error_pos = 0; + + const emel::text::jinja::event::parse::done_callback done_cb{&state, on_done}; + const emel::text::jinja::event::parse::error_callback error_cb{&state, on_error}; + const std::string_view input(reinterpret_cast(data), size); + const emel::text::jinja::event::parse parse_ev{ + input, + program, + done_cb, + error_cb, + parse_err, + parse_error_pos, + }; + + (void)parser.process_event(parse_ev); + return 0; +} diff --git a/tests/text/jinja/parity_texts/invalid_unclosed_expression.j2 b/tests/text/jinja/parity_texts/invalid_unclosed_expression.j2 new file mode 100644 index 00000000..9ca189af --- /dev/null +++ b/tests/text/jinja/parity_texts/invalid_unclosed_expression.j2 @@ -0,0 +1 @@ +hello {{ name diff --git a/tests/text/jinja/parity_texts/literal_text.j2 b/tests/text/jinja/parity_texts/literal_text.j2 new file mode 100644 index 00000000..fd4c9e63 --- /dev/null +++ b/tests/text/jinja/parity_texts/literal_text.j2 @@ -0,0 +1 @@ +hello from paritychecker diff --git a/tools/paritychecker/CMakeLists.txt b/tools/paritychecker/CMakeLists.txt index ef554076..23357eda 100644 --- a/tools/paritychecker/CMakeLists.txt +++ b/tools/paritychecker/CMakeLists.txt @@ -116,6 +116,12 @@ add_executable(paritychecker ${CMAKE_CURRENT_SOURCE_DIR}/tokenizer_rwkv_parity.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tokenizer_plamo2_parity.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tokenizer_fallback_parity.cpp + ${reference_impl_SOURCE_DIR}/common/jinja/lexer.cpp + ${reference_impl_SOURCE_DIR}/common/jinja/parser.cpp + ${reference_impl_SOURCE_DIR}/common/jinja/runtime.cpp + ${reference_impl_SOURCE_DIR}/common/jinja/value.cpp + ${reference_impl_SOURCE_DIR}/common/jinja/string.cpp + ${reference_impl_SOURCE_DIR}/common/jinja/caps.cpp ) target_link_libraries(paritychecker @@ -131,6 +137,7 @@ target_include_directories(paritychecker ${EMEL_ROOT}/src ${EMEL_ROOT}/include ${reference_impl_SOURCE_DIR}/src + ${reference_impl_SOURCE_DIR}/common ${reference_impl_SOURCE_DIR}/ggml/include ${reference_impl_SOURCE_DIR}/include ) @@ -151,6 +158,12 @@ add_executable(paritychecker_tests ${CMAKE_CURRENT_SOURCE_DIR}/tokenizer_rwkv_parity.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tokenizer_plamo2_parity.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tokenizer_fallback_parity.cpp + ${reference_impl_SOURCE_DIR}/common/jinja/lexer.cpp + ${reference_impl_SOURCE_DIR}/common/jinja/parser.cpp + ${reference_impl_SOURCE_DIR}/common/jinja/runtime.cpp + ${reference_impl_SOURCE_DIR}/common/jinja/value.cpp + ${reference_impl_SOURCE_DIR}/common/jinja/string.cpp + ${reference_impl_SOURCE_DIR}/common/jinja/caps.cpp ) target_link_libraries(paritychecker_tests @@ -166,6 +179,7 @@ target_include_directories(paritychecker_tests ${EMEL_ROOT}/src ${EMEL_ROOT}/include ${reference_impl_SOURCE_DIR}/src + ${reference_impl_SOURCE_DIR}/common ${reference_impl_SOURCE_DIR}/ggml/include ${reference_impl_SOURCE_DIR}/include ${DOCTEST_INCLUDE_DIR} diff --git a/tools/paritychecker/parity_main.cpp b/tools/paritychecker/parity_main.cpp index 439681e6..670b88df 100644 --- a/tools/paritychecker/parity_main.cpp +++ b/tools/paritychecker/parity_main.cpp @@ -10,11 +10,13 @@ using emel::paritychecker::parity_options; void print_usage(const char * exe) { std::fprintf(stderr, - "usage: %s [--gbnf | --kernel] [--model ] (--text | --text-file ) " + "usage: %s [--gbnf | --kernel | --jinja] [--model ] " + "(--text | --text-file ) " "[--add-special] [--parse-special] [--dump]\n" " default mode compares tokenizer parity and requires --model\n" " --gbnf mode compares GBNF parser parity and ignores --model\n" - " --kernel mode compares kernel parity and ignores --model\n", + " --kernel mode compares kernel parity and ignores --model\n" + " --jinja mode compares jinja parser/formatter parity and ignores --model\n", exe); } @@ -49,6 +51,10 @@ bool parse_args(int argc, char ** argv, parity_options & out) { out.mode = emel::paritychecker::parity_mode::kernel; continue; } + if (arg == "--jinja") { + out.mode = emel::paritychecker::parity_mode::jinja; + continue; + } if (arg == "--model") { if (i + 1 >= argc) { return false; @@ -102,6 +108,10 @@ bool parse_args(int argc, char ** argv, parity_options & out) { (out.add_special || out.parse_special)) { return false; } + if (out.mode == emel::paritychecker::parity_mode::jinja && + (out.add_special || out.parse_special)) { + return false; + } return true; } diff --git a/tools/paritychecker/parity_runner.cpp b/tools/paritychecker/parity_runner.cpp index 542f9e01..fb933c23 100644 --- a/tools/paritychecker/parity_runner.cpp +++ b/tools/paritychecker/parity_runner.cpp @@ -3,8 +3,8 @@ #include #include -#include #include +#include #include #include #include @@ -20,9 +20,16 @@ #include "emel/kernel/x86_64/context.hpp" #include "emel/kernel/x86_64/detail.hpp" #include "emel/model/data.hpp" +#include "emel/text/jinja/formatter/sm.hpp" +#include "emel/text/jinja/parser/detail.hpp" +#include "emel/text/jinja/parser/errors.hpp" +#include "emel/text/jinja/parser/sm.hpp" #include "ggml-cpu.h" #include "ggml.h" +#include "jinja/lexer.h" +#include "jinja/parser.h" +#include "jinja/runtime.h" #include "llama-grammar.h" #include "llama-vocab.h" @@ -101,6 +108,151 @@ bool run_llama_gbnf_parse(std::string_view grammar_text, llama_grammar_rules & r return true; } +struct jinja_parse_capture { + bool done_called = false; + bool error_called = false; +}; + +struct jinja_render_capture { + bool done_called = false; + bool error_called = false; +}; + +bool on_jinja_parse_done(void * owner, + const emel::text::jinja::events::parsing_done &) { + auto * capture = static_cast(owner); + capture->done_called = true; + return true; +} + +bool on_jinja_parse_error(void * owner, + const emel::text::jinja::events::parsing_error &) { + auto * capture = static_cast(owner); + capture->error_called = true; + return true; +} + +bool on_jinja_render_done(void * owner, + const emel::text::jinja::events::rendering_done &) { + auto * capture = static_cast(owner); + capture->done_called = true; + return true; +} + +bool on_jinja_render_error(void * owner, + const emel::text::jinja::events::rendering_error &) { + auto * capture = static_cast(owner); + capture->error_called = true; + return true; +} + +bool run_emel_jinja_parse(std::string_view template_text, + emel::text::jinja::program & program_out, + int32_t & parse_err_out, + size_t & parse_error_pos_out) { + jinja_parse_capture capture{}; + emel::text::jinja::parser::action::context parse_ctx{}; + emel::text::jinja::parser::sm parser{parse_ctx}; + const emel::text::jinja::event::parse::done_callback done_cb{ + &capture, + on_jinja_parse_done}; + const emel::text::jinja::event::parse::error_callback error_cb{ + &capture, + on_jinja_parse_error}; + const emel::text::jinja::event::parse parse_ev{ + template_text, + program_out, + done_cb, + error_cb, + parse_err_out, + parse_error_pos_out, + }; + const bool accepted = parser.process_event(parse_ev); + return accepted && capture.done_called && !capture.error_called && + parse_err_out == static_cast(emel::text::jinja::parser::error::none); +} + +bool run_reference_jinja_parse(std::string_view template_text, + ::jinja::program & program_out) { + try { + ::jinja::lexer lex; + ::jinja::lexer_result lex_res = lex.tokenize(std::string(template_text)); + program_out = ::jinja::parse_from_tokens(lex_res); + return true; + } catch (...) { + return false; + } +} + +bool run_emel_jinja_render(const emel::text::jinja::program & program, + std::string_view template_text, + std::string & rendered_out) { + jinja_render_capture capture{}; + emel::text::jinja::formatter::action::context formatter_ctx{}; + emel::text::jinja::formatter::sm formatter{formatter_ctx}; + std::vector output_buffer( + std::max(1, static_cast(template_text.size() + 1))); + size_t output_len = 0; + int32_t render_err = static_cast(emel::text::jinja::formatter::error::none); + size_t render_error_pos = 0; + const emel::text::jinja::event::render::done_callback done_cb{ + &capture, + on_jinja_render_done}; + const emel::text::jinja::event::render::error_callback error_cb{ + &capture, + on_jinja_render_error}; + const emel::text::jinja::event::render render_ev{ + program, + template_text, + output_buffer[0], + output_buffer.size(), + done_cb, + error_cb, + nullptr, + &output_len, + nullptr, + &render_err, + &render_error_pos, + }; + + const bool accepted = formatter.process_event(render_ev); + if (!accepted || capture.error_called || + render_err != static_cast(emel::text::jinja::formatter::error::none)) { + return false; + } + rendered_out.assign(output_buffer.data(), output_len); + return true; +} + +bool run_reference_jinja_render(const ::jinja::program & program, + std::string & rendered_out) { + try { + ::jinja::context ctx; + ctx.set_val("name", ::jinja::mk_val<::jinja::value_string>("world")); + ctx.set_val("cond", ::jinja::mk_val<::jinja::value_bool>(true)); + auto items = ::jinja::mk_val<::jinja::value_array>(); + items->push_back(::jinja::mk_val<::jinja::value_int>(1)); + items->push_back(::jinja::mk_val<::jinja::value_int>(2)); + items->push_back(::jinja::mk_val<::jinja::value_int>(3)); + ctx.set_val("items", items); + ::jinja::runtime runtime{ctx}; + auto result = runtime.execute(program); + auto parts = ::jinja::runtime::gather_string_parts(result); + rendered_out = ::jinja::render_string_parts(parts); + return true; + } catch (...) { + return false; + } +} + +std::string_view strip_trailing_newline(const std::string & value) { + size_t len = value.size(); + if (len > 0 && value[len - 1] == '\n') { + --len; + } + return std::string_view(value.data(), len); +} + bool compare_grammars(const emel::gbnf::grammar & emel_grammar, const llama_grammar_rules & llama_rules) { if (emel_grammar.rule_count != llama_rules.size()) { @@ -1173,6 +1325,61 @@ int run_gbnf_parser_parity(const emel::paritychecker::parity_options & opts) { return 0; } +int run_jinja_parity(const emel::paritychecker::parity_options & opts) { + emel::text::jinja::program emel_program{}; + int32_t emel_parse_err = static_cast(emel::text::jinja::parser::error::none); + size_t emel_parse_error_pos = 0; + const bool emel_parse_ok = run_emel_jinja_parse( + opts.text, emel_program, emel_parse_err, emel_parse_error_pos); + + ::jinja::program reference_program; + const bool reference_parse_ok = run_reference_jinja_parse(opts.text, reference_program); + + if (emel_parse_ok != reference_parse_ok) { + std::fprintf(stderr, + "jinja parse outcome mismatch: emel=%s reference=%s (emel_err=%d at %zu)\n", + emel_parse_ok ? "ok" : "error", + reference_parse_ok ? "ok" : "error", + emel_parse_err, + emel_parse_error_pos); + return 1; + } + + if (!emel_parse_ok) { + std::fprintf(stdout, "jinja parity ok (both parsers rejected template)\n"); + return 0; + } + + std::string emel_rendered; + if (!run_emel_jinja_render(emel_program, opts.text, emel_rendered)) { + std::fprintf(stderr, "jinja render failed in emel formatter\n"); + return 1; + } + + std::string reference_rendered; + if (!run_reference_jinja_render(reference_program, reference_rendered)) { + std::fprintf(stderr, "jinja render failed in reference runtime\n"); + return 1; + } + + const std::string_view emel_cmp = strip_trailing_newline(emel_rendered); + const std::string_view reference_cmp = strip_trailing_newline(reference_rendered); + if (emel_cmp != reference_cmp) { + std::fprintf(stderr, + "jinja render mismatch: emel_len=%zu reference_len=%zu\n", + emel_rendered.size(), + reference_rendered.size()); + if (opts.dump) { + std::fprintf(stdout, "emel:\n%s\n", emel_rendered.c_str()); + std::fprintf(stdout, "reference:\n%s\n", reference_rendered.c_str()); + } + return 1; + } + + std::fprintf(stdout, "jinja parity ok (output bytes=%zu)\n", emel_rendered.size()); + return 0; +} + } // namespace namespace emel::paritychecker { @@ -1183,6 +1390,8 @@ int run_parity(const parity_options & opts) { return run_gbnf_parser_parity(opts); case parity_mode::kernel: return run_kernel_parity(opts); + case parity_mode::jinja: + return run_jinja_parity(opts); case parity_mode::tokenizer: default: return run_tokenizer_parity(opts); diff --git a/tools/paritychecker/parity_runner.hpp b/tools/paritychecker/parity_runner.hpp index de98717a..3fd5b73b 100644 --- a/tools/paritychecker/parity_runner.hpp +++ b/tools/paritychecker/parity_runner.hpp @@ -9,6 +9,7 @@ enum class parity_mode : uint8_t { tokenizer = 0, gbnf_parser = 1, kernel = 2, + jinja = 3, }; struct parity_options { diff --git a/tools/paritychecker/paritychecker_tests.cpp b/tools/paritychecker/paritychecker_tests.cpp index 60e2c482..4cc260e7 100644 --- a/tools/paritychecker/paritychecker_tests.cpp +++ b/tools/paritychecker/paritychecker_tests.cpp @@ -42,6 +42,15 @@ std::filesystem::path gbnf_parity_texts_dir() { #endif } +std::filesystem::path jinja_parity_texts_dir() { +#ifdef PARITYCHECKER_REPO_ROOT + std::filesystem::path root = PARITYCHECKER_REPO_ROOT; + return root / "tests" / "text" / "jinja" / "parity_texts"; +#else + return std::filesystem::path("tests") / "text" / "jinja" / "parity_texts"; +#endif +} + bool file_exists(const std::filesystem::path & path) { std::FILE * file = std::fopen(path.string().c_str(), "rb"); if (file == nullptr) { @@ -213,6 +222,29 @@ bool run_kernel_paritychecker_process() { #endif } +bool run_jinja_paritychecker_process(const std::filesystem::path & template_path) { + std::string command; +#if defined(_WIN32) + command = ".\\paritychecker --jinja --text-file "; + command += quote_arg_windows(template_path.string()); +#else + command = "ulimit -s 8192; ./paritychecker --jinja --text-file "; + command += quote_arg_posix(template_path.string()); +#endif + const int status = std::system(command.c_str()); + if (status == -1) { + return false; + } +#if defined(_WIN32) + return status == 0; +#else + if (!WIFEXITED(status)) { + return false; + } + return WEXITSTATUS(status) == 0; +#endif +} + } // namespace TEST_CASE("paritychecker matches llama tokens across tiny models") { @@ -260,3 +292,17 @@ TEST_CASE("paritychecker matches llama gbnf parser outputs") { TEST_CASE("paritychecker matches llama kernel outputs") { CHECK(run_kernel_paritychecker_process()); } + +TEST_CASE("paritychecker matches llama jinja parser and formatter outputs") { + const auto template_dir = jinja_parity_texts_dir(); + const std::vector cases = { + template_dir / "literal_text.j2", + template_dir / "invalid_unclosed_expression.j2", + }; + + for (const auto & template_path : cases) { + INFO("case: " << template_path.string()); + REQUIRE(file_exists(template_path)); + CHECK(run_jinja_paritychecker_process(template_path)); + } +}