From 36787bb84315324d1c681cb8edbda4bd5990989a Mon Sep 17 00:00:00 2001 From: gabewillen Date: Mon, 2 Mar 2026 13:24:28 -0600 Subject: [PATCH 1/2] text/renderer: remove state-checking wrappers and explicit error paths --- src/emel/text/renderer/actions.hpp | 277 ++++++++--- src/emel/text/renderer/context.hpp | 11 +- src/emel/text/renderer/events.hpp | 40 +- src/emel/text/renderer/guards.hpp | 67 +-- src/emel/text/renderer/sm.hpp | 616 ++++++++----------------- tests/text/renderer/renderer_tests.cpp | 442 ++++++++---------- 6 files changed, 612 insertions(+), 841 deletions(-) diff --git a/src/emel/text/renderer/actions.hpp b/src/emel/text/renderer/actions.hpp index 94f69a95..6d453984 100644 --- a/src/emel/text/renderer/actions.hpp +++ b/src/emel/text/renderer/actions.hpp @@ -25,14 +25,6 @@ constexpr decltype(auto) unwrap_runtime_event(const runtime_event_type & ev) noe } } -inline bool dispatch_bind_fallback( - void *, - const emel::text::detokenizer::event::bind &) noexcept { - return false; -} - -inline const emel::model::data::vocab k_fallback_vocab = {}; - } // namespace detail inline constexpr emel::error::type k_error_none = emel::error::cast(error::none); @@ -44,26 +36,39 @@ inline constexpr int32_t to_detokenizer_error_code( inline constexpr int32_t k_detokenizer_ok = to_detokenizer_error_code(emel::text::detokenizer::error::none); +inline constexpr int32_t k_detokenizer_backend_error = + to_detokenizer_error_code(emel::text::detokenizer::error::backend_error); inline constexpr int32_t to_error_out(const emel::error::type err) noexcept { return static_cast(err); } +struct detokenizer_error_map_entry { + int32_t detokenizer_code = k_detokenizer_ok; + emel::error::type renderer_code = emel::error::cast(error::none); +}; + inline emel::error::type from_detokenizer_error(const int32_t err) noexcept { - switch (err) { - case to_detokenizer_error_code(emel::text::detokenizer::error::none): - return emel::error::cast(error::none); - case to_detokenizer_error_code(emel::text::detokenizer::error::invalid_request): - return emel::error::cast(error::invalid_request); - case to_detokenizer_error_code(emel::text::detokenizer::error::model_invalid): - return emel::error::cast(error::model_invalid); - case to_detokenizer_error_code(emel::text::detokenizer::error::backend_error): - return emel::error::cast(error::backend_error); - case to_detokenizer_error_code(emel::text::detokenizer::error::internal_error): - return emel::error::cast(error::internal_error); - default: - return emel::error::cast(error::untracked); - } + constexpr std::array k_error_map = {{ + {to_detokenizer_error_code(emel::text::detokenizer::error::none), + emel::error::cast(error::none)}, + {to_detokenizer_error_code(emel::text::detokenizer::error::invalid_request), + emel::error::cast(error::invalid_request)}, + {to_detokenizer_error_code(emel::text::detokenizer::error::model_invalid), + emel::error::cast(error::model_invalid)}, + {to_detokenizer_error_code(emel::text::detokenizer::error::backend_error), + emel::error::cast(error::backend_error)}, + {to_detokenizer_error_code(emel::text::detokenizer::error::internal_error), + emel::error::cast(error::internal_error)}, + }}; + + emel::error::type mapped = emel::error::cast(error::untracked); + for (const auto & entry : k_error_map) { + const size_t matched = static_cast(entry.detokenizer_code == err); + const emel::error::type candidates[2] = {mapped, entry.renderer_code}; + mapped = candidates[matched]; + } + return mapped; } template @@ -93,9 +98,6 @@ inline void reset_outcome(runtime_ctx_type & runtime_ctx) noexcept { if constexpr (requires { runtime_ctx.detokenizer_err; }) { runtime_ctx.detokenizer_err = k_detokenizer_ok; } - if constexpr (requires { runtime_ctx.detokenizer_accepted; }) { - runtime_ctx.detokenizer_accepted = false; - } if constexpr (requires { runtime_ctx.detokenizer_output_length; }) { runtime_ctx.detokenizer_output_length = 0; } @@ -136,7 +138,7 @@ inline char concat_char(const sequence_state & sequence, return sources[from_new][adjusted_indices[from_new]]; } -inline bool copy_stop_sequences(const event::bind & ev, +inline bool copy_stop_sequences(const event::initialize & ev, context & ctx) noexcept { ctx.stop_sequence_count = ev.stop_sequence_count; ctx.stop_storage_used = 0; @@ -319,63 +321,35 @@ inline bool apply_stop_matching(sequence_state & sequence, return compose_ok != 0; } -struct begin_bind { - void operator()(const event::bind_runtime & ev, context & ctx) const noexcept { +struct begin_initialize { + void operator()(const event::initialize_runtime & ev, context &) const noexcept { reset_outcome(ev.ctx); int32_t error_sink = to_error_out(k_error_none); write_optional(ev.request.error_out, error_sink, to_error_out(k_error_none)); - - ctx.vocab = ev.request.vocab; - ctx.detokenizer_sm = ev.request.detokenizer_sm; - ctx.dispatch_detokenizer_bind = ev.request.dispatch_detokenizer_bind; - ctx.dispatch_detokenizer_detokenize = ev.request.dispatch_detokenizer_detokenize; - ctx.strip_leading_space_default = ev.request.strip_leading_space; - ctx.is_bound = false; - - copy_stop_sequences(ev.request, ctx); - reset_sequences(ctx); } }; -struct reject_bind { - void operator()(const event::bind_runtime & ev, context & ctx) const noexcept { - ctx.is_bound = false; +struct reject_initialize { + void operator()(const event::initialize_runtime & ev, context &) const noexcept { set_error(ev.ctx, error::invalid_request); } }; -struct bind_detokenizer { +struct dispatch_initialize_detokenizer { template void operator()(const runtime_event_type & ev, context & ctx) const noexcept { auto & runtime_ev = detail::unwrap_runtime_event(ev); - ctx.is_bound = false; - - const size_t has_vocab = static_cast(ctx.vocab != nullptr); - const size_t has_sm = static_cast(ctx.detokenizer_sm != nullptr); - const size_t has_bind_dispatch = static_cast(ctx.dispatch_detokenizer_bind != nullptr); - const size_t has_detokenize_dispatch = - static_cast(ctx.dispatch_detokenizer_detokenize != nullptr); - const size_t has_dependencies = - has_vocab & has_sm & has_bind_dispatch & has_detokenize_dispatch; - - const emel::model::data::vocab * vocabs[2] = {&detail::k_fallback_vocab, ctx.vocab}; - void * dispatch_sms[2] = {nullptr, ctx.detokenizer_sm}; - bool (*dispatchers[2])(void *, const emel::text::detokenizer::event::bind &) = { - detail::dispatch_bind_fallback, - ctx.dispatch_detokenizer_bind}; - int32_t err = k_detokenizer_ok; const emel::text::detokenizer::event::bind bind_ev{ - *vocabs[has_vocab], + runtime_ev.request.vocab, err}; - runtime_ev.ctx.detokenizer_accepted = - dispatchers[has_bind_dispatch](dispatch_sms[has_sm], bind_ev) && (has_dependencies != 0); - const int32_t dependency_error = to_detokenizer_error_code( - emel::text::detokenizer::error::invalid_request); - const int32_t errors[2] = {dependency_error, err}; - runtime_ev.ctx.detokenizer_err = errors[has_dependencies]; + const bool accepted = ctx.detokenizer.process_event(bind_ev); + if (!accepted && err == k_detokenizer_ok) { + err = k_detokenizer_backend_error; + } + runtime_ev.ctx.detokenizer_err = err; } }; @@ -406,16 +380,54 @@ struct set_error_from_detokenizer { } }; -struct commit_bind_success { +struct commit_initialize_success { template void operator()(const runtime_event_type & ev, context & ctx) const noexcept { auto & runtime_ev = detail::unwrap_runtime_event(ev); - ctx.is_bound = true; + ctx.vocab = &runtime_ev.request.vocab; + ctx.strip_leading_space_default = runtime_ev.request.strip_leading_space; + copy_stop_sequences(runtime_ev.request, ctx); + reset_sequences(ctx); set_error(runtime_ev.ctx, error::none); } }; +struct publish_initialize_done { + template + void operator()(const runtime_event_type & runtime_ev, + context &) const noexcept { + auto & ev = detail::unwrap_runtime_event(runtime_ev); + int32_t error_sink = to_error_out(k_error_none); + write_optional(ev.request.error_out, + error_sink, + to_error_out(ev.ctx.err)); + if (ev.request.owner_sm != nullptr && + ev.request.dispatch_done != nullptr) { + ev.request.dispatch_done(ev.request.owner_sm, + events::initialize_done{&ev.request}); + } + } +}; + +struct publish_initialize_error { + template + void operator()(const runtime_event_type & runtime_ev, + context &) const noexcept { + auto & ev = detail::unwrap_runtime_event(runtime_ev); + int32_t error_sink = to_error_out(k_error_none); + write_optional(ev.request.error_out, + error_sink, + to_error_out(ev.ctx.err)); + if (ev.request.owner_sm != nullptr && + ev.request.dispatch_error != nullptr) { + ev.request.dispatch_error(ev.request.owner_sm, + events::initialize_error{&ev.request, + to_error_out(ev.ctx.err)}); + } + } +}; + struct begin_render { void operator()(const event::render_runtime & ev, context &) const noexcept { @@ -473,8 +485,10 @@ struct dispatch_render_detokenizer { detok_pending_length, err}; - runtime_ev.ctx.detokenizer_accepted = - ctx.dispatch_detokenizer_detokenize(ctx.detokenizer_sm, detok_ev); + const bool accepted = ctx.detokenizer.process_event(detok_ev); + if (!accepted && err == k_detokenizer_ok) { + err = k_detokenizer_backend_error; + } runtime_ev.ctx.detokenizer_err = err; runtime_ev.ctx.detokenizer_output_length = detok_output_length; runtime_ev.ctx.detokenizer_pending_length = detok_pending_length; @@ -539,6 +553,27 @@ struct apply_render_stop_matching { } }; +struct commit_render_output { + template + void operator()(const runtime_event_type & ev, + context & ctx) const noexcept { + commit_render_detokenizer_output{}(ev, ctx); + update_render_strip_state{}(ev, ctx); + apply_render_stop_matching{}(ev, ctx); + } +}; + +struct commit_and_strip_render_output { + template + void operator()(const runtime_event_type & ev, + context & ctx) const noexcept { + commit_render_detokenizer_output{}(ev, ctx); + strip_render_leading_space{}(ev, ctx); + update_render_strip_state{}(ev, ctx); + apply_render_stop_matching{}(ev, ctx); + } +}; + struct begin_flush { void operator()(const event::flush_runtime & ev, context &) const noexcept { @@ -594,6 +629,94 @@ struct flush_copy_sequence_buffers { } }; +struct publish_render_done { + template + void operator()(const runtime_event_type & runtime_ev, + context &) const noexcept { + auto & ev = detail::unwrap_runtime_event(runtime_ev); + int32_t error_sink = to_error_out(k_error_none); + size_t output_length_sink = 0; + sequence_status status_sink = sequence_status::running; + + write_optional(ev.request.output_length_out, + output_length_sink, + ev.ctx.output_length); + write_optional(ev.request.status_out, status_sink, ev.ctx.status); + write_optional(ev.request.error_out, + error_sink, + to_error_out(ev.ctx.err)); + if (ev.request.owner_sm != nullptr && ev.request.dispatch_done != nullptr) { + ev.request.dispatch_done( + ev.request.owner_sm, + events::rendering_done{&ev.request, + ev.ctx.output_length, + ev.ctx.status}); + } + } +}; + +struct publish_render_error { + template + void operator()(const runtime_event_type & runtime_ev, + context &) const noexcept { + auto & ev = detail::unwrap_runtime_event(runtime_ev); + int32_t error_sink = to_error_out(k_error_none); + write_optional(ev.request.error_out, + error_sink, + to_error_out(ev.ctx.err)); + if (ev.request.owner_sm != nullptr && + ev.request.dispatch_error != nullptr) { + ev.request.dispatch_error( + ev.request.owner_sm, + events::rendering_error{&ev.request, to_error_out(ev.ctx.err)}); + } + } +}; + +struct publish_flush_done { + template + void operator()(const runtime_event_type & runtime_ev, + context &) const noexcept { + auto & ev = detail::unwrap_runtime_event(runtime_ev); + int32_t error_sink = to_error_out(k_error_none); + size_t output_length_sink = 0; + sequence_status status_sink = sequence_status::running; + + write_optional(ev.request.output_length_out, + output_length_sink, + ev.ctx.output_length); + write_optional(ev.request.status_out, status_sink, ev.ctx.status); + write_optional(ev.request.error_out, + error_sink, + to_error_out(ev.ctx.err)); + if (ev.request.owner_sm != nullptr && ev.request.dispatch_done != nullptr) { + ev.request.dispatch_done( + ev.request.owner_sm, + events::flush_done{&ev.request, + ev.ctx.output_length, + ev.ctx.status}); + } + } +}; + +struct publish_flush_error { + template + void operator()(const runtime_event_type & runtime_ev, + context &) const noexcept { + auto & ev = detail::unwrap_runtime_event(runtime_ev); + int32_t error_sink = to_error_out(k_error_none); + write_optional(ev.request.error_out, + error_sink, + to_error_out(ev.ctx.err)); + if (ev.request.owner_sm != nullptr && + ev.request.dispatch_error != nullptr) { + ev.request.dispatch_error( + ev.request.owner_sm, + events::flush_error{&ev.request, to_error_out(ev.ctx.err)}); + } + } +}; + struct mark_done { template void operator()(const runtime_event_type & ev, @@ -632,24 +755,32 @@ struct on_unexpected { } }; -inline constexpr begin_bind begin_bind{}; -inline constexpr reject_bind reject_bind{}; -inline constexpr bind_detokenizer bind_detokenizer{}; +inline constexpr begin_initialize begin_initialize{}; +inline constexpr reject_initialize reject_initialize{}; +inline constexpr dispatch_initialize_detokenizer dispatch_initialize_detokenizer{}; inline constexpr set_backend_error set_backend_error{}; inline constexpr set_invalid_request set_invalid_request{}; inline constexpr set_error_from_detokenizer set_error_from_detokenizer{}; -inline constexpr commit_bind_success commit_bind_success{}; +inline constexpr commit_initialize_success commit_initialize_success{}; +inline constexpr publish_initialize_done publish_initialize_done{}; +inline constexpr publish_initialize_error publish_initialize_error{}; inline constexpr begin_render begin_render{}; inline constexpr reject_render reject_render{}; inline constexpr render_sequence_already_stopped render_sequence_already_stopped{}; inline constexpr dispatch_render_detokenizer dispatch_render_detokenizer{}; inline constexpr commit_render_detokenizer_output commit_render_detokenizer_output{}; +inline constexpr commit_render_output commit_render_output{}; +inline constexpr commit_and_strip_render_output commit_and_strip_render_output{}; inline constexpr strip_render_leading_space strip_render_leading_space{}; inline constexpr update_render_strip_state update_render_strip_state{}; inline constexpr apply_render_stop_matching apply_render_stop_matching{}; inline constexpr begin_flush begin_flush{}; inline constexpr reject_flush reject_flush{}; inline constexpr flush_copy_sequence_buffers flush_copy_sequence_buffers{}; +inline constexpr publish_render_done publish_render_done{}; +inline constexpr publish_render_error publish_render_error{}; +inline constexpr publish_flush_done publish_flush_done{}; +inline constexpr publish_flush_error publish_flush_error{}; inline constexpr mark_done mark_done{}; inline constexpr ensure_last_error ensure_last_error{}; inline constexpr on_unexpected on_unexpected{}; diff --git a/src/emel/text/renderer/context.hpp b/src/emel/text/renderer/context.hpp index 470bea7e..2c5ef8d7 100644 --- a/src/emel/text/renderer/context.hpp +++ b/src/emel/text/renderer/context.hpp @@ -5,7 +5,7 @@ #include #include "emel/model/data.hpp" -#include "emel/text/detokenizer/events.hpp" +#include "emel/text/detokenizer/sm.hpp" namespace emel::text::renderer::action { @@ -34,13 +34,7 @@ struct sequence_state { struct context { const emel::model::data::vocab * vocab = nullptr; - void * detokenizer_sm = nullptr; - bool (*dispatch_detokenizer_bind)( - void * detokenizer_sm, - const emel::text::detokenizer::event::bind &) = nullptr; - bool (*dispatch_detokenizer_detokenize)( - void * detokenizer_sm, - const emel::text::detokenizer::event::detokenize &) = nullptr; + emel::text::detokenizer::sm detokenizer = {}; bool strip_leading_space_default = false; std::array stop_sequences = {}; @@ -50,7 +44,6 @@ struct context { size_t stop_max_length = 0; std::array sequences = {}; - bool is_bound = false; }; } // namespace emel::text::renderer::action diff --git a/src/emel/text/renderer/events.hpp b/src/emel/text/renderer/events.hpp index 21c7103a..63e57019 100644 --- a/src/emel/text/renderer/events.hpp +++ b/src/emel/text/renderer/events.hpp @@ -20,8 +20,8 @@ enum class sequence_status : uint8_t { namespace emel::text::renderer::events { -struct binding_done; -struct binding_error; +struct initialize_done; +struct initialize_error; struct rendering_done; struct rendering_error; struct flush_done; @@ -31,24 +31,20 @@ struct flush_error; namespace emel::text::renderer::event { -struct bind { - const emel::model::data::vocab * vocab = nullptr; - void * detokenizer_sm = nullptr; - bool (*dispatch_detokenizer_bind)( - void * detokenizer_sm, - const emel::text::detokenizer::event::bind &) = nullptr; - bool (*dispatch_detokenizer_detokenize)( - void * detokenizer_sm, - const emel::text::detokenizer::event::detokenize &) = nullptr; +struct initialize { + explicit initialize(const emel::model::data::vocab & vocab_ref) noexcept + : vocab(vocab_ref) {} + + const emel::model::data::vocab & vocab; bool strip_leading_space = false; const std::string_view * stop_sequences = nullptr; size_t stop_sequence_count = 0; int32_t * error_out = nullptr; void * owner_sm = nullptr; bool (*dispatch_done)(void * owner_sm, - const events::binding_done &) = nullptr; + const events::initialize_done &) = nullptr; bool (*dispatch_error)(void * owner_sm, - const events::binding_error &) = nullptr; + const events::initialize_error &) = nullptr; }; struct render { @@ -81,10 +77,9 @@ struct flush { const events::flush_error &) = nullptr; }; -struct bind_ctx { +struct initialize_ctx { emel::error::type err = emel::error::cast(error::none); int32_t detokenizer_err = 0; - bool detokenizer_accepted = false; }; struct render_ctx { @@ -93,7 +88,6 @@ struct render_ctx { sequence_status status = sequence_status::running; size_t sequence_index = 0; int32_t detokenizer_err = 0; - bool detokenizer_accepted = false; size_t detokenizer_output_length = 0; size_t detokenizer_pending_length = 0; size_t produced_length = 0; @@ -106,9 +100,9 @@ struct flush_ctx { size_t sequence_index = 0; }; -struct bind_runtime { - const bind & request; - bind_ctx & ctx; +struct initialize_runtime { + const initialize & request; + initialize_ctx & ctx; }; struct render_runtime { @@ -125,12 +119,12 @@ struct flush_runtime { namespace emel::text::renderer::events { -struct binding_done { - const event::bind * request = nullptr; +struct initialize_done { + const event::initialize * request = nullptr; }; -struct binding_error { - const event::bind * request = nullptr; +struct initialize_error { + const event::initialize * request = nullptr; int32_t err = 0; }; diff --git a/src/emel/text/renderer/guards.hpp b/src/emel/text/renderer/guards.hpp index fec113ba..7e0c038d 100644 --- a/src/emel/text/renderer/guards.hpp +++ b/src/emel/text/renderer/guards.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include #include "emel/error/error.hpp" @@ -24,6 +23,8 @@ constexpr decltype(auto) unwrap_runtime_event(const runtime_event_type & ev) noe inline constexpr int32_t k_detokenizer_ok = static_cast( emel::error::cast(emel::text::detokenizer::error::none)); +inline constexpr int32_t k_detokenizer_backend_error = static_cast( + emel::error::cast(emel::text::detokenizer::error::backend_error)); inline bool is_leading_space(const char value) noexcept { return value == ' ' || value == '\t' || value == '\n' || value == '\r'; @@ -31,7 +32,7 @@ inline bool is_leading_space(const char value) noexcept { } // namespace detail -struct valid_bind { +struct valid_initialize { template bool operator()(const runtime_event_type & ev) const noexcept { if constexpr (requires { ev.event_; }) { @@ -62,10 +63,10 @@ struct valid_bind { } }; -struct invalid_bind { +struct invalid_initialize { template bool operator()(const runtime_event_type & ev) const noexcept { - return !valid_bind{}(ev); + return !valid_initialize{}(ev); } }; @@ -77,7 +78,10 @@ struct valid_render { return false; } const auto & runtime_ev = detail::unwrap_runtime_event(ev); - if (!ctx.is_bound || ctx.vocab == nullptr) { + if (ctx.vocab == nullptr) { + return false; + } + if (runtime_ev.request.token_id < 0) { return false; } if (runtime_ev.request.sequence_id < 0 || @@ -108,7 +112,7 @@ struct valid_flush { return false; } const auto & runtime_ev = detail::unwrap_runtime_event(ev); - if (!ctx.is_bound || ctx.vocab == nullptr) { + if (ctx.vocab == nullptr) { return false; } if (runtime_ev.request.sequence_id < 0 || @@ -147,50 +151,28 @@ struct request_failed { } }; -struct bind_context_ready { - template - bool operator()(const runtime_event_type & ev, - const action::context & ctx) const noexcept { - const auto & runtime_ev = detail::unwrap_runtime_event(ev); - return runtime_ev.ctx.err == emel::error::cast(error::none) && - ctx.vocab != nullptr && - ctx.detokenizer_sm != nullptr && - ctx.dispatch_detokenizer_bind != nullptr && - ctx.dispatch_detokenizer_detokenize != nullptr; - } -}; - -struct bind_context_invalid { - template - bool operator()(const runtime_event_type & ev, - const action::context & ctx) const noexcept { - return !bind_context_ready{}(ev, ctx); - } -}; - -struct bind_dispatch_backend_failure { +struct initialize_dispatch_backend_failure { template bool operator()(const runtime_event_type & ev) const noexcept { const auto & runtime_ev = detail::unwrap_runtime_event(ev); - return !runtime_ev.ctx.detokenizer_accepted && - runtime_ev.ctx.detokenizer_err == detail::k_detokenizer_ok; + return runtime_ev.ctx.detokenizer_err == detail::k_detokenizer_backend_error; } }; -struct bind_dispatch_reported_error { +struct initialize_dispatch_reported_error { template bool operator()(const runtime_event_type & ev) const noexcept { const auto & runtime_ev = detail::unwrap_runtime_event(ev); - return runtime_ev.ctx.detokenizer_err != detail::k_detokenizer_ok; + return runtime_ev.ctx.detokenizer_err != detail::k_detokenizer_ok && + runtime_ev.ctx.detokenizer_err != detail::k_detokenizer_backend_error; } }; -struct bind_dispatch_ok { +struct initialize_dispatch_ok { template bool operator()(const runtime_event_type & ev) const noexcept { const auto & runtime_ev = detail::unwrap_runtime_event(ev); - return runtime_ev.ctx.detokenizer_accepted && - runtime_ev.ctx.detokenizer_err == detail::k_detokenizer_ok; + return runtime_ev.ctx.detokenizer_err == detail::k_detokenizer_ok; } }; @@ -215,8 +197,7 @@ struct render_dispatch_backend_failure { template bool operator()(const runtime_event_type & ev) const noexcept { const auto & runtime_ev = detail::unwrap_runtime_event(ev); - return !runtime_ev.ctx.detokenizer_accepted && - runtime_ev.ctx.detokenizer_err == detail::k_detokenizer_ok; + return runtime_ev.ctx.detokenizer_err == detail::k_detokenizer_backend_error; } }; @@ -224,7 +205,8 @@ struct render_dispatch_reported_error { template bool operator()(const runtime_event_type & ev) const noexcept { const auto & runtime_ev = detail::unwrap_runtime_event(ev); - return runtime_ev.ctx.detokenizer_err != detail::k_detokenizer_ok; + return runtime_ev.ctx.detokenizer_err != detail::k_detokenizer_ok && + runtime_ev.ctx.detokenizer_err != detail::k_detokenizer_backend_error; } }; @@ -252,8 +234,7 @@ struct render_dispatch_ok { bool operator()(const runtime_event_type & ev, const action::context & ctx) const noexcept { const auto & runtime_ev = detail::unwrap_runtime_event(ev); - return runtime_ev.ctx.detokenizer_accepted && - runtime_ev.ctx.detokenizer_err == detail::k_detokenizer_ok && + return runtime_ev.ctx.detokenizer_err == detail::k_detokenizer_ok && render_dispatch_lengths_valid{}(ev, ctx); } }; @@ -264,7 +245,9 @@ struct strip_needed { const action::context & ctx) const noexcept { const auto & runtime_ev = detail::unwrap_runtime_event(ev); const auto & sequence = ctx.sequences[runtime_ev.ctx.sequence_index]; - return sequence.strip_leading_space && runtime_ev.ctx.produced_length > 0 && + return render_dispatch_ok{}(ev, ctx) && + sequence.strip_leading_space && + runtime_ev.ctx.detokenizer_output_length > 0 && detail::is_leading_space(runtime_ev.request.output[0]); } }; @@ -273,7 +256,7 @@ struct strip_not_needed { template bool operator()(const runtime_event_type & ev, const action::context & ctx) const noexcept { - return !strip_needed{}(ev, ctx); + return render_dispatch_ok{}(ev, ctx) && !strip_needed{}(ev, ctx); } }; diff --git a/src/emel/text/renderer/sm.hpp b/src/emel/text/renderer/sm.hpp index 87b732db..db66cbaa 100644 --- a/src/emel/text/renderer/sm.hpp +++ b/src/emel/text/renderer/sm.hpp @@ -1,86 +1,7 @@ #pragma once -/* -design doc: docs/designs/text/renderer.design.md - --- - title: text/renderer architecture design - status: draft - --- - - # text/renderer architecture design - - this document defines the text/renderer actor. it acts as a domain-specific output consumer for the - modality-agnostic `generator`. it receives raw token IDs, translates them to text, and handles - string-based stopping criteria and streaming formats. - - ## role - - act as an injected dependency (or external consumer) for the `generator`. - - receive raw `token_id` streams from the generator's sampling phase. - - own the `text/detokenizer` codec to translate tokens into utf-8 bytes. - - handle text-domain complexities: utf-8 boundary buffering, whitespace stripping, and string-based - stop sequence matching. - - ## architecture shift: omnimodal decoupling - to support omnimodal generation (text, audio, vision), the core `generator` must remain completely - ignorant of what a `token_id` represents. it cannot own text-specific formatting or stop sequences. - - the `generator` is passed a `renderer` (e.g., a `text/renderer` or an `audio/renderer`). when the - generator samples a new token, it passes it to the renderer. if the renderer detects that the newly - emitted text matches a user-defined string stop sequence (e.g., `"\nUser:"`), the renderer signals - back to the generator to halt that sequence. - - ## events - - `event::bind` - - inputs: `vocab`, renderer options (e.g., stop sequences, strip whitespace flags), and optional callbacks. - - outputs: invokes callback upon successfully binding state ready to render. - - `event::render` (called by the generator post-sampling) - - inputs: `token_id`, sequence ID, output buffers (for streaming to the user), and optional callbacks (`dispatch_done`, `dispatch_error`). - - outputs: populates caller-provided buffers with translated utf-8 bytes (if any) and a `sequence_status` flag, - invoking the appropriate callback before returning to prevent context reading. - - `event::flush` - - inputs: sequence ID, output buffers, and optional callbacks. - - outputs: forces the emission of any pending bytes into the output buffers and invokes the callback. - - ## state model - - ```text - uninitialized ──► binding ──► idle - │ - idle ──► rendering ──► render_decision ──► (idle | errored) - ▲ │ - └───────────────────────────────────────────┘ - ``` - - - `uninitialized` — awaiting initial setup. - - `binding` — storing vocab references and compiling stop sequence patterns. - - `idle` — waiting for a `token_id` from the generator. - - `rendering` — passing the token to the `text/detokenizer` and buffering the result. - - `render_decision` — evaluating the newly translated text against the stop sequence list. - - unexpected events route to `unexpected`. - - ## responsibilities - 1. **utf-8 stream management:** modern models use byte-fallback tokens. a single `token_id` might be - a partial utf-8 character (e.g., `0xE2`). the renderer maintains a tiny pending byte buffer per - sequence and only flushes valid, complete utf-8 strings to the user's output buffer. - 2. **stop sequence matching:** evaluate the rolling text window against user-provided stop strings. - if a match occurs, truncate the output and return a `stop_sequence_matched` status to the generator. - 3. **formatting:** handle optional policies like stripping leading spaces from the very first token - generated. - - ## error codes - - this actor can produce the following local error flags: - - - `error::invalid_request` — invalid bind/render/flush request payload. - - `error::backend_error` — downstream detokenizer dispatch failed without an explicit code. - - `error::model_invalid` — downstream detokenizer reported model/token validity failure. - - `error::internal_error` — internal unexpected path. - - `error::untracked` — downstream returned an unmapped legacy code. -*/ - #include -#include #include "emel/error/error.hpp" #include "emel/sm.hpp" @@ -92,18 +13,19 @@ design doc: docs/designs/text/renderer.design.md namespace emel::text::renderer { struct uninitialized {}; -struct binding {}; -struct binding_decision {}; -struct idle {}; +struct initializing {}; +struct initialization_decision {}; +struct initialize_publish_success {}; +struct initialize_publish_error {}; +struct initialized {}; struct rendering {}; -struct render_sequence_decision {}; struct render_dispatch_decision {}; -struct render_strip_decision {}; -struct render_apply_decision {}; -struct render_stop_decision {}; -struct render_decision {}; +struct render_result_decision {}; +struct render_publish_success {}; +struct render_publish_error {}; struct flushing {}; -struct flush_decision {}; +struct flush_publish_success {}; +struct flush_publish_error {}; struct done {}; struct errored {}; struct unexpected {}; @@ -112,31 +34,25 @@ struct unexpected {}; renderer architecture notes (single source of truth) state purpose -- uninitialized: awaiting dependency and vocab binding. -- binding/binding_decision: bind request acceptance and detokenizer bind outcome. -- idle: ready for render and flush requests. -- rendering/render_*: render request setup, per-sequence routing, detokenizer dispatch, strip/stop phases. -- flushing/flush_decision: emits buffered bytes (utf-8 pending + stop holdback). +- uninitialized: awaiting dependency and vocab initializing. +- initializing/initialization_decision: initialization request acceptance and detokenizer attach outcome. +- initialize_publish_*: explicit success/error publication for initialization. +- initialized: ready for render and flush requests. +- rendering/render_*: render request setup, detokenizer dispatch, strip/stop phases. +- render_publish_*: explicit success/error publication for render. +- flushing: emits buffered bytes (utf-8 pending + stop holdback). +- flush_publish_*: explicit success/error publication for flush. - done/errored: terminal outcomes for the latest request. - unexpected: sequencing contract violation. key invariants -- per-sequence utf-8 pending bytes and stop holdback are stored in renderer context. +- per-sequence utf-8 pending bytes and holdback are stored in renderer context. - detokenizer stays stateless and receives all pending state via event payloads. - output bytes are written only to caller-provided buffers. -guard semantics -- valid_bind: dependency pointers and stop sequence constraints are valid. -- valid_render/valid_flush: output pointers and sequence id are valid. -- request_ok/request_failed: branch on runtime action outcomes. -- bind/render/flush decision guards model all runtime routing branches. - -action side effects -- begin_bind/bind_detokenizer: configure dependencies and dispatch child bind. -- begin_render/dispatch_render_detokenizer: initialize render runtime ctx and dispatch child detokenize. -- strip/apply actions: apply leading-strip policy and stop sequence matching. -- begin_flush/flush_copy_sequence_buffers: emit pending and holdback bytes. -- mark_done/ensure_last_error: finalize terminal request result. +control invariants +- input validation and all branch outcomes are explicit guard predicates. +- publication is split into explicit success/error branches per request kind. */ struct model { auto operator()() const { @@ -145,411 +61,243 @@ struct model { // clang-format off return sml::make_transition_table( //------------------------------------------------------------------------------// - sml::state <= *sml::state - + sml::event[ guard::valid_bind{} ] - / action::begin_bind - , sml::state <= sml::state - + sml::event[ guard::invalid_bind{} ] - / action::reject_bind - , sml::state <= sml::state + sml::event + sml::state <= *sml::state + + sml::event[ guard::valid_initialize{} ] + / action::begin_initialize + , sml::state <= sml::state + + sml::event[ guard::invalid_initialize{} ] + / action::reject_initialize + , sml::state <= sml::state + + sml::event / action::reject_render - , sml::state <= sml::state + sml::event + , sml::state <= sml::state + + sml::event / action::reject_flush - , sml::state <= sml::state - + sml::event[ guard::valid_bind{} ] - / action::begin_bind - , sml::state <= sml::state - + sml::event[ guard::invalid_bind{} ] - / action::reject_bind - , sml::state <= sml::state - + sml::event[ guard::valid_render{} ] + , sml::state <= sml::state + + sml::event[ guard::valid_initialize{} ] + / action::begin_initialize + , sml::state <= sml::state + + sml::event[ guard::invalid_initialize{} ] + / action::reject_initialize + , sml::state <= sml::state + + sml::event[ guard::valid_render{} ] / action::begin_render - , sml::state <= sml::state - + sml::event[ guard::invalid_render{} ] + , sml::state <= sml::state + + sml::event[ guard::invalid_render{} ] / action::reject_render - , sml::state <= sml::state - + sml::event[ guard::valid_flush{} ] + , sml::state <= sml::state + + sml::event[ guard::valid_flush{} ] / action::begin_flush - , sml::state <= sml::state - + sml::event[ guard::invalid_flush{} ] + , sml::state <= sml::state + + sml::event[ guard::invalid_flush{} ] / action::reject_flush - , sml::state <= sml::state - + sml::event[ guard::valid_bind{} ] - / action::begin_bind - , sml::state <= sml::state - + sml::event[ guard::invalid_bind{} ] - / action::reject_bind + , sml::state <= sml::state + + sml::event[ guard::valid_initialize{} ] + / action::begin_initialize + , sml::state <= sml::state + + sml::event[ guard::invalid_initialize{} ] + / action::reject_initialize , sml::state <= sml::state - + sml::event[ guard::valid_render{} ] + + sml::event[ guard::valid_render{} ] / action::begin_render - , sml::state <= sml::state - + sml::event[ guard::invalid_render{} ] + , sml::state <= sml::state + + sml::event[ guard::invalid_render{} ] / action::reject_render , sml::state <= sml::state - + sml::event[ guard::valid_flush{} ] + + sml::event[ guard::valid_flush{} ] / action::begin_flush - , sml::state <= sml::state - + sml::event[ guard::invalid_flush{} ] + , sml::state <= sml::state + + sml::event[ guard::invalid_flush{} ] / action::reject_flush - , sml::state <= sml::state - + sml::event[ guard::valid_bind{} ] - / action::begin_bind - , sml::state <= sml::state - + sml::event[ guard::invalid_bind{} ] - / action::reject_bind + , sml::state <= sml::state + + sml::event[ guard::valid_initialize{} ] + / action::begin_initialize + , sml::state <= sml::state + + sml::event[ guard::invalid_initialize{} ] + / action::reject_initialize , sml::state <= sml::state - + sml::event[ guard::valid_render{} ] + + sml::event[ guard::valid_render{} ] / action::begin_render - , sml::state <= sml::state - + sml::event[ guard::invalid_render{} ] + , sml::state <= sml::state + + sml::event[ guard::invalid_render{} ] / action::reject_render , sml::state <= sml::state - + sml::event[ guard::valid_flush{} ] + + sml::event[ guard::valid_flush{} ] / action::begin_flush - , sml::state <= sml::state - + sml::event[ guard::invalid_flush{} ] + , sml::state <= sml::state + + sml::event[ guard::invalid_flush{} ] / action::reject_flush - , sml::state <= sml::state - + sml::event[ guard::valid_bind{} ] - / action::begin_bind + , sml::state <= sml::state + + sml::event[ guard::valid_initialize{} ] + / action::begin_initialize , sml::state <= sml::state - + sml::event[ guard::invalid_bind{} ] - / action::reject_bind + + sml::event[ guard::invalid_initialize{} ] + / action::reject_initialize , sml::state <= sml::state - + sml::event[ guard::valid_render{} ] + + sml::event[ guard::valid_render{} ] / action::begin_render , sml::state <= sml::state - + sml::event[ guard::invalid_render{} ] + + sml::event[ guard::invalid_render{} ] / action::reject_render , sml::state <= sml::state - + sml::event[ guard::valid_flush{} ] + + sml::event[ guard::valid_flush{} ] / action::begin_flush , sml::state <= sml::state - + sml::event[ guard::invalid_flush{} ] + + sml::event[ guard::invalid_flush{} ] / action::reject_flush //------------------------------------------------------------------------------// - , sml::state <= sml::state - + sml::completion / action::bind_detokenizer - , sml::state <= sml::state - + sml::completion [ guard::bind_dispatch_ok{} ] - / action::commit_bind_success - , sml::state <= sml::state - + sml::completion [ guard::bind_dispatch_backend_failure{} ] + , sml::state <= sml::state + + sml::completion [ guard::initialize_dispatch_ok{} ] + / action::commit_initialize_success + , sml::state <= sml::state + + sml::completion [ guard::initialize_dispatch_backend_failure{} ] / action::set_backend_error - , sml::state <= sml::state - + sml::completion [ guard::bind_dispatch_reported_error{} ] + , sml::state <= sml::state + + sml::completion [ guard::initialize_dispatch_reported_error{} ] / action::set_error_from_detokenizer - , sml::state <= sml::state - + sml::completion - / action::ensure_last_error + , sml::state <= sml::state + + sml::completion + / action::set_error_from_detokenizer + , sml::state <= sml::state + + sml::completion + / action::publish_initialize_done + , sml::state <= sml::state + + sml::completion + / action::publish_initialize_error - , sml::state <= sml::state - + sml::completion - , sml::state <= sml::state - + sml::completion [ guard::sequence_stop_matched{} ] + , sml::state <= sml::state + + sml::completion + / action::dispatch_initialize_detokenizer + + //------------------------------------------------------------------------------// + , sml::state <= sml::state + + sml::completion [ guard::sequence_stop_matched{} ] / action::render_sequence_already_stopped - , sml::state <= sml::state - + sml::completion [ guard::sequence_running{} ] + , sml::state <= sml::state + + sml::completion [ guard::sequence_running{} ] / action::dispatch_render_detokenizer - , sml::state <= sml::state - + sml::completion [ guard::render_dispatch_backend_failure{} ] + , sml::state <= sml::state + + sml::completion [ guard::render_dispatch_ok{} ] + , sml::state <= sml::state + + sml::completion [ guard::render_dispatch_backend_failure{} ] / action::set_backend_error - , sml::state <= sml::state - + sml::completion [ guard::render_dispatch_reported_error{} ] + , sml::state <= sml::state + + sml::completion [ guard::render_dispatch_reported_error{} ] / action::set_error_from_detokenizer - , sml::state <= sml::state - + sml::completion [ guard::render_dispatch_lengths_invalid{} ] + , sml::state <= sml::state + + sml::completion [ guard::render_dispatch_lengths_invalid{} ] / action::set_invalid_request - , sml::state <= sml::state - + sml::completion [ guard::render_dispatch_ok{} ] - / action::commit_render_detokenizer_output - , sml::state <= sml::state - + sml::completion [ guard::strip_needed{} ] - / action::strip_render_leading_space - , sml::state <= sml::state - + sml::completion [ guard::strip_not_needed{} ] - , sml::state <= sml::state - + sml::completion / action::update_render_strip_state - , sml::state <= sml::state - + sml::completion / action::apply_render_stop_matching - , sml::state <= sml::state - + sml::completion [ guard::request_ok{} ] + , sml::state <= sml::state + + sml::completion + / action::ensure_last_error + , sml::state <= sml::state + + sml::completion [ guard::strip_needed{} ] + / action::commit_and_strip_render_output + , sml::state <= sml::state + + sml::completion [ guard::strip_not_needed{} ] + / action::commit_render_output + , sml::state <= sml::state + + sml::completion [ guard::request_ok{} ] / action::mark_done - , sml::state <= sml::state - + sml::completion [ guard::request_failed{} ] + , sml::state <= sml::state + + sml::completion [ guard::request_failed{} ] / action::ensure_last_error + , sml::state <= sml::state + + sml::completion + / action::publish_render_done + , sml::state <= sml::state + + sml::completion + / action::publish_render_error - , sml::state <= sml::state - + sml::completion - , sml::state <= sml::state - + sml::completion [ guard::flush_output_fits{} ] + //------------------------------------------------------------------------------// + , sml::state <= sml::state + + sml::completion [ guard::flush_output_fits{} ] / action::flush_copy_sequence_buffers - , sml::state <= sml::state - + sml::completion [ guard::flush_output_too_large{} ] + , sml::state <= sml::state + + sml::completion [ guard::flush_output_too_large{} ] / action::set_invalid_request + , sml::state <= sml::state + + sml::completion + / action::publish_flush_done + , sml::state <= sml::state + + sml::completion + / action::publish_flush_error //------------------------------------------------------------------------------// , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected - , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected - , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected - , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected + / action::on_unexpected + , sml::state <= sml::state + sml::unexpected_event + / action::on_unexpected + , sml::state <= sml::state + sml::unexpected_event + / action::on_unexpected + , sml::state <= sml::state + sml::unexpected_event + / action::on_unexpected + , sml::state <= sml::state + sml::unexpected_event + / action::on_unexpected + , sml::state <= sml::state + sml::unexpected_event + / action::on_unexpected , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected - , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected + / action::on_unexpected , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected - , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected - , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected - , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected - , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected + / action::on_unexpected + , sml::state <= sml::state + sml::unexpected_event + / action::on_unexpected + , sml::state <= sml::state + sml::unexpected_event + / action::on_unexpected + , sml::state <= sml::state + sml::unexpected_event + / action::on_unexpected , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected - , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected + / action::on_unexpected + , sml::state <= sml::state + sml::unexpected_event + / action::on_unexpected + , sml::state <= sml::state + sml::unexpected_event + / action::on_unexpected , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected + / action::on_unexpected , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected + / action::on_unexpected , sml::state <= sml::state + sml::unexpected_event - / action::on_unexpected + / action::on_unexpected ); // clang-format on } }; -namespace detail { - -template -inline void write_optional(value_type * destination, - value_type & sink, - const value_type value) noexcept { - value_type * destinations[2] = {&sink, destination}; - value_type * const target = - destinations[static_cast(destination != nullptr)]; - *target = value; -} - -template -inline bool ignore_callback(void *, const event_type &) noexcept { - return true; -} - -template -inline void dispatch_optional_callback( - void * owner, - bool (*callback)(void * owner, const event_type &), - const event_type & payload) noexcept { - const size_t callback_ready = static_cast(callback != nullptr); - const size_t owner_ready = static_cast(owner != nullptr); - const size_t valid = callback_ready & owner_ready; - bool (*callbacks[2])(void *, const event_type &) = { - ignore_callback, - callback}; - void * owners[2] = {nullptr, owner}; - callbacks[valid](owners[valid], payload); -} - -inline emel::error::type select_error_code( - const bool ok, - const emel::error::type runtime_error) noexcept { - const std::array fallback_errors = { - emel::error::cast(error::backend_error), - runtime_error}; - const emel::error::type failure_error = - fallback_errors[static_cast(runtime_error != emel::error::cast(error::none))]; - const std::array final_errors = { - failure_error, - emel::error::cast(error::none)}; - return final_errors[static_cast(ok)]; -} - -inline void dispatch_bind_done(const event::bind & ev, - const int32_t, - const events::binding_done & done_ev, - const events::binding_error &) noexcept { - dispatch_optional_callback(ev.owner_sm, ev.dispatch_done, done_ev); -} - -inline void dispatch_bind_error(const event::bind & ev, - const int32_t, - const events::binding_done &, - const events::binding_error & error_ev) noexcept { - dispatch_optional_callback(ev.owner_sm, ev.dispatch_error, error_ev); -} - -inline void dispatch_render_done(const event::render & ev, - const int32_t, - const events::rendering_done & done_ev, - const events::rendering_error &) noexcept { - dispatch_optional_callback(ev.owner_sm, ev.dispatch_done, done_ev); -} - -inline void dispatch_render_error(const event::render & ev, - const int32_t, - const events::rendering_done &, - const events::rendering_error & error_ev) noexcept { - dispatch_optional_callback(ev.owner_sm, ev.dispatch_error, error_ev); -} - -inline void dispatch_flush_done(const event::flush & ev, - const int32_t, - const events::flush_done & done_ev, - const events::flush_error &) noexcept { - dispatch_optional_callback(ev.owner_sm, ev.dispatch_done, done_ev); -} - -inline void dispatch_flush_error(const event::flush & ev, - const int32_t, - const events::flush_done &, - const events::flush_error & error_ev) noexcept { - dispatch_optional_callback(ev.owner_sm, ev.dispatch_error, error_ev); -} - -template -inline void dispatch_result_callback( - const bool ok, - const request_type & request, - const int32_t err, - const done_event_type & done_ev, - const error_event_type & error_ev, - void (*on_done)( - const request_type &, - const int32_t, - const done_event_type &, - const error_event_type &) noexcept, - void (*on_error)( - const request_type &, - const int32_t, - const done_event_type &, - const error_event_type &) noexcept) noexcept { - using dispatch_fn_type = void (*)(const request_type &, - const int32_t, - const done_event_type &, - const error_event_type &) noexcept; - const std::array dispatchers = {on_error, on_done}; - dispatchers[static_cast(ok)](request, err, done_ev, error_ev); -} - -} // namespace detail - struct sm : public emel::sm { using base_type = emel::sm; sm() : base_type() {} - bool process_event(const event::bind & ev) { - namespace sml = boost::sml; - - event::bind_ctx runtime_ctx{}; - event::bind_runtime runtime_ev{ev, runtime_ctx}; - const bool accepted = base_type::process_event(runtime_ev); - const bool ok = this->is(sml::state); - const emel::error::type err_code = detail::select_error_code(ok, runtime_ctx.err); - this->last_error_ = err_code; - const int32_t err = static_cast(err_code); - - int32_t error_sink = 0; - detail::write_optional(ev.error_out, error_sink, err); - - const events::binding_done done_ev{&ev}; - const events::binding_error error_ev{&ev, err}; - detail::dispatch_result_callback( - ok, - ev, - err, - done_ev, - error_ev, - detail::dispatch_bind_done, - detail::dispatch_bind_error); - - return accepted && ok; + bool process_event(const event::initialize & ev) { + event::initialize_ctx runtime_ctx{}; + const bool accepted = base_type::process_event(event::initialize_runtime{ev, + runtime_ctx}); + return accepted && runtime_ctx.err == emel::error::cast(error::none); } bool process_event(const event::render & ev) { - namespace sml = boost::sml; - event::render_ctx runtime_ctx{}; - event::render_runtime runtime_ev{ev, runtime_ctx}; - const bool accepted = base_type::process_event(runtime_ev); - const bool ok = this->is(sml::state); - const emel::error::type err_code = detail::select_error_code(ok, runtime_ctx.err); - this->last_error_ = err_code; - const int32_t err = static_cast(err_code); - - size_t output_length_sink = 0; - detail::write_optional(ev.output_length_out, output_length_sink, runtime_ctx.output_length); - sequence_status status_sink = sequence_status::running; - detail::write_optional(ev.status_out, status_sink, runtime_ctx.status); - int32_t error_sink = 0; - detail::write_optional(ev.error_out, error_sink, err); - - const events::rendering_done done_ev{&ev, runtime_ctx.output_length, runtime_ctx.status}; - const events::rendering_error error_ev{&ev, err}; - detail::dispatch_result_callback( - ok, - ev, - err, - done_ev, - error_ev, - detail::dispatch_render_done, - detail::dispatch_render_error); - - return accepted && ok; + const bool accepted = base_type::process_event(event::render_runtime{ev, + runtime_ctx}); + return accepted && runtime_ctx.err == emel::error::cast(error::none); } bool process_event(const event::flush & ev) { - namespace sml = boost::sml; - event::flush_ctx runtime_ctx{}; - event::flush_runtime runtime_ev{ev, runtime_ctx}; - const bool accepted = base_type::process_event(runtime_ev); - const bool ok = this->is(sml::state); - const emel::error::type err_code = detail::select_error_code(ok, runtime_ctx.err); - this->last_error_ = err_code; - const int32_t err = static_cast(err_code); - - size_t output_length_sink = 0; - detail::write_optional(ev.output_length_out, output_length_sink, runtime_ctx.output_length); - sequence_status status_sink = sequence_status::running; - detail::write_optional(ev.status_out, status_sink, runtime_ctx.status); - int32_t error_sink = 0; - detail::write_optional(ev.error_out, error_sink, err); - - const events::flush_done done_ev{&ev, runtime_ctx.output_length, runtime_ctx.status}; - const events::flush_error error_ev{&ev, err}; - detail::dispatch_result_callback( - ok, - ev, - err, - done_ev, - error_ev, - detail::dispatch_flush_done, - detail::dispatch_flush_error); - - return accepted && ok; + const bool accepted = base_type::process_event(event::flush_runtime{ev, + runtime_ctx}); + return accepted && runtime_ctx.err == emel::error::cast(error::none); } using base_type::process_event; using base_type::visit_current_states; - - int32_t last_error() const noexcept { return static_cast(this->last_error_); } - - private: - emel::error::type last_error_ = emel::error::cast(error::none); }; +using Renderer = sm; + } // namespace emel::text::renderer diff --git a/tests/text/renderer/renderer_tests.cpp b/tests/text/renderer/renderer_tests.cpp index 766190cb..6716bf8f 100644 --- a/tests/text/renderer/renderer_tests.cpp +++ b/tests/text/renderer/renderer_tests.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include @@ -57,74 +59,21 @@ constexpr int32_t k_renderer_model_invalid = renderer_error_code(emel::text::renderer::error::model_invalid); constexpr int32_t k_detok_ok = static_cast( emel::error::cast(emel::text::detokenizer::error::none)); +constexpr int32_t k_detok_backend_error = static_cast( + emel::error::cast(emel::text::detokenizer::error::backend_error)); constexpr int32_t k_detok_model_invalid = static_cast( emel::error::cast(emel::text::detokenizer::error::model_invalid)); -bool detokenizer_bind_dispatch( - void * detokenizer_sm, - const emel::text::detokenizer::event::bind & ev) { - if (detokenizer_sm == nullptr) { - return false; - } - return static_cast(detokenizer_sm)->process_event(ev); -} - -bool detokenizer_detokenize_dispatch( - void * detokenizer_sm, - const emel::text::detokenizer::event::detokenize & ev) { - if (detokenizer_sm == nullptr) { - return false; - } - return static_cast(detokenizer_sm)->process_event(ev); -} - -bool detokenizer_bind_fail_no_error( - void *, - const emel::text::detokenizer::event::bind &) { - return false; -} - -bool detokenizer_bind_fail_with_error( - void *, - const emel::text::detokenizer::event::bind & ev) { - ev.error_out = k_detok_model_invalid; - return true; -} - -bool detokenizer_detokenize_fail_no_error( - void *, - const emel::text::detokenizer::event::detokenize &) { - return false; -} - -bool detokenizer_detokenize_fail_with_error( - void *, - const emel::text::detokenizer::event::detokenize & ev) { - ev.error_out = k_detok_model_invalid; - return true; -} - -bool detokenizer_detokenize_bad_output_length( - void *, - const emel::text::detokenizer::event::detokenize & ev) { - ev.error_out = k_detok_ok; - ev.output_length_out = ev.output_capacity + 1; - ev.pending_length_out = ev.pending_length; - return true; -} - -bool detokenizer_detokenize_bad_pending_length( - void *, - const emel::text::detokenizer::event::detokenize & ev) { - ev.error_out = k_detok_ok; - ev.output_length_out = 0; - ev.pending_length_out = ev.pending_capacity + 1; - return true; -} +static_assert( + std::is_reference_v().vocab)>); +static_assert(std::is_same_v< + std::remove_reference_t< + decltype(std::declval().detokenizer)>, + emel::text::detokenizer::sm>); struct callback_recorder { - int bind_done = 0; - int bind_error = 0; + int initialize_done_count = 0; + int initialize_error_count = 0; int render_done = 0; int render_error = 0; int flush_done = 0; @@ -135,22 +84,22 @@ struct callback_recorder { int32_t last_error = k_renderer_ok; }; -bool on_bind_done(void * owner, - const emel::text::renderer::events::binding_done &) { +bool on_initialize_done(void * owner, + const emel::text::renderer::events::initialize_done &) { if (owner == nullptr) { return false; } - static_cast(owner)->bind_done += 1; + static_cast(owner)->initialize_done_count += 1; return true; } -bool on_bind_error(void * owner, - const emel::text::renderer::events::binding_error & ev) { +bool on_initialize_error(void * owner, + const emel::text::renderer::events::initialize_error & ev) { if (owner == nullptr) { return false; } auto * recorder = static_cast(owner); - recorder->bind_error += 1; + recorder->initialize_error_count += 1; recorder->last_error = ev.err; return true; } @@ -201,38 +150,32 @@ bool on_flush_error(void * owner, return true; } -bool bind_renderer(emel::text::renderer::sm & renderer, - emel::text::detokenizer::sm & detokenizer, +bool initialize_renderer(emel::text::renderer::sm & renderer, const emel::model::data::vocab & vocab, const bool strip_leading_space, const std::string_view * stop_sequences, const size_t stop_count, int32_t & err_out) { - emel::text::renderer::event::bind bind_ev = {}; - bind_ev.vocab = &vocab; - bind_ev.detokenizer_sm = &detokenizer; - bind_ev.dispatch_detokenizer_bind = detokenizer_bind_dispatch; - bind_ev.dispatch_detokenizer_detokenize = detokenizer_detokenize_dispatch; - bind_ev.strip_leading_space = strip_leading_space; - bind_ev.stop_sequences = stop_sequences; - bind_ev.stop_sequence_count = stop_count; - bind_ev.error_out = &err_out; - return renderer.process_event(bind_ev); + emel::text::renderer::event::initialize initialize_ev{vocab}; + initialize_ev.strip_leading_space = strip_leading_space; + initialize_ev.stop_sequences = stop_sequences; + initialize_ev.stop_sequence_count = stop_count; + initialize_ev.error_out = &err_out; + return renderer.process_event(initialize_ev); } } // namespace -TEST_CASE("renderer_bind_render_and_flush_without_stop_sequences") { +TEST_CASE("renderer_initialize_render_and_flush_without_stop_sequences") { auto & vocab = make_vocab(); const int32_t hi_id = add_token(vocab, "hi"); CHECK(hi_id == 0); - emel::text::detokenizer::sm detokenizer{}; emel::text::renderer::sm renderer{}; - int32_t bind_err = k_renderer_ok; - CHECK(bind_renderer(renderer, detokenizer, vocab, false, nullptr, 0, bind_err)); - CHECK(bind_err == k_renderer_ok); + int32_t initialize_err = k_renderer_ok; + CHECK(initialize_renderer(renderer, vocab, false, nullptr, 0, initialize_err)); + CHECK(initialize_err == k_renderer_ok); std::array output = {}; size_t output_length = 0; @@ -273,26 +216,33 @@ TEST_CASE("renderer_bind_render_and_flush_without_stop_sequences") { CHECK(status == emel::text::renderer::sequence_status::running); } -TEST_CASE("renderer_bind_rejects_invalid_stop_sequences") { +TEST_CASE("renderer_initialize_succeeds_without_external_detokenizer_injection") { + auto & vocab = make_vocab(); + emel::text::renderer::sm renderer{}; + + int32_t initialize_err = k_renderer_ok; + emel::text::renderer::event::initialize initialize_ev{vocab}; + initialize_ev.error_out = &initialize_err; + + CHECK(renderer.process_event(initialize_ev)); + CHECK(initialize_err == k_renderer_ok); +} + +TEST_CASE("renderer_initialize_rejects_invalid_stop_sequences") { auto & vocab = make_vocab(); - emel::text::detokenizer::sm detokenizer{}; emel::text::renderer::sm renderer{}; const std::array invalid_stops = { std::string_view("0123456789012345678901234567890123456789")}; - int32_t bind_err = k_renderer_ok; - emel::text::renderer::event::bind bind_ev = {}; - bind_ev.vocab = &vocab; - bind_ev.detokenizer_sm = &detokenizer; - bind_ev.dispatch_detokenizer_bind = detokenizer_bind_dispatch; - bind_ev.dispatch_detokenizer_detokenize = detokenizer_detokenize_dispatch; - bind_ev.stop_sequences = invalid_stops.data(); - bind_ev.stop_sequence_count = invalid_stops.size(); - bind_ev.error_out = &bind_err; - - CHECK_FALSE(renderer.process_event(bind_ev)); - CHECK(bind_err == k_renderer_invalid_request); + int32_t initialize_err = k_renderer_ok; + emel::text::renderer::event::initialize initialize_ev{vocab}; + initialize_ev.stop_sequences = invalid_stops.data(); + initialize_ev.stop_sequence_count = invalid_stops.size(); + initialize_ev.error_out = &initialize_err; + + CHECK_FALSE(renderer.process_event(initialize_ev)); + CHECK(initialize_err == k_renderer_invalid_request); } TEST_CASE("renderer_handles_plamo2_byte_fallback_utf8") { @@ -301,12 +251,11 @@ TEST_CASE("renderer_handles_plamo2_byte_fallback_utf8") { const int32_t b1 = add_token(vocab, "<0x82>"); const int32_t b2 = add_token(vocab, "<0xAC>"); - emel::text::detokenizer::sm detokenizer{}; emel::text::renderer::sm renderer{}; - int32_t bind_err = k_renderer_ok; - CHECK(bind_renderer(renderer, detokenizer, vocab, false, nullptr, 0, bind_err)); - CHECK(bind_err == k_renderer_ok); + int32_t initialize_err = k_renderer_ok; + CHECK(initialize_renderer(renderer, vocab, false, nullptr, 0, initialize_err)); + CHECK(initialize_err == k_renderer_ok); std::array output = {}; size_t output_length = 0; @@ -348,18 +297,16 @@ TEST_CASE("renderer_stop_sequence_matches_across_token_boundary") { const int32_t cd_id = add_token(vocab, "cd"); const std::array stops = {"bc"}; - emel::text::detokenizer::sm detokenizer{}; emel::text::renderer::sm renderer{}; - int32_t bind_err = k_renderer_ok; - CHECK(bind_renderer(renderer, - detokenizer, + int32_t initialize_err = k_renderer_ok; + CHECK(initialize_renderer(renderer, vocab, false, stops.data(), stops.size(), - bind_err)); - CHECK(bind_err == k_renderer_ok); + initialize_err)); + CHECK(initialize_err == k_renderer_ok); std::array output = {}; size_t output_length = 0; @@ -411,18 +358,16 @@ TEST_CASE("renderer_flush_emits_holdback_when_no_stop_match") { const int32_t ab_id = add_token(vocab, "ab"); const std::array stops = {"xyz"}; - emel::text::detokenizer::sm detokenizer{}; emel::text::renderer::sm renderer{}; - int32_t bind_err = k_renderer_ok; - CHECK(bind_renderer(renderer, - detokenizer, + int32_t initialize_err = k_renderer_ok; + CHECK(initialize_renderer(renderer, vocab, false, stops.data(), stops.size(), - bind_err)); - CHECK(bind_err == k_renderer_ok); + initialize_err)); + CHECK(initialize_err == k_renderer_ok); std::array output = {}; size_t output_length = 0; @@ -465,12 +410,11 @@ TEST_CASE("renderer_strips_leading_whitespace_when_enabled") { auto & vocab = make_vocab(); const int32_t spaced = add_token(vocab, " hi"); - emel::text::detokenizer::sm detokenizer{}; emel::text::renderer::sm renderer{}; - int32_t bind_err = k_renderer_ok; - CHECK(bind_renderer(renderer, detokenizer, vocab, true, nullptr, 0, bind_err)); - CHECK(bind_err == k_renderer_ok); + int32_t initialize_err = k_renderer_ok; + CHECK(initialize_renderer(renderer, vocab, true, nullptr, 0, initialize_err)); + CHECK(initialize_err == k_renderer_ok); std::array output = {}; size_t output_length = 0; @@ -497,25 +441,20 @@ TEST_CASE("renderer_dispatches_done_and_error_callbacks") { auto & vocab = make_vocab(); const int32_t hi_id = add_token(vocab, "hi"); - emel::text::detokenizer::sm detokenizer{}; emel::text::renderer::sm renderer{}; callback_recorder recorder{}; - int32_t bind_err = k_renderer_ok; - emel::text::renderer::event::bind bind_ev = {}; - bind_ev.vocab = &vocab; - bind_ev.detokenizer_sm = &detokenizer; - bind_ev.dispatch_detokenizer_bind = detokenizer_bind_dispatch; - bind_ev.dispatch_detokenizer_detokenize = detokenizer_detokenize_dispatch; - bind_ev.error_out = &bind_err; - bind_ev.owner_sm = &recorder; - bind_ev.dispatch_done = on_bind_done; - bind_ev.dispatch_error = on_bind_error; - - CHECK(renderer.process_event(bind_ev)); - CHECK(bind_err == k_renderer_ok); - CHECK(recorder.bind_done == 1); - CHECK(recorder.bind_error == 0); + int32_t initialize_err = k_renderer_ok; + emel::text::renderer::event::initialize initialize_ev{vocab}; + initialize_ev.error_out = &initialize_err; + initialize_ev.owner_sm = &recorder; + initialize_ev.dispatch_done = on_initialize_done; + initialize_ev.dispatch_error = on_initialize_error; + + CHECK(renderer.process_event(initialize_ev)); + CHECK(initialize_err == k_renderer_ok); + CHECK(recorder.initialize_done_count == 1); + CHECK(recorder.initialize_error_count == 0); std::array output = {}; size_t output_length = 0; @@ -577,21 +516,32 @@ TEST_CASE("renderer_dispatches_done_and_error_callbacks") { CHECK(recorder.flush_error == 0); } -TEST_CASE("renderer_surfaces_local_model_invalid_error_on_bind_failure") { +TEST_CASE("renderer_surfaces_local_model_invalid_error_on_render_failure") { auto & vocab = make_vocab(); - emel::text::detokenizer::sm detokenizer{}; + add_token(vocab, "ok"); emel::text::renderer::sm renderer{}; - int32_t bind_err = 0; - emel::text::renderer::event::bind bind_ev = {}; - bind_ev.vocab = &vocab; - bind_ev.detokenizer_sm = &detokenizer; - bind_ev.dispatch_detokenizer_bind = detokenizer_bind_fail_with_error; - bind_ev.dispatch_detokenizer_detokenize = detokenizer_detokenize_dispatch; - bind_ev.error_out = &bind_err; + int32_t initialize_err = k_renderer_ok; + CHECK(initialize_renderer(renderer, vocab, false, nullptr, 0, initialize_err)); + CHECK(initialize_err == k_renderer_ok); + + std::array output = {}; + size_t output_length = 0; + emel::text::renderer::sequence_status status = + emel::text::renderer::sequence_status::running; + int32_t render_err = k_renderer_ok; + + emel::text::renderer::event::render render_ev = {}; + render_ev.token_id = 99999; + render_ev.sequence_id = 0; + render_ev.output = output.data(); + render_ev.output_capacity = output.size(); + render_ev.output_length_out = &output_length; + render_ev.status_out = &status; + render_ev.error_out = &render_err; - CHECK_FALSE(renderer.process_event(bind_ev)); - CHECK(bind_err == k_renderer_model_invalid); + CHECK_FALSE(renderer.process_event(render_ev)); + CHECK(render_err == k_renderer_model_invalid); } TEST_CASE("renderer_action_and_guard_paths") { @@ -599,40 +549,32 @@ TEST_CASE("renderer_action_and_guard_paths") { const int32_t token_id = add_token(vocab, "ab"); const int32_t special_id = add_token(vocab, "", 3); (void)token_id; - int dummy = 0; - void * dummy_ptr = &dummy; emel::text::renderer::action::context ctx = {}; std::array output = {}; - emel::text::renderer::event::bind bind_ev = {}; - emel::text::renderer::event::bind_ctx bind_runtime_ctx = {}; - emel::text::renderer::event::bind_runtime bind_runtime_ev{bind_ev, bind_runtime_ctx}; - CHECK(emel::text::renderer::guard::valid_bind{}(bind_runtime_ev)); - CHECK_FALSE(emel::text::renderer::guard::invalid_bind{}(bind_runtime_ev)); - - bind_ev.vocab = &vocab; - bind_ev.detokenizer_sm = dummy_ptr; - bind_ev.dispatch_detokenizer_bind = detokenizer_bind_dispatch; - bind_ev.dispatch_detokenizer_detokenize = detokenizer_detokenize_dispatch; - CHECK(emel::text::renderer::guard::valid_bind{}(bind_runtime_ev)); + emel::text::renderer::event::initialize initialize_ev{vocab}; + emel::text::renderer::event::initialize_ctx initialize_runtime_ctx = {}; + emel::text::renderer::event::initialize_runtime initialize_runtime_ev{initialize_ev, initialize_runtime_ctx}; + CHECK(emel::text::renderer::guard::valid_initialize{}(initialize_runtime_ev)); + CHECK_FALSE(emel::text::renderer::guard::invalid_initialize{}(initialize_runtime_ev)); std::array long_stop = { std::string_view("0123456789012345678901234567890123456789")}; - bind_ev.stop_sequences = long_stop.data(); - bind_ev.stop_sequence_count = long_stop.size(); - CHECK_FALSE(emel::text::renderer::guard::valid_bind{}(bind_runtime_ev)); + initialize_ev.stop_sequences = long_stop.data(); + initialize_ev.stop_sequence_count = long_stop.size(); + CHECK_FALSE(emel::text::renderer::guard::valid_initialize{}(initialize_runtime_ev)); emel::text::renderer::action::set_error( - bind_runtime_ctx, + initialize_runtime_ctx, emel::text::renderer::error::invalid_request); - CHECK(bind_runtime_ctx.err == + CHECK(initialize_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::invalid_request)); - bind_ev.stop_sequences = nullptr; - bind_ev.stop_sequence_count = 0; - bind_runtime_ctx = {}; - emel::text::renderer::action::begin_bind(bind_runtime_ev, ctx); - CHECK(ctx.vocab == &vocab); + initialize_ev.stop_sequences = nullptr; + initialize_ev.stop_sequence_count = 0; + initialize_runtime_ctx = {}; + emel::text::renderer::action::begin_initialize(initialize_runtime_ev, ctx); + CHECK(ctx.vocab == nullptr); ctx.sequences[0].holdback_length = 1; ctx.sequences[0].holdback[0] = 'x'; @@ -642,23 +584,23 @@ TEST_CASE("renderer_action_and_guard_paths") { ctx.sequences[0], output.data(), 0, 1, 0, 0, output_length, compose_ctx)); CHECK(compose_ctx.err == renderer_error_type(emel::text::renderer::error::invalid_request)); - ctx = emel::text::renderer::action::context{}; - ctx.stop_sequence_count = 1; - ctx.stop_sequences[0].offset = 0; - ctx.stop_sequences[0].length = 2; - ctx.stop_storage[0] = 'b'; - ctx.stop_storage[1] = 'c'; - ctx.stop_max_length = 2; - ctx.sequences[0].holdback_length = 1; - ctx.sequences[0].holdback[0] = 'b'; + emel::text::renderer::action::context match_context = {}; + match_context.stop_sequence_count = 1; + match_context.stop_sequences[0].offset = 0; + match_context.stop_sequences[0].length = 2; + match_context.stop_storage[0] = 'b'; + match_context.stop_storage[1] = 'c'; + match_context.stop_max_length = 2; + match_context.sequences[0].holdback_length = 1; + match_context.sequences[0].holdback[0] = 'b'; output[0] = 'c'; emel::text::renderer::event::render_ctx match_ctx = {}; output_length = 0; emel::text::renderer::sequence_status status = emel::text::renderer::sequence_status::running; CHECK(emel::text::renderer::action::apply_stop_matching( - ctx.sequences[0], - ctx, + match_context.sequences[0], + match_context, output.data(), output.size(), 1, @@ -667,22 +609,22 @@ TEST_CASE("renderer_action_and_guard_paths") { match_ctx)); CHECK(status == emel::text::renderer::sequence_status::stop_sequence_matched); - ctx = emel::text::renderer::action::context{}; - ctx.stop_sequence_count = 1; - ctx.stop_sequences[0].offset = 0; - ctx.stop_sequences[0].length = 2; - ctx.stop_storage[0] = 'z'; - ctx.stop_storage[1] = 'z'; - ctx.stop_max_length = 2; - ctx.sequences[0].holdback_length = 1; - ctx.sequences[0].holdback[0] = 'a'; + emel::text::renderer::action::context no_match_context = {}; + no_match_context.stop_sequence_count = 1; + no_match_context.stop_sequences[0].offset = 0; + no_match_context.stop_sequences[0].length = 2; + no_match_context.stop_storage[0] = 'z'; + no_match_context.stop_storage[1] = 'z'; + no_match_context.stop_max_length = 2; + no_match_context.sequences[0].holdback_length = 1; + no_match_context.sequences[0].holdback[0] = 'a'; output[0] = 'b'; emel::text::renderer::event::render_ctx no_match_ctx = {}; output_length = 0; status = emel::text::renderer::sequence_status::running; CHECK(emel::text::renderer::action::apply_stop_matching( - ctx.sequences[0], - ctx, + no_match_context.sequences[0], + no_match_context, output.data(), output.size(), 1, @@ -691,40 +633,26 @@ TEST_CASE("renderer_action_and_guard_paths") { no_match_ctx)); CHECK(output_length == 1); - CHECK_FALSE(emel::text::renderer::guard::bind_context_ready{}(bind_runtime_ev, ctx)); - emel::text::renderer::action::set_invalid_request(bind_runtime_ev, ctx); - CHECK(bind_runtime_ctx.err == - renderer_error_type(emel::text::renderer::error::invalid_request)); - - ctx.vocab = &vocab; - ctx.detokenizer_sm = dummy_ptr; - ctx.dispatch_detokenizer_bind = detokenizer_bind_fail_no_error; - ctx.dispatch_detokenizer_detokenize = detokenizer_detokenize_dispatch; - bind_runtime_ctx = {}; - emel::text::renderer::action::bind_detokenizer(bind_runtime_ev, ctx); - CHECK(emel::text::renderer::guard::bind_dispatch_backend_failure{}(bind_runtime_ev)); - emel::text::renderer::action::set_backend_error(bind_runtime_ev, ctx); - CHECK(bind_runtime_ctx.err == + initialize_runtime_ctx.detokenizer_err = k_detok_backend_error; + CHECK(emel::text::renderer::guard::initialize_dispatch_backend_failure{}(initialize_runtime_ev)); + emel::text::renderer::action::set_backend_error(initialize_runtime_ev, ctx); + CHECK(initialize_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::backend_error)); - ctx.dispatch_detokenizer_bind = detokenizer_bind_fail_with_error; - bind_runtime_ctx = {}; - emel::text::renderer::action::bind_detokenizer(bind_runtime_ev, ctx); - CHECK(emel::text::renderer::guard::bind_dispatch_reported_error{}(bind_runtime_ev)); - emel::text::renderer::action::set_error_from_detokenizer(bind_runtime_ev, ctx); - CHECK(bind_runtime_ctx.err == + initialize_runtime_ctx.err = renderer_error_type(emel::text::renderer::error::none); + initialize_runtime_ctx.detokenizer_err = k_detok_model_invalid; + CHECK(emel::text::renderer::guard::initialize_dispatch_reported_error{}(initialize_runtime_ev)); + emel::text::renderer::action::set_error_from_detokenizer(initialize_runtime_ev, ctx); + CHECK(initialize_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::model_invalid)); - emel::text::detokenizer::sm detokenizer{}; - ctx.detokenizer_sm = &detokenizer; - ctx.dispatch_detokenizer_bind = detokenizer_bind_dispatch; - ctx.dispatch_detokenizer_detokenize = detokenizer_detokenize_dispatch; - bind_runtime_ctx = {}; - emel::text::renderer::action::bind_detokenizer(bind_runtime_ev, ctx); - CHECK(emel::text::renderer::guard::bind_dispatch_ok{}(bind_runtime_ev)); - emel::text::renderer::action::commit_bind_success(bind_runtime_ev, ctx); - CHECK(bind_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::none)); - CHECK(ctx.is_bound); + initialize_runtime_ctx = {}; + emel::text::renderer::action::begin_initialize(initialize_runtime_ev, ctx); + emel::text::renderer::action::dispatch_initialize_detokenizer(initialize_runtime_ev, ctx); + CHECK(emel::text::renderer::guard::initialize_dispatch_ok{}(initialize_runtime_ev)); + emel::text::renderer::action::commit_initialize_success(initialize_runtime_ev, ctx); + CHECK(initialize_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::none)); + CHECK(ctx.vocab == &vocab); emel::text::renderer::event::render render_ev = {}; size_t out_len = 0; @@ -742,39 +670,26 @@ TEST_CASE("renderer_action_and_guard_paths") { emel::text::renderer::action::begin_render(render_runtime_ev, ctx); CHECK(render_runtime_ctx.output_length == 0); - ctx.vocab = &vocab; - ctx.detokenizer_sm = dummy_ptr; - ctx.dispatch_detokenizer_detokenize = detokenizer_detokenize_fail_no_error; - render_ev.output = output.data(); - render_ev.output_capacity = output.size(); - render_ev.sequence_id = 0; - render_ev.token_id = 0; - render_runtime_ctx = {}; - emel::text::renderer::action::begin_render(render_runtime_ev, ctx); - emel::text::renderer::action::dispatch_render_detokenizer(render_runtime_ev, ctx); + render_runtime_ctx.detokenizer_err = k_detok_backend_error; CHECK(emel::text::renderer::guard::render_dispatch_backend_failure{}(render_runtime_ev)); emel::text::renderer::action::set_backend_error(render_runtime_ev, ctx); CHECK(render_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::backend_error)); - ctx.dispatch_detokenizer_detokenize = detokenizer_detokenize_fail_with_error; - render_runtime_ctx = {}; - emel::text::renderer::action::begin_render(render_runtime_ev, ctx); - emel::text::renderer::action::dispatch_render_detokenizer(render_runtime_ev, ctx); + render_runtime_ctx.err = renderer_error_type(emel::text::renderer::error::none); + render_runtime_ctx.detokenizer_err = k_detok_model_invalid; CHECK(emel::text::renderer::guard::render_dispatch_reported_error{}(render_runtime_ev)); emel::text::renderer::action::set_error_from_detokenizer(render_runtime_ev, ctx); CHECK(render_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::model_invalid)); - ctx.dispatch_detokenizer_detokenize = detokenizer_detokenize_dispatch; - ctx.detokenizer_sm = &detokenizer; + render_runtime_ctx = {}; render_ev.output = nullptr; render_ev.output_capacity = 0; render_ev.token_id = special_id; render_ev.emit_special = false; ctx.sequences[0].pending_length = 0; ctx.sequences[0].holdback_length = 0; - render_runtime_ctx = {}; emel::text::renderer::action::begin_render(render_runtime_ev, ctx); CHECK(emel::text::renderer::guard::sequence_running{}(render_runtime_ev, ctx)); emel::text::renderer::action::dispatch_render_detokenizer(render_runtime_ev, ctx); @@ -790,25 +705,27 @@ TEST_CASE("renderer_action_and_guard_paths") { render_ev.output = output.data(); render_ev.output_capacity = output.size(); render_ev.token_id = token_id; - ctx.dispatch_detokenizer_detokenize = detokenizer_detokenize_bad_output_length; render_runtime_ctx = {}; emel::text::renderer::action::begin_render(render_runtime_ev, ctx); - emel::text::renderer::action::dispatch_render_detokenizer(render_runtime_ev, ctx); + render_runtime_ctx.detokenizer_err = k_detok_ok; + render_runtime_ctx.detokenizer_output_length = render_ev.output_capacity + 1; + render_runtime_ctx.detokenizer_pending_length = 0; CHECK(emel::text::renderer::guard::render_dispatch_lengths_invalid{}(render_runtime_ev, ctx)); emel::text::renderer::action::set_invalid_request(render_runtime_ev, ctx); CHECK(render_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::invalid_request)); - ctx.dispatch_detokenizer_detokenize = detokenizer_detokenize_bad_pending_length; render_runtime_ctx = {}; emel::text::renderer::action::begin_render(render_runtime_ev, ctx); - emel::text::renderer::action::dispatch_render_detokenizer(render_runtime_ev, ctx); + render_runtime_ctx.detokenizer_err = k_detok_ok; + render_runtime_ctx.detokenizer_output_length = 0; + render_runtime_ctx.detokenizer_pending_length = + ctx.sequences[0].pending_bytes.size() + 1; CHECK(emel::text::renderer::guard::render_dispatch_lengths_invalid{}(render_runtime_ev, ctx)); emel::text::renderer::action::set_invalid_request(render_runtime_ev, ctx); CHECK(render_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::invalid_request)); - ctx.dispatch_detokenizer_detokenize = detokenizer_detokenize_dispatch; ctx.sequences[0].stop_matched = true; render_runtime_ctx = {}; emel::text::renderer::action::begin_render(render_runtime_ev, ctx); @@ -861,39 +778,44 @@ TEST_CASE("renderer_action_and_guard_paths") { emel::text::renderer::action::reject_flush(flush_runtime_ev, ctx); CHECK(flush_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::invalid_request)); - bind_runtime_ctx = {}; - emel::text::renderer::action::reject_bind(bind_runtime_ev, ctx); - CHECK(bind_runtime_ctx.err == + initialize_runtime_ctx = {}; + emel::text::renderer::action::reject_initialize(initialize_runtime_ev, ctx); + CHECK(initialize_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::invalid_request)); - bind_runtime_ctx.err = renderer_error_type(emel::text::renderer::error::none); - emel::text::renderer::action::ensure_last_error(bind_runtime_ev, ctx); - CHECK(bind_runtime_ctx.err == + initialize_runtime_ctx.err = renderer_error_type(emel::text::renderer::error::none); + emel::text::renderer::action::ensure_last_error(initialize_runtime_ev, ctx); + CHECK(initialize_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::backend_error)); - emel::text::renderer::action::mark_done(bind_runtime_ev, ctx); - CHECK(bind_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::none)); - emel::text::renderer::action::on_unexpected(bind_runtime_ev, ctx); - CHECK(bind_runtime_ctx.err == + emel::text::renderer::action::mark_done(initialize_runtime_ev, ctx); + CHECK(initialize_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::none)); + emel::text::renderer::action::on_unexpected(initialize_runtime_ev, ctx); + CHECK(initialize_runtime_ctx.err == renderer_error_type(emel::text::renderer::error::invalid_request)); emel::text::renderer::event::render bad_render = {}; emel::text::renderer::event::render_ctx bad_render_ctx = {}; - emel::text::renderer::event::render_runtime bad_render_runtime{bad_render, bad_render_ctx}; - CHECK_FALSE(emel::text::renderer::guard::valid_render{}(bad_render_runtime, ctx)); - CHECK(emel::text::renderer::guard::invalid_render{}(bad_render_runtime, ctx)); - ctx.is_bound = true; + bad_render.token_id = -1; + CHECK_FALSE(emel::text::renderer::guard::valid_render{}( + emel::text::renderer::event::render_runtime{bad_render, bad_render_ctx}, ctx)); + CHECK(emel::text::renderer::guard::invalid_render{}( + emel::text::renderer::event::render_runtime{bad_render, bad_render_ctx}, ctx)); ctx.vocab = &vocab; + bad_render_ctx = {}; + bad_render.token_id = 0; bad_render.output = output.data(); bad_render.output_capacity = output.size(); bad_render.output_length_out = &out_len; bad_render.status_out = &status; bad_render.error_out = &err; bad_render.sequence_id = 0; - CHECK(emel::text::renderer::guard::valid_render{}(bad_render_runtime, ctx)); + CHECK(emel::text::renderer::guard::valid_render{}( + emel::text::renderer::event::render_runtime{bad_render, bad_render_ctx}, ctx)); bad_render.output = nullptr; bad_render.output_capacity = 0; - CHECK(emel::text::renderer::guard::valid_render{}(bad_render_runtime, ctx)); + CHECK(emel::text::renderer::guard::valid_render{}( + emel::text::renderer::event::render_runtime{bad_render, bad_render_ctx}, ctx)); emel::text::renderer::event::flush bad_flush = {}; emel::text::renderer::event::flush_ctx bad_flush_ctx = {}; From 5c6830e8c3c97df00b7ef6c70a868699799ab1a4 Mon Sep 17 00:00:00 2001 From: gabewillen Date: Mon, 2 Mar 2026 16:06:46 -0600 Subject: [PATCH 2/2] fix(renderer): resolve inline review feedback --- src/emel/text/renderer/actions.hpp | 13 +++ src/emel/text/renderer/sm.hpp | 4 +- tests/text/renderer/renderer_tests.cpp | 105 +++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 2 deletions(-) diff --git a/src/emel/text/renderer/actions.hpp b/src/emel/text/renderer/actions.hpp index 6d453984..36ca8006 100644 --- a/src/emel/text/renderer/actions.hpp +++ b/src/emel/text/renderer/actions.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -661,6 +662,12 @@ struct publish_render_error { context &) const noexcept { auto & ev = detail::unwrap_runtime_event(runtime_ev); int32_t error_sink = to_error_out(k_error_none); + size_t output_length_sink = 0; + sequence_status status_sink = sequence_status::running; + write_optional(ev.request.output_length_out, + output_length_sink, + ev.ctx.output_length); + write_optional(ev.request.status_out, status_sink, ev.ctx.status); write_optional(ev.request.error_out, error_sink, to_error_out(ev.ctx.err)); @@ -705,6 +712,12 @@ struct publish_flush_error { context &) const noexcept { auto & ev = detail::unwrap_runtime_event(runtime_ev); int32_t error_sink = to_error_out(k_error_none); + size_t output_length_sink = 0; + sequence_status status_sink = sequence_status::running; + write_optional(ev.request.output_length_out, + output_length_sink, + ev.ctx.output_length); + write_optional(ev.request.status_out, status_sink, ev.ctx.status); write_optional(ev.request.error_out, error_sink, to_error_out(ev.ctx.err)); diff --git a/src/emel/text/renderer/sm.hpp b/src/emel/text/renderer/sm.hpp index db66cbaa..deafae33 100644 --- a/src/emel/text/renderer/sm.hpp +++ b/src/emel/text/renderer/sm.hpp @@ -163,7 +163,7 @@ struct model { , sml::state <= sml::state + sml::completion / action::set_error_from_detokenizer - , sml::state <= sml::state + , sml::state <= sml::state + sml::completion / action::publish_initialize_done , sml::state <= sml::state @@ -175,7 +175,7 @@ struct model { / action::dispatch_initialize_detokenizer //------------------------------------------------------------------------------// - , sml::state <= sml::state + , sml::state <= sml::state + sml::completion [ guard::sequence_stop_matched{} ] / action::render_sequence_already_stopped , sml::state <= sml::state diff --git a/tests/text/renderer/renderer_tests.cpp b/tests/text/renderer/renderer_tests.cpp index 6716bf8f..f0042a92 100644 --- a/tests/text/renderer/renderer_tests.cpp +++ b/tests/text/renderer/renderer_tests.cpp @@ -353,6 +353,62 @@ TEST_CASE("renderer_stop_sequence_matches_across_token_boundary") { CHECK(status == emel::text::renderer::sequence_status::stop_sequence_matched); } +TEST_CASE("renderer_stop_sequence_state_latches_across_calls") { + auto & vocab = make_vocab(); + const int32_t ab_id = add_token(vocab, "ab"); + const int32_t cd_id = add_token(vocab, "cd"); + const std::array stops = {"bc"}; + + emel::text::renderer::sm renderer{}; + + int32_t initialize_err = k_renderer_ok; + CHECK(initialize_renderer(renderer, + vocab, + false, + stops.data(), + stops.size(), + initialize_err)); + CHECK(initialize_err == k_renderer_ok); + + std::array output = {}; + size_t output_length = 0; + emel::text::renderer::sequence_status status = + emel::text::renderer::sequence_status::running; + int32_t err = k_renderer_ok; + + emel::text::renderer::event::render render_ev = {}; + render_ev.sequence_id = 0; + render_ev.output = output.data(); + render_ev.output_capacity = output.size(); + render_ev.output_length_out = &output_length; + render_ev.status_out = &status; + render_ev.error_out = &err; + + render_ev.token_id = ab_id; + CHECK(renderer.process_event(render_ev)); + CHECK(err == k_renderer_ok); + CHECK(output_length == 1); + CHECK(std::string_view(output.data(), output_length) == "a"); + CHECK(status == emel::text::renderer::sequence_status::running); + + output.fill('\0'); + render_ev.token_id = cd_id; + CHECK(renderer.process_event(render_ev)); + CHECK(err == k_renderer_ok); + CHECK(output_length == 0); + CHECK(status == emel::text::renderer::sequence_status::stop_sequence_matched); + + output.fill('\0'); + output_length = 99; + status = emel::text::renderer::sequence_status::running; + err = k_renderer_ok; + render_ev.token_id = ab_id; + CHECK(renderer.process_event(render_ev)); + CHECK(err == k_renderer_ok); + CHECK(output_length == 0); + CHECK(status == emel::text::renderer::sequence_status::stop_sequence_matched); +} + TEST_CASE("renderer_flush_emits_holdback_when_no_stop_match") { auto & vocab = make_vocab(); const int32_t ab_id = add_token(vocab, "ab"); @@ -516,6 +572,55 @@ TEST_CASE("renderer_dispatches_done_and_error_callbacks") { CHECK(recorder.flush_error == 0); } +TEST_CASE("renderer_invalid_render_and_flush_set_output_status_defaults") { + auto & vocab = make_vocab(); + add_token(vocab, "hi"); + + emel::text::renderer::sm renderer{}; + + int32_t initialize_err = k_renderer_ok; + CHECK(initialize_renderer(renderer, vocab, false, nullptr, 0, initialize_err)); + CHECK(initialize_err == k_renderer_ok); + + std::array output = {}; + size_t output_length = 123; + emel::text::renderer::sequence_status status = + emel::text::renderer::sequence_status::stop_sequence_matched; + int32_t render_err = k_renderer_ok; + + emel::text::renderer::event::render render_ev = {}; + render_ev.token_id = 0; + render_ev.sequence_id = + static_cast(emel::text::renderer::action::k_max_sequences); + render_ev.output = output.data(); + render_ev.output_capacity = output.size(); + render_ev.output_length_out = &output_length; + render_ev.status_out = &status; + render_ev.error_out = &render_err; + + CHECK_FALSE(renderer.process_event(render_ev)); + CHECK(render_err == k_renderer_invalid_request); + CHECK(output_length == 0); + CHECK(status == emel::text::renderer::sequence_status::running); + + size_t flush_output_length = 123; + status = emel::text::renderer::sequence_status::stop_sequence_matched; + int32_t flush_err = k_renderer_ok; + emel::text::renderer::event::flush flush_ev = {}; + flush_ev.sequence_id = + static_cast(emel::text::renderer::action::k_max_sequences); + flush_ev.output = output.data(); + flush_ev.output_capacity = output.size(); + flush_ev.output_length_out = &flush_output_length; + flush_ev.status_out = &status; + flush_ev.error_out = &flush_err; + + CHECK_FALSE(renderer.process_event(flush_ev)); + CHECK(flush_err == k_renderer_invalid_request); + CHECK(flush_output_length == 0); + CHECK(status == emel::text::renderer::sequence_status::running); +} + TEST_CASE("renderer_surfaces_local_model_invalid_error_on_render_failure") { auto & vocab = make_vocab(); add_token(vocab, "ok");