From f2983c8d031aeea30c72a7089991d4bb5741d254 Mon Sep 17 00:00:00 2001 From: Brett Kinny Date: Fri, 5 Jun 2026 19:57:51 +1000 Subject: [PATCH 1/2] fix(openai_compat): apply turn-suffix without a user turn + no whitespace before emoji MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two audit findings in the OpenAICompat provider (the alternate voice backend). Turn-suffix placement: _build_messages appended _TURN_SUFFIX (emoji-prefix rule + kid-mode filter + length limits) only at the last *user* turn. A request with no user turn — a greeter/system-only injection ending in a system/assistant message — left last_user_idx None, so the suffix was never applied and the model ran completely unconstrained (no emoji, no kid-mode filter). Now fall back to the last message regardless of role, and synthesize a trailing system message when the dialogue is empty. Whitespace-before-emoji: the stream yielded a leading whitespace-only delta to TTS before the emoji-prefix check had non-whitespace to inspect, so the first thing the firmware saw was whitespace, not the emoji it parses into a face animation. Buffer until there's non-whitespace, then emit the (optional) fallback emoji followed by the accumulated leading text. Tests: new tests/test_openai_compat.py (8 cases) — suffix on last user turn, fallback-to-last-message, empty-dialogue synthesis, exactly-once; and stream ordering for whitespace-first, existing-emoji, no-emoji, and all-whitespace. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openai_compat/openai_compat.py | 36 +++-- tests/test_openai_compat.py | 147 ++++++++++++++++++ 2 files changed, 174 insertions(+), 9 deletions(-) create mode 100644 tests/test_openai_compat.py diff --git a/custom-providers/openai_compat/openai_compat.py b/custom-providers/openai_compat/openai_compat.py index d2ea625..0803294 100644 --- a/custom-providers/openai_compat/openai_compat.py +++ b/custom-providers/openai_compat/openai_compat.py @@ -98,19 +98,29 @@ def _build_messages(self, dialogue): if self._persona: messages.append({"role": "system", "content": self._persona}) - # Copy the dialogue, appending the safety suffix to the last user turn. + # Append the safety/format suffix (emoji-prefix rule + kid-mode filter + + # length limits) to the final user turn. If the dialogue has no user + # turn — e.g. a greeter/system-only injection where the last message is + # system or assistant — fall back to the last message regardless of role + # so the constraints still reach the model; if the dialogue is empty, + # carry them in a standalone system message. Without this, a no-user-turn + # request runs completely unconstrained (no emoji, no kid-mode filter). last_user_idx = None for i, msg in enumerate(dialogue): if msg.get("role") == "user": last_user_idx = i + suffix_idx = last_user_idx if last_user_idx is not None else len(dialogue) - 1 for i, msg in enumerate(dialogue): role = msg.get("role", "user") content = msg.get("content", "") - if i == last_user_idx: + if i == suffix_idx: content = content + _TURN_SUFFIX messages.append({"role": role, "content": content}) + if suffix_idx < 0: # empty dialogue — nothing to append to + messages.append({"role": "system", "content": _TURN_SUFFIX}) + return messages def _headers(self): @@ -216,15 +226,23 @@ def _response_stream(self, messages): full_text.append(content) # Emoji prefix enforcement on the first non-whitespace content. + # Until we've seen non-whitespace, buffer rather than yield — a + # leading whitespace-only delta must never reach TTS/firmware + # before the emoji (the firmware parses the leading glyph into a + # face animation, so whitespace-first breaks the contract). if not emoji_checked: so_far = "".join(full_text).lstrip() - if so_far: - emoji_checked = True - if not any(so_far.startswith(e) for e in ALLOWED_EMOJIS): - # Prepend fallback emoji before yielding the first - # chunk. We yield the emoji + space as a separate - # chunk so the face animation fires immediately. - yield f"{FALLBACK_EMOJI} " + if not so_far: + continue # still all whitespace — keep buffering + emoji_checked = True + if not any(so_far.startswith(e) for e in ALLOWED_EMOJIS): + # Prepend fallback emoji + space as a separate chunk so + # the face animation fires immediately. + yield f"{FALLBACK_EMOJI} " + # Flush the accumulated leading text (minus the meaningless + # leading whitespace) as the first real chunk. + yield so_far + continue yield content diff --git a/tests/test_openai_compat.py b/tests/test_openai_compat.py new file mode 100644 index 0000000..137c66b --- /dev/null +++ b/tests/test_openai_compat.py @@ -0,0 +1,147 @@ +"""Unit tests for the OpenAICompat LLM provider (openai_compat.py). + +Two audit findings: + - the kid-mode/emoji turn-suffix was only appended to the last *user* turn, + so a no-user-turn request (greeter/system injection) ran unconstrained; + - a leading whitespace-only stream delta was yielded to TTS before the + emoji-prefix check, breaking the firmware's leading-glyph face contract. + +Container-only imports (config.logger, core.providers.llm.base, +core.utils.textUtils) are stubbed with controlled values so the logic is +exercised deterministically. `requests` is real and patched per-test. +""" +import importlib.util as _ilu +import pathlib +import re +import sys +import types +import unittest +from unittest.mock import MagicMock, patch + +# ── Stub container-only imports BEFORE loading the module ───────────────────── +sys.modules.setdefault("config", MagicMock()) +_logger_mod = types.ModuleType("config.logger") +_logger_mod.setup_logging = lambda: MagicMock() # type: ignore[attr-defined] +sys.modules["config.logger"] = _logger_mod + +for _n in ("core", "core.providers", "core.providers.llm", "core.utils"): + sys.modules.setdefault(_n, MagicMock()) + +_base_mod = types.ModuleType("core.providers.llm.base") + + +class _StubBase: + pass + + +_base_mod.LLMProviderBase = _StubBase # type: ignore[attr-defined] +sys.modules["core.providers.llm.base"] = _base_mod + +# Controlled textUtils stubs (a fixed emoji set + a sentinel suffix). +_FALLBACK = "😐" +_ALLOWED = ("😊", "😆", "😢", "😮", "🤔", "😐", "😍", "😴", "😠") +_SUFFIX = " [[SUFFIX]]" +_tu = types.ModuleType("core.utils.textUtils") +_tu.ALLOWED_EMOJIS = _ALLOWED # type: ignore[attr-defined] +_tu.FALLBACK_EMOJI = _FALLBACK # type: ignore[attr-defined] +_tu._SENTENCE_BOUNDARY = re.compile(r"(?<=[.!?])\s+") # type: ignore[attr-defined] +_tu.build_turn_suffix = lambda kid_mode: _SUFFIX # type: ignore[attr-defined] +sys.modules["core.utils.textUtils"] = _tu + +# ── Load the module under test by path ─────────────────────────────────────── +_OC_PY = ( + pathlib.Path(__file__).parent.parent + / "custom-providers" + / "openai_compat" + / "openai_compat.py" +) +_spec = _ilu.spec_from_file_location("openai_compat_under_test", _OC_PY) +_mod = _ilu.module_from_spec(_spec) # type: ignore[arg-type] +_spec.loader.exec_module(_mod) # type: ignore[union-attr] + + +def _provider(): + return _mod.LLMProvider({"url": "http://x/v1", "model": "m"}) + + +def _sse(*contents): + """Build SSE 'data:' lines for a sequence of delta content strings.""" + import json + lines = [] + for c in contents: + lines.append("data: " + json.dumps({"choices": [{"delta": {"content": c}}]})) + lines.append("data: [DONE]") + return lines + + +def _patched_stream(prov, lines): + resp = MagicMock() + resp.raise_for_status = MagicMock() + resp.iter_lines = lambda decode_unicode=True: iter(lines) + with patch.object(_mod.requests, "post", return_value=resp): + return list(prov._response_stream([{"role": "user", "content": "hi"}])) + + +class TestTurnSuffixPlacement(unittest.TestCase): + + def test_suffix_on_last_user_turn(self): + msgs = _provider()._build_messages( + [{"role": "system", "content": "S"}, {"role": "user", "content": "hi"}] + ) + self.assertTrue(msgs[-1]["content"].endswith(_SUFFIX)) + self.assertEqual(msgs[-1]["role"], "user") + + def test_suffix_falls_to_last_message_when_no_user_turn(self): + # Greeter-style: dialogue ends with a non-user message. + msgs = _provider()._build_messages( + [{"role": "system", "content": "sys"}, {"role": "assistant", "content": "greet"}] + ) + joined = "".join(m["content"] for m in msgs) + self.assertIn(_SUFFIX, joined) + self.assertTrue(msgs[-1]["content"].endswith(_SUFFIX)) + + def test_suffix_synthesized_when_dialogue_empty(self): + msgs = _provider()._build_messages([]) + self.assertEqual(len(msgs), 1) + self.assertEqual(msgs[0]["role"], "system") + self.assertEqual(msgs[0]["content"], _SUFFIX) + + def test_suffix_appears_exactly_once(self): + msgs = _provider()._build_messages( + [{"role": "user", "content": "a"}, {"role": "assistant", "content": "b"}, + {"role": "user", "content": "c"}] + ) + self.assertEqual("".join(m["content"] for m in msgs).count(_SUFFIX), 1) + # ...and it lands on the *last* user turn, not the first. + self.assertTrue(msgs[2]["content"].endswith(_SUFFIX)) + + +class TestStreamEmojiOrdering(unittest.TestCase): + + def test_leading_whitespace_never_precedes_emoji(self): + out = _patched_stream(_provider(), _sse(" ", "Hi there.")) + # Nothing whitespace-only may be surfaced before the emoji. + self.assertTrue(out[0].strip(), f"first chunk was whitespace: {out!r}") + self.assertTrue(out[0].startswith(_FALLBACK)) + self.assertEqual("".join(out), f"{_FALLBACK} Hi there.") + + def test_existing_emoji_not_double_prefixed(self): + out = _patched_stream(_provider(), _sse("😊 Hello")) + self.assertTrue(out[0].startswith("😊")) + self.assertNotIn(_FALLBACK, "".join(out)) + + def test_emoji_prepended_when_first_chunk_has_no_emoji(self): + out = _patched_stream(_provider(), _sse("Hello")) + self.assertEqual(out[0], f"{_FALLBACK} ") + self.assertEqual("".join(out), f"{_FALLBACK} Hello") + + def test_all_whitespace_stream_emits_fallback(self): + out = _patched_stream(_provider(), _sse(" ", "\n")) + # No real content ever arrived → the no-response fallback fires, and + # nothing whitespace-only leaked out ahead of it. + self.assertTrue(all(c.strip() for c in out), f"whitespace leaked: {out!r}") + self.assertIn(_FALLBACK, "".join(out)) + + +if __name__ == "__main__": + unittest.main() From 70f42ce58f3ab8ed079071fd064960ce34d9b120 Mon Sep 17 00:00:00 2001 From: Brett Kinny Date: Sat, 6 Jun 2026 21:35:08 +1000 Subject: [PATCH 2/2] test(openai_compat): tear down sys.modules stubs after import to stop cross-test leak test_openai_compat installed a core.utils.textUtils stub (build_turn_suffix -> '[[SUFFIX]]') into sys.modules at collection time and never removed it. pi_voice.py probes `from core.utils.textUtils import build_turn_suffix` and depends on that raising ImportError to reach its real by-path fallback; the leaked stub made the probe succeed, so test_pi_voice's TestSandwichInjection saw '[[SUFFIX]]' instead of the real suffix and failed (3 failures). Snapshot the touched sys.modules keys before stubbing and restore them after exec_module. openai_compat.py binds its names at import time, so the module under test keeps working while the global registry is left clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/test_openai_compat.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_openai_compat.py b/tests/test_openai_compat.py index 137c66b..ec45a4a 100644 --- a/tests/test_openai_compat.py +++ b/tests/test_openai_compat.py @@ -19,6 +19,25 @@ from unittest.mock import MagicMock, patch # ── Stub container-only imports BEFORE loading the module ───────────────────── +# These stubs are installed only long enough to exec the module under test; +# they are torn down again after the import (see below) so they don't leak into +# sibling test modules' sys.modules. In particular test_pi_voice.py imports +# pi_voice.py, which probes `from core.utils.textUtils import build_turn_suffix` +# and *depends on that raising ImportError* to reach its real by-path fallback — +# a leaked stub here makes that probe succeed and binds the wrong suffix. +_MISSING = object() +_STUBBED_MODULES = ( + "config", + "config.logger", + "core", + "core.providers", + "core.providers.llm", + "core.providers.llm.base", + "core.utils", + "core.utils.textUtils", +) +_saved_modules = {k: sys.modules.get(k, _MISSING) for k in _STUBBED_MODULES} + sys.modules.setdefault("config", MagicMock()) _logger_mod = types.ModuleType("config.logger") _logger_mod.setup_logging = lambda: MagicMock() # type: ignore[attr-defined] @@ -59,6 +78,16 @@ class _StubBase: _mod = _ilu.module_from_spec(_spec) # type: ignore[arg-type] _spec.loader.exec_module(_mod) # type: ignore[union-attr] +# Tear the stubs back out. openai_compat.py bound the names it needs into `_mod` +# at exec time (`from core.utils.textUtils import ...`), so it keeps working, +# while sys.modules is returned to its pre-stub state — no leakage into sibling +# test modules (e.g. test_pi_voice.py's ImportError-driven fallback). +for _k, _v in _saved_modules.items(): + if _v is _MISSING: + sys.modules.pop(_k, None) + else: + sys.modules[_k] = _v # type: ignore[assignment] + def _provider(): return _mod.LLMProvider({"url": "http://x/v1", "model": "m"})