From e6de4ad327dafe727e139835bd05535d81249c65 Mon Sep 17 00:00:00 2001
From: Alexandre Balmes <alexandre@vanoix.com>
Date: Thu, 4 Jun 2026 00:51:33 +0200
Subject: [PATCH] feat(codex): implement F103 NDJSON output parity for Codex
 provider

- `.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/journal.wal`: Add ZPM PR tracking journal
- `.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl`: Add ZPM PR knowledge base schema
- `CHANGELOG.md`: Document F103 presence-aware output extraction and Response population
- `README.md`: Update output formatting feature description to include Response auto-population
- `docs/reference/interpolation.md`: Expand Response section with agent step examples and JSON/Response comparison
- `docs/user-guide/agent-steps.md`: Update Output/Response field descriptions and output_format table for F103
- `internal/infrastructure/agents/codex_provider.go`: Add extractCodexAssistantText/extractCodexTextContent helpers; wire extractTextContent hook; implement presence-aware Output overwrite and Response population in Execute and ExecuteConversation
- `internal/infrastructure/agents/codex_provider_delegation_test.go`: Remove stub-era conditionals; add OutputExtraction, HookWired, PlainTextFallback, NDJSONOutput delegation tests
- `internal/infrastructure/agents/codex_provider_test_helpers_test.go`: Add ndjsonLine test helper
- `internal/infrastructure/agents/codex_provider_unit_test.go`: Expand unit test coverage for F103 extraction paths; remove duplicated ProviderName/TimestampOrdering tests
- `internal/infrastructure/agents/helpers.go`: Remove stale comment from appendCodexOptions

Closes #363
---
 .../journal.wal                               | 166 +++++
 .../knowledge.pl                              |  62 ++
 CHANGELOG.md                                  |   4 +
 README.md                                     |   2 +-
 docs/reference/interpolation.md               |  77 ++-
 docs/user-guide/agent-steps.md                |  32 +-
 .../infrastructure/agents/codex_provider.go   | 145 ++++-
 .../agents/codex_provider_delegation_test.go  | 177 ++++--
 .../agents/codex_provider_exec_test.go        |   7 +-
 ...odex_provider_parse_display_events_test.go |  44 ++
 .../agents/codex_provider_resume_test.go      |  37 +-
 .../agents/codex_provider_session_test.go     |  12 +-
 .../codex_provider_test_helpers_test.go       |  15 +
 .../agents/codex_provider_unit_test.go        | 583 +++++++++++++++---
 internal/infrastructure/agents/helpers.go     |   4 +-
 15 files changed, 1142 insertions(+), 225 deletions(-)
 create mode 100644 .zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/journal.wal
 create mode 100644 .zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl
 create mode 100644 internal/infrastructure/agents/codex_provider_test_helpers_test.go

diff --git a/.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/journal.wal b/.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/journal.wal
new file mode 100644
index 00000000..9be492db
--- /dev/null
+++ b/.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/journal.wal
@@ -0,0 +1,166 @@
+{"ts":1780517469,"op":"retractall","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:todo(_, _, _, _)"}
+{"ts":1780517470,"op":"retractall","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:stub(_, _, _)"}
+{"ts":1780517470,"op":"retractall","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock(_, _, _)"}
+{"ts":1780517470,"op":"retractall","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:not_impl(_, _, _)"}
+{"ts":1780517470,"op":"retractall","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:pr_file(_, _)"}
+{"ts":1780517470,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:pr_file('.zpm/mounts.json', changed)"}
+{"ts":1780519586,"op":"retractall","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:todo(_, _, _, _)"}
+{"ts":1780519586,"op":"retractall","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:stub(_, _, _)"}
+{"ts":1780519586,"op":"retractall","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock(_, _, _)"}
+{"ts":1780519586,"op":"retractall","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:not_impl(_, _, _)"}
+{"ts":1780519586,"op":"retractall","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:pr_file(_, _)"}
+{"ts":1780519586,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:pr_file('.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/journal.wal', changed)"}
+{"ts":1780519587,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:stub('issue_1_2', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/journal.wal', 'unknown')"}
+{"ts":1780519587,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_1_3', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/journal.wal', 'unknown')"}
+{"ts":1780519587,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:stub('issue_1_8', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/journal.wal', 'unknown')"}
+{"ts":1780519587,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_1_9', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/journal.wal', 'unknown')"}
+{"ts":1780519587,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:pr_file('.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', changed)"}
+{"ts":1780519588,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:todo('issue_2_8', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', 8, 'found in changed code')"}
+{"ts":1780519588,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:stub('issue_2_9', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', 'unknown')"}
+{"ts":1780519588,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_2_10', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', 'unknown')"}
+{"ts":1780519588,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:stub('issue_2_17', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', 'unknown')"}
+{"ts":1780519588,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_2_18', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', 'unknown')"}
+{"ts":1780519588,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:stub('issue_2_26', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', 'unknown')"}
+{"ts":1780519588,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:stub('issue_2_27', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', 'unknown')"}
+{"ts":1780519589,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_2_28', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', 'unknown')"}
+{"ts":1780519589,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_2_29', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', 'unknown')"}
+{"ts":1780519589,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:stub('issue_2_36', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', 'unknown')"}
+{"ts":1780519589,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:stub('issue_2_37', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', 'unknown')"}
+{"ts":1780519589,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_2_38', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', 'unknown')"}
+{"ts":1780519589,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_2_39', '.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl', 'unknown')"}
+{"ts":1780519589,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:pr_file('.zpm/mounts.json', changed)"}
+{"ts":1780519590,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:pr_file('internal/infrastructure/agents/codex_provider.go', changed)"}
+{"ts":1780519590,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:pr_file('internal/infrastructure/agents/codex_provider_unit_test.go', test)"}
+{"ts":1780519590,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_10', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519590,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_16', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519590,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_23', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519590,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_29', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'main')"}
+{"ts":1780519590,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_36', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519591,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_43', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519591,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_49', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'complex')"}
+{"ts":1780519591,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_55', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519591,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_62', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519591,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_63', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519591,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_64', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519592,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_87', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519592,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_94', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519592,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_101', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519592,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_108', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519592,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_115', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519592,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_122', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519592,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_129', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519593,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_130', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519593,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_131', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519593,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_136', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519593,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_169', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519593,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_170', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519593,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_171', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519593,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_179', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519594,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_295', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519594,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_296', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519594,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_297', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519594,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_342', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519594,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_343', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519594,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_344', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519594,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_359', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519595,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_364', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519595,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_369', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519595,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_374', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519595,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_379', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519595,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_386', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519595,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_387', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519595,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_388', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519596,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_446', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519596,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_447', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519596,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_448', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519596,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_449', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519596,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_463', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519596,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_468', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519597,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_473', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519597,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_478', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519597,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_483', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519597,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_490', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519597,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_491', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519597,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_492', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519597,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_504', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519598,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_505', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519598,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_506', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519598,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_518', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519598,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_519', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519598,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_520', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519598,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_535', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519598,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_542', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'main')"}
+{"ts":1780519599,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_550', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519599,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_558', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'func')"}
+{"ts":1780519599,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_565', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519599,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_566', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519599,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_567', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519599,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_584', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519600,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_585', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519600,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_586', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519600,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_615', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519600,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_616', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519600,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_617', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519600,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_630', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519600,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_631', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519601,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_632', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519601,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_653', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519601,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_654', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519601,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_655', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519601,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_674', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519601,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_675', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519602,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_676', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519602,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_758', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519602,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_759', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519602,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_760', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519602,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_806', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519602,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_807', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519602,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_808', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519603,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_824', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519603,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_829', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519603,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_834', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519603,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_841', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519603,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_842', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519603,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_843', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519603,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_890', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519604,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_891', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519604,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_892', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519604,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_898', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519604,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_939', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519604,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_946', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519604,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_953', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519605,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_960', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519605,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_967', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519605,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_974', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519605,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_975', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519605,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_976', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519605,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_981', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519605,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_991', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519606,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_992', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519606,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_993', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519606,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_998', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519606,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1010', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519606,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1011', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519606,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1012', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519606,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1018', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519607,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1163', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519607,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1164', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519607,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1165', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519607,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1179', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519607,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1180', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519607,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1181', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519608,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1193', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519608,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1194', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519608,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1195', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519608,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1206', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519608,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1207', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519608,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1208', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519608,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1209', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519609,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1216', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519609,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1223', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519609,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1224', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519609,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1225', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519609,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1237', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519609,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1238', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519610,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1239', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
+{"ts":1780519610,"op":"assert","clause":"pr_feature_f103_codex_provider_jsonl_output_parity:mock('issue_5_1246', 'internal/infrastructure/agents/codex_provider_unit_test.go', 'unknown')"}
diff --git a/.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl b/.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl
new file mode 100644
index 00000000..40504f08
--- /dev/null
+++ b/.zpm/kb/pr_feature_f103_codex_provider_jsonl_output_parity/knowledge.pl
@@ -0,0 +1,62 @@
+:- module(pr_feature_f103_codex_provider_jsonl_output_parity, []).
+% ─── PR Tracking Schema ──────────────────────────────────────────────────────
+% Memory segment: pr_<branch>
+% Lifecycle: created at implement start, gated before commit, archived on merge.
+%
+% Facts (asserted by scan scripts and LLM):
+%   pr_file(Path, ChangeType)        — file in PR scope (changed | added | test)
+%   todo(Id, File, Line, Desc)       — TODO/FIXME found in changed code
+%   stub(Id, File, Symbol)           — stub/placeholder implementation
+%   mock(Id, File, Symbol)           — mock that should be replaced with real impl
+%   not_impl(Id, File, Desc)         — "not yet implemented" marker
+%   resolved(Type, Id)               — marks a tracked issue as resolved
+%
+% Dynamic declarations (required by Trealla Prolog for runtime assertion).
+:- dynamic(pr_file/2).
+:- dynamic(todo/4).
+:- dynamic(stub/3).
+:- dynamic(mock/3).
+:- dynamic(not_impl/3).
+:- dynamic(resolved/2).
+
+% ─── Unresolved queries ─────────────────────────────────────────────────────
+% Convenience predicates for querying unresolved issues by type.
+unresolved_todo(Id, File, Line, Desc) :-
+    todo(Id, File, Line, Desc), \+ resolved(todo, Id).
+unresolved_stub(Id, File, Symbol) :-
+    stub(Id, File, Symbol), \+ resolved(stub, Id).
+unresolved_mock(Id, File, Symbol) :-
+    mock(Id, File, Symbol), \+ resolved(mock, Id).
+unresolved_not_impl(Id, File, Desc) :-
+    not_impl(Id, File, Desc), \+ resolved(not_impl, Id).
+
+% A blocking issue is any tracked issue that has not been resolved.
+blocking_issue(Id, todo, File, Desc) :-
+    todo(Id, File, _, Desc), \+ resolved(todo, Id).
+blocking_issue(Id, stub, File, Symbol) :-
+    stub(Id, File, Symbol), \+ resolved(stub, Id).
+blocking_issue(Id, mock, File, Symbol) :-
+    mock(Id, File, Symbol), \+ resolved(mock, Id).
+blocking_issue(Id, not_impl, File, Desc) :-
+    not_impl(Id, File, Desc), \+ resolved(not_impl, Id).
+
+% PR is ready ONLY when zero blocking issues remain.
+pr_ready :- \+ blocking_issue(_, _, _, _).
+
+% Health summary — counts by category.
+pr_health(blocking, N) :-
+    findall(I, blocking_issue(I, _, _, _), L), length(L, N).
+pr_health(resolved, N) :-
+    findall(I, resolved(_, I), L), length(L, N).
+pr_health(files, N) :-
+    findall(F, pr_file(F, _), L), length(L, N).
+
+% Coverage gap: source file changed without corresponding test file.
+coverage_gap(File) :-
+    pr_file(File, changed),
+    \+ pr_file(File, test),
+    \+ test_file(File, _).
+
+% List all blocking issues as Id-Type-File-Desc tuples.
+all_blockers(Blockers) :-
+    findall(blocker(Id, Type, File, Desc), blocking_issue(Id, Type, File, Desc), Blockers).
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 21c032cd..87ec8903 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Fixed
+
+- **F103**: Codex provider output parity — `state.Output` and `ConversationResult.Output` for the `codex` provider now contain clean aggregated assistant text instead of raw NDJSON, regardless of `output_format` value (`json`, `stream-json`, `text`, or absent), closing the gap left by B015 in 0.7.1 which only covered Claude, Gemini, and OpenCode. Aggregation is presence-aware: when the NDJSON stream contains ≥1 `assistant_message` event, the extracted text overwrites `Output` (empty `assistant_message` yields `Output == ""`); when no events are parsed (plain-text mocks, pre-result lifecycle events only), the base output is preserved. `state.Response` and `ConversationResult.Response` are now populated via `tryParseJSONResponse` on the aggregated assistant text when it is a valid JSON object — matching Gemini/OpenCode semantics. Estimated token recount runs on the extracted text rather than the raw NDJSON length. Conversation parity achieved through a Codex-targeted override in `ExecuteConversation` plus an `extractTextContent` hook wired into `cliProviderHooks`; base provider and other CLI providers (Claude, Gemini, OpenCode, GitHub Copilot) are bit-identical. NUL bytes, multi-event streams, empty assistant messages, and non-JSON plain text are all handled without panic or truncation. Workflow authors can now reference `{{.states.codex_step.Output}}` and `{{.states.codex_step.Response}}` with the same semantics as other providers, unblocking multi-step workflows that pipe Codex output downstream.
+
 ## [0.10.0] - 2026-06-01
 
 ### Changed
diff --git a/README.md b/README.md
index e9fbedca..c87b77a7 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ A Go CLI tool for orchestrating AI agents (Claude, Gemini, Codex, GitHub Copilot
 - **State Machine Execution** - Define workflows as state machines with conditional transitions based on exit codes, command output, or custom expressions
 - **Inline Error Handling** - Specify error messages and exit codes directly on steps without creating separate terminal states
 - **Agent Steps** - Invoke AI agents via CLI tools (Claude, Codex, Gemini, GitHub Copilot) or direct HTTP (OpenAI, Ollama, vLLM, Groq) with prompt templates, response parsing, and accurate token tracking
-- **Output Formatting for Agent Steps** - Automatically strip markdown code fences and validate JSON output; human-readable streaming display controlled by `output_format` field (text vs raw NDJSON); unified display-event abstraction across all 6 providers with optional verbose mode showing tool-use markers (`[tool: Name(Arg)]`)
+- **Output Formatting for Agent Steps** - Automatically strip markdown code fences and validate JSON output; human-readable streaming display controlled by `output_format` field (text vs raw NDJSON); unified display-event abstraction across all 6 providers with optional verbose mode showing tool-use markers (`[tool: Name(Arg)]`); automatic `state.Response` population for structured JSON output (heuristic across all providers)
 - **Agent Skills** - Inject deterministic domain knowledge into agent steps via `skills:` declarations in workflow YAML; filesystem-based multi-directory discovery (project `.awf/skills/`, `.agents/skills/`, `.claude/skills/`, XDG global) with priority ordering; SKILL.md frontmatter stripping and agentskills.io-compliant `<skill_content>` structured wrapping with bundled resource enumeration; validated by `awf validate`
 - **Agent Roles** - Inject reusable personas into agent steps via `role:` field referencing AGENTS.md files; filesystem-based multi-directory discovery in a dedicated `roles/` namespace (project `.awf/roles/`, `.agents/roles/`, `$XDG_CONFIG_HOME/awf/roles/`, `~/.agents/roles/`) with priority ordering and `AWF_ROLES_PATH` exclusive override; AGENTS.md frontmatter stripping and system prompt injection with optional composition via inline `system_prompt` field; validated by `awf validate`
 - **External Prompt Files** - Load agent prompts from `.md` files with full template interpolation, helper functions, and local override support
diff --git a/docs/reference/interpolation.md b/docs/reference/interpolation.md
index a0d9b366..7c424366 100644
--- a/docs/reference/interpolation.md
+++ b/docs/reference/interpolation.md
@@ -159,9 +159,18 @@ transitions:
 
 **Note**: `TokensUsed` replaced deprecated `states.step_name.Tokens` field. If migrating from earlier versions, update workflow YAML expressions from `{{.states.step_name.Tokens}}` to `{{.states.step_name.TokensUsed}}`.
 
-#### Response (Operation Outputs)
+#### Response (Structured Outputs)
 
-Operation steps (e.g., `github.get_issue`, `http.request`) return structured data accessible via `Response`:
+Both agent steps and operation steps return structured data accessible via `Response` when the output is valid JSON.
+
+**Agent steps** automatically populate `Response` when the agent's output contains valid JSON (regardless of `output_format` setting):
+
+```yaml
+{{.states.step_name.Response.field}}        # Parsed field from agent JSON output
+{{.states.step_name.Response}}              # Full parsed JSON object
+```
+
+**Operation steps** (e.g., `github.get_issue`, `http.request`) always return structured data via `Response`:
 
 ```yaml
 {{.states.step_name.Response.title}}       # Parsed field from operation result
@@ -169,25 +178,25 @@ Operation steps (e.g., `github.get_issue`, `http.request`) return structured dat
 {{.states.step_name.Response.labels}}      # Array field
 ```
 
-Use `Output` for raw JSON, `Response.field` for parsed fields:
+Example with agent step returning JSON:
 
 ```yaml
 states:
-  initial: get_issue
-
-  get_issue:
-    type: operation
-    operation: github.get_issue
-    inputs:
-      number: "{{.inputs.issue_number}}"
-    on_success: show_title
+  initial: analyze
+
+  analyze:
+    type: agent
+    provider: claude
+    prompt: "Return JSON analysis with 'issues' and 'severity' fields"
+    on_success: process
     on_failure: error
 
-  show_title:
+  process:
     type: step
-    command: echo "Issue: {{.states.get_issue.Response.title}}"
+    command: |
+      echo "Severity: {{.states.analyze.Response.severity}}"
+      echo "Issues: {{.states.analyze.Response.issues}}"
     on_success: done
-    on_failure: error
 
   done:
     type: terminal
@@ -196,6 +205,15 @@ states:
     status: failure
 ```
 
+If agent returns:
+```json
+{"issues": ["buffer overflow", "memory leak"], "severity": "high"}
+```
+
+Then:
+- `{{.states.analyze.Response.severity}}` = `"high"`
+- `{{.states.analyze.Response.issues}}` = `["buffer overflow", "memory leak"]`
+
 **HTTP operation outputs** follow the same pattern:
 
 ```yaml
@@ -205,7 +223,12 @@ states:
 {{.states.step_name.Response.body_truncated}}    # true if body was truncated
 ```
 
-See [Workflow Syntax - Operation State](../user-guide/workflow-syntax.md#operation-state) for the full list of available operations and their output fields.
+**Difference from `JSON`:**
+- `Response` is populated automatically for all agent outputs if valid JSON is detected (heuristic)
+- `JSON` is only populated when `output_format: json` is explicitly set on the agent step
+- Use `Response.field` for automatic best-effort parsing; use `JSON.field` for explicit, validated JSON output
+
+See [Agent Steps - Output Formatting](../user-guide/agent-steps.md#output-formatting) for examples and [Workflow Syntax - Operation State](../user-guide/workflow-syntax.md#operation-state) for available operations.
 
 #### JSON (Explicit Output Formatting)
 
@@ -217,9 +240,13 @@ When an agent step uses `output_format: json`, the parsed JSON is accessible via
 ```
 
 **Key differences from `Response`:**
-- `JSON` is only populated when `output_format: json` is explicitly set on the agent step
-- `Response` is populated automatically for all agent outputs if valid JSON is detected (heuristic)
-- `JSON` represents explicitly formatted output; `Response` is automatic best-effort parsing
+- `JSON` is only populated when `output_format: json` is explicitly set on the agent step (and validation passes)
+- `Response` is populated automatically for all agent outputs if valid JSON is detected (heuristic, regardless of `output_format`)
+- Use `JSON.field` for strict, validated output; use `Response.field` for lenient, automatic parsing
+
+**When to use each:**
+- **`JSON.field`**: You explicitly requested `output_format: json` and want strict validation
+- **`Response.field`**: You want to access JSON from any agent output without requiring `output_format: json`
 
 Example with `output_format: json`:
 
@@ -238,8 +265,9 @@ states:
   process:
     type: step
     command: |
-      echo "Severity: {{.states.analyze.JSON.severity}}"
-      echo "Issues: {{.states.analyze.JSON.issues}}"
+      # Both work — JSON is strict, Response is lenient
+      echo "Severity (JSON): {{.states.analyze.JSON.severity}}"
+      echo "Severity (Response): {{.states.analyze.Response.severity}}"
     on_success: done
 
   done:
@@ -254,9 +282,14 @@ If agent returns:
 {"issues": ["buffer overflow", "memory leak"], "severity": "high"}
 ```
 
-Then:
+Then both work identically:
 - `{{.states.analyze.JSON.severity}}` = `"high"`
-- `{{.states.analyze.JSON.issues}}` = `["buffer overflow", "memory leak"]`
+- `{{.states.analyze.Response.severity}}` = `"high"`
+
+**Difference becomes apparent when `output_format: json` is omitted:**
+- Without `output_format: json`, JSON validation is skipped
+- `JSON` remains empty (never populated)
+- `Response` still populates if valid JSON is detected
 
 See [Agent Steps - Output Formatting](../user-guide/agent-steps.md#output-formatting) for detailed examples and best practices.
 
diff --git a/docs/user-guide/agent-steps.md b/docs/user-guide/agent-steps.md
index ccd68cad..1d08d3be 100644
--- a/docs/user-guide/agent-steps.md
+++ b/docs/user-guide/agent-steps.md
@@ -1132,8 +1132,8 @@ Agent responses are automatically captured in the execution state:
 
 | Field | Type | Description |
 |-------|------|-------------|
-| `{{.states.step_name.Output}}` | string | Raw response text (or cleaned text if `output_format` is set) |
-| `{{.states.step_name.Response}}` | object | Parsed JSON response (automatic heuristic) |
+| `{{.states.step_name.Output}}` | string | Aggregated assistant text. For CLI providers emitting NDJSON (Claude, Codex, Gemini, OpenCode, GitHub Copilot), the raw stream is extracted to clean text regardless of `output_format`; for HTTP providers, the response body text. `output_format: json` additionally strips markdown code fences. |
+| `{{.states.step_name.Response}}` | object | Parsed JSON response (automatic heuristic — populated when the assistant text is a valid JSON object) |
 | `{{.states.step_name.JSON}}` | object | Parsed JSON from `output_format: json` (explicit, see [Output Formatting](#output-formatting)) |
 | `{{.states.step_name.TokensUsed}}` | int | Total tokens consumed by this step |
 | `{{.states.step_name.TokensInput}}` | int | Input tokens (prompt + context). `0` in single-turn mode. |
@@ -1165,10 +1165,11 @@ process_response:
 
 ## Output Formatting
 
-The `output_format` field serves two purposes:
+The `output_format` field serves three purposes:
 
-1. **Post-processing**: Strips markdown code fences and optionally validates JSON (F065)
-2. **Display filtering**: Controls how agent responses appear on terminal during streaming and buffered execution, with optional verbose tool-use markers (F082, F085)
+1. **Text Extraction**: For CLI providers, extracts clean assistant text from streaming event logs (F103, F082)
+2. **Post-processing**: Strips markdown code fences and optionally validates JSON (F065)
+3. **Display filtering**: Controls how agent responses appear on terminal during streaming and buffered execution, with optional verbose tool-use markers (F082, F085)
 
 When an agent wraps its output in markdown code fences (common with many LLMs), use `output_format` to automatically strip the fences and optionally validate the content:
 
@@ -1210,6 +1211,7 @@ process_results:
 - Strips outermost markdown code fences (e.g., ````json ... ``` ``)
 - Validates stripped content as valid JSON
 - Stores parsed JSON in `{{.states.step_name.JSON}}`
+- Automatically populates `{{.states.step_name.Response}}` with parsed JSON (available across all providers)
 - If validation fails, step fails with a descriptive error
 - Works with both objects and arrays
 
@@ -1225,6 +1227,8 @@ The analysis shows the following:
 - `{{.states.analyze.Output}}` = `{"issues": ["buffer overflow", "memory leak"], "severity": "high"}`
 - `{{.states.analyze.JSON.issues}}` = `["buffer overflow", "memory leak"]`
 - `{{.states.analyze.JSON.severity}}` = `"high"`
+- `{{.states.analyze.Response.issues}}` = `["buffer overflow", "memory leak"]` (same as JSON)
+- `{{.states.analyze.Response.severity}}` = `"high"` (same as JSON)
 
 #### `text` Format
 
@@ -1248,6 +1252,7 @@ save_code:
 - Strips outermost markdown code fences (e.g., ````python ... ``` ``)
 - Returns clean text in `{{.states.step_name.Output}}`
 - Does not populate `{{.states.step_name.JSON}}`
+- Automatically populates `{{.states.step_name.Response}}` if the output happens to be valid JSON (heuristic)
 
 **Example agent output:**
 ```
@@ -1262,6 +1267,8 @@ def fibonacci(n):
 
 **After processing:**
 - `{{.states.generate_code.Output}}` = `def fibonacci(n):\n    if n <= 1:\n        return n\n    return fibonacci(n-1) + fibonacci(n-2)`
+- `{{.states.generate_code.JSON}}` = empty/not populated (no `output_format: json`)
+- `{{.states.generate_code.Response}}` = populated only if output happens to be valid JSON
 
 #### No Format (Default)
 
@@ -1279,10 +1286,15 @@ analyze:
 
 The `output_format` field controls how agent responses appear on the terminal (F082). Additionally, the `--verbose` flag displays tool-use activity markers (F085) — showing which tools the agent invoked — alongside agent output when running with `awf run --output streaming` or `--output buffered`:
 
-| `output_format` | Streaming Display | Buffered Display | Raw Storage |
+| `output_format` | Streaming Display | Buffered Display | `state.Output` Storage |
 |---|---|---|---|
-| `text` (or omitted) | Human-readable filtered text | Filtered text in summary | Raw NDJSON |
-| `json` | Raw NDJSON (unfiltered) | Raw NDJSON (unfiltered) | Raw NDJSON |
+| `text` (or omitted) | Human-readable filtered text | Filtered text in summary | Extracted assistant text |
+| `json` | Raw NDJSON (unfiltered) | Raw NDJSON (unfiltered) | Extracted assistant text (markdown code fences stripped) |
+
+**Output Parity Across All Providers (F103):** For CLI-based providers (Claude, Codex, Gemini, OpenCode), the system automatically extracts clean assistant text from NDJSON events and stores it in `state.Output`. Additionally:
+- `state.Response` is automatically populated when the output is valid JSON (heuristic, regardless of `output_format`)
+- Both `output_format: json` and omitted formats produce semantically equivalent output shapes across all providers
+- Codex now has feature parity with Claude, Gemini, and OpenCode for output handling
 
 #### Streaming Mode (`--output streaming`)
 
@@ -1347,10 +1359,10 @@ Silent mode suppresses all display regardless of `output_format`:
 ```bash
 awf run code-review --output silent
 # No output displayed (silent mode is absolute)
-# state.Output still contains raw NDJSON for template interpolation
+# state.Output still contains the aggregated assistant text for template interpolation
 ```
 
-**Note:** `state.Output` always contains the raw NDJSON regardless of display filtering. Filtering only affects terminal display, not data storage.
+**Note:** `state.Output` is populated independently of display filtering. For CLI providers emitting NDJSON (Claude, Codex, Gemini, OpenCode, GitHub Copilot), the assistant text is extracted from the raw stream and stored — so `{{.states.step.Output}}` resolves to clean text regardless of `output_format`. Filtering only affects terminal display, not data storage.
 
 #### Provider Event Cadence
 
diff --git a/internal/infrastructure/agents/codex_provider.go b/internal/infrastructure/agents/codex_provider.go
index 2db39aa9..71512cb0 100644
--- a/internal/infrastructure/agents/codex_provider.go
+++ b/internal/infrastructure/agents/codex_provider.go
@@ -27,12 +27,7 @@ type CodexProvider struct {
 }
 
 func NewCodexProvider() *CodexProvider {
-	p := &CodexProvider{
-		logger:   logger.NopLogger{},
-		executor: NewExecCLIExecutor(),
-	}
-	p.base = p.newBase()
-	return p
+	return NewCodexProviderWithOptions()
 }
 
 func NewCodexProviderWithOptions(opts ...CodexProviderOption) *CodexProvider {
@@ -52,6 +47,7 @@ func (p *CodexProvider) newBase() *baseCLIProvider {
 		buildExecuteArgs:      p.buildExecuteArgs,
 		buildConversationArgs: p.buildConversationArgs,
 		extractSessionID:      p.extractSessionID,
+		extractTextContent:    p.extractCodexTextContent,
 		validateOptions:       validateCodexOptions,
 		parseDisplayEvents:    p.parseCodexDisplayEvents,
 		extractTokenUsage:     p.extractCodexTokenUsage,
@@ -69,28 +65,63 @@ func (p *CodexProvider) Execute(ctx context.Context, prompt string, options map[
 		return nil, err
 	}
 
-	// Codex CLI is always invoked with `exec --json` (NDJSON). For text intent,
-	// aggregate assistant message content for state.Output so downstream
-	// interpolation ({{states.step.Output}}) is human-readable (F082).
-	userFormat, _ := getStringOption(options, "output_format")
-	if userFormat != "json" && userFormat != "stream-json" {
-		if extracted := extractDisplayTextFromEvents(rawOutput, p.parseCodexDisplayEvents); extracted != "" {
-			result.Output = extracted
-			if result.TokensEstimated {
-				tokens, _ := p.base.tokenizer.CountTokens(extracted) //nolint:errcheck // ApproximationTokenizer never errors with a valid ratio
-				result.Tokens = tokens
+	// F103: presence-aware extraction — when the NDJSON carried an assistant_message event (even an
+	// empty one), overwrite Output with the aggregated assistant text and re-derive every dependent
+	// field (Response, token estimate) from it, regardless of output_format. Gating all of them on
+	// hadText mirrors the result.Output != "" guard in ExecuteConversation: an empty assistant_message
+	// must not leave a JSON Response or a token count estimated from the raw NDJSON envelope.
+	if extracted, hadText := p.extractCodexAssistantText(rawOutput); hadText {
+		result.Output = extracted
+		if extracted != "" {
+			// Array payloads ([…]) leave Response nil — tryParseJSONResponse matches HasPrefix("{") only.
+			if jsonResp := tryParseJSONResponse(extracted); jsonResp != nil {
+				result.Response = jsonResp
 			}
 		}
+		if result.TokensEstimated {
+			tokens, _ := p.base.tokenizer.CountTokens(extracted) //nolint:errcheck // ApproximationTokenizer never errors with a valid ratio
+			result.Tokens = tokens
+		}
 	}
 
 	return result, nil
 }
 
 func (p *CodexProvider) ExecuteConversation(ctx context.Context, state *workflow.ConversationState, prompt string, options map[string]any, stdout, stderr io.Writer) (*workflow.ConversationResult, error) {
-	result, _, err := p.base.executeConversation(ctx, state, prompt, options, stdout, stderr)
+	result, rawOutput, err := p.base.executeConversation(ctx, state, prompt, options, stdout, stderr)
 	if err != nil {
 		return nil, err
 	}
+	if text, hadText := p.extractCodexAssistantText(rawOutput); hadText {
+		// F103: presence-aware overwrite. The base derived Output, the trailing assistant
+		// turn's Content, and the estimated token counts from the raw NDJSON envelope; once
+		// Output is replaced with the aggregated assistant text we must re-derive all three so
+		// they stay consistent (mirrors the token recount performed in Execute). Without this,
+		// an empty assistant_message leaves inflated TokensOutput/TokensTotal and a turn whose
+		// Content still holds the raw NDJSON, violating the turn.Content == Output invariant.
+		result.Output = text
+		if result.TokensEstimated {
+			tokens, _ := p.base.tokenizer.CountTokens(text) //nolint:errcheck // ApproximationTokenizer never errors with a valid ratio
+			result.TokensOutput = tokens
+			result.TokensTotal = result.TokensInput + tokens
+		}
+		if result.State != nil {
+			if n := len(result.State.Turns); n > 0 {
+				if last := &result.State.Turns[n-1]; last.Role == workflow.TurnRoleAssistant {
+					last.Content = text
+					if result.TokensEstimated {
+						last.Tokens = result.TokensOutput
+					}
+				}
+			}
+		}
+	}
+	if result.Output != "" {
+		// F103: array payloads ([…]) leave Response nil — tryParseJSONResponse matches HasPrefix("{") only. F107 will revisit.
+		if jsonResp := tryParseJSONResponse(result.Output); jsonResp != nil {
+			result.Response = jsonResp
+		}
+	}
 	return result, nil
 }
 
@@ -115,8 +146,11 @@ func (p *CodexProvider) buildExecuteArgs(prompt string, options map[string]any)
 func (p *CodexProvider) buildConversationArgs(state *workflow.ConversationState, prompt string, options map[string]any) ([]string, error) {
 	var args []string
 	if state.SessionID != "" {
-		// Resume an existing thread using the native resume subcommand.
-		args = []string{"resume", state.SessionID, "--json", prompt}
+		// Resume an existing thread headlessly. The top-level `codex resume` subcommand is the
+		// interactive TUI (session picker, no --json); headless JSONL resume lives under
+		// `codex exec resume <SESSION_ID> [PROMPT] --json` (codex-cli >= 0.x). Using the TUI
+		// resume here makes the CLI reject --json with "unexpected argument '--json'".
+		args = []string{"exec", "resume", state.SessionID, "--json", prompt}
 	} else {
 		// Codex CLI has no --system-prompt flag; inline the system prompt into
 		// the first-turn message only when a session is not yet established.
@@ -127,7 +161,6 @@ func (p *CodexProvider) buildConversationArgs(state *workflow.ConversationState,
 	return args, nil
 }
 
-// appendCodexOptions appends Codex CLI flags from options; unknown keys are silently ignored.
 func appendCodexOptions(args []string, options map[string]any) []string {
 	if model, ok := getStringOption(options, "model"); ok && model != "" {
 		args = append(args, "--model", model)
@@ -182,6 +215,48 @@ func (p *CodexProvider) extractCodexTokenUsage(rawOutput string) *tokenUsage {
 	}
 }
 
+// extractCodexTextContent adapts extractCodexAssistantText to the extractTextContent hook
+// signature (func(string) string) required by cliProviderHooks. The hadText bool is deliberately
+// discarded: the base provider only needs the aggregated text to build outputStr, and it already
+// treats "" as "fall back to TrimSpace(rawOutput)".
+//
+// CONTRACT (do not "simplify" away the ExecuteConversation override): because this hook cannot
+// surface hadText, an empty assistant_message (hadText=true, text="") is indistinguishable from
+// non-NDJSON plain text (hadText=false) at the base layer — both yield "". CodexProvider.Execute
+// and CodexProvider.ExecuteConversation therefore re-run extractCodexAssistantText to recover
+// hadText and apply the presence-aware overwrite that corrects Output, the token counts, and the
+// trailing turn Content for the empty-message case. This intentional re-walk of the NDJSON is the
+// accepted cost of keeping the base hook signature uniform across all providers.
+func (p *CodexProvider) extractCodexTextContent(output string) string {
+	text, _ := p.extractCodexAssistantText(output)
+	return text
+}
+
+// extractCodexAssistantText aggregates assistant text from NDJSON output via
+// p.parseCodexDisplayEvents, mirroring the join semantics of extractDisplayTextFromEvents.
+// hadText is true iff at least one EventText was produced (even when its .Text is empty),
+// enabling callers to distinguish "empty assistant_message" from "non-NDJSON plain text".
+func (p *CodexProvider) extractCodexAssistantText(output string) (string, bool) {
+	var result strings.Builder
+	hadText := false
+	for line := range strings.SplitSeq(output, "\n") {
+		if line == "" {
+			continue
+		}
+		for _, evt := range p.parseCodexDisplayEvents([]byte(line)) {
+			if evt.Kind != EventText {
+				continue
+			}
+			hadText = true
+			if result.Len() > 0 {
+				result.WriteRune('\n')
+			}
+			result.WriteString(evt.Text)
+		}
+	}
+	return result.String(), hadText
+}
+
 func validateCodexOptions(options map[string]any) error {
 	if options == nil {
 		return nil
@@ -247,17 +322,30 @@ func (p *CodexProvider) codexMCPInjector(_ context.Context, args []string, cfg *
 }
 
 // parseCodexDisplayEvents parses a single NDJSON line from Codex CLI output into
-// DisplayEvents. It emits EventText for assistant_message items and EventToolUse
-// for function_call items. All other event types return nil (skip signal).
+// DisplayEvents. It emits EventText for assistant-message items and EventToolUse
+// for function-call items. All other event types return nil (skip signal).
+//
+// Codex CLI versions disagree on the item discriminator and its values:
+//   - legacy schema:        item.item_type == "assistant_message" / "function_call"
+//   - codex-cli >= 0.133.0: item.type      == "agent_message"     / "command_execution"
+//
+// We read BOTH discriminator keys (item_type wins when present) and accept both naming
+// families, so the same parser handles old fixtures and the current CLI. When the schema
+// is not recognized, extraction yields no text and the base provider falls back to the raw
+// NDJSON — which is exactly the leak this dual-schema handling prevents.
 func (p *CodexProvider) parseCodexDisplayEvents(line []byte) []DisplayEvent {
 	// Replace NUL bytes (0x00) with the 6-byte JSON unicode escape sequence
 	// {0x5c,0x75,0x30,0x30,0x30,0x30} = backslash + u + 0 + 0 + 0 + 0.
 	// json.Unmarshal decodes this escape back to NUL, preserving string content.
+	// This diverges intentionally from the Claude provider, which replaces NUL with a space
+	// (discarding the byte): Codex tool output may embed NUL as a meaningful sentinel, so we
+	// preserve it rather than mangle it. Do not align the two without re-checking that contract.
 	sanitized := bytes.ReplaceAll(line, []byte{0x00}, []byte{0x5c, 0x75, 0x30, 0x30, 0x30, 0x30})
 	var evt struct {
 		Type string `json:"type"`
 		Item *struct {
-			ItemType  string `json:"item_type"`
+			ItemType  string `json:"item_type"` // legacy schema discriminator
+			Kind      string `json:"type"`      // codex-cli >= 0.133.0 discriminator
 			Text      string `json:"text"`
 			Name      string `json:"name"`
 			Arguments string `json:"arguments"`
@@ -270,10 +358,15 @@ func (p *CodexProvider) parseCodexDisplayEvents(line []byte) []DisplayEvent {
 		return nil
 	}
 	if evt.Type == "item.completed" && evt.Item != nil {
-		switch evt.Item.ItemType {
-		case "assistant_message":
+		// item_type (legacy) takes precedence; fall back to type (current CLI).
+		itemKind := evt.Item.ItemType
+		if itemKind == "" {
+			itemKind = evt.Item.Kind
+		}
+		switch itemKind {
+		case "assistant_message", "agent_message":
 			return []DisplayEvent{{Type: evt.Type, Kind: EventText, Text: evt.Item.Text}}
-		case "function_call":
+		case "function_call", "command_execution":
 			// Codex does not emit tool-call IDs; ID is always empty.
 			preview := extractArgPreview(evt.Item.Arguments)
 			return []DisplayEvent{{Type: evt.Type, Kind: EventToolUse, Name: evt.Item.Name, Arg: preview, ID: ""}}
diff --git a/internal/infrastructure/agents/codex_provider_delegation_test.go b/internal/infrastructure/agents/codex_provider_delegation_test.go
index c8d81a32..19400f44 100644
--- a/internal/infrastructure/agents/codex_provider_delegation_test.go
+++ b/internal/infrastructure/agents/codex_provider_delegation_test.go
@@ -2,6 +2,7 @@ package agents
 
 import (
 	"context"
+	"errors"
 	"strings"
 	"testing"
 
@@ -11,11 +12,6 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-// T004: Codex provider delegation tests verify that CodexProvider correctly
-// delegates Execute and ExecuteConversation to baseCLIProvider through hooks.
-// Tests fail against stub (buildExecuteArgs/buildConversationArgs return nil)
-// and pass after implementation provides proper CLI arguments.
-
 func TestCodexProvider_Execute_DelegationToBase(t *testing.T) {
 	tests := []struct {
 		name       string
@@ -23,7 +19,6 @@ func TestCodexProvider_Execute_DelegationToBase(t *testing.T) {
 		options    map[string]any
 		mockStdout []byte
 		wantOutput string
-		// Note: stub returns nil args, real impl will return ["exec", "--json", ...]
 	}{
 		{
 			name:       "simple prompt delegation",
@@ -55,18 +50,11 @@ func TestCodexProvider_Execute_DelegationToBase(t *testing.T) {
 			provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
 
 			result, err := provider.Execute(context.Background(), tt.prompt, tt.options, nil, nil)
-			if err != nil {
-				// Stub returns nil args, causing executor to be called with zero args
-				// Real implementation will return proper args ["exec", "--json", ...]
-				t.Logf("implementation incomplete: %v", err)
-				assert.Nil(t, result)
-				return
-			}
-
+			require.NoError(t, err)
 			require.NotNil(t, result)
 			assert.Equal(t, "codex", result.Provider)
 			assert.Equal(t, tt.wantOutput, result.Output)
-			assert.True(t, result.TokensEstimated, "Execute must set TokensEstimated=true (FR-007)")
+			assert.True(t, result.TokensEstimated, "all CLI providers estimate tokens (no real-time token API)")
 			assert.NotZero(t, result.Tokens)
 		})
 	}
@@ -78,10 +66,10 @@ func TestCodexProvider_Execute_EmptyPrompt_ValidationByBase(t *testing.T) {
 
 	result, err := provider.Execute(context.Background(), "", nil, nil, nil)
 
-	// Base validates empty prompt before calling hooks
 	assert.Error(t, err)
 	assert.Nil(t, result)
-	assert.Contains(t, err.Error(), "prompt")
+	assert.Contains(t, err.Error(), "prompt cannot be empty")
+	assert.Empty(t, mockExec.GetCalls())
 }
 
 func TestCodexProvider_Execute_ContextCancellation(t *testing.T) {
@@ -93,9 +81,10 @@ func TestCodexProvider_Execute_ContextCancellation(t *testing.T) {
 
 	result, err := provider.Execute(ctx, "test", nil, nil, nil)
 
-	// Base checks context error before hook execution
 	assert.Error(t, err)
 	assert.Nil(t, result)
+	assert.True(t, errors.Is(err, context.Canceled), "error must wrap context.Canceled")
+	assert.Empty(t, mockExec.GetCalls())
 }
 
 func TestCodexProvider_ExecuteConversation_DelegationToBase(t *testing.T) {
@@ -142,18 +131,12 @@ func TestCodexProvider_ExecuteConversation_DelegationToBase(t *testing.T) {
 
 			state := workflow.NewConversationState(tt.systemPrompt)
 			result, err := provider.ExecuteConversation(context.Background(), state, tt.userPrompt, tt.options, nil, nil)
-			if err != nil {
-				// Stub returns nil args → executor error (expected against stub)
-				t.Logf("stub error (expected): %v", err)
-				assert.Nil(t, result)
-				return
-			}
-
+			require.NoError(t, err)
 			require.NotNil(t, result)
 			assert.Equal(t, "codex", result.Provider)
 			assert.NotNil(t, result.State)
 
-			// Verify empty output gets " " fallback (FR-008)
+			// empty output gets " " fallback
 			if tt.mockStdout == "" {
 				assert.Equal(t, " ", result.Output)
 			} else {
@@ -182,6 +165,8 @@ func TestCodexProvider_ExecuteConversation_ContextCancellation(t *testing.T) {
 
 	assert.Error(t, err)
 	assert.Nil(t, result)
+	assert.True(t, errors.Is(err, context.Canceled), "error must wrap context.Canceled")
+	assert.Empty(t, mockExec.GetCalls())
 }
 
 func TestCodexProvider_ExecuteConversation_TurnManagement(t *testing.T) {
@@ -199,12 +184,7 @@ func TestCodexProvider_ExecuteConversation_TurnManagement(t *testing.T) {
 	state.TotalTokens = 100
 
 	result, err := provider.ExecuteConversation(context.Background(), state, "Second question", nil, nil, nil)
-	if err != nil {
-		// Stub behavior - expected against incomplete implementation
-		t.Logf("stub error: %v", err)
-		return
-	}
-
+	require.NoError(t, err)
 	require.NotNil(t, result)
 	require.NotNil(t, result.State)
 
@@ -248,10 +228,8 @@ func TestCodexProvider_ExecuteConversation_SessionIDExtraction(t *testing.T) {
 
 			state := workflow.NewConversationState("System")
 			result, err := provider.ExecuteConversation(context.Background(), state, "test", nil, nil, nil)
-			if err != nil {
-				// Stub case
-				return
-			}
+			require.NoError(t, err)
+			require.NotNil(t, result)
 
 			if tt.wantThreadID != "" {
 				assert.Equal(t, tt.wantThreadID, result.State.SessionID)
@@ -260,15 +238,6 @@ func TestCodexProvider_ExecuteConversation_SessionIDExtraction(t *testing.T) {
 	}
 }
 
-func TestCodexProvider_Validate_BinaryNotFound(t *testing.T) {
-	provider := NewCodexProvider()
-	// This will fail on systems without Codex CLI, which is expected
-	err := provider.Validate()
-	// Error is acceptable (Codex not installed)
-	// Success is acceptable (Codex installed)
-	_ = err
-}
-
 func TestCodexProvider_ExecuteConversation_MultipleEvents(t *testing.T) {
 	output := strings.Join([]string{
 		`{"type":"message","content":"thinking..."}`,
@@ -282,13 +251,11 @@ func TestCodexProvider_ExecuteConversation_MultipleEvents(t *testing.T) {
 
 	state := workflow.NewConversationState("Assistant")
 	result, err := provider.ExecuteConversation(context.Background(), state, "multi-event test", nil, nil, nil)
-	if err != nil {
-		// Stub case
-		return
-	}
+	require.NoError(t, err)
+	require.NotNil(t, result)
 
 	// Even with multiple events, session ID should be extracted
-	if result != nil && result.State != nil && result.State.SessionID != "" {
+	if result.State != nil && result.State.SessionID != "" {
 		assert.Equal(t, "thread-multi", result.State.SessionID)
 	}
 }
@@ -299,13 +266,8 @@ func TestCodexProvider_Execute_AllTokensEstimatedTrue(t *testing.T) {
 	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
 
 	result, err := provider.Execute(context.Background(), "test prompt", nil, nil, nil)
-	if err != nil {
-		// Stub case
-		return
-	}
-
+	require.NoError(t, err)
 	require.NotNil(t, result)
-	// FR-007: TokensEstimated must be true for all CLI providers
 	assert.True(t, result.TokensEstimated)
 }
 
@@ -316,12 +278,105 @@ func TestCodexProvider_ExecuteConversation_AllTokensEstimatedTrue(t *testing.T)
 
 	state := workflow.NewConversationState("System")
 	result, err := provider.ExecuteConversation(context.Background(), state, "test", nil, nil, nil)
-	if err != nil {
-		// Stub case
-		return
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	assert.True(t, result.TokensEstimated)
+}
+
+func TestCodexProvider_ExecuteConversation_OutputExtraction(t *testing.T) {
+	ndjson := ndjsonLine("extracted assistant text")
+
+	tests := []struct {
+		name         string
+		outputFormat string
+	}{
+		{name: "default (no output_format)", outputFormat: ""},
+		{name: "json output_format", outputFormat: "json"},
+		{name: "text output_format", outputFormat: "text"},
+		{name: "stream-json output_format", outputFormat: "stream-json"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mockExec := mocks.NewMockCLIExecutor()
+			mockExec.SetOutput([]byte(ndjson), nil)
+			provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+			opts := map[string]any{}
+			if tt.outputFormat != "" {
+				opts["output_format"] = tt.outputFormat
+			}
+
+			state := workflow.NewConversationState("System")
+			result, err := provider.ExecuteConversation(context.Background(), state, "test prompt", opts, nil, nil)
+			require.NoError(t, err)
+			require.NotNil(t, result)
+			assert.Equal(t, "extracted assistant text", result.Output)
+			assert.NotContains(t, result.Output, "item.completed", "raw NDJSON envelope must not appear in Output")
+			// Verify the executor was actually invoked (proves mock isolation)
+			assert.Len(t, mockExec.GetCalls(), 1, "executor must be called exactly once per conversation turn")
+		})
 	}
+}
+
+func TestCodexProvider_ExecuteConversation_HookWired(t *testing.T) {
+	ndjson := `{"type":"item.completed","item":{"item_type":"assistant_message","text":"hook extracted text"}}`
+
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
 
+	state := workflow.NewConversationState("System")
+	result, err := provider.ExecuteConversation(context.Background(), state, "test", nil, nil, nil)
+	require.NoError(t, err)
 	require.NotNil(t, result)
-	// FR-007: TokensEstimated must be true for all CLI providers
-	assert.True(t, result.TokensEstimated)
+
+	assert.Equal(t, "hook extracted text", result.Output)
+	assert.NotContains(t, result.Output, "item.completed", "raw NDJSON envelope must not appear in Output")
+	// Verify the executor was invoked with the NDJSON input (proves hook ran on real output)
+	assert.Len(t, mockExec.GetCalls(), 1, "executor must be called exactly once")
+}
+
+// TestCodexProvider_ExecuteConversation_PlainTextFallback proves the hook runs at runtime:
+// when extractCodexTextContent returns "" (non-NDJSON input), base_cli_provider.go:332-334
+// falls through to strings.TrimSpace(rawOutput). Without the hook wired, the base provider
+// would use the raw bytes directly (no trim). With the hook wired, "" triggers the fallback.
+func TestCodexProvider_ExecuteConversation_PlainTextFallback(t *testing.T) {
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte("  plain text response  "), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+	state := workflow.NewConversationState("System")
+	result, err := provider.ExecuteConversation(context.Background(), state, "test", nil, nil, nil)
+	require.NoError(t, err)
+	require.NotNil(t, result)
+
+	// Hook returns "" for non-NDJSON; base falls back to strings.TrimSpace(rawOutput).
+	assert.Equal(t, "plain text response", result.Output, "plain text must be trimmed via base fallback")
+	assert.NotContains(t, result.Output, "item.completed")
+	assert.Len(t, mockExec.GetCalls(), 1)
+}
+
+func TestCodexProvider_ExecuteConversation_NDJSONOutput(t *testing.T) {
+	multiEvent := strings.Join([]string{
+		`{"type":"thread.started","thread_id":"thread-ndjson"}`,
+		ndjsonLine("first part"),
+		ndjsonLine("second part"),
+		`{"type":"item.completed","item":{"item_type":"function_call","name":"bash","arguments":"{}"}}`,
+		ndjsonLine("third part"),
+	}, "\n")
+
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(multiEvent), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+	state := workflow.NewConversationState("System")
+	result, err := provider.ExecuteConversation(context.Background(), state, "test", nil, nil, nil)
+	require.NoError(t, err)
+	require.NotNil(t, result)
+
+	assert.Equal(t, "first part\nsecond part\nthird part", result.Output)
+	assert.NotContains(t, result.Output, "item.completed", "raw NDJSON must not appear in Output")
+	// Verify executor was invoked: proves the mock was the actual dependency under test
+	assert.Len(t, mockExec.GetCalls(), 1, "executor must be called exactly once")
 }
diff --git a/internal/infrastructure/agents/codex_provider_exec_test.go b/internal/infrastructure/agents/codex_provider_exec_test.go
index 72f912b0..93ce9832 100644
--- a/internal/infrastructure/agents/codex_provider_exec_test.go
+++ b/internal/infrastructure/agents/codex_provider_exec_test.go
@@ -214,9 +214,10 @@ func TestCodexProvider_ExecuteConversation_ResumeTurnResumeCommand(t *testing.T)
 	calls := mockExec.GetCalls()
 	require.Len(t, calls, 1)
 	args := calls[0].Args
-	assert.Equal(t, "resume", args[0], "resume turn should use 'resume' subcommand")
-	assert.Equal(t, "xyz789", args[1], "should have session ID as arg[1]")
-	assert.Equal(t, "--json", args[2], "should have --json flag after session ID")
+	assert.Equal(t, "exec", args[0], "resume turn should use exec subcommand")
+	assert.Equal(t, "resume", args[1], "should use the exec resume subcommand")
+	assert.Equal(t, "xyz789", args[2], "should have session ID as arg[2]")
+	assert.Equal(t, "--json", args[3], "should have --json flag after session ID")
 	assert.Contains(t, strings.Join(args, " "), "continue", "should contain prompt")
 }
 
diff --git a/internal/infrastructure/agents/codex_provider_parse_display_events_test.go b/internal/infrastructure/agents/codex_provider_parse_display_events_test.go
index 6778114d..6a90823f 100644
--- a/internal/infrastructure/agents/codex_provider_parse_display_events_test.go
+++ b/internal/infrastructure/agents/codex_provider_parse_display_events_test.go
@@ -135,6 +135,50 @@ func TestCodexProvider_parseCodexDisplayEvents_FunctionCall(t *testing.T) {
 	}
 }
 
+// TestCodexProvider_parseCodexDisplayEvents_CurrentSchema covers the codex-cli >= 0.133.0
+// event shape, where the item discriminator moved to item.type with new values
+// (agent_message / command_execution) instead of the legacy item.item_type
+// (assistant_message / function_call). Regression guard for the format drift that left raw
+// NDJSON leaking into state.Output.
+func TestCodexProvider_parseCodexDisplayEvents_CurrentSchema(t *testing.T) {
+	provider := NewCodexProvider()
+
+	t.Run("agent_message surfaces text (real codex-cli 0.133.0 line)", func(t *testing.T) {
+		line := []byte(`{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"PONG"}}`)
+		got := provider.parseCodexDisplayEvents(line)
+		require.Len(t, got, 1)
+		assert.Equal(t, "item.completed", got[0].Type)
+		assert.Equal(t, EventText, got[0].Kind)
+		assert.Equal(t, "PONG", got[0].Text)
+	})
+
+	t.Run("agent_message with empty text still produces an EventText", func(t *testing.T) {
+		line := []byte(`{"type":"item.completed","item":{"id":"i1","type":"agent_message","text":""}}`)
+		got := provider.parseCodexDisplayEvents(line)
+		require.Len(t, got, 1)
+		assert.Equal(t, EventText, got[0].Kind)
+		assert.Equal(t, "", got[0].Text)
+	})
+
+	t.Run("command_execution maps to EventToolUse", func(t *testing.T) {
+		line := []byte(`{"type":"item.completed","item":{"id":"i2","type":"command_execution","name":"shell","arguments":"{\"command\":\"ls -la\"}"}}`)
+		got := provider.parseCodexDisplayEvents(line)
+		require.Len(t, got, 1)
+		assert.Equal(t, EventToolUse, got[0].Kind)
+		assert.Equal(t, "shell", got[0].Name)
+		assert.Equal(t, "ls -la", got[0].Arg)
+	})
+
+	t.Run("legacy item_type wins when both keys present", func(t *testing.T) {
+		// item_type takes precedence over type, so a legacy assistant_message is honored.
+		line := []byte(`{"type":"item.completed","item":{"item_type":"assistant_message","type":"agent_message","text":"hi"}}`)
+		got := provider.parseCodexDisplayEvents(line)
+		require.Len(t, got, 1)
+		assert.Equal(t, EventText, got[0].Kind)
+		assert.Equal(t, "hi", got[0].Text)
+	})
+}
+
 func TestCodexProvider_parseCodexDisplayEvents_NonAssistantItems(t *testing.T) {
 	provider := NewCodexProvider()
 
diff --git a/internal/infrastructure/agents/codex_provider_resume_test.go b/internal/infrastructure/agents/codex_provider_resume_test.go
index 66f50a05..0e5b2db6 100644
--- a/internal/infrastructure/agents/codex_provider_resume_test.go
+++ b/internal/infrastructure/agents/codex_provider_resume_test.go
@@ -53,11 +53,12 @@ func TestCodexProvider_ExecuteConversation_T005_ResumeTurnResumeSubcommand(t *te
 	call := calls[0]
 
 	assert.Equal(t, "codex", call.Name)
-	assert.GreaterOrEqual(t, len(call.Args), 4)
-	assert.Equal(t, "resume", call.Args[0], "resume turn should use resume subcommand")
-	assert.Equal(t, "codex-sess-123", call.Args[1], "session ID should be args[1]")
-	assert.Equal(t, "--json", call.Args[2], "resume should have --json flag after session ID")
-	assert.Equal(t, "add error handling", call.Args[3], "prompt should follow --json")
+	assert.GreaterOrEqual(t, len(call.Args), 5)
+	assert.Equal(t, "exec", call.Args[0], "resume turn should use exec subcommand")
+	assert.Equal(t, "resume", call.Args[1], "exec resume subcommand")
+	assert.Equal(t, "codex-sess-123", call.Args[2], "session ID should be args[2]")
+	assert.Equal(t, "--json", call.Args[3], "resume should have --json flag after session ID")
+	assert.Equal(t, "add error handling", call.Args[4], "prompt should follow --json")
 }
 
 // T005: ExecuteConversation uses resume for any non-empty session ID (no prefix required)
@@ -77,10 +78,11 @@ func TestCodexProvider_ExecuteConversation_T005_NonCodexPrefixedSessionIDFallbac
 	require.Len(t, calls, 1)
 	call := calls[0]
 
-	assert.Equal(t, "resume", call.Args[0], "any non-empty session ID triggers resume subcommand")
-	assert.Equal(t, "non-codex-prefixed-id", call.Args[1], "session ID passed after resume")
-	assert.Equal(t, "--json", call.Args[2])
-	assert.Equal(t, "continue", call.Args[3])
+	assert.Equal(t, "exec", call.Args[0], "any non-empty session ID triggers exec resume")
+	assert.Equal(t, "resume", call.Args[1], "exec resume subcommand")
+	assert.Equal(t, "non-codex-prefixed-id", call.Args[2], "session ID passed after resume")
+	assert.Equal(t, "--json", call.Args[3])
+	assert.Equal(t, "continue", call.Args[4])
 }
 
 // T005: Quiet flag is NOT passed (replaced by --json on exec/resume)
@@ -204,7 +206,7 @@ func TestCodexProvider_ExecuteConversation_T005_OptionsWithSubcommands(t *testin
 			name:           "resume turn with model",
 			sessionID:      "codex-sess-abc",
 			options:        map[string]any{"model": "gpt-4o"},
-			expectedSubcmd: "resume",
+			expectedSubcmd: "exec",
 			expectedFlag:   "--model",
 		},
 		{
@@ -218,7 +220,7 @@ func TestCodexProvider_ExecuteConversation_T005_OptionsWithSubcommands(t *testin
 			name:           "resume turn with dangerously_skip_permissions",
 			sessionID:      "codex-sess-xyz",
 			options:        map[string]any{"dangerously_skip_permissions": true},
-			expectedSubcmd: "resume",
+			expectedSubcmd: "exec",
 			expectedFlag:   "--dangerously-bypass-approvals-and-sandbox",
 		},
 	}
@@ -239,6 +241,9 @@ func TestCodexProvider_ExecuteConversation_T005_OptionsWithSubcommands(t *testin
 			call := calls[0]
 
 			assert.Equal(t, tt.expectedSubcmd, call.Args[0])
+			if tt.sessionID != "" {
+				assert.Equal(t, "resume", call.Args[1], "resume turn nests the resume subcommand under exec")
+			}
 			assert.Contains(t, call.Args, tt.expectedFlag, "expected flag should be in args")
 			assert.NotContains(t, call.Args, "--language", "--language is not supported")
 			assert.NotContains(t, call.Args, "--yolo", "--yolo is not supported")
@@ -316,7 +321,7 @@ func TestCodexProvider_ExecuteConversation_T005_SystemPromptHandling(t *testing.
 			name:            "resume turn with system prompt (should ignore)",
 			sessionID:       "codex-sess-abc",
 			hasSystemPrompt: true,
-			expectedSubcmd:  "resume",
+			expectedSubcmd:  "exec",
 			shouldInline:    false,
 		},
 	}
@@ -345,11 +350,11 @@ func TestCodexProvider_ExecuteConversation_T005_SystemPromptHandling(t *testing.
 			assert.NotContains(t, args, "--system-prompt", "codex has no --system-prompt flag")
 
 			var promptArg string
-			switch args[0] {
-			case "exec":
+			switch {
+			case args[0] == "exec" && len(args) > 1 && args[1] == "resume":
+				promptArg = args[4]
+			case args[0] == "exec":
 				promptArg = args[2]
-			case "resume":
-				promptArg = args[3]
 			}
 
 			if tt.shouldInline {
diff --git a/internal/infrastructure/agents/codex_provider_session_test.go b/internal/infrastructure/agents/codex_provider_session_test.go
index a5b671f3..a3b43820 100644
--- a/internal/infrastructure/agents/codex_provider_session_test.go
+++ b/internal/infrastructure/agents/codex_provider_session_test.go
@@ -240,14 +240,14 @@ func TestCodexProvider_ExecuteConversation_SystemPromptFirstTurnOnly(t *testing.
 			assert.NotContains(t, args, "--system-prompt", "codex has no --system-prompt flag")
 
 			// The prompt arg sits immediately after the subcommand positional args.
-			// exec: args = ["exec", "--json", <prompt>, ...]
-			// resume: args = ["resume", <thread_id>, "--json", <prompt>, ...]
+			// first turn: args = ["exec", "--json", <prompt>, ...]
+			// resume:     args = ["exec", "resume", <thread_id>, "--json", <prompt>, ...]
 			var promptArg string
-			switch args[0] {
-			case "exec":
+			switch {
+			case args[0] == "exec" && len(args) > 1 && args[1] == "resume":
+				promptArg = args[4]
+			case args[0] == "exec":
 				promptArg = args[2]
-			case "resume":
-				promptArg = args[3]
 			default:
 				t.Fatalf("unexpected subcommand %q", args[0])
 			}
diff --git a/internal/infrastructure/agents/codex_provider_test_helpers_test.go b/internal/infrastructure/agents/codex_provider_test_helpers_test.go
new file mode 100644
index 00000000..23122498
--- /dev/null
+++ b/internal/infrastructure/agents/codex_provider_test_helpers_test.go
@@ -0,0 +1,15 @@
+package agents
+
+import "encoding/json"
+
+// ndjsonLine builds a single Codex NDJSON `item.completed` envelope wrapping an
+// assistant_message with the given text. Shared across the codex provider test
+// files (unit + delegation) so the envelope shape lives in one place.
+//
+// The text is JSON-escaped via json.Marshal so callers may pass quotes,
+// backslashes, or control characters without producing a malformed envelope —
+// matching the project convention of testing handlers with special characters.
+func ndjsonLine(text string) string {
+	escaped, _ := json.Marshal(text) //nolint:errcheck // json.Marshal never fails for a Go string value
+	return `{"type":"item.completed","item":{"item_type":"assistant_message","text":` + string(escaped) + `}}`
+}
diff --git a/internal/infrastructure/agents/codex_provider_unit_test.go b/internal/infrastructure/agents/codex_provider_unit_test.go
index bdbb862c..15d58423 100644
--- a/internal/infrastructure/agents/codex_provider_unit_test.go
+++ b/internal/infrastructure/agents/codex_provider_unit_test.go
@@ -3,6 +3,7 @@ package agents
 import (
 	"context"
 	"errors"
+	"strings"
 	"testing"
 	"time"
 
@@ -12,9 +13,6 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-// Component: C025 - Unit Tests for CodexProvider (WITHOUT integration build tag)
-// These tests use MockCLIExecutor to avoid external CLI dependencies
-
 func TestCodexProvider_Execute_Success(t *testing.T) {
 	tests := []struct {
 		name       string
@@ -69,7 +67,6 @@ func TestCodexProvider_Execute_Success(t *testing.T) {
 			require.NotNil(t, result)
 			assert.Equal(t, "codex", result.Provider)
 			assert.Equal(t, tt.wantOutput, result.Output)
-			// Token estimation is ~4 chars per token
 			expectedTokens := len(tt.wantOutput) / 4
 			assert.Equal(t, expectedTokens, result.Tokens)
 			assert.False(t, result.StartedAt.IsZero())
@@ -175,7 +172,6 @@ func TestCodexProvider_Execute_EmptyPrompt(t *testing.T) {
 			assert.Error(t, err)
 			assert.Contains(t, err.Error(), tt.wantErr)
 			assert.Nil(t, result)
-			// Executor should not be called
 			calls := mockExec.GetCalls()
 			assert.Empty(t, calls)
 		})
@@ -189,7 +185,6 @@ func TestCodexProvider_Execute_ValidationErrors(t *testing.T) {
 		options map[string]any
 		wantErr string
 	}{
-		// Valid models (should not error)
 		{
 			name:    "valid model: gpt-4o",
 			prompt:  "test",
@@ -226,7 +221,6 @@ func TestCodexProvider_Execute_ValidationErrors(t *testing.T) {
 			options: map[string]any{"model": "o4-mini"},
 			wantErr: "",
 		},
-		// Invalid models (should error)
 		{
 			name:    "invalid model: claude-3-opus (wrong provider)",
 			prompt:  "test",
@@ -269,7 +263,6 @@ func TestCodexProvider_Execute_ValidationErrors(t *testing.T) {
 			options: map[string]any{"model": "oracle"},
 			wantErr: "must start with 'gpt-', 'codex-', or be an o-series",
 		},
-		// Options that are silently ignored (no validation error)
 		{
 			name:    "max_tokens is silently ignored (no validation error)",
 			prompt:  "test",
@@ -396,6 +389,10 @@ func TestCodexProvider_Execute_CLIErrors(t *testing.T) {
 	}
 }
 
+// TestCodexProvider_Execute_StdoutStderrCombination exercises the non-NDJSON fallback path:
+// plain stdout/stderr carries no assistant_message event, so extractCodexAssistantText reports
+// hadText=false and Output is whatever baseCLIProvider.combineOutput produced. It validates the
+// base combine/fallback behavior, not the F103 NDJSON extraction overwrite.
 func TestCodexProvider_Execute_StdoutStderrCombination(t *testing.T) {
 	tests := []struct {
 		name       string
@@ -459,30 +456,13 @@ func TestCodexProvider_Execute_StdoutStderrCombination(t *testing.T) {
 
 func TestCodexProvider_Execute_TokenEstimation(t *testing.T) {
 	tests := []struct {
-		name           string
-		mockStdout     []byte
-		expectedTokens int
+		name       string
+		mockStdout []byte
 	}{
-		{
-			name:           "small output",
-			mockStdout:     []byte("test"),
-			expectedTokens: 1, // 4 chars / 4 = 1
-		},
-		{
-			name:           "medium output",
-			mockStdout:     []byte("This is a longer output with multiple words"),
-			expectedTokens: 10, // 44 chars / 4 = 10 (integer division)
-		},
-		{
-			name:           "large output",
-			mockStdout:     make([]byte, 1000),
-			expectedTokens: 250, // 1000 / 4 = 250
-		},
-		{
-			name:           "empty output",
-			mockStdout:     []byte(""),
-			expectedTokens: 0,
-		},
+		{name: "small output", mockStdout: []byte("test")},
+		{name: "medium output", mockStdout: []byte("This is a longer output with multiple words")},
+		{name: "large output", mockStdout: make([]byte, 1000)},
+		{name: "empty output", mockStdout: []byte("")},
 	}
 
 	for _, tt := range tests {
@@ -495,37 +475,18 @@ func TestCodexProvider_Execute_TokenEstimation(t *testing.T) {
 
 			require.NoError(t, err)
 			require.NotNil(t, result)
-			assert.Equal(t, tt.expectedTokens, result.Tokens)
+			// Plain (non-NDJSON) output is used verbatim, except an empty body becomes the " "
+			// fallback. The ApproximationTokenizer estimates len/4 tokens; derive the expectation
+			// from that contract instead of hardcoding so the test tracks the tokenizer's ratio.
+			wantOutput := string(tt.mockStdout)
+			if wantOutput == "" {
+				wantOutput = " "
+			}
+			assert.Equal(t, len(wantOutput)/4, result.Tokens)
 		})
 	}
 }
 
-func TestCodexProvider_Execute_TimestampOrdering(t *testing.T) {
-	mockExec := mocks.NewMockCLIExecutor()
-	mockExec.SetOutput([]byte("code"), nil)
-	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
-
-	result, err := provider.Execute(context.Background(), "test", nil, nil, nil)
-
-	require.NoError(t, err)
-	require.NotNil(t, result)
-	assert.False(t, result.StartedAt.IsZero())
-	assert.False(t, result.CompletedAt.IsZero())
-	assert.True(t, result.CompletedAt.After(result.StartedAt) || result.CompletedAt.Equal(result.StartedAt))
-}
-
-func TestCodexProvider_Execute_ProviderName(t *testing.T) {
-	mockExec := mocks.NewMockCLIExecutor()
-	mockExec.SetOutput([]byte("code"), nil)
-	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
-
-	result, err := provider.Execute(context.Background(), "test", nil, nil, nil)
-
-	require.NoError(t, err)
-	require.NotNil(t, result)
-	assert.Equal(t, "codex", result.Provider)
-}
-
 func TestCodexProvider_ExecuteConversation_Success(t *testing.T) {
 	tests := []struct {
 		name       string
@@ -698,7 +659,6 @@ func TestCodexProvider_ExecuteConversation_ValidationErrors(t *testing.T) {
 		options map[string]any
 		wantErr string
 	}{
-		// Valid models (should not error)
 		{
 			name:    "valid model: gpt-4o",
 			options: map[string]any{"model": "gpt-4o"},
@@ -714,7 +674,6 @@ func TestCodexProvider_ExecuteConversation_ValidationErrors(t *testing.T) {
 			options: map[string]any{"model": "o1"},
 			wantErr: "",
 		},
-		// Invalid models (should error)
 		{
 			name:    "invalid model: claude-3-opus",
 			options: map[string]any{"model": "claude-3-opus"},
@@ -735,7 +694,6 @@ func TestCodexProvider_ExecuteConversation_ValidationErrors(t *testing.T) {
 			options: map[string]any{"model": "ollama"},
 			wantErr: "must start with 'gpt-', 'codex-', or be an o-series",
 		},
-		// Options that are silently ignored (no validation error)
 		{
 			name:    "temperature is silently ignored (no validation error)",
 			options: map[string]any{"temperature": 2.5},
@@ -911,15 +869,12 @@ func TestCodexProvider_Name(t *testing.T) {
 	assert.Equal(t, "codex", provider.Name())
 }
 
-func TestCodexProvider_Validate_Success(t *testing.T) {
-	// Note: This test will fail if 'codex' is not in PATH
-	// For unit testing purposes, we skip this test if codex is not available
+func TestCodexProvider_Validate_DoesNotPanic(t *testing.T) {
+	// Validate() resolves 'codex' in PATH, so its result is environment-specific
+	// (error when absent, nil when installed). This test only guarantees the method
+	// can be invoked without panicking.
 	provider := NewCodexProvider()
-	err := provider.Validate()
-
-	// We don't assert anything specific here because this depends on the system
-	// The test verifies the method can be called without panicking
-	_ = err
+	_ = provider.Validate()
 }
 
 func TestCodexProvider_NewCodexProvider_DefaultExecutor(t *testing.T) {
@@ -930,7 +885,6 @@ func TestCodexProvider_NewCodexProvider_DefaultExecutor(t *testing.T) {
 	assert.True(t, ok, "default executor should be ExecCLIExecutor")
 }
 
-// T008: Explicit tests for `exec --json` arg structure assertion
 func TestCodexProvider_Execute_ExecJSONStructure(t *testing.T) {
 	tests := []struct {
 		name        string
@@ -986,7 +940,6 @@ func TestCodexProvider_Execute_ExecJSONStructure(t *testing.T) {
 	}
 }
 
-// T008: Verify --json flag is first arg after exec subcommand
 func TestCodexProvider_Execute_JSONFlagPosition(t *testing.T) {
 	mockExec := mocks.NewMockCLIExecutor()
 	mockExec.SetOutput([]byte("result"), nil)
@@ -1005,7 +958,6 @@ func TestCodexProvider_Execute_JSONFlagPosition(t *testing.T) {
 	assert.Equal(t, "test prompt", args[2])
 }
 
-// T008: ExecuteConversation uses exec --json on first turn
 func TestCodexProvider_ExecuteConversation_FirstTurnExecJSON(t *testing.T) {
 	mockExec := mocks.NewMockCLIExecutor()
 	mockExec.SetOutput([]byte("response"), nil)
@@ -1025,7 +977,481 @@ func TestCodexProvider_ExecuteConversation_FirstTurnExecJSON(t *testing.T) {
 	assert.Equal(t, "first prompt", args[2], "third arg should be prompt")
 }
 
-// T008: ExecuteConversation uses resume subcommand on resume turn
+func TestCodexProvider_extractCodexTextContent(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		wantText string
+	}{
+		{
+			name:     "single assistant_message",
+			input:    ndjsonLine("hello"),
+			wantText: "hello",
+		},
+		{
+			name:     "multiple assistant_messages joined with newline",
+			input:    ndjsonLine("foo") + "\n" + ndjsonLine("bar"),
+			wantText: "foo\nbar",
+		},
+		{
+			name:     "empty assistant_message text",
+			input:    ndjsonLine(""),
+			wantText: "",
+		},
+		{
+			name:     "plain non-NDJSON text",
+			input:    "second response",
+			wantText: "",
+		},
+		{
+			name:     "empty input",
+			input:    "",
+			wantText: "",
+		},
+		{
+			name:     "NUL byte inside JSON string value",
+			input:    "{\"type\":\"item.completed\",\"item\":{\"item_type\":\"assistant_message\",\"text\":\"hell\x00o\"}}",
+			wantText: "hell\x00o",
+		},
+		{
+			name:     "function_call only produces no text",
+			input:    `{"type":"item.completed","item":{"item_type":"function_call","name":"bash","arguments":"{}"}}`,
+			wantText: "",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			p := NewCodexProvider()
+			got := p.extractCodexTextContent(tt.input)
+			assert.Equal(t, tt.wantText, got)
+		})
+	}
+}
+
+func TestCodexProvider_extractCodexAssistantText(t *testing.T) {
+	tests := []struct {
+		name        string
+		input       string
+		wantText    string
+		wantHadText bool
+	}{
+		{
+			name:        "single assistant_message with text",
+			input:       ndjsonLine("hello"),
+			wantText:    "hello",
+			wantHadText: true,
+		},
+		{
+			name:        "multiple assistant_messages joined with newline",
+			input:       ndjsonLine("foo") + "\n" + ndjsonLine("bar"),
+			wantText:    "foo\nbar",
+			wantHadText: true,
+		},
+		{
+			name:        "empty assistant_message text — hadText true despite empty string",
+			input:       ndjsonLine(""),
+			wantText:    "",
+			wantHadText: true,
+		},
+		{
+			name:        "plain non-NDJSON text — hadText false",
+			input:       "second response",
+			wantText:    "",
+			wantHadText: false,
+		},
+		{
+			name:        "empty input",
+			input:       "",
+			wantText:    "",
+			wantHadText: false,
+		},
+		{
+			name:        "NUL byte inside JSON string value — no truncation",
+			input:       "{\"type\":\"item.completed\",\"item\":{\"item_type\":\"assistant_message\",\"text\":\"hell\x00o\"}}",
+			wantText:    "hell\x00o",
+			wantHadText: true,
+		},
+		{
+			name:        "function_call only — hadText false",
+			input:       `{"type":"item.completed","item":{"item_type":"function_call","name":"bash","arguments":"{}"}}`,
+			wantText:    "",
+			wantHadText: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			p := NewCodexProvider()
+			gotText, gotHadText := p.extractCodexAssistantText(tt.input)
+			assert.Equal(t, tt.wantText, gotText)
+			assert.Equal(t, tt.wantHadText, gotHadText)
+		})
+	}
+}
+
+func TestCodexProvider_extractCodexTokenUsage(t *testing.T) {
+	tests := []struct {
+		name       string
+		input      string
+		wantNil    bool
+		wantInput  int
+		wantOutput int
+		wantTotal  int
+	}{
+		{
+			name:       "turn.completed with full usage",
+			input:      `{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}`,
+			wantInput:  10,
+			wantOutput: 5,
+			wantTotal:  15,
+		},
+		{
+			name:       "usage among multiple events",
+			input:      `{"type":"thread.started","thread_id":"t1"}` + "\n" + `{"type":"turn.completed","usage":{"input_tokens":3,"output_tokens":7}}`,
+			wantInput:  3,
+			wantOutput: 7,
+			wantTotal:  10,
+		},
+		{
+			name:       "partial usage defaults missing field to zero",
+			input:      `{"type":"turn.completed","usage":{"input_tokens":4}}`,
+			wantInput:  4,
+			wantOutput: 0,
+			wantTotal:  4,
+		},
+		{
+			name:    "turn.completed without usage field",
+			input:   `{"type":"turn.completed"}`,
+			wantNil: true,
+		},
+		{
+			name:    "usage present but not an object",
+			input:   `{"type":"turn.completed","usage":"nope"}`,
+			wantNil: true,
+		},
+		{
+			name:    "no turn.completed event",
+			input:   `{"type":"item.completed","item":{"item_type":"assistant_message","text":"hi"}}`,
+			wantNil: true,
+		},
+		{
+			name:    "empty output",
+			input:   "",
+			wantNil: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			p := NewCodexProvider()
+			usage := p.extractCodexTokenUsage(tt.input)
+			if tt.wantNil {
+				assert.Nil(t, usage)
+				return
+			}
+			require.NotNil(t, usage)
+			assert.Equal(t, tt.wantInput, usage.InputTokens)
+			assert.Equal(t, tt.wantOutput, usage.OutputTokens)
+			assert.Equal(t, tt.wantTotal, usage.TotalTokens)
+		})
+	}
+}
+
+// TestCodexProvider_RealStream_0133 feeds the exact NDJSON stream emitted by
+// codex-cli 0.133.0 (captured from `codex exec --json`) and asserts the assistant text
+// is extracted cleanly and the real usage is read. Regression guard for the schema drift
+// (item.type=agent_message) that previously left the raw envelope in state.Output.
+func TestCodexProvider_RealStream_0133(t *testing.T) {
+	stream := strings.Join([]string{
+		`{"type":"thread.started","thread_id":"019e91f0-2fd4-7a20-9fc2-03994766eec3"}`,
+		`{"type":"turn.started"}`,
+		`{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"PONG"}}`,
+		`{"type":"turn.completed","usage":{"input_tokens":18809,"cached_input_tokens":3456,"output_tokens":6,"reasoning_output_tokens":0}}`,
+	}, "\n")
+
+	p := NewCodexProvider()
+
+	text, hadText := p.extractCodexAssistantText(stream)
+	assert.True(t, hadText, "agent_message must be recognized as assistant text")
+	assert.Equal(t, "PONG", text, "Output must be the clean answer, not the raw NDJSON envelope")
+
+	usage := p.extractCodexTokenUsage(stream)
+	require.NotNil(t, usage, "turn.completed.usage must be read")
+	assert.Equal(t, 18809, usage.InputTokens)
+	assert.Equal(t, 6, usage.OutputTokens)
+}
+
+func TestCodexProvider_Execute_OutputExtractionAllFormats(t *testing.T) {
+	ndjson := `{"type":"item.completed","item":{"item_type":"assistant_message","text":"clean extracted text"}}`
+	tests := []struct {
+		name         string
+		outputFormat string
+	}{
+		{name: "json format", outputFormat: "json"},
+		{name: "stream-json format", outputFormat: "stream-json"},
+		{name: "text format", outputFormat: "text"},
+		{name: "empty format", outputFormat: ""},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mockExec := mocks.NewMockCLIExecutor()
+			mockExec.SetOutput([]byte(ndjson), nil)
+			provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+			options := map[string]any{"output_format": tt.outputFormat}
+
+			result, err := provider.Execute(context.Background(), "test", options, nil, nil)
+
+			require.NoError(t, err)
+			require.NotNil(t, result)
+			assert.Equal(t, "clean extracted text", result.Output)
+		})
+	}
+}
+
+func TestCodexProvider_Execute_ResponsePopulation_ValidJSON(t *testing.T) {
+	ndjson := `{"type":"item.completed","item":{"item_type":"assistant_message","text":"{\"answer\":42}"}}`
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+	result, err := provider.Execute(context.Background(), "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	require.NotNil(t, result.Response)
+	assert.Equal(t, float64(42), result.Response["answer"])
+}
+
+func TestCodexProvider_Execute_ResponsePopulation_InvalidJSON(t *testing.T) {
+	ndjson := `{"type":"item.completed","item":{"item_type":"assistant_message","text":"plain prose text"}}`
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+	result, err := provider.Execute(context.Background(), "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	assert.Nil(t, result.Response)
+	assert.Equal(t, "plain prose text", result.Output)
+}
+
+func TestCodexProvider_Execute_ResponsePopulation_EmptyExtraction(t *testing.T) {
+	rawMock := "raw plain text output"
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(rawMock), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+	result, err := provider.Execute(context.Background(), "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	assert.Nil(t, result.Response)
+	assert.Equal(t, rawMock, result.Output)
+}
+
+func TestCodexProvider_Execute_TokenRecountOnExtracted(t *testing.T) {
+	// recount uses extracted length ("hi" = 0 tokens), not raw NDJSON length (~19 tokens)
+	ndjson := `{"type":"item.completed","item":{"item_type":"assistant_message","text":"hi"}}`
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+	result, err := provider.Execute(context.Background(), "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	assert.True(t, result.TokensEstimated)
+	assert.Equal(t, len("hi")/4, result.Tokens)
+}
+
+func TestCodexProvider_ExecuteConversation_EmptyAssistantMessage(t *testing.T) {
+	// Response nil: base uses struct literal init, not make(); update if base switches to pre-init.
+	ndjson := `{"type":"item.completed","item":{"item_type":"assistant_message","text":""}}`
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+	result, err := provider.ExecuteConversation(context.Background(), workflow.NewConversationState(""), "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	assert.Equal(t, "", result.Output)
+	assert.Nil(t, result.Response)
+	// F103: the presence-aware overwrite must re-derive token counts from the extracted
+	// (empty) text rather than leaving the inflated count the base computed on the raw NDJSON.
+	assert.Equal(t, 0, result.TokensOutput)
+	assert.Equal(t, result.TokensInput, result.TokensTotal)
+	// And the trailing assistant turn Content must match the overwritten Output.
+	require.NotEmpty(t, result.State.Turns)
+	last := result.State.Turns[len(result.State.Turns)-1]
+	assert.Equal(t, workflow.TurnRoleAssistant, last.Role)
+	assert.Equal(t, "", last.Content)
+}
+
+func TestCodexProvider_ExecuteConversation_ResponsePopulation_ValidJSON(t *testing.T) {
+	ndjson := `{"type":"item.completed","item":{"item_type":"assistant_message","text":"{\"answer\":42}"}}`
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+	result, err := provider.ExecuteConversation(context.Background(), workflow.NewConversationState(""), "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	require.NotNil(t, result.Response)
+	assert.Equal(t, float64(42), result.Response["answer"])
+}
+
+func TestCodexProvider_ExecuteConversation_JSONResponse(t *testing.T) {
+	ndjson := `{"type":"item.completed","item":{"item_type":"assistant_message","text":"{\"key\":\"val\"}"}}`
+	tests := []struct {
+		name         string
+		outputFormat string
+	}{
+		{name: "json format", outputFormat: "json"},
+		{name: "stream-json format", outputFormat: "stream-json"},
+		{name: "text format", outputFormat: "text"},
+		{name: "absent format", outputFormat: ""},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mockExec := mocks.NewMockCLIExecutor()
+			mockExec.SetOutput([]byte(ndjson), nil)
+			provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+			options := map[string]any{"output_format": tt.outputFormat}
+
+			result, err := provider.ExecuteConversation(context.Background(), workflow.NewConversationState(""), "test", options, nil, nil)
+
+			require.NoError(t, err)
+			require.NotNil(t, result)
+			require.NotNil(t, result.Response, "Response must be populated for all output_format values")
+			assert.Equal(t, "val", result.Response["key"])
+		})
+	}
+}
+
+func TestCodexProvider_ExecuteConversation_NilResponseOnPlainText(t *testing.T) {
+	ndjson := `{"type":"item.completed","item":{"item_type":"assistant_message","text":"plain prose text"}}`
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+	result, err := provider.ExecuteConversation(context.Background(), workflow.NewConversationState(""), "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	assert.Nil(t, result.Response)
+	assert.Equal(t, "plain prose text", result.Output)
+}
+
+func TestCodexProvider_ExecuteConversation_MultiEventNDJSON(t *testing.T) {
+	ndjson := ndjsonLine("foo") + "\n" + ndjsonLine("bar")
+
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+	state := workflow.NewConversationState("")
+
+	result, err := provider.ExecuteConversation(context.Background(), state, "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	assert.Equal(t, "foo\nbar", result.Output)
+}
+
+func TestCodexProvider_ExecuteConversation_NULBytesInStream(t *testing.T) {
+	ndjson := "{\"type\":\"item.completed\",\"item\":{\"item_type\":\"assistant_message\",\"text\":\"before\x00after\"}}"
+
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+	state := workflow.NewConversationState("")
+
+	result, err := provider.ExecuteConversation(context.Background(), state, "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	assert.Contains(t, result.Output, "after", "post-NUL characters must not be truncated")
+}
+
+func TestCodexProvider_ExecuteConversation_NonJSONText(t *testing.T) {
+	ndjson := `{"type":"item.completed","item":{"item_type":"assistant_message","text":"hello world"}}`
+
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+	state := workflow.NewConversationState("")
+
+	result, err := provider.ExecuteConversation(context.Background(), state, "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	assert.Equal(t, "hello world", result.Output)
+	assert.Nil(t, result.Response)
+}
+
+func TestCodexProvider_Execute_MultiEventNDJSON(t *testing.T) {
+	ndjson := ndjsonLine("foo") + "\n" + ndjsonLine("bar")
+
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+	result, err := provider.Execute(context.Background(), "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	assert.Equal(t, "foo\nbar", result.Output)
+}
+
+func TestCodexProvider_Execute_EmptyAssistantMessage(t *testing.T) {
+	ndjson := `{"type":"item.completed","item":{"item_type":"assistant_message","text":""}}`
+
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+	result, err := provider.Execute(context.Background(), "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	assert.Equal(t, "", result.Output)
+	assert.Nil(t, result.Response)
+}
+
+func TestCodexProvider_Execute_NULBytesInStream(t *testing.T) {
+	ndjson := "{\"type\":\"item.completed\",\"item\":{\"item_type\":\"assistant_message\",\"text\":\"before\x00after\"}}"
+
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+	result, err := provider.Execute(context.Background(), "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	assert.Contains(t, result.Output, "after", "post-NUL characters must not be truncated")
+}
+
+func TestCodexProvider_Execute_NonJSONText(t *testing.T) {
+	ndjson := `{"type":"item.completed","item":{"item_type":"assistant_message","text":"hello world"}}`
+
+	mockExec := mocks.NewMockCLIExecutor()
+	mockExec.SetOutput([]byte(ndjson), nil)
+	provider := NewCodexProviderWithOptions(WithCodexExecutor(mockExec))
+
+	result, err := provider.Execute(context.Background(), "test", nil, nil, nil)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	assert.Equal(t, "hello world", result.Output)
+	assert.Nil(t, result.Response)
+}
+
 func TestCodexProvider_ExecuteConversation_ResumeJSONStructure(t *testing.T) {
 	mockExec := mocks.NewMockCLIExecutor()
 	mockExec.SetOutput([]byte("response"), nil)
@@ -1040,9 +1466,10 @@ func TestCodexProvider_ExecuteConversation_ResumeJSONStructure(t *testing.T) {
 	require.Len(t, calls, 1)
 	args := calls[0].Args
 
-	require.Len(t, args, 4, "resume turn should have resume, sessionID, --json, prompt")
-	assert.Equal(t, "resume", args[0], "first arg should be resume subcommand")
-	assert.Equal(t, "codex-abc123def456", args[1], "second arg should be session ID")
-	assert.Equal(t, "--json", args[2], "third arg should be --json flag")
-	assert.Equal(t, "follow up", args[3], "fourth arg should be prompt")
+	require.Len(t, args, 5, "resume turn should have exec, resume, sessionID, --json, prompt")
+	assert.Equal(t, "exec", args[0], "first arg should be exec subcommand")
+	assert.Equal(t, "resume", args[1], "second arg should be resume subcommand")
+	assert.Equal(t, "codex-abc123def456", args[2], "third arg should be session ID")
+	assert.Equal(t, "--json", args[3], "fourth arg should be --json flag")
+	assert.Equal(t, "follow up", args[4], "fifth arg should be prompt")
 }
diff --git a/internal/infrastructure/agents/helpers.go b/internal/infrastructure/agents/helpers.go
index 73423837..efbc2b35 100644
--- a/internal/infrastructure/agents/helpers.go
+++ b/internal/infrastructure/agents/helpers.go
@@ -62,7 +62,7 @@ func tryParseJSONResponse(output string) map[string]any {
 // findFirstNDJSONEvent scans NDJSON output and returns the first parsed event
 // whose "type" field equals eventType. Returns nil if no match is found.
 func findFirstNDJSONEvent(output, eventType string) map[string]any {
-	for _, line := range strings.Split(output, "\n") {
+	for line := range strings.SplitSeq(output, "\n") {
 		line = strings.TrimSpace(line)
 		if line == "" {
 			continue
@@ -80,7 +80,7 @@ func findFirstNDJSONEvent(output, eventType string) map[string]any {
 
 func findLastNDJSONEvent(output, eventType string) map[string]any {
 	var found map[string]any
-	for _, line := range strings.Split(output, "\n") {
+	for line := range strings.SplitSeq(output, "\n") {
 		line = strings.TrimSpace(line)
 		if line == "" {
 			continue