From df2a446bb451ce8786956cf9f6bf123ae59edb89 Mon Sep 17 00:00:00 2001
From: "David J. Bianco" <davidjbianco@gmail.com>
Date: Fri, 15 May 2026 12:31:28 -0400
Subject: [PATCH] fix: always emit generation sidecars

---
 README.md                                     |  4 +-
 TODO.md                                       |  2 +-
 commands/eforge/generate.md                   |  7 +-
 .../eforge/references/evidence-formats.md     |  3 +-
 docs/design/PRD.md                            |  9 +-
 docs/reference/EVIDENCE_FORMATS.md            |  3 +-
 src/evidenceforge/cli/commands.py             | 10 +-
 src/evidenceforge/generation/engine/core.py   | 29 +++---
 src/evidenceforge/generation/ground_truth.py  | 10 +-
 tests/unit/test_cli.py                        | 95 +++++++++++++++++++
 tests/unit/test_engine.py                     | 73 +++++++++++++-
 tests/unit/test_ground_truth.py               | 12 +++
 12 files changed, 220 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 95a5dd37..da506e9c 100644
--- a/README.md
+++ b/README.md
@@ -97,7 +97,7 @@ For details on the overlay system, manual editing, and cross-file dependencies,
 
 EvidenceForge creates multi-format security log datasets from YAML scenario definitions. You describe an environment (users, systems, network topology) and a storyline (attack events), and EvidenceForge generates temporally consistent logs across all formats simultaneously — complete with cross-referenced LogonIDs, PIDs, timestamps, and UIDs.
 
-Every attack scenario includes a `GROUND_TRUTH.md` file documenting exactly what happened, when, and where — making the datasets immediately usable for threat hunting training.
+Every generated scenario includes a `GROUND_TRUTH.md` file. Attack scenarios document exactly what happened, when, and where, while baseline-only scenarios explicitly document that no malicious events were generated.
 
 ### Key Capabilities
 
@@ -106,7 +106,7 @@ Every attack scenario includes a `GROUND_TRUTH.md` file documenting exactly what
 - **Realistic baseline noise** — 26 lateral movement patterns, process→network correlation, network-level red herrings, and 18 Linux syslog categories create noise that analysts must work through
 - **OS-aware generation** — Windows systems produce Windows Event + Sysmon logs; Linux systems produce syslog + bash history
 - **Network visibility modeling** — Define sensor placement (SPAN/TAP), direction, and monitored segments
-- **Ground truth documentation** — Every attack scenario generates a GROUND_TRUTH.md with narrative, timeline, and IOCs
+- **Ground truth documentation** — Every run generates a GROUND_TRUTH.md; attack scenarios include narrative, timeline, and IOCs
 - **Parallel generation** — Threaded emitters write all formats simultaneously with temporal consistency
 - **Scenario validation** — Cross-reference checking, uniqueness constraints, and network topology validation
 - **Data quality evaluation** — 5-dimension scoring framework (23 sub-scores) with acceptance criteria
diff --git a/TODO.md b/TODO.md
index dc5796e6..edb9a2ff 100644
--- a/TODO.md
+++ b/TODO.md
@@ -334,7 +334,7 @@ Verification is complete: dedicated `tests/unit/test_world_model.py` coverage wa
 - [x] Security: cap firewall deny baseline amplification (`deny_ratio`/hourly deny volume) to prevent scenario-driven local DoS — `NetworkSensor.deny_ratio` now enforces `<= 50.0`.
 - [x] Security: prevent IPv6 scenario DoS in DNS AAAA fallback (`_ipv4_to_fake_ipv6` no longer evaluates for IPv6 destination IPs; AAAA uses mapped IPv6 or preserves IPv6 literal).
 - [x] Security: bounded/pruned ActivityGenerator DNS cache (60s prune cadence, 600s TTL-horizon eviction, 50k hard cap) to prevent unbounded memory growth from unique `(src_ip, hostname)` keys.
-- [ ] `eforge generate --force` overwrite can fail for scenarios that do not emit `GROUND_TRUTH.md` — explicit-proxy smoke testing exposed that replacing an existing output directory expects staged ground truth even when fresh no-storyline generation produced only `data/`. Decide whether no-storyline generation should always write an empty `GROUND_TRUTH.md` or overwrite swap should tolerate its absence.
+- [x] `eforge generate --force` overwrite can fail for scenarios that do not emit `GROUND_TRUTH.md` — fixed the root contract so every successful generation emits a matched `data/`, `GROUND_TRUTH.md`, and `OBSERVATION_MANIFEST.json` sidecar set, including baseline-only scenarios. The CLI swap stays strict and now requires staged data, ground truth, and observation manifest before replacing old output. Verification passed with focused engine/CLI/ground-truth/manifest tests, `eforge validate-config`, Ruff checks, and full normal `uv run pytest -v` (`3051 passed, 15 skipped`).
 
 - [x] **`uv.lock` not committed** — gitignored, so CI `setup-uv@v4` cache fails. Remove from `.gitignore` and commit.
 - [x] **`eforge validate` can't find personas in dev mode** — works when installed (`eforge validate`) but not via `uv run eforge validate`. Blocks dev workflow.
diff --git a/commands/eforge/generate.md b/commands/eforge/generate.md
index 02bd927b..e1045757 100644
--- a/commands/eforge/generate.md
+++ b/commands/eforge/generate.md
@@ -93,7 +93,8 @@ Generation writes log files to a `data/` subdirectory alongside the scenario fil
 scenarios/<scenario-name>/
   scenario.yaml          ← input
   ENVIRONMENT.md         ← created by /eforge scenario
-  GROUND_TRUTH.md        ← generated (answer key)
+  GROUND_TRUTH.md        ← generated answer key (empty for benign baseline-only runs)
+  OBSERVATION_MANIFEST.json ← generated source-observation sidecar
   data/                  ← generated log files
     windows/
       security.xml
@@ -104,14 +105,14 @@ scenarios/<scenario-name>/
     ...
 ```
 
-If `data/`, `GROUND_TRUTH.md`, or `ENVIRONMENT.md` already exist, the CLI prompts before overwriting. Use `--force` to skip the prompt (for automation / AI use).
+If generated output (`data/`, `GROUND_TRUTH.md`, or `OBSERVATION_MANIFEST.json`) already exists, the CLI prompts before overwriting. Use `--force` to skip the prompt (for automation / AI use). `ENVIRONMENT.md` is scenario-authored and is preserved.
 
 ### 3. Post-Generation
 
 After successful generation:
 - List the generated files and their sizes
 - Check that expected formats were produced
-- If the scenario had a storyline, note that `GROUND_TRUTH.md` was generated alongside the scenario file — this is the answer key containing the full attack timeline and IOCs
+- Note that `GROUND_TRUTH.md` and `OBSERVATION_MANIFEST.json` were generated alongside the scenario file. For baseline-only runs, `GROUND_TRUTH.md` explicitly says no malicious events were generated.
 - `ENVIRONMENT.md` (created by `/eforge scenario`) is already in the same directory — no copying needed
 - Note that the causal expansion engine auto-generates prerequisite events (DNS lookups before connections, Kerberos TGT/TGS before logons, audit events from command patterns, etc.) — these appear in the logs but are not explicitly listed in the scenario YAML
 - Summarize the output for the user
diff --git a/commands/eforge/references/evidence-formats.md b/commands/eforge/references/evidence-formats.md
index 7db99be7..9b7ed006 100644
--- a/commands/eforge/references/evidence-formats.md
+++ b/commands/eforge/references/evidence-formats.md
@@ -10,7 +10,8 @@ This document lists every evidence type EvidenceForge can generate, where to fin
 
 ```
 output/
-  GROUND_TRUTH.md                          # Attack narrative, timeline, IOCs
+  GROUND_TRUTH.md                          # Ground truth sidecar; empty for baseline-only runs
+  OBSERVATION_MANIFEST.json                # Source-observation sidecar for eval
   ENVIRONMENT.md                           # Student-facing environment description (created by /eforge scenario skill)
   <hostname.domain>/                       # Per-host directories (FQDN)
     windows_event_security.xml             # Windows Security channel events
diff --git a/docs/design/PRD.md b/docs/design/PRD.md
index e63f414c..9617aecf 100644
--- a/docs/design/PRD.md
+++ b/docs/design/PRD.md
@@ -36,7 +36,7 @@ The tool addresses the need for realistic, large-volume training datasets withou
 - Schema validation for scenario files (Pydantic-based)
 - Cross-reference validation (users, systems, personas, groups referenced correctly)
 - Evaluation framework with concrete metrics (format compliance, consistency, statistical properties)
-- Ground truth documentation (GROUND_TRUTH.md) for scenarios with malicious activity
+- Ground truth documentation (GROUND_TRUTH.md) for every generated scenario
 - Network topology and sensor placement modeling for traffic visibility
 - Persona-based temporal activity distribution with configurable work hours, intensity, and risk profiles
 - Comprehensive test coverage (95%+) with pytest
@@ -154,7 +154,7 @@ eforge generate SCENARIO_FILE [--output DIR] [--verbose] [--debug]
 9. Write to organized directory structure with incremental flushing (10K event buffer)
 10. Show progress with Rich progress bars (per-hour baseline, per-event storyline)
 11. Log details to `generation.log` in output directory
-12. Generate GROUND_TRUTH.md when malicious/suspicious activities are present
+12. Generate GROUND_TRUTH.md and OBSERVATION_MANIFEST.json sidecars
 
 #### Workflow 6: Evaluate Output
 ```bash
@@ -430,7 +430,8 @@ Generated logs are written to a timestamped output directory:
 output/
   scenario-name-YYYYMMDD-HHMMSS/
     generation.log              # Detailed generation log
-    GROUND_TRUTH.md            # Attack ground truth (if malicious activity present)
+    GROUND_TRUTH.md            # Ground truth sidecar (empty for baseline-only scenarios)
+    OBSERVATION_MANIFEST.json  # Source-observation sidecar
     windows_events.xml         # Windows Event Logs
     zeek_conn.log              # Zeek connection logs
     ecar.json                  # ECAR events
@@ -442,7 +443,7 @@ output/
 
 **GROUND_TRUTH.md Format**
 
-When a scenario includes malicious or suspicious activities (not baseline-only scenarios), the generator creates a GROUND_TRUTH.md file documenting the attack for training and evaluation purposes.
+Every successful generation creates a GROUND_TRUTH.md file. Attack/red-herring scenarios document the narrative, timeline, and IOCs for training and evaluation; baseline-only scenarios explicitly state that no malicious events were generated.
 
 ```markdown
 # Ground Truth: [Scenario Name]
diff --git a/docs/reference/EVIDENCE_FORMATS.md b/docs/reference/EVIDENCE_FORMATS.md
index 7db99be7..9b7ed006 100644
--- a/docs/reference/EVIDENCE_FORMATS.md
+++ b/docs/reference/EVIDENCE_FORMATS.md
@@ -10,7 +10,8 @@ This document lists every evidence type EvidenceForge can generate, where to fin
 
 ```
 output/
-  GROUND_TRUTH.md                          # Attack narrative, timeline, IOCs
+  GROUND_TRUTH.md                          # Ground truth sidecar; empty for baseline-only runs
+  OBSERVATION_MANIFEST.json                # Source-observation sidecar for eval
   ENVIRONMENT.md                           # Student-facing environment description (created by /eforge scenario skill)
   <hostname.domain>/                       # Per-host directories (FQDN)
     windows_event_security.xml             # Windows Security channel events
diff --git a/src/evidenceforge/cli/commands.py b/src/evidenceforge/cli/commands.py
index 632dca4a..83aaf111 100644
--- a/src/evidenceforge/cli/commands.py
+++ b/src/evidenceforge/cli/commands.py
@@ -278,7 +278,7 @@ def generate(
     console.print(f"\n[bold]Data directory:[/bold] {data_dir}")
     console.print(f"[bold]Ground truth:[/bold] {ground_truth_dir / 'GROUND_TRUTH.md'}")
 
-    # Check for existing generated output (data/ and GROUND_TRUTH.md only).
+    # Check for existing generated output (data/ and generated sidecars only).
     # ENVIRONMENT.md is authored by /eforge scenario, not the engine — never touch it.
     existing = []
     if data_dir.exists():
@@ -387,8 +387,8 @@ def progress_callback(event_type: str, data: dict) -> None:
 
         # Transactional swap: backup old → install new → cleanup backup.
         # If any step fails (including KeyboardInterrupt), old output is
-        # restored from backup. data/ and GROUND_TRUTH.md are always kept
-        # as a matched pair — partial preservation is never valid.
+        # restored from backup. data/ and generated sidecars are always kept
+        # as a matched set — partial preservation is never valid.
         if staging_dir:
             staged_gt = gen_gt_dir / "GROUND_TRUTH.md"
             staged_manifest = gen_gt_dir / OBSERVATION_MANIFEST_FILENAME
@@ -396,6 +396,10 @@ def progress_callback(event_type: str, data: dict) -> None:
                 raise RuntimeError("Staged data/ directory missing after generation")
             if not staged_gt.exists():
                 raise RuntimeError("Staged GROUND_TRUTH.md missing after generation")
+            if not staged_manifest.exists():
+                raise RuntimeError(
+                    f"Staged {OBSERVATION_MANIFEST_FILENAME} missing after generation"
+                )
 
             # Clean up stale rollback dirs from prior killed runs
             for stale in ground_truth_dir.glob(".eforge_rollback_*"):
diff --git a/src/evidenceforge/generation/engine/core.py b/src/evidenceforge/generation/engine/core.py
index c3a1043e..703b8e61 100644
--- a/src/evidenceforge/generation/engine/core.py
+++ b/src/evidenceforge/generation/engine/core.py
@@ -119,7 +119,7 @@ def generate(self) -> None:
         2. Generate baseline activity (hour-by-hour iteration)
         3. Execute storyline events (if present)
         4. Finalize and close emitters
-        5. Generate GROUND_TRUTH.md (if malicious activity present)
+        5. Generate GROUND_TRUTH.md and OBSERVATION_MANIFEST.json sidecars
         """
         logger.info(f"Starting generation for scenario: {self.scenario.name}")
 
@@ -185,17 +185,20 @@ def generate(self) -> None:
             self._finalize()
             self._report_progress("phase_end", {"phase": "finalize"})
 
-        # Phase 5: Generate ground truth (if malicious activity or red herrings present)
-        if self.malicious_events or self.red_herring_events:
-            logger.info(
-                f"Generating GROUND_TRUTH.md with {len(self.malicious_events)} malicious events"
-            )
-            self._report_progress(
-                "phase_start",
-                {"phase": "ground_truth", "description": "Generating ground truth documentation"},
-            )
-            self._generate_ground_truth()
-            self._report_progress("phase_end", {"phase": "ground_truth"})
+        # Phase 5: Generate sidecars for every successful run. Baseline-only
+        # datasets still need an empty GROUND_TRUTH.md so CLI overwrite swaps
+        # can keep data and metadata as a matched pair.
+        logger.info(
+            "Generating GROUND_TRUTH.md with %d malicious events and %d red herrings",
+            len(self.malicious_events),
+            len(self.red_herring_events),
+        )
+        self._report_progress(
+            "phase_start",
+            {"phase": "ground_truth", "description": "Generating ground truth documentation"},
+        )
+        self._generate_ground_truth()
+        self._report_progress("phase_end", {"phase": "ground_truth"})
 
         logger.info("Generation complete")
 
@@ -464,7 +467,7 @@ def _finalize(self) -> None:
         logger.info("All emitters closed")
 
     def _generate_ground_truth(self) -> None:
-        """Generate GROUND_TRUTH.md documentation."""
+        """Generate GROUND_TRUTH.md and observation manifest sidecars."""
         from evidenceforge.events.observation_manifest import (
             OBSERVATION_MANIFEST_FILENAME,
             write_observation_manifest,
diff --git a/src/evidenceforge/generation/ground_truth.py b/src/evidenceforge/generation/ground_truth.py
index d7cfb3f7..da21bd15 100644
--- a/src/evidenceforge/generation/ground_truth.py
+++ b/src/evidenceforge/generation/ground_truth.py
@@ -509,34 +509,34 @@ def _format_iocs(self, iocs: dict[str, set]) -> str:
         Returns:
             Formatted IOC sections (Markdown)
         """
-        if not iocs:
+        if not iocs or not any(values for values in iocs.values()):
             return "*No IOCs extracted.*\n"
 
         sections = []
 
         # Network IOCs
-        if "network" in iocs:
+        if iocs.get("network"):
             sections.append("### Network IOCs\n")
             for ioc in sorted(iocs["network"]):
                 sections.append(f"- {ioc}")
             sections.append("")
 
         # Process IOCs
-        if "processes" in iocs:
+        if iocs.get("processes"):
             sections.append("### Process IOCs\n")
             for ioc in sorted(iocs["processes"]):
                 sections.append(f"- {ioc}")
             sections.append("")
 
         # User IOCs
-        if "users" in iocs:
+        if iocs.get("users"):
             sections.append("### User IOCs\n")
             for ioc in sorted(iocs["users"]):
                 sections.append(f"- {ioc} (compromised account)")
             sections.append("")
 
         # File IOCs
-        if "files" in iocs:
+        if iocs.get("files"):
             sections.append("### File IOCs\n")
             for ioc in sorted(iocs["files"]):
                 sections.append(f"- {ioc}")
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index 5ad5db32..1c0c20c2 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -35,6 +35,7 @@
     EXIT_SUCCESS,
     app,
 )
+from evidenceforge.events.observation_manifest import OBSERVATION_MANIFEST_FILENAME
 
 runner = CliRunner()
 
@@ -212,6 +213,7 @@ def _fake_generate():
                 (sd / "data").mkdir(exist_ok=True)
                 (sd / "data" / "new.xml").write_text("new data")
                 (sd / "GROUND_TRUTH.md").write_text("new ground truth")
+                (sd / OBSERVATION_MANIFEST_FILENAME).write_text('{"schema_version": 1}')
 
         mock_engine = Mock()
         mock_engine.generate.side_effect = _fake_generate
@@ -272,6 +274,7 @@ def _fake_generate():
                 (sd / "data").mkdir(exist_ok=True)
                 (sd / "data" / "new.xml").write_text("new data")
                 (sd / "GROUND_TRUTH.md").write_text("new ground truth")
+                (sd / OBSERVATION_MANIFEST_FILENAME).write_text('{"schema_version": 1}')
 
         mock_engine = Mock()
         mock_engine.generate.side_effect = _fake_generate
@@ -280,6 +283,7 @@ def _fake_generate():
         # Create existing output files
         (tmp_path / "data").mkdir()
         (tmp_path / "GROUND_TRUTH.md").write_text("old")
+        (tmp_path / OBSERVATION_MANIFEST_FILENAME).write_text("old manifest")
         (tmp_path / "ENVIRONMENT.md").write_text("old")
 
         result = runner.invoke(
@@ -297,11 +301,59 @@ def _fake_generate():
         assert "Overwrite existing output?" not in result.stdout
         assert mock_engine.generate.called
         assert (tmp_path / "GROUND_TRUTH.md").read_text() == "new ground truth"
+        assert (tmp_path / OBSERVATION_MANIFEST_FILENAME).read_text() == '{"schema_version": 1}'
         assert (tmp_path / "data" / "new.xml").read_text() == "new data"
         # ENVIRONMENT.md must be preserved (not engine output)
         assert (tmp_path / "ENVIRONMENT.md").exists()
         assert (tmp_path / "ENVIRONMENT.md").read_text() == "old"
 
+    @patch("evidenceforge.cli.commands.GenerationEngine")
+    def test_generate_force_baseline_only_replaces_complete_sidecar_set(
+        self, mock_engine_class, scenarios_dir, tmp_path
+    ):
+        """--force should swap baseline-only outputs with data, ground truth, and manifest."""
+
+        def _fake_generate():
+            staging_dirs = list(tmp_path.glob(".eforge_staging_*"))
+            if staging_dirs:
+                sd = staging_dirs[0]
+                (sd / "data").mkdir(exist_ok=True)
+                (sd / "data" / "baseline.log").write_text("new baseline data")
+                (sd / "GROUND_TRUTH.md").write_text(
+                    "# Ground Truth: baseline-only\n\n*No malicious activities in this scenario.*\n"
+                )
+                (sd / OBSERVATION_MANIFEST_FILENAME).write_text(
+                    '{"schema_version": 1, "scenario_name": "baseline-only"}'
+                )
+
+        mock_engine = Mock()
+        mock_engine.generate.side_effect = _fake_generate
+        mock_engine_class.return_value = mock_engine
+
+        (tmp_path / "data").mkdir()
+        (tmp_path / "data" / "old.log").write_text("old data")
+        (tmp_path / "GROUND_TRUTH.md").write_text("old ground truth")
+        (tmp_path / OBSERVATION_MANIFEST_FILENAME).write_text("old manifest")
+        (tmp_path / "ENVIRONMENT.md").write_text("scenario-authored")
+
+        result = runner.invoke(
+            app,
+            [
+                "generate",
+                str(scenarios_dir / "baseline-only.yaml"),
+                "--output",
+                str(tmp_path),
+                "--force",
+            ],
+        )
+
+        assert result.exit_code == EXIT_SUCCESS
+        assert not (tmp_path / "data" / "old.log").exists()
+        assert (tmp_path / "data" / "baseline.log").read_text() == "new baseline data"
+        assert "No malicious activities" in (tmp_path / "GROUND_TRUTH.md").read_text()
+        assert "baseline-only" in (tmp_path / OBSERVATION_MANIFEST_FILENAME).read_text()
+        assert (tmp_path / "ENVIRONMENT.md").read_text() == "scenario-authored"
+
     @patch("evidenceforge.cli.commands.GenerationEngine")
     def test_generate_force_preserves_old_output_on_failure(
         self, mock_engine_class, scenarios_dir, tmp_path
@@ -364,6 +416,7 @@ def _fake_generate():
                 (sd / "data").mkdir(exist_ok=True)
                 (sd / "data" / "new.xml").write_text("new data")
                 (sd / "GROUND_TRUTH.md").write_text("new ground truth")
+                (sd / OBSERVATION_MANIFEST_FILENAME).write_text('{"schema_version": 1}')
 
         mock_engine = Mock()
         mock_engine.generate.side_effect = _fake_generate
@@ -415,6 +468,7 @@ def _fake_generate():
                 (sd / "data").mkdir(exist_ok=True)
                 (sd / "data" / "new.xml").write_text("new data")
                 (sd / "GROUND_TRUTH.md").write_text("new ground truth")
+                (sd / OBSERVATION_MANIFEST_FILENAME).write_text('{"schema_version": 1}')
 
         mock_engine = Mock()
         mock_engine.generate.side_effect = _fake_generate
@@ -485,6 +539,7 @@ def _fake_generate():
                 (sd / "data").mkdir(exist_ok=True)
                 (sd / "data" / "new.xml").write_text("new data")
                 (sd / "GROUND_TRUTH.md").write_text("new ground truth")
+                (sd / OBSERVATION_MANIFEST_FILENAME).write_text('{"schema_version": 1}')
 
         mock_engine = Mock()
         mock_engine.generate.side_effect = _fake_generate
@@ -548,6 +603,45 @@ def _fake_generate_no_gt():
         assert (tmp_path / "data" / "old.xml").read_text() == "old data"
         assert (tmp_path / "GROUND_TRUTH.md").read_text() == "old ground truth"
 
+    @patch("evidenceforge.cli.commands.GenerationEngine")
+    def test_force_swap_requires_staged_manifest(self, mock_engine_class, scenarios_dir, tmp_path):
+        """If engine succeeds but staged observation manifest is missing, old output preserved."""
+
+        def _fake_generate_no_manifest():
+            staging_dirs = list(tmp_path.glob(".eforge_staging_*"))
+            if staging_dirs:
+                sd = staging_dirs[0]
+                (sd / "data").mkdir(exist_ok=True)
+                (sd / "data" / "new.xml").write_text("new data")
+                (sd / "GROUND_TRUTH.md").write_text("new ground truth")
+                # Deliberately skip creating OBSERVATION_MANIFEST.json
+
+        mock_engine = Mock()
+        mock_engine.generate.side_effect = _fake_generate_no_manifest
+        mock_engine_class.return_value = mock_engine
+
+        (tmp_path / "data").mkdir()
+        (tmp_path / "data" / "old.xml").write_text("old data")
+        (tmp_path / "GROUND_TRUTH.md").write_text("old ground truth")
+        (tmp_path / OBSERVATION_MANIFEST_FILENAME).write_text("old manifest")
+
+        result = runner.invoke(
+            app,
+            [
+                "generate",
+                str(scenarios_dir / "minimal.yaml"),
+                "--output",
+                str(tmp_path),
+                "--force",
+            ],
+        )
+
+        assert result.exit_code == EXIT_GENERATION_ERROR
+        assert (tmp_path / "data" / "old.xml").exists()
+        assert (tmp_path / "data" / "old.xml").read_text() == "old data"
+        assert (tmp_path / "GROUND_TRUTH.md").read_text() == "old ground truth"
+        assert (tmp_path / OBSERVATION_MANIFEST_FILENAME).read_text() == "old manifest"
+
     @patch("evidenceforge.cli.commands.GenerationEngine")
     def test_force_swap_cleans_stale_rollback(self, mock_engine_class, scenarios_dir, tmp_path):
         """Stale rollback dirs from prior killed runs are cleaned up."""
@@ -559,6 +653,7 @@ def _fake_generate():
                 (sd / "data").mkdir(exist_ok=True)
                 (sd / "data" / "new.xml").write_text("new data")
                 (sd / "GROUND_TRUTH.md").write_text("new ground truth")
+                (sd / OBSERVATION_MANIFEST_FILENAME).write_text('{"schema_version": 1}')
 
         mock_engine = Mock()
         mock_engine.generate.side_effect = _fake_generate
diff --git a/tests/unit/test_engine.py b/tests/unit/test_engine.py
index afa786b9..490b1ac9 100644
--- a/tests/unit/test_engine.py
+++ b/tests/unit/test_engine.py
@@ -27,6 +27,7 @@
 
 import pytest
 
+from evidenceforge.events.observation_manifest import OBSERVATION_MANIFEST_FILENAME
 from evidenceforge.generation.engine import GenerationEngine
 from evidenceforge.generation.engine.storyline import _estimate_process_lifetime
 from evidenceforge.models import (
@@ -872,7 +873,7 @@ def test_generate_calls_ground_truth_when_malicious_events(
     @patch("evidenceforge.generation.engine.emitter_setup.WindowsEventEmitter")
     @patch("evidenceforge.generation.engine.emitter_setup.SysmonEventEmitter")
     @patch("evidenceforge.generation.engine.emitter_setup.load_format")
-    def test_generate_skips_ground_truth_without_malicious_events(
+    def test_generate_calls_ground_truth_without_malicious_events(
         self,
         mock_load_format,
         mock_sysmon,
@@ -895,7 +896,67 @@ def test_generate_skips_ground_truth_without_malicious_events(
         minimal_scenario,
         tmp_path,
     ):
-        """Should NOT generate ground truth for baseline-only scenarios."""
+        """Baseline-only scenarios should still generate matched sidecars."""
+        mock_format_def = Mock()
+        mock_format_def.output.file_extension = ".log"
+        mock_load_format.return_value = mock_format_def
+
+        mock_activity_instance = Mock()
+        mock_activity_instance.get_baseline_pattern.return_value = []
+        mock_activity_gen.return_value = mock_activity_instance
+
+        mock_gt_instance = Mock()
+        mock_gt_gen.return_value = mock_gt_instance
+
+        engine = GenerationEngine(minimal_scenario, tmp_path)
+        engine.generate()
+
+        assert mock_gt_gen.called
+        assert mock_gt_gen.call_args.kwargs["malicious_events"] == []
+        assert mock_gt_gen.call_args.kwargs["red_herring_events"] == []
+        assert mock_gt_instance.generate.called
+        assert (tmp_path / OBSERVATION_MANIFEST_FILENAME).exists()
+
+    @patch("evidenceforge.generation.engine.core.ActivityGenerator")
+    @patch("evidenceforge.generation.engine.emitter_setup.ZeekReporterEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.ZeekPacketFilterEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.ZeekPeEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.ZeekOcspEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.ZeekX509Emitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.ZeekWeirdEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.ZeekNtpEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.ZeekDhcpEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.ZeekFilesEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.ZeekSslEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.ZeekHttpEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.ZeekDnsEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.ZeekEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.WindowsEventEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.SysmonEventEmitter")
+    @patch("evidenceforge.generation.engine.emitter_setup.load_format")
+    def test_generate_baseline_only_writes_ground_truth_and_manifest(
+        self,
+        mock_load_format,
+        mock_sysmon,
+        mock_windows,
+        mock_zeek,
+        mock_zeek_dns,
+        mock_zeek_http,
+        mock_zeek_ssl,
+        mock_zeek_files,
+        mock_zeek_dhcp,
+        mock_zeek_ntp,
+        mock_zeek_weird,
+        mock_zeek_x509,
+        mock_zeek_ocsp,
+        mock_zeek_pe,
+        mock_zeek_pf,
+        mock_zeek_reporter,
+        mock_activity_gen,
+        minimal_scenario,
+        tmp_path,
+    ):
+        """A successful baseline-only generation writes the complete sidecar set."""
         mock_format_def = Mock()
         mock_format_def.output.file_extension = ".log"
         mock_load_format.return_value = mock_format_def
@@ -907,8 +968,12 @@ def test_generate_skips_ground_truth_without_malicious_events(
         engine = GenerationEngine(minimal_scenario, tmp_path)
         engine.generate()
 
-        # Ground truth generator should NOT be called
-        assert not mock_gt_gen.called
+        ground_truth = tmp_path / "GROUND_TRUTH.md"
+        manifest = tmp_path / OBSERVATION_MANIFEST_FILENAME
+        assert ground_truth.exists()
+        assert manifest.exists()
+        assert "No malicious activities" in ground_truth.read_text()
+        assert "No malicious events were generated" in ground_truth.read_text()
 
     @patch("evidenceforge.generation.engine.core.ActivityGenerator")
     @patch("evidenceforge.generation.engine.emitter_setup.ZeekReporterEmitter")
diff --git a/tests/unit/test_ground_truth.py b/tests/unit/test_ground_truth.py
index 8c9b704e..15e1f08e 100644
--- a/tests/unit/test_ground_truth.py
+++ b/tests/unit/test_ground_truth.py
@@ -469,6 +469,18 @@ def test_format_iocs_empty(self, minimal_scenario, malicious_events):
 
         assert "No IOCs extracted" in formatted
 
+    def test_format_iocs_empty_categories(self, minimal_scenario, malicious_events):
+        """_format_iocs() should not emit blank headings for empty IOC categories."""
+        generator = GroundTruthGenerator(minimal_scenario, malicious_events)
+
+        formatted = generator._format_iocs(
+            {"network": set(), "processes": set(), "users": set(), "files": set()}
+        )
+
+        assert "No IOCs extracted" in formatted
+        assert "### Network IOCs" not in formatted
+        assert "### Process IOCs" not in formatted
+
     def test_format_iocs_sorted(self, minimal_scenario, malicious_events):
         """_format_iocs() should sort IOCs alphabetically."""
         iocs = {"users": {"zebra", "alpha", "beta"}}