From df2a446bb451ce8786956cf9f6bf123ae59edb89 Mon Sep 17 00:00:00 2001 From: "David J. Bianco" Date: Fri, 15 May 2026 12:31:28 -0400 Subject: [PATCH] fix: always emit generation sidecars --- README.md | 4 +- TODO.md | 2 +- commands/eforge/generate.md | 7 +- .../eforge/references/evidence-formats.md | 3 +- docs/design/PRD.md | 9 +- docs/reference/EVIDENCE_FORMATS.md | 3 +- src/evidenceforge/cli/commands.py | 10 +- src/evidenceforge/generation/engine/core.py | 29 +++--- src/evidenceforge/generation/ground_truth.py | 10 +- tests/unit/test_cli.py | 95 +++++++++++++++++++ tests/unit/test_engine.py | 73 +++++++++++++- tests/unit/test_ground_truth.py | 12 +++ 12 files changed, 220 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 95a5dd37..da506e9c 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ For details on the overlay system, manual editing, and cross-file dependencies, EvidenceForge creates multi-format security log datasets from YAML scenario definitions. You describe an environment (users, systems, network topology) and a storyline (attack events), and EvidenceForge generates temporally consistent logs across all formats simultaneously — complete with cross-referenced LogonIDs, PIDs, timestamps, and UIDs. -Every attack scenario includes a `GROUND_TRUTH.md` file documenting exactly what happened, when, and where — making the datasets immediately usable for threat hunting training. +Every generated scenario includes a `GROUND_TRUTH.md` file. Attack scenarios document exactly what happened, when, and where, while baseline-only scenarios explicitly document that no malicious events were generated. ### Key Capabilities @@ -106,7 +106,7 @@ Every attack scenario includes a `GROUND_TRUTH.md` file documenting exactly what - **Realistic baseline noise** — 26 lateral movement patterns, process→network correlation, network-level red herrings, and 18 Linux syslog categories create noise that analysts must work through - **OS-aware generation** — Windows systems produce Windows Event + Sysmon logs; Linux systems produce syslog + bash history - **Network visibility modeling** — Define sensor placement (SPAN/TAP), direction, and monitored segments -- **Ground truth documentation** — Every attack scenario generates a GROUND_TRUTH.md with narrative, timeline, and IOCs +- **Ground truth documentation** — Every run generates a GROUND_TRUTH.md; attack scenarios include narrative, timeline, and IOCs - **Parallel generation** — Threaded emitters write all formats simultaneously with temporal consistency - **Scenario validation** — Cross-reference checking, uniqueness constraints, and network topology validation - **Data quality evaluation** — 5-dimension scoring framework (23 sub-scores) with acceptance criteria diff --git a/TODO.md b/TODO.md index dc5796e6..edb9a2ff 100644 --- a/TODO.md +++ b/TODO.md @@ -334,7 +334,7 @@ Verification is complete: dedicated `tests/unit/test_world_model.py` coverage wa - [x] Security: cap firewall deny baseline amplification (`deny_ratio`/hourly deny volume) to prevent scenario-driven local DoS — `NetworkSensor.deny_ratio` now enforces `<= 50.0`. - [x] Security: prevent IPv6 scenario DoS in DNS AAAA fallback (`_ipv4_to_fake_ipv6` no longer evaluates for IPv6 destination IPs; AAAA uses mapped IPv6 or preserves IPv6 literal). - [x] Security: bounded/pruned ActivityGenerator DNS cache (60s prune cadence, 600s TTL-horizon eviction, 50k hard cap) to prevent unbounded memory growth from unique `(src_ip, hostname)` keys. -- [ ] `eforge generate --force` overwrite can fail for scenarios that do not emit `GROUND_TRUTH.md` — explicit-proxy smoke testing exposed that replacing an existing output directory expects staged ground truth even when fresh no-storyline generation produced only `data/`. Decide whether no-storyline generation should always write an empty `GROUND_TRUTH.md` or overwrite swap should tolerate its absence. +- [x] `eforge generate --force` overwrite can fail for scenarios that do not emit `GROUND_TRUTH.md` — fixed the root contract so every successful generation emits a matched `data/`, `GROUND_TRUTH.md`, and `OBSERVATION_MANIFEST.json` sidecar set, including baseline-only scenarios. The CLI swap stays strict and now requires staged data, ground truth, and observation manifest before replacing old output. Verification passed with focused engine/CLI/ground-truth/manifest tests, `eforge validate-config`, Ruff checks, and full normal `uv run pytest -v` (`3051 passed, 15 skipped`). - [x] **`uv.lock` not committed** — gitignored, so CI `setup-uv@v4` cache fails. Remove from `.gitignore` and commit. - [x] **`eforge validate` can't find personas in dev mode** — works when installed (`eforge validate`) but not via `uv run eforge validate`. Blocks dev workflow. diff --git a/commands/eforge/generate.md b/commands/eforge/generate.md index 02bd927b..e1045757 100644 --- a/commands/eforge/generate.md +++ b/commands/eforge/generate.md @@ -93,7 +93,8 @@ Generation writes log files to a `data/` subdirectory alongside the scenario fil scenarios// scenario.yaml ← input ENVIRONMENT.md ← created by /eforge scenario - GROUND_TRUTH.md ← generated (answer key) + GROUND_TRUTH.md ← generated answer key (empty for benign baseline-only runs) + OBSERVATION_MANIFEST.json ← generated source-observation sidecar data/ ← generated log files windows/ security.xml @@ -104,14 +105,14 @@ scenarios// ... ``` -If `data/`, `GROUND_TRUTH.md`, or `ENVIRONMENT.md` already exist, the CLI prompts before overwriting. Use `--force` to skip the prompt (for automation / AI use). +If generated output (`data/`, `GROUND_TRUTH.md`, or `OBSERVATION_MANIFEST.json`) already exists, the CLI prompts before overwriting. Use `--force` to skip the prompt (for automation / AI use). `ENVIRONMENT.md` is scenario-authored and is preserved. ### 3. Post-Generation After successful generation: - List the generated files and their sizes - Check that expected formats were produced -- If the scenario had a storyline, note that `GROUND_TRUTH.md` was generated alongside the scenario file — this is the answer key containing the full attack timeline and IOCs +- Note that `GROUND_TRUTH.md` and `OBSERVATION_MANIFEST.json` were generated alongside the scenario file. For baseline-only runs, `GROUND_TRUTH.md` explicitly says no malicious events were generated. - `ENVIRONMENT.md` (created by `/eforge scenario`) is already in the same directory — no copying needed - Note that the causal expansion engine auto-generates prerequisite events (DNS lookups before connections, Kerberos TGT/TGS before logons, audit events from command patterns, etc.) — these appear in the logs but are not explicitly listed in the scenario YAML - Summarize the output for the user diff --git a/commands/eforge/references/evidence-formats.md b/commands/eforge/references/evidence-formats.md index 7db99be7..9b7ed006 100644 --- a/commands/eforge/references/evidence-formats.md +++ b/commands/eforge/references/evidence-formats.md @@ -10,7 +10,8 @@ This document lists every evidence type EvidenceForge can generate, where to fin ``` output/ - GROUND_TRUTH.md # Attack narrative, timeline, IOCs + GROUND_TRUTH.md # Ground truth sidecar; empty for baseline-only runs + OBSERVATION_MANIFEST.json # Source-observation sidecar for eval ENVIRONMENT.md # Student-facing environment description (created by /eforge scenario skill) / # Per-host directories (FQDN) windows_event_security.xml # Windows Security channel events diff --git a/docs/design/PRD.md b/docs/design/PRD.md index e63f414c..9617aecf 100644 --- a/docs/design/PRD.md +++ b/docs/design/PRD.md @@ -36,7 +36,7 @@ The tool addresses the need for realistic, large-volume training datasets withou - Schema validation for scenario files (Pydantic-based) - Cross-reference validation (users, systems, personas, groups referenced correctly) - Evaluation framework with concrete metrics (format compliance, consistency, statistical properties) -- Ground truth documentation (GROUND_TRUTH.md) for scenarios with malicious activity +- Ground truth documentation (GROUND_TRUTH.md) for every generated scenario - Network topology and sensor placement modeling for traffic visibility - Persona-based temporal activity distribution with configurable work hours, intensity, and risk profiles - Comprehensive test coverage (95%+) with pytest @@ -154,7 +154,7 @@ eforge generate SCENARIO_FILE [--output DIR] [--verbose] [--debug] 9. Write to organized directory structure with incremental flushing (10K event buffer) 10. Show progress with Rich progress bars (per-hour baseline, per-event storyline) 11. Log details to `generation.log` in output directory -12. Generate GROUND_TRUTH.md when malicious/suspicious activities are present +12. Generate GROUND_TRUTH.md and OBSERVATION_MANIFEST.json sidecars #### Workflow 6: Evaluate Output ```bash @@ -430,7 +430,8 @@ Generated logs are written to a timestamped output directory: output/ scenario-name-YYYYMMDD-HHMMSS/ generation.log # Detailed generation log - GROUND_TRUTH.md # Attack ground truth (if malicious activity present) + GROUND_TRUTH.md # Ground truth sidecar (empty for baseline-only scenarios) + OBSERVATION_MANIFEST.json # Source-observation sidecar windows_events.xml # Windows Event Logs zeek_conn.log # Zeek connection logs ecar.json # ECAR events @@ -442,7 +443,7 @@ output/ **GROUND_TRUTH.md Format** -When a scenario includes malicious or suspicious activities (not baseline-only scenarios), the generator creates a GROUND_TRUTH.md file documenting the attack for training and evaluation purposes. +Every successful generation creates a GROUND_TRUTH.md file. Attack/red-herring scenarios document the narrative, timeline, and IOCs for training and evaluation; baseline-only scenarios explicitly state that no malicious events were generated. ```markdown # Ground Truth: [Scenario Name] diff --git a/docs/reference/EVIDENCE_FORMATS.md b/docs/reference/EVIDENCE_FORMATS.md index 7db99be7..9b7ed006 100644 --- a/docs/reference/EVIDENCE_FORMATS.md +++ b/docs/reference/EVIDENCE_FORMATS.md @@ -10,7 +10,8 @@ This document lists every evidence type EvidenceForge can generate, where to fin ``` output/ - GROUND_TRUTH.md # Attack narrative, timeline, IOCs + GROUND_TRUTH.md # Ground truth sidecar; empty for baseline-only runs + OBSERVATION_MANIFEST.json # Source-observation sidecar for eval ENVIRONMENT.md # Student-facing environment description (created by /eforge scenario skill) / # Per-host directories (FQDN) windows_event_security.xml # Windows Security channel events diff --git a/src/evidenceforge/cli/commands.py b/src/evidenceforge/cli/commands.py index 632dca4a..83aaf111 100644 --- a/src/evidenceforge/cli/commands.py +++ b/src/evidenceforge/cli/commands.py @@ -278,7 +278,7 @@ def generate( console.print(f"\n[bold]Data directory:[/bold] {data_dir}") console.print(f"[bold]Ground truth:[/bold] {ground_truth_dir / 'GROUND_TRUTH.md'}") - # Check for existing generated output (data/ and GROUND_TRUTH.md only). + # Check for existing generated output (data/ and generated sidecars only). # ENVIRONMENT.md is authored by /eforge scenario, not the engine — never touch it. existing = [] if data_dir.exists(): @@ -387,8 +387,8 @@ def progress_callback(event_type: str, data: dict) -> None: # Transactional swap: backup old → install new → cleanup backup. # If any step fails (including KeyboardInterrupt), old output is - # restored from backup. data/ and GROUND_TRUTH.md are always kept - # as a matched pair — partial preservation is never valid. + # restored from backup. data/ and generated sidecars are always kept + # as a matched set — partial preservation is never valid. if staging_dir: staged_gt = gen_gt_dir / "GROUND_TRUTH.md" staged_manifest = gen_gt_dir / OBSERVATION_MANIFEST_FILENAME @@ -396,6 +396,10 @@ def progress_callback(event_type: str, data: dict) -> None: raise RuntimeError("Staged data/ directory missing after generation") if not staged_gt.exists(): raise RuntimeError("Staged GROUND_TRUTH.md missing after generation") + if not staged_manifest.exists(): + raise RuntimeError( + f"Staged {OBSERVATION_MANIFEST_FILENAME} missing after generation" + ) # Clean up stale rollback dirs from prior killed runs for stale in ground_truth_dir.glob(".eforge_rollback_*"): diff --git a/src/evidenceforge/generation/engine/core.py b/src/evidenceforge/generation/engine/core.py index c3a1043e..703b8e61 100644 --- a/src/evidenceforge/generation/engine/core.py +++ b/src/evidenceforge/generation/engine/core.py @@ -119,7 +119,7 @@ def generate(self) -> None: 2. Generate baseline activity (hour-by-hour iteration) 3. Execute storyline events (if present) 4. Finalize and close emitters - 5. Generate GROUND_TRUTH.md (if malicious activity present) + 5. Generate GROUND_TRUTH.md and OBSERVATION_MANIFEST.json sidecars """ logger.info(f"Starting generation for scenario: {self.scenario.name}") @@ -185,17 +185,20 @@ def generate(self) -> None: self._finalize() self._report_progress("phase_end", {"phase": "finalize"}) - # Phase 5: Generate ground truth (if malicious activity or red herrings present) - if self.malicious_events or self.red_herring_events: - logger.info( - f"Generating GROUND_TRUTH.md with {len(self.malicious_events)} malicious events" - ) - self._report_progress( - "phase_start", - {"phase": "ground_truth", "description": "Generating ground truth documentation"}, - ) - self._generate_ground_truth() - self._report_progress("phase_end", {"phase": "ground_truth"}) + # Phase 5: Generate sidecars for every successful run. Baseline-only + # datasets still need an empty GROUND_TRUTH.md so CLI overwrite swaps + # can keep data and metadata as a matched pair. + logger.info( + "Generating GROUND_TRUTH.md with %d malicious events and %d red herrings", + len(self.malicious_events), + len(self.red_herring_events), + ) + self._report_progress( + "phase_start", + {"phase": "ground_truth", "description": "Generating ground truth documentation"}, + ) + self._generate_ground_truth() + self._report_progress("phase_end", {"phase": "ground_truth"}) logger.info("Generation complete") @@ -464,7 +467,7 @@ def _finalize(self) -> None: logger.info("All emitters closed") def _generate_ground_truth(self) -> None: - """Generate GROUND_TRUTH.md documentation.""" + """Generate GROUND_TRUTH.md and observation manifest sidecars.""" from evidenceforge.events.observation_manifest import ( OBSERVATION_MANIFEST_FILENAME, write_observation_manifest, diff --git a/src/evidenceforge/generation/ground_truth.py b/src/evidenceforge/generation/ground_truth.py index d7cfb3f7..da21bd15 100644 --- a/src/evidenceforge/generation/ground_truth.py +++ b/src/evidenceforge/generation/ground_truth.py @@ -509,34 +509,34 @@ def _format_iocs(self, iocs: dict[str, set]) -> str: Returns: Formatted IOC sections (Markdown) """ - if not iocs: + if not iocs or not any(values for values in iocs.values()): return "*No IOCs extracted.*\n" sections = [] # Network IOCs - if "network" in iocs: + if iocs.get("network"): sections.append("### Network IOCs\n") for ioc in sorted(iocs["network"]): sections.append(f"- {ioc}") sections.append("") # Process IOCs - if "processes" in iocs: + if iocs.get("processes"): sections.append("### Process IOCs\n") for ioc in sorted(iocs["processes"]): sections.append(f"- {ioc}") sections.append("") # User IOCs - if "users" in iocs: + if iocs.get("users"): sections.append("### User IOCs\n") for ioc in sorted(iocs["users"]): sections.append(f"- {ioc} (compromised account)") sections.append("") # File IOCs - if "files" in iocs: + if iocs.get("files"): sections.append("### File IOCs\n") for ioc in sorted(iocs["files"]): sections.append(f"- {ioc}") diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 5ad5db32..1c0c20c2 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -35,6 +35,7 @@ EXIT_SUCCESS, app, ) +from evidenceforge.events.observation_manifest import OBSERVATION_MANIFEST_FILENAME runner = CliRunner() @@ -212,6 +213,7 @@ def _fake_generate(): (sd / "data").mkdir(exist_ok=True) (sd / "data" / "new.xml").write_text("new data") (sd / "GROUND_TRUTH.md").write_text("new ground truth") + (sd / OBSERVATION_MANIFEST_FILENAME).write_text('{"schema_version": 1}') mock_engine = Mock() mock_engine.generate.side_effect = _fake_generate @@ -272,6 +274,7 @@ def _fake_generate(): (sd / "data").mkdir(exist_ok=True) (sd / "data" / "new.xml").write_text("new data") (sd / "GROUND_TRUTH.md").write_text("new ground truth") + (sd / OBSERVATION_MANIFEST_FILENAME).write_text('{"schema_version": 1}') mock_engine = Mock() mock_engine.generate.side_effect = _fake_generate @@ -280,6 +283,7 @@ def _fake_generate(): # Create existing output files (tmp_path / "data").mkdir() (tmp_path / "GROUND_TRUTH.md").write_text("old") + (tmp_path / OBSERVATION_MANIFEST_FILENAME).write_text("old manifest") (tmp_path / "ENVIRONMENT.md").write_text("old") result = runner.invoke( @@ -297,11 +301,59 @@ def _fake_generate(): assert "Overwrite existing output?" not in result.stdout assert mock_engine.generate.called assert (tmp_path / "GROUND_TRUTH.md").read_text() == "new ground truth" + assert (tmp_path / OBSERVATION_MANIFEST_FILENAME).read_text() == '{"schema_version": 1}' assert (tmp_path / "data" / "new.xml").read_text() == "new data" # ENVIRONMENT.md must be preserved (not engine output) assert (tmp_path / "ENVIRONMENT.md").exists() assert (tmp_path / "ENVIRONMENT.md").read_text() == "old" + @patch("evidenceforge.cli.commands.GenerationEngine") + def test_generate_force_baseline_only_replaces_complete_sidecar_set( + self, mock_engine_class, scenarios_dir, tmp_path + ): + """--force should swap baseline-only outputs with data, ground truth, and manifest.""" + + def _fake_generate(): + staging_dirs = list(tmp_path.glob(".eforge_staging_*")) + if staging_dirs: + sd = staging_dirs[0] + (sd / "data").mkdir(exist_ok=True) + (sd / "data" / "baseline.log").write_text("new baseline data") + (sd / "GROUND_TRUTH.md").write_text( + "# Ground Truth: baseline-only\n\n*No malicious activities in this scenario.*\n" + ) + (sd / OBSERVATION_MANIFEST_FILENAME).write_text( + '{"schema_version": 1, "scenario_name": "baseline-only"}' + ) + + mock_engine = Mock() + mock_engine.generate.side_effect = _fake_generate + mock_engine_class.return_value = mock_engine + + (tmp_path / "data").mkdir() + (tmp_path / "data" / "old.log").write_text("old data") + (tmp_path / "GROUND_TRUTH.md").write_text("old ground truth") + (tmp_path / OBSERVATION_MANIFEST_FILENAME).write_text("old manifest") + (tmp_path / "ENVIRONMENT.md").write_text("scenario-authored") + + result = runner.invoke( + app, + [ + "generate", + str(scenarios_dir / "baseline-only.yaml"), + "--output", + str(tmp_path), + "--force", + ], + ) + + assert result.exit_code == EXIT_SUCCESS + assert not (tmp_path / "data" / "old.log").exists() + assert (tmp_path / "data" / "baseline.log").read_text() == "new baseline data" + assert "No malicious activities" in (tmp_path / "GROUND_TRUTH.md").read_text() + assert "baseline-only" in (tmp_path / OBSERVATION_MANIFEST_FILENAME).read_text() + assert (tmp_path / "ENVIRONMENT.md").read_text() == "scenario-authored" + @patch("evidenceforge.cli.commands.GenerationEngine") def test_generate_force_preserves_old_output_on_failure( self, mock_engine_class, scenarios_dir, tmp_path @@ -364,6 +416,7 @@ def _fake_generate(): (sd / "data").mkdir(exist_ok=True) (sd / "data" / "new.xml").write_text("new data") (sd / "GROUND_TRUTH.md").write_text("new ground truth") + (sd / OBSERVATION_MANIFEST_FILENAME).write_text('{"schema_version": 1}') mock_engine = Mock() mock_engine.generate.side_effect = _fake_generate @@ -415,6 +468,7 @@ def _fake_generate(): (sd / "data").mkdir(exist_ok=True) (sd / "data" / "new.xml").write_text("new data") (sd / "GROUND_TRUTH.md").write_text("new ground truth") + (sd / OBSERVATION_MANIFEST_FILENAME).write_text('{"schema_version": 1}') mock_engine = Mock() mock_engine.generate.side_effect = _fake_generate @@ -485,6 +539,7 @@ def _fake_generate(): (sd / "data").mkdir(exist_ok=True) (sd / "data" / "new.xml").write_text("new data") (sd / "GROUND_TRUTH.md").write_text("new ground truth") + (sd / OBSERVATION_MANIFEST_FILENAME).write_text('{"schema_version": 1}') mock_engine = Mock() mock_engine.generate.side_effect = _fake_generate @@ -548,6 +603,45 @@ def _fake_generate_no_gt(): assert (tmp_path / "data" / "old.xml").read_text() == "old data" assert (tmp_path / "GROUND_TRUTH.md").read_text() == "old ground truth" + @patch("evidenceforge.cli.commands.GenerationEngine") + def test_force_swap_requires_staged_manifest(self, mock_engine_class, scenarios_dir, tmp_path): + """If engine succeeds but staged observation manifest is missing, old output preserved.""" + + def _fake_generate_no_manifest(): + staging_dirs = list(tmp_path.glob(".eforge_staging_*")) + if staging_dirs: + sd = staging_dirs[0] + (sd / "data").mkdir(exist_ok=True) + (sd / "data" / "new.xml").write_text("new data") + (sd / "GROUND_TRUTH.md").write_text("new ground truth") + # Deliberately skip creating OBSERVATION_MANIFEST.json + + mock_engine = Mock() + mock_engine.generate.side_effect = _fake_generate_no_manifest + mock_engine_class.return_value = mock_engine + + (tmp_path / "data").mkdir() + (tmp_path / "data" / "old.xml").write_text("old data") + (tmp_path / "GROUND_TRUTH.md").write_text("old ground truth") + (tmp_path / OBSERVATION_MANIFEST_FILENAME).write_text("old manifest") + + result = runner.invoke( + app, + [ + "generate", + str(scenarios_dir / "minimal.yaml"), + "--output", + str(tmp_path), + "--force", + ], + ) + + assert result.exit_code == EXIT_GENERATION_ERROR + assert (tmp_path / "data" / "old.xml").exists() + assert (tmp_path / "data" / "old.xml").read_text() == "old data" + assert (tmp_path / "GROUND_TRUTH.md").read_text() == "old ground truth" + assert (tmp_path / OBSERVATION_MANIFEST_FILENAME).read_text() == "old manifest" + @patch("evidenceforge.cli.commands.GenerationEngine") def test_force_swap_cleans_stale_rollback(self, mock_engine_class, scenarios_dir, tmp_path): """Stale rollback dirs from prior killed runs are cleaned up.""" @@ -559,6 +653,7 @@ def _fake_generate(): (sd / "data").mkdir(exist_ok=True) (sd / "data" / "new.xml").write_text("new data") (sd / "GROUND_TRUTH.md").write_text("new ground truth") + (sd / OBSERVATION_MANIFEST_FILENAME).write_text('{"schema_version": 1}') mock_engine = Mock() mock_engine.generate.side_effect = _fake_generate diff --git a/tests/unit/test_engine.py b/tests/unit/test_engine.py index afa786b9..490b1ac9 100644 --- a/tests/unit/test_engine.py +++ b/tests/unit/test_engine.py @@ -27,6 +27,7 @@ import pytest +from evidenceforge.events.observation_manifest import OBSERVATION_MANIFEST_FILENAME from evidenceforge.generation.engine import GenerationEngine from evidenceforge.generation.engine.storyline import _estimate_process_lifetime from evidenceforge.models import ( @@ -872,7 +873,7 @@ def test_generate_calls_ground_truth_when_malicious_events( @patch("evidenceforge.generation.engine.emitter_setup.WindowsEventEmitter") @patch("evidenceforge.generation.engine.emitter_setup.SysmonEventEmitter") @patch("evidenceforge.generation.engine.emitter_setup.load_format") - def test_generate_skips_ground_truth_without_malicious_events( + def test_generate_calls_ground_truth_without_malicious_events( self, mock_load_format, mock_sysmon, @@ -895,7 +896,67 @@ def test_generate_skips_ground_truth_without_malicious_events( minimal_scenario, tmp_path, ): - """Should NOT generate ground truth for baseline-only scenarios.""" + """Baseline-only scenarios should still generate matched sidecars.""" + mock_format_def = Mock() + mock_format_def.output.file_extension = ".log" + mock_load_format.return_value = mock_format_def + + mock_activity_instance = Mock() + mock_activity_instance.get_baseline_pattern.return_value = [] + mock_activity_gen.return_value = mock_activity_instance + + mock_gt_instance = Mock() + mock_gt_gen.return_value = mock_gt_instance + + engine = GenerationEngine(minimal_scenario, tmp_path) + engine.generate() + + assert mock_gt_gen.called + assert mock_gt_gen.call_args.kwargs["malicious_events"] == [] + assert mock_gt_gen.call_args.kwargs["red_herring_events"] == [] + assert mock_gt_instance.generate.called + assert (tmp_path / OBSERVATION_MANIFEST_FILENAME).exists() + + @patch("evidenceforge.generation.engine.core.ActivityGenerator") + @patch("evidenceforge.generation.engine.emitter_setup.ZeekReporterEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.ZeekPacketFilterEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.ZeekPeEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.ZeekOcspEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.ZeekX509Emitter") + @patch("evidenceforge.generation.engine.emitter_setup.ZeekWeirdEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.ZeekNtpEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.ZeekDhcpEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.ZeekFilesEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.ZeekSslEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.ZeekHttpEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.ZeekDnsEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.ZeekEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.WindowsEventEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.SysmonEventEmitter") + @patch("evidenceforge.generation.engine.emitter_setup.load_format") + def test_generate_baseline_only_writes_ground_truth_and_manifest( + self, + mock_load_format, + mock_sysmon, + mock_windows, + mock_zeek, + mock_zeek_dns, + mock_zeek_http, + mock_zeek_ssl, + mock_zeek_files, + mock_zeek_dhcp, + mock_zeek_ntp, + mock_zeek_weird, + mock_zeek_x509, + mock_zeek_ocsp, + mock_zeek_pe, + mock_zeek_pf, + mock_zeek_reporter, + mock_activity_gen, + minimal_scenario, + tmp_path, + ): + """A successful baseline-only generation writes the complete sidecar set.""" mock_format_def = Mock() mock_format_def.output.file_extension = ".log" mock_load_format.return_value = mock_format_def @@ -907,8 +968,12 @@ def test_generate_skips_ground_truth_without_malicious_events( engine = GenerationEngine(minimal_scenario, tmp_path) engine.generate() - # Ground truth generator should NOT be called - assert not mock_gt_gen.called + ground_truth = tmp_path / "GROUND_TRUTH.md" + manifest = tmp_path / OBSERVATION_MANIFEST_FILENAME + assert ground_truth.exists() + assert manifest.exists() + assert "No malicious activities" in ground_truth.read_text() + assert "No malicious events were generated" in ground_truth.read_text() @patch("evidenceforge.generation.engine.core.ActivityGenerator") @patch("evidenceforge.generation.engine.emitter_setup.ZeekReporterEmitter") diff --git a/tests/unit/test_ground_truth.py b/tests/unit/test_ground_truth.py index 8c9b704e..15e1f08e 100644 --- a/tests/unit/test_ground_truth.py +++ b/tests/unit/test_ground_truth.py @@ -469,6 +469,18 @@ def test_format_iocs_empty(self, minimal_scenario, malicious_events): assert "No IOCs extracted" in formatted + def test_format_iocs_empty_categories(self, minimal_scenario, malicious_events): + """_format_iocs() should not emit blank headings for empty IOC categories.""" + generator = GroundTruthGenerator(minimal_scenario, malicious_events) + + formatted = generator._format_iocs( + {"network": set(), "processes": set(), "users": set(), "files": set()} + ) + + assert "No IOCs extracted" in formatted + assert "### Network IOCs" not in formatted + assert "### Process IOCs" not in formatted + def test_format_iocs_sorted(self, minimal_scenario, malicious_events): """_format_iocs() should sort IOCs alphabetically.""" iocs = {"users": {"zebra", "alpha", "beta"}}