Cisco-Talos · DavidJBianco · May 15, 2026 · May 13, 2026 · May 13, 2026 · May 13, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -7,14 +7,62 @@ on:
     branches: [main, dev]
 
 jobs:
-  test:
-    name: Test on Python ${{ matrix.python-version }}
+  fast-tests:
+    name: Fast tests (Python 3.12, no coverage)
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    if: github.event_name != 'pull_request' || github.base_ref != 'main'
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          enable-cache: true
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install dependencies
+        run: uv sync --all-extras
+
+      - name: Run tests without coverage
+        run: uv run pytest --no-cov
+
+  compatibility:
+    name: Compatibility tests (Python 3.11, no coverage)
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          enable-cache: true
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: uv sync --all-extras
+
+      - name: Run tests without coverage
+        run: uv run pytest --no-cov
+
+  coverage:
+    name: Release coverage gate (Python 3.12)
     runs-on: ubuntu-latest
     timeout-minutes: 15
-    strategy:
-      matrix:
-        python-version: ["3.11", "3.12"]
-      fail-fast: false
+    if: github.event_name == 'pull_request' && github.base_ref == 'main'
 
     steps:
       - name: Checkout code
@@ -25,29 +73,49 @@ jobs:
         with:
           enable-cache: true
 
-      - name: Set up Python ${{ matrix.python-version }}
+      - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: ${{ matrix.python-version }}
+          python-version: "3.12"
 
       - name: Install dependencies
         run: uv sync --all-extras
 
       - name: Run tests with coverage
-        if: github.ref_name != 'dev' && github.base_ref != 'dev'
-        run: uv run pytest --include-slow --cov-report=xml
-
-      - name: Run fast unit tests (dev)
-        if: github.ref_name == 'dev' || github.base_ref == 'dev'
-        run: uv run pytest tests/unit --no-cov
+        run: uv run pytest --cov=evidenceforge --cov-report=term-missing --cov-report=xml --cov-fail-under=70
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v4
-        if: matrix.python-version == '3.12' && github.ref_name != 'dev' && github.base_ref != 'dev'
         with:
           file: ./coverage.xml
           fail_ci_if_error: false
 
+  slow-comprehensive:
+    name: Slow comprehensive tests
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    if: github.event_name == 'pull_request' && github.base_ref == 'main'
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          enable-cache: true
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install dependencies
+        run: uv sync --all-extras
+
+      - name: Run slow comprehensive tests without coverage
+        run: uv run pytest --include-slow -m slow --no-cov --durations=20
+
   lint:
     name: Lint
     runs-on: ubuntu-latest

diff --git a/AGENTS.md b/AGENTS.md
@@ -69,7 +69,11 @@ When a phase is fully complete, collapse its tasks in `TODO.md` to a 2-3 line su
 
 **Testing:**
 - pytest with pytest-cov, pytest-asyncio, pytest-mock, pytest-benchmark
-- Separate test markers: `@pytest.mark.slow` for large dataset tests (not run by default)
+- Default test runs should avoid coverage instrumentation: use `uv run pytest --no-cov`
+  for normal local and feature-PR validation. Coverage is a release/readiness
+  gate before `dev` → `main`, run explicitly with
+  `uv run pytest --cov=evidenceforge --cov-report=term-missing --cov-report=xml --cov-fail-under=70`.
+- Separate test markers: `@pytest.mark.slow` for large dataset/workload tests (not run by default). Run slow tests with `--no-cov` unless you are specifically profiling coverage behavior, because coverage instrumentation makes the generator workload much slower.
 - Target coverage: 95%+ overall, 95%+ for core generation engine
 
 **Format Support:**
@@ -324,6 +328,10 @@ When adding or significantly modifying event types, emitters, or the event schem
 
 **Coverage targets:** 95%+ overall, 95%+ core engine, 90%+ formats, 85%+ CLI. Exclude: `__main__.py`, type stubs, test fixtures.
 
+**Default validation:** run `uv run pytest --no-cov` for normal development and
+feature PRs. Run the explicit coverage command only for release readiness before
+opening or updating a `dev` → `main` PR.
+
 **Conventions:**
 - Test naming: `test_<function>_<scenario>_<expected_result>`
 - Use Arrange/Act/Assert pattern

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,31 @@ Detailed development history for the EvidenceForge project. Transferred from TOD
 
 ---
 
+## v0.7.0 (2026-05-15)
+
+This minor release packages the latest `dev` branch realism, observation, and CI work since v0.6.3. The branch includes `feat:` commits, so the version moves from `0.6.3` to `0.7.0` under the pre-1.0 semver policy.
+
+**Observation and evaluation realism**
+
+- Added observation profiles and an observation-aware evaluation manifest so generated datasets can model source-specific coverage and missingness more explicitly (`0ed18df`, `599a40e`).
+- Improved source identity metadata, endpoint baseline noise policy, and host activity distribution realism for more believable source-native evidence (`317decd`, `5931c8a`, `c8f6226`).
+- Cleaned calibration evaluation warnings by tightening observation-aware causality matching, sensor-filtered observation-manifest accounting, OCSP optional-field rendering, and visible Windows logon-before-process ordering (`e771e77`).
+
+**Source-native timing and log texture**
+
+- Emitted syslog in RFC 5424 format and improved web sessions, sensor timing, auth noise, and Zeek timing realism (`0247cc7`, `90e96cf`, `30c8217`).
+- Fixed generation sidecar emission so overwrite swaps preserve the expected matched output contract (`df2a446`).
+
+**CI and developer workflow**
+
+- Split slow comprehensive tests from coverage instrumentation, keeping normal coverage on fast/default tests while running slow workload tests separately with `--no-cov` (`a6d7583`).
+- Stabilized the slow release gate by skipping the non-gating 500MB `tracemalloc` ceiling check and fixing observation manifests for scenarios that use explicit end times instead of durations (`6e6c9f3`).
+
+**Validation**
+
+- Release-prep validation passed `uv run ruff check .`, `uv run ruff format --check .`, `uv run pytest --cov-report=xml` (`3030 passed`, `37 skipped`, `79.82%` coverage), and `uv run pytest --include-slow -m slow --no-cov --durations=20` (`13 passed`, `1 skipped`, `1:08`).
+- PR #162 cleanup validation passed `uv run eforge validate-config`, `uv run eforge validate scenarios/iteration-test/scenario.yaml`, `uv run eforge generate scenarios/iteration-test/scenario.yaml --verbose --force`, `uv run eforge eval scenarios/iteration-test/data --scenario scenarios/iteration-test/scenario.yaml --format json --verbose` (`94.64`, all hard gates passing), focused regressions (`164 passed`), and `uv run pytest -v` (`3075 passed`, `15 skipped`).
+
 ## v0.6.3 (2026-05-13)
 
 This patch release packages the latest `dev` branch realism work since v0.6.2. The branch contains only `fix:` and `docs:` commits, so the version moves from `0.6.2` to `0.6.3` under the pre-1.0 semver policy.

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -46,11 +46,27 @@ We expect new pull requests to include tests for any affected behavior, and, as
 we follow semantic versioning, we may reserve breaking changes until the next
 major version release.
 
-Before submitting, run the full test suite (including slow tests) and confirm
-all tests pass:
+Before submitting a regular feature or fix pull request, run the normal suite
+without coverage instrumentation plus lint/format checks:
 
 ```bash
-uv run pytest --include-slow
+uv run pytest --no-cov
+uv run ruff check .
+uv run ruff format --check .
+```
+
+Run the slow comprehensive workload suite without coverage when your change
+touches generation behavior or before a release PR:
+
+```bash
+uv run pytest --include-slow -m slow --no-cov --durations=20
+```
+
+Coverage is reserved for final readiness checks before opening a `dev` → `main`
+release PR:
+
+```bash
+uv run pytest --cov=evidenceforge --cov-report=term-missing --cov-report=xml --cov-fail-under=70
 ```
 
 ### Commit Messages
@@ -89,22 +105,28 @@ cd EvidenceForge
 # Install dependencies (requires uv: https://docs.astral.sh/uv/)
 uv sync
 
-# Run the test suite (1100+ tests, skips slow by default)
-uv run pytest
+# Run the test suite without coverage instrumentation (skips slow by default)
+uv run pytest --no-cov
 
 # Lint and format
-uv run ruff check src/ tests/
-uv run ruff format src/ tests/
+uv run ruff check .
+uv run ruff format --check .
 ```
 
 ### Test Markers
 
-- `@pytest.mark.slow`: large dataset tests (100+ users), skipped by default
+- `@pytest.mark.slow`: large dataset and workload tests, skipped by default and normally
+  run without coverage instrumentation
 
 ```bash
-uv run pytest                  # Quick run (skips slow tests)
-uv run pytest --include-slow   # Full run (all tests, required before PRs)
-uv run pytest -m slow          # Only slow tests
+# Normal fast run
+uv run pytest --no-cov
+
+# Slow comprehensive run
+uv run pytest --include-slow -m slow --no-cov --durations=20
+
+# Release coverage gate
+uv run pytest --cov=evidenceforge --cov-report=term-missing --cov-report=xml --cov-fail-under=70
 ```
 
 ## Code Style

diff --git a/README.md b/README.md
@@ -97,7 +97,7 @@ For details on the overlay system, manual editing, and cross-file dependencies,
 
 EvidenceForge creates multi-format security log datasets from YAML scenario definitions. You describe an environment (users, systems, network topology) and a storyline (attack events), and EvidenceForge generates temporally consistent logs across all formats simultaneously — complete with cross-referenced LogonIDs, PIDs, timestamps, and UIDs.
 
-Every attack scenario includes a `GROUND_TRUTH.md` file documenting exactly what happened, when, and where — making the datasets immediately usable for threat hunting training.
+Every generated scenario includes a `GROUND_TRUTH.md` file. Attack scenarios document exactly what happened, when, and where, while baseline-only scenarios explicitly document that no malicious events were generated.
 
 ### Key Capabilities
 
@@ -106,7 +106,7 @@ Every attack scenario includes a `GROUND_TRUTH.md` file documenting exactly what
 - **Realistic baseline noise** — 26 lateral movement patterns, process→network correlation, network-level red herrings, and 18 Linux syslog categories create noise that analysts must work through
 - **OS-aware generation** — Windows systems produce Windows Event + Sysmon logs; Linux systems produce syslog + bash history
 - **Network visibility modeling** — Define sensor placement (SPAN/TAP), direction, and monitored segments
-- **Ground truth documentation** — Every attack scenario generates a GROUND_TRUTH.md with narrative, timeline, and IOCs
+- **Ground truth documentation** — Every run generates a GROUND_TRUTH.md; attack scenarios include narrative, timeline, and IOCs
 - **Parallel generation** — Threaded emitters write all formats simultaneously with temporal consistency
 - **Scenario validation** — Cross-reference checking, uniqueness constraints, and network topology validation
 - **Data quality evaluation** — 5-dimension scoring framework (23 sub-scores) with acceptance criteria
@@ -241,15 +241,21 @@ See [Architecture Documentation](docs/ARCHITECTURE.md) for the full deep dive in
 # Install dependencies
 uv sync
 
-# Run tests (1400+ tests)
-uv run pytest
+# Run tests without coverage instrumentation (skips slow by default)
+uv run pytest --no-cov
+
+# Run slow comprehensive workload tests without coverage instrumentation
+uv run pytest --include-slow -m slow --no-cov --durations=20
+
+# Run the release coverage gate before a dev -> main PR
+uv run pytest --cov=evidenceforge --cov-report=term-missing --cov-report=xml --cov-fail-under=70
 
 # Run specific test suite
 uv run pytest tests/unit/test_network_visibility.py -v
 
 # Lint and format
-uv run ruff check src/ tests/
-uv run ruff format src/ tests/
+uv run ruff check .
+uv run ruff format --check .
 ```
 
 ### Tech Stack