From c8f622626797eb4f6867571cfb48a4e2f04e0475 Mon Sep 17 00:00:00 2001
From: "David J. Bianco" <davidjbianco@gmail.com>
Date: Thu, 14 May 2026 15:44:20 -0400
Subject: [PATCH] feat: add host activity realism profiles

---
 TODO.md                                       |  17 +-
 commands/eforge/config.md                     |   1 +
 .../references/config-dependency-graph.md     |   8 +
 .../eforge/references/config-host-activity.md |  56 +++-
 .../eforge/references/config-validation.md    |   3 +-
 docs/reference/CUSTOMIZING_CONFIG.md          |   1 +
 scenarios/COVERAGE-TEST-PROMPT.md             |  18 +-
 scenarios/ITERATION-TEST-PROMPT.md            |  18 +-
 scenarios/LARGE-SCALE-COVERAGE-TEST-PROMPT.md |  25 +-
 src/evidenceforge/cli/validate_config.py      |  23 ++
 src/evidenceforge/config/activity/README.md   |   1 +
 .../activity/host_activity_profiles.yaml      | 199 ++++++++++++
 src/evidenceforge/config/schemas.py           | 164 ++++++++++
 src/evidenceforge/events/contexts.py          |   2 +
 .../activity/host_activity_profiles.py        | 281 +++++++++++++++++
 .../generation/activity/suspicious_benign.py  |  54 ++--
 .../generation/emitters/cisco_asa.py          |   6 +-
 .../generation/engine/baseline.py             | 284 ++++++++++++++++--
 tests/unit/test_baseline_canonical.py         |   4 +-
 tests/unit/test_cisco_asa_emitter.py          |   3 +
 tests/unit/test_host_activity_profiles.py     | 141 +++++++++
 tests/unit/test_validate_config.py            |  28 ++
 22 files changed, 1252 insertions(+), 85 deletions(-)
 create mode 100644 src/evidenceforge/config/activity/host_activity_profiles.yaml
 create mode 100644 src/evidenceforge/generation/activity/host_activity_profiles.py
 create mode 100644 tests/unit/test_host_activity_profiles.py

diff --git a/TODO.md b/TODO.md
index 663a96e6..b97146cf 100644
--- a/TODO.md
+++ b/TODO.md
@@ -2,7 +2,7 @@
 
 **Status:** Phase 8.5 (Dual src/dst HostContext) COMPLETE; Pre-MVP quality fixes ongoing
 **Started:** 2026-03-11
-**Last Updated:** 2026-04-29
+**Last Updated:** 2026-05-14
 
 See [CHANGELOG.md](CHANGELOG.md) for detailed development history of completed phases.
 
@@ -241,7 +241,7 @@ Replaced manual per-emitter field coordination with SecurityEvent intermediate r
 - [x] **P1** Web application response/session realism follow-up — Added data-driven inbound `web_server` visitor profiles so human visitors consume `traffic_rates.web` as top-level actions, then fan out into required page assets/API calls through `site_maps.yaml`; crawler, health-check, API-client, and opportunistic-probe traffic now uses source-native configured request/status/User-Agent profiles. Static resource sizes are stable per host/path, human navigation and render fanout timing use `timing_profiles.yaml`, and docs/skill references now explain the budget and config ownership. Verification passed: focused web/timing/baseline tests (`107 passed, 1 skipped`), config-related tests (`64 passed`), `uv run eforge validate-config`, repo-wide Ruff checks/format checks, full normal `uv run pytest -q` (`3012 passed, 15 skipped`), and `git diff --check`.
 - [x] **P1** Well-synced network sensor timing follow-up — Replaced hardcoded multi-sensor Zeek +/-400ms skew plus broad path delay with a validated `network_sensor_observation` timing profile. The default `well_synced` profile keeps stable per-sensor clock skew within +/-1.5ms and per-flow capture/path delay within 50-2000us while preserving canonical packet/byte truth unless source-native observation variance is explicitly enabled. Verification passed with focused Zeek/timing tests, `uv run eforge validate-config`, repo-wide Ruff checks/format checks, full normal `uv run pytest -q` (`3012 passed, 15 skipped`), and `git diff --check`.
 - [x] **P1** Source identity and endpoint baseline realism sprint — completed TLS/X.509 issuer-compatible chain signatures, Sysmon Event 7 native third-party module identity, config-driven Windows scheduled-process timing, and DHCP registry emission policy tied to lease activity. Verified with `uv run eforge validate-config`, focused regressions, Ruff, normal pytest, and slow-inclusive pytest.
-- [ ] **P2** Endpoint/eCAR baseline variance follow-up — Loop 96 found workstation eCAR category volumes and Linux process lifecycle evidence too uniform and complete. The realistic endpoint observation-gap portion is now handled by named observation profiles; remaining work should focus on host/persona-specific volume variance, long-lived process state, and benign unmatched endpoint artifacts.
+- [x] **P2** Endpoint/eCAR baseline variance follow-up — addressed through the host/activity profile realism layer. Host family, role, persona, and stable per-host multipliers now shape endpoint, process, registry, scheduled-task, syslog, bash, eCAR, Windows, Zeek, firewall, IDS, web, and proxy rates; config-driven encoded PowerShell variants and benign endpoint texture reduce repeated per-host artifacts. Verification passed with focused host-activity/config/ASA/baseline tests, `uv run eforge validate-config`, Ruff checks/format checks, full normal `uv run pytest -v`, and slow-inclusive `uv run pytest -v --include-slow --no-cov` (`3057 passed, 1 skipped`).
 - [x] **Later architectural sprint: imperfect observation and source coverage** — implemented a training-friendly `complete` default plus overlay-compatible named observation profiles that apply deterministic source-level drop/delay/coverage semantics without modeling contradictions. The policy covers endpoint, network, proxy/web, firewall, IDS, Windows, Sysmon, Zeek, syslog, bash history, and eCAR source families, while ground truth preserves canonical truth and records source evidence status. Verification passed: focused observation/config/ground-truth tests, `uv run eforge validate-config`, Ruff checks/format checks, full normal `uv run pytest -v` (`3036 passed, 15 skipped`), and slow-inclusive `uv run pytest -v --include-slow` (`3050 passed, 1 skipped`).
 - [x] Full slow-suite regression cleanup after loop-65 merge — explicit-proxy storyline beacons now preserve authored hostname+destination IP pairs only when the storyline marks that pair as intentional, normal proxy-origin DNS resolution remains intact, and the parallel-generation LogonID assertion treats Type 7 unlock reuse as valid slice-of-time Windows behavior. Verified with targeted proxy/parallel tests, `uv run ruff check .`, `uv run ruff format --check .`, and `uv run pytest -v --include-slow` (`2875 passed, 23 skipped`).
   Detection Engineer blind review completed for the regenerated Loop 61 dataset at `scenarios/iteration-test/data`; reviewer verdict: Synthetic, 63/100 confidence. Main findings: one PROXY-01 sshd accepted-login lifecycle gap/self-source artifact and Windows 4648 explicit-credential caller PID/image provenance ambiguity around `WS-MCHEN-01`.
@@ -437,7 +437,7 @@ Data works but experienced analysts spot tells. Grouped by format for efficient
 - [x] Event 10 source/target pairs too narrow — fixed by widening `process_access_patterns.yaml` and seeded long-lived process actors. Verification audit output: 950 Event 10 records used 16 source/target pairs.
 - [x] Registry writer processes too narrow — fixed with key-family-aware writer selection. Verification audit output: Event 12/13 records used 12 writer process images and 1,968 unique TargetObject paths with 0 template artifacts.
 - [x] Event 7 residual attribution issues — tightened generic module/process matching and retained process-aware DLL materialization. Verification audit output: 380 Event 7 records used 42 unique ImageLoaded paths.
-- [ ] Cross-source distribution realism layer — defer until data-source reviews are complete. Independent Sysmon reviews found that field-level realism improved, but per-host event volumes and recipe selection remain too uniform. Design a deterministic host/activity profile layer derived from scenario facts (host type, roles, assigned_user, persona, services, stable seed) and use it to shape Sysmon, Windows Security, Zeek, syslog, firewall, web, proxy, and eCAR/EDR rates. Avoid implementing Sysmon-only profile logic unless needed as a narrow bug fix.
+- [x] Cross-source distribution realism layer — implemented a deterministic, overlay-capable host/activity profile layer derived from host family, roles, persona/risk, services, and stable per-host variance. Baseline generation now uses these profiles to scale Windows Security/Sysmon/eCAR, Zeek/network/web/proxy, Linux syslog/bash, firewall/ASA, IDS, auth, endpoint registry, scheduled process, and service-noise volumes without requiring scenario YAML changes.
 
 **Zeek:**
 - [x] Zeek DNS / network support log review — fixed DNS/TLS PTR coherence, added realistic TXT lookup variety, prevented CDN-hostname MX artifacts, increased file-server SMB target coverage, and made SSH pivot UIDs respect sensor visibility. Tests, docs, skills, and skill references updated where needed.
@@ -583,8 +583,8 @@ Data works but experienced analysts spot tells. Grouped by format for efficient
 - [x] Security: bound threat-detection deny timestamp tracking window to prevent unbounded memory/CPU growth
 - [x] ASA imperfect-observation realism — addressed by the general observation profile layer. `complete` preserves paired training-friendly firewall evidence, while non-default profiles can apply deterministic ASA source-family gaps that create realistic missing/partial firewall evidence without rewriting canonical truth.
 - [ ] ASA message type diversity limited to 106023/302013-16/305011-12 — missing 111008, 113004, 733100, 106001, 725001, 304001
-- [ ] ASA deny baseline burstiness/profile variance — defer to a general per-source activity profile rather than a one-off ASA fix. Current deny events are uniformly spaced (3-7s); real scans should have configurable burst/quiet periods, campaign-level cadence, and source-specific variance.
-- [ ] ASA deny metadata diversity — defer to a general field-distribution realism layer. Current deny events use `[0x0, 0x0]` hash values uniformly; a later profile should model when hashes remain zero vs vary by platform/message/context.
+- [x] ASA deny baseline burstiness/profile variance — fixed through host activity profiles and firewall-deny burst scheduling. Baseline denies now use deterministic burst/quiet periods and host/profile variance instead of uniform 3-7 second spacing.
+- [x] ASA deny metadata diversity — fixed by carrying deny hash metadata on canonical firewall context and rendering stable varied ASA hash values where appropriate instead of hardcoded `[0x0, 0x0]`.
 - [ ] Recognizable 45.33.32.x public IPs remain in built-in scan/attacker pools — the original `45.33.32.1` NAT PAT finding is stale, but code still uses `45.33.32.156` in scan/attacker pools. Move these values into data/config or replace them with less recognizable public-looking lab addresses during the broader public-IP/profile cleanup.
 
 **eCAR:**
@@ -598,10 +598,11 @@ Data works but experienced analysts spot tells. Grouped by format for efficient
 **Cross-Source / General:**
 - [x] Configurable cross-source evidence disagreement — implemented as named observation profiles with `complete` as the default. Non-default profiles can introduce deterministic dropped/delayed/filtered/out-of-window evidence across Zeek, web, proxy, firewall, IDS, Windows, Sysmon, syslog, bash history, and eCAR without contradictions or ambiguous rewrites; ground truth retains source evidence status for traceability.
 - [x] Cross-sensor timestamp precision identical to 15+ decimal places — microsecond jitter added in snort.py, windows.py, and storyline.py
-- [ ] **P2** Per-host-type event rate multiplier — Domain controllers generate ~50 events/hr but real DCs running AD/DNS/DFS/GPO produce thousands/hr. `system.type` is used for routing but never for volume scaling. Need `event_rate_multiplier` on System model (or implicit per-type defaults) applied in `_calculate_events_for_hour()` and `_generate_system_traffic()`. DCs should be 3-5x workstation baseline; file servers and web servers similarly elevated.
-- [ ] Configurable per-entity artifact variation — deferred to the general host/activity profile layer. Encoded PowerShell baseline noise is currently identical across hosts (same Get-Service blob); later profiles should derive stable per-host command variants, encoded payloads, tool versions, and operator habits.
-- [ ] Configurable per-host volume variance — deferred to the general host/activity profile layer. Workstation connection counts are suspiciously uniform (808-1068 range); later profiles should widen variance by role, persona, weekday, installed apps, and stable host-specific multipliers.
+- [x] **P2** Per-host-type event rate multiplier — implemented as implicit host/activity profile defaults rather than scenario YAML fields. Domain controllers, file servers, web servers, proxies, Linux servers, and workstations now receive role/family/persona-specific multipliers across baseline activity, auth, endpoint, network, and source-specific noise.
+- [x] Configurable per-entity artifact variation — implemented in the host/activity profile layer for baseline artifact texture, including stable per-host encoded PowerShell variants and profile-owned endpoint activity scaling.
+- [x] Configurable per-host volume variance — implemented via stable host/persona/role multipliers applied across major activity families so hosts no longer share narrow uniform volume bands by construction.
 - [ ] Configurable per-host/source log deployment coverage — observation profiles now support source-family gaps and host-scoped missingness multipliers, but explicit per-host source enablement/disablement remains future work. A later setting should model named host groups, disabled sensors, partial deployments, and collection windows when users need topology-level telemetry coverage differences rather than event-level missingness.
+- [ ] **P2** Generation speed and efficiency follow-up — Sprint 4 host/activity realism is functionally verified, but the slow-inclusive suite exposed that `pytest-cov` plus `tracemalloc` can make the medium dataset memory test pathological. A future sprint should profile generation without instrumentation noise, identify hot paths introduced by richer host activity/web fanout/firewall texture, and decide whether to optimize generation, mark the memory test `--no-cov`, or relax/update stale performance assertions.
 - [x] DNS IP pool reuse causes cross-provider resolution (CloudFront→Microsoft IPs, etc.) — domain-first selection ensures consistent domain→IP mapping via FORWARD_DNS
 - [x] AWS region mismatch between DNS PTR and SSL SNI for same IP — AWS hostname/PTR generation now derives a stable per-IP region/edge identity and PTR generation respects known forward hostname context.
 - [x] TLS volume clustering design — added data-driven TLS destination profiles with overlay support and `eforge validate-config` schema/tag checks. Auto-generated external TLS now uses weighted enterprise, certificate-infra, package-update, developer-tool, and long-tail browsing profiles with stable per-host preferences. Smoke output had 28,544 TLS SNI rows, 116 distinct names, top SNI share 5.5%, and top-5 share 18.0%.
diff --git a/commands/eforge/config.md b/commands/eforge/config.md
index b2d8b88b..17a026e3 100644
--- a/commands/eforge/config.md
+++ b/commands/eforge/config.md
@@ -70,6 +70,7 @@ When writing to the overlay, files are partial — they contain ONLY the user's
 | Modify Windows auth realism | `windows_auth_realism.yaml` | (standalone — Security log auth timing and failed-logon profile knobs) |
 | Modify baseline auth noise | `auth_noise.yaml` | (standalone — stale scheduled-credential accounts and irregular recurrence timing) |
 | Modify endpoint background noise | `endpoint_noise.yaml` | (standalone — scheduled-process timing and DHCP registry emission policy) |
+| Modify host activity distribution | `host_activity_profiles.yaml` | (standalone — host/persona/role rate-family multipliers, firewall deny bursts, and artifact variants) |
 | Modify source observation coverage | `observation_profiles.yaml` | Scenario `observation_profile` selects the named profile; keep `complete` as the default training profile |
 | Modify causal/source timing | `timing_profiles.yaml` | (standalone — causal prerequisite, source latency, teardown, and Windows/Sysmon collision-spacing knobs) |
 | ~~Format definitions~~ | Not user-customizable | Engine internals — requires code changes |
diff --git a/commands/eforge/references/config-dependency-graph.md b/commands/eforge/references/config-dependency-graph.md
index 95a720b2..c3ee6dd8 100644
--- a/commands/eforge/references/config-dependency-graph.md
+++ b/commands/eforge/references/config-dependency-graph.md
@@ -49,6 +49,14 @@ Each row is a file; columns show what it depends on and what depends on it.
 | depends on | nothing | Standalone rate table |
 | **depended on by** | Engine (runtime) | Drives all baseline traffic rate calculations (user activity, web top-level actions, DNS, SMB, Kerberos, LDAP, persona connections) |
 
+### host_activity_profiles.yaml
+| Direction | File | Relationship |
+|-----------|------|-------------|
+| depends on | scenario host metadata | Uses system type, roles, assigned users, primary systems, and user personas to resolve coarse activity multipliers |
+| depends on | `traffic_rates.yaml` | Multiplies resolved baseline rates after global intensity and scenario `baseline_activity.traffic_rates` overrides are applied |
+| **depended on by** | Engine (runtime) | Shapes host/persona/role baseline volume, endpoint noise, Linux/syslog shell activity, firewall deny bursts, IDS/ICMP rates, and encoded PowerShell artifact variation |
+| validated by | `eforge validate-config` | Enforces known rate-family names, ordered positive bounds, core host types, firewall deny burst settings, and artifact variant pools |
+
 ### web_session_profiles.yaml
 | Direction | File | Relationship |
 |-----------|------|-------------|
diff --git a/commands/eforge/references/config-host-activity.md b/commands/eforge/references/config-host-activity.md
index fae076df..33634892 100644
--- a/commands/eforge/references/config-host-activity.md
+++ b/commands/eforge/references/config-host-activity.md
@@ -15,9 +15,10 @@ Schema documentation for host-level activity config files. User customizations g
 5. [windows_auth_realism.yaml](#windows_auth_realismyaml)
 6. [auth_noise.yaml](#auth-noise-auth_noiseyaml)
 7. [endpoint_noise.yaml](#endpoint-noise-endpoint_noiseyaml)
-8. [observation_profiles.yaml](#observation-profiles-observation_profilesyaml)
-9. [timing_profiles.yaml](#timing_profilesyaml)
-10. [Domain Controller Baseline Activity](#domain-controller-baseline-activity)
+8. [host_activity_profiles.yaml](#host-activity-profiles-host_activity_profilesyaml)
+9. [observation_profiles.yaml](#observation-profiles-observation_profilesyaml)
+10. [timing_profiles.yaml](#timing_profilesyaml)
+11. [Domain Controller Baseline Activity](#domain-controller-baseline-activity)
 
 ---
 
@@ -350,6 +351,55 @@ registry_noise:
 
 ---
 
+## Host Activity Profiles (`host_activity_profiles.yaml`)
+
+Controls coarse host/persona/role volume multipliers for baseline realism. This layer is intentionally rate-family based rather than event-type based: it keeps scenario authors from managing per-emitter matrices while still making domain controllers, servers, workstations, sysadmins, developers, and exposed roles produce distinct volumes.
+
+```yaml
+rate_families:
+  default_bounds: [0.25, 6.0]
+  bounds:
+    windows_machine_auth: [0.5, 8.0]
+    firewall_deny: [0.4, 5.0]
+
+host_types:
+  workstation:
+    base_multiplier: 1.0
+    variance: [0.75, 1.35]
+    families:
+      inbound_network: 0.65
+  server:
+    base_multiplier: 1.8
+    variance: [0.85, 1.45]
+    families:
+      windows_service_process: 1.15
+  domain_controller:
+    base_multiplier: 4.0
+    variance: [0.9, 1.3]
+    families:
+      dc_kerberos: 1.5
+
+role_profiles:
+  web_server:
+    families:
+      inbound_network: 2.0
+      firewall_deny: 1.35
+
+persona_profiles:
+  sysadmin:
+    families:
+      linux_remote_admin: 1.45
+      windows_remote_admin: 1.35
+```
+
+Resolved multipliers apply after global intensity defaults and scenario `baseline_activity.traffic_rates` overrides. Use `traffic_rates.yaml` for global low/medium/high defaults; use `host_activity_profiles.yaml` when the rate should differ by host type, role, persona, or deterministic per-host variance.
+
+Valid rate families are: `user_activity`, `web`, `dns_interval`, `ntp`, `smb_interval`, `kerberos`, `ldap`, `persona_connections`, `role_network`, `inbound_network`, `windows_service_process`, `windows_registry`, `windows_scheduled_task`, `windows_remote_thread`, `windows_process_access`, `windows_module_load`, `windows_remote_admin`, `windows_service_logon`, `windows_machine_auth`, `dc_kerberos`, `linux_syslog`, `linux_remote_admin`, `linux_shell`, `firewall_deny`, `ids_alert`, and `icmp_monitoring`.
+
+`artifact_variants.powershell_encoded` provides data-driven benign encoded PowerShell payload templates and parameter pools. `firewall_deny` controls ASA deny burst windows, quiet periods, and mostly-zero metadata hash frequency. Run `eforge validate-config` after overlay changes; it rejects unknown rate-family names, missing core host types, inverted ranges, invalid probabilities, and empty artifact pools.
+
+---
+
 ## Observation Profiles (`observation_profiles.yaml`)
 
 Defines named source-observation profiles selected by scenario `observation_profile`. Keep `complete` as the default for training-friendly perfect source coverage and correlation. Use non-default profiles only when a scenario intentionally needs realistic source gaps or ingestion delays.
diff --git a/commands/eforge/references/config-validation.md b/commands/eforge/references/config-validation.md
index a0aa6ac9..86db24c5 100644
--- a/commands/eforge/references/config-validation.md
+++ b/commands/eforge/references/config-validation.md
@@ -85,7 +85,8 @@ Run `eforge info <field>` to get specific values (e.g., `eforge info paths.activ
 | 38 | auth_noise.yaml structure | ERROR | Invalid stale scheduled-credential account pool, host-count range, recurrence interval range, jitter range, skip probability, or backoff bounds |
 | 39 | endpoint_noise.yaml structure | ERROR | Invalid Windows scheduled-process timing bounds, skip probability, or DHCP registry emission policy |
 | 40 | observation_profiles.yaml structure | ERROR | Invalid source-family name, missing `complete` profile, invalid missingness probability, or inverted delay/host multiplier range |
-| 41 | tls_realism.yaml chain metadata | ERROR | Invalid TLS subject-key profile fields or RSA/ECDSA child signature algorithm mismatch |
+| 41 | host_activity_profiles.yaml structure | ERROR | Invalid host/persona/role rate-family name, missing core host type, malformed multiplier/bounds range, malformed firewall deny burst settings, or invalid artifact variant pools |
+| 42 | tls_realism.yaml chain metadata | ERROR | Invalid TLS subject-key profile fields or RSA/ECDSA child signature algorithm mismatch |
 
 ## Scenario Validation: traffic_rates
 
diff --git a/docs/reference/CUSTOMIZING_CONFIG.md b/docs/reference/CUSTOMIZING_CONFIG.md
index c2d0a76d..286baf38 100644
--- a/docs/reference/CUSTOMIZING_CONFIG.md
+++ b/docs/reference/CUSTOMIZING_CONFIG.md
@@ -163,6 +163,7 @@ Configuration files are interconnected. When you add an entry to one file, other
 | Windows auth realism | `windows_auth_realism.yaml` (`workstation_lock.min_unlock_gap_seconds`, failed-logon local/network profiles, and optional companion network connection rates) |
 | Baseline auth noise | `auth_noise.yaml` (stale scheduled-credential account pools, host counts, recurrence intervals, jitter, skips, and backoff) |
 | Endpoint background noise | `endpoint_noise.yaml` (Windows scheduled-process trigger windows, host drift, skip probability, and DHCP registry emission policy) |
+| Host/persona/role volume realism | `host_activity_profiles.yaml` (coarse rate-family multipliers, firewall deny burst shaping, and data-driven artifact variants) |
 | Observation/source coverage | `observation_profiles.yaml` (named source-level missingness/delay profiles selected by scenario `observation_profile`; default `complete` keeps perfect coverage) |
 | Causal/source-native timing | `timing_profiles.yaml` (`relationships` for causal prerequisites, source latency, teardown margins, Zeek analyzer offsets and TLS duration floors, plus Windows/Sysmon collision spacing) |
 | Public NTP fallback servers and DNS tunnel timing | `network_params.yaml` (`public_ntp_servers`, `dns_tunnel_rtt`; scenario-defined internal/domain NTP servers still take precedence) |
diff --git a/scenarios/COVERAGE-TEST-PROMPT.md b/scenarios/COVERAGE-TEST-PROMPT.md
index 44637578..200e0d0f 100644
--- a/scenarios/COVERAGE-TEST-PROMPT.md
+++ b/scenarios/COVERAGE-TEST-PROMPT.md
@@ -8,6 +8,8 @@
   first minute of output is realistic rather than cold-start).
   logon_grace_period: "30m" (suppresses "no prior logon" warnings for users assumed already
   at their desk at time_window.start).
+  observation_profile: complete (explicit default — preserves training-friendly complete source
+  coverage; use non-default profiles only when specifically testing collection gaps).
 
   Systems (mix of Windows and Linux, ~20+ total):
   - One workstation per user, distributed across departments: dev, IT,
@@ -80,7 +82,7 @@
   - Service account (svc_backup) authenticating from an unusual host (not its normal server) —
     legitimate scheduled task migration, but looks like lateral movement.
 
-  All 10 log format groups: windows, zeek, ecar, syslog, bash_history, snort_alert, cisco_asa,
+  All 9 log format groups: windows, zeek, ecar, syslog, bash_history, snort_alert, cisco_asa,
    web_access, proxy_access.
   (Note: "windows" expands to windows_event_security + windows_event_sysmon; "zeek" expands to
    zeek_conn, zeek_dns, zeek_http, zeek_ssl, zeek_files, zeek_dhcp, zeek_ntp, zeek_weird,
@@ -238,6 +240,7 @@
   - 4634 logoff pairs with 4624 on matching TargetLogonId, including type 3 network logons
     and DC machine-account logons (after short delays)
   - Certificate validity periods match issuer (Let's Encrypt = 90 days, DigiCert = 397 days)
+  - X.509 child certificate signatures are compatible with the issuer key family and CA profile
   - Certificate chain depth and CA reuse driven by tls_realism.yaml/tls_issuers.yaml —
     intermediate CAs appear as shared profiles, not unique per leaf
   - MAC addresses use diverse OUI prefixes from network_params.yaml (Dell, HP, Lenovo,
@@ -288,7 +291,8 @@
     process terminations with realistic delays (recon: 0.3-5s, attack tools: 5-30s, persistent/C2: no termination);
     paired 1:1 with Security 4689 + eCAR PROCESS/TERMINATE for the same exit
   - Event 7 (ImageLoad): baseline DLL loads (ntdll.dll, kernel32.dll, etc.) with
-    signing status and signature details
+    signing status and signature details. Third-party DLLs preserve source-native signer,
+    company, product, and version metadata instead of falling back to Microsoft identity.
   - Event 8 (CreateRemoteThread): baseline benign pairs 1-3/hr (MsMpEng->explorer,
     csrss->svchost, etc.) plus storyline mimikatz create_remote_thread targeting lsass;
     correlated with eCAR THREAD/REMOTE_CREATE
@@ -315,6 +319,9 @@
   - Correct interface resolution: internal IPs -> "inside", DMZ IPs -> "dmz", external IPs -> "outside"
   - Per-sensor directory output: fw-perimeter/cisco_asa.log
   - Deny baseline volume proportional to deny_ratio (~5x allows)
+  - Deny baseline timing uses burst/quiet cadence from host_activity_profiles.yaml, not evenly
+    spaced attempts; 106023 hash pairs should vary when the profile calls for it, not always
+    render as [0x0, 0x0]
   - Firewall policy enforcement: external -> corporate_lan denied, external -> dmz:80/443 allowed
   - Storyline connections through the firewall produce ASA allow records correlated with Zeek conn records
   - 305011 (Built NAT translation) present when nat_rules configured
@@ -334,6 +341,9 @@
     Verify DNS-to-TCP offsets are not uniform; verify Sysmon Events 1/5/8/10 for the same
     process chain are not bucketed at identical timestamps.
   - Hawkes temporal model: user events show bursty clusters (CV > 1.0 in eval), not uniform spacing
+  - Host activity profiles: host type, roles, and persona shape broad rate families after
+    traffic_rates/scenario overrides. Verify DC/file/web/proxy/server hosts and user workstations
+    have distinct event-volume profiles rather than uniform per-host counts.
   - Typing cadence: multi-event storyline steps (e.g., step 4 discovery commands, step 10 AD enum)
     have 1-15 second gaps between events, not identical timestamps
   - Day-of-week variation: if scenario spans a weekend, Saturday/Sunday activity near-zero
@@ -353,6 +363,8 @@
   - Entity lifecycle: no process_access events targeting PIDs that don't exist in running_processes
   - Workstation lock/unlock (4800/4801): persona-driven lock frequency during work hours
   - Explicit credentials (4648): RunAs and scheduled task execution with alternate credentials
+  - Observation profile: `complete` keeps cross-source coverage training-friendly; source gaps,
+    delays, and partial collection belong to named non-default profiles and should not appear here.
 
   Proxy coverage (verify in generated data):
   - Forward proxy (PROXY-01 with roles: [forward_proxy]) routes web traffic for internal systems
@@ -377,6 +389,8 @@
     dirb/nmap_http always blank
   - Nikto User-Agent rotates per request via @NIKTO_TESTID@ token (6-digit IDs unique per
     request), not a single static string
+  - Browser-like page loads fan out into realistic CSS/JS/image/API subresource requests; the
+    top-level request budget counts user-driven page/tool requests, not every render component
   - Event-specific jitter defaults: beacon 0.15 (tight), web_scan 0.4 (wide), credential_spray
     0.5 (self-pacing), dga_queries 0.3, dns_tunnel 0.25 — can be overridden per event
 
diff --git a/scenarios/ITERATION-TEST-PROMPT.md b/scenarios/ITERATION-TEST-PROMPT.md
index c21d49ca..199cf680 100644
--- a/scenarios/ITERATION-TEST-PROMPT.md
+++ b/scenarios/ITERATION-TEST-PROMPT.md
@@ -10,6 +10,8 @@
   warmup: "2h" (minimum viable to pre-populate DNS cache, process trees, and sessions —
   cold-start artifacts are immediately visible to forensic reviewers).
   logon_grace_period: "30m"
+  observation_profile: enterprise_standard (intentionally exercises realistic source-level
+  observation gaps, delays, and coverage variation for blind-review improvement loops).
 
   Systems (mix of Windows and Linux, ~15 total):
   - 8 workstations, one per user (1:1 mapping — create one workstation per user):
@@ -253,6 +255,7 @@
     LDAP/RPC connections to DC, type 3 logon on DC — all within seconds
   - 4634 logoff pairs with 4624 on matching TargetLogonId
   - Certificate validity periods match issuer (Let's Encrypt = 90 days, DigiCert = 397 days)
+  - X.509 child certificate signatures are compatible with the issuer key family and CA profile
   - PID 4 resolves to "System" in parent process lookups
   - NAT rules produce: dynamic PAT for outbound (mapped_src_ip + translated port), static NAT
     for WEB-EXT-01 VIP. Outside Zeek sensors see post-NAT IPs; inside sensors see real IPs
@@ -284,7 +287,9 @@
     command line; ParentImage reflects spawn_rules.yaml chains
   - Event 3 (NetworkConnect): outbound connections attributed to originating process
   - Event 5 (ProcessTerminate): paired 1:1 with Security 4689 + eCAR PROCESS/TERMINATE
-  - Event 7 (ImageLoad): baseline DLL loads with signing status
+  - Event 7 (ImageLoad): baseline DLL loads with signing status. Third-party DLLs preserve
+    source-native signer, company, product, and version metadata instead of falling back to
+    Microsoft identity.
   - Event 8 (CreateRemoteThread): baseline benign pairs (1-3/hr) plus storyline mimikatz
   - Event 10 (ProcessAccess): baseline benign pairs (3-8/hr) plus storyline mimikatz on lsass
   - Event 11/12/13: emitted for persistence steps (service install, scheduled task)
@@ -296,6 +301,9 @@
   - Built/Teardown pairs (302013/302014) for permitted TCP connections
   - Built/Teardown pairs (302015/302016) for permitted UDP connections (DNS, NTP)
   - Deny records (106023) for blocked traffic
+  - Deny baseline timing uses burst/quiet cadence from host_activity_profiles.yaml, not evenly
+    spaced attempts; 106023 hash pairs should vary when the profile calls for it, not always
+    render as [0x0, 0x0]
   - 733100 threat-detection alerts during port_scan and web_scan phases (burst exceeds
     threat_detection_rate of 10 drops/sec). Verify rate_id, current_burst, max_burst,
     total_count fields present.
@@ -309,6 +317,9 @@
   - Causal expansion: DNS queries precede TCP connections; Kerberos 4768/4769 precede 4624
     domain logons; process_access follows create_remote_thread targeting lsass
   - Hawkes temporal model: user events show bursty clusters (CV > 1.0), not uniform spacing
+  - Host activity profiles: host type, roles, and persona shape broad rate families after
+    traffic_rates/scenario overrides. Verify DC/file/web/proxy/server hosts and user workstations
+    have distinct event-volume profiles rather than uniform per-host counts.
   - Typing cadence: multi-event storyline steps have 1-15 second gaps, not identical timestamps
   - Process→network correlation: chrome.exe/git/sqlcmd baseline processes produce matching connections
   - Stale account enrichment: Kerberos 4771 (0x12) failures plus failed batch and service logons
@@ -321,6 +332,9 @@
   - Workstation lock/unlock (4800/4801): workstation_lock always precedes workstation_unlock
     for the same session — semantic ordering enforced
   - Explicit credentials (4648): RunAs and scheduled task execution with alternate credentials
+  - Observation profile: `enterprise_standard` introduces realistic source-level gaps, delays,
+    and coverage variation without contradictions. Ground truth should still preserve canonical
+    truth and source-evidence status for reviewer traceability.
 
   Proxy coverage (verify in generated data):
   - PROXY-01 (forward_proxy) routes web traffic for internal systems
@@ -337,6 +351,8 @@
   - Nikto User-Agent rotates per request via @NIKTO_TESTID@ token (unique 6-digit IDs),
     not a single static string
   - Web-scan Referer for nikto: ~30% same-origin; for sqlmap/dirb/nmap_http: always blank
+  - Browser-like page loads fan out into realistic CSS/JS/image/API subresource requests; the
+    top-level request budget counts user-driven page/tool requests, not every render component
 
   Ground truth / answer key:
   - GROUND_TRUTH.md generated automatically from storyline events
diff --git a/scenarios/LARGE-SCALE-COVERAGE-TEST-PROMPT.md b/scenarios/LARGE-SCALE-COVERAGE-TEST-PROMPT.md
index 12a66769..8657aa9f 100644
--- a/scenarios/LARGE-SCALE-COVERAGE-TEST-PROMPT.md
+++ b/scenarios/LARGE-SCALE-COVERAGE-TEST-PROMPT.md
@@ -8,6 +8,8 @@
   Duration: 72 hours (3 full business days), starting 2024-03-18T06:00:00Z (Monday morning).
   Timezone: America/Chicago. This spans Monday–Wednesday, exercising day-of-week variation with
   full business-day cycles including morning ramp-up, lunch dips, and evening wind-down.
+  observation_profile: complete (explicit default — preserves training-friendly complete source
+  coverage; use non-default profiles only when specifically testing collection gaps).
 
   Scenario name: apt-healthcare-breach-large
 
@@ -249,10 +251,12 @@
   Key requirements:
   - Exercise all typed event types: process, logon, failed_logon, logoff (baseline), connection,
     ssh_session, rdp_session, account_created, account_deleted, group_member_added, service_installed,
-    scheduled_task_created, log_cleared, create_remote_thread, dhcp_lease, port_scan, beacon, dns_query,
-    web_scan, credential_spray, dga_queries, dns_tunnel, raw
-  - NOTE: process_access is NOT a scenario event type — it is auto-generated by create_remote_thread
-    targeting lsass.exe via the causal expansion engine. Do not declare it in the YAML.
+    scheduled_task_created, log_cleared, create_remote_thread, process_access, dhcp_lease,
+    port_scan, beacon, dns_query, web_scan, credential_spray, dga_queries, dns_tunnel, raw
+  - NOTE: process_access IS a valid scenario event type and can be declared directly for a standalone
+    Sysmon Event 10. However, create_remote_thread targeting lsass.exe auto-generates correlated
+    process_access via the causal expansion engine. Do not declare a second process_access on lsass
+    in the same step.
   - Use connection events with HTTP fields (method, uri, status_code, user_agent) for web access log
     entries showing the SQLi, web shell access, and failed exploit attempts — NOT raw events
   - All base64 payloads must be real (generated via Bash tool)
@@ -266,6 +270,7 @@
   - DHCP events are routed to sensors by segment visibility (not duplicated across all sensors)
   - Windows service account events (SYSTEM, NETWORK SERVICE) show "NT AUTHORITY" as SubjectDomainName
   - Certificate validity periods match issuer (Let's Encrypt = 90 days, DigiCert = 397 days)
+  - X.509 child certificate signatures are compatible with the issuer key family and CA profile
   - MAC addresses use diverse OUI prefixes (Dell, HP, Lenovo, Intel, VMware)
   - PID 4 resolves to "System" in parent process lookups
 
@@ -288,6 +293,8 @@
   Sysmon coverage (verify in generated data):
   - Event 1 (ProcessCreate): baseline + storyline process events
   - Event 5 (ProcessTerminate): baseline process terminations plus storyline with realistic delays
+  - Event 7 (ImageLoad): third-party DLLs preserve source-native signer, company, product, and
+    version metadata instead of falling back to Microsoft identity
   - Event 8 (CreateRemoteThread): baseline benign pairs plus storyline mimikatz
   - Event 10 (ProcessAccess): baseline benign pairs plus storyline mimikatz on lsass
   - Baseline Event 8/10 noise ensures storyline attack events are not instant red flags
@@ -302,6 +309,9 @@
   - Correct interface resolution per firewall: fw-external uses inside/dmz/outside; fw-internal
     uses db-zone/mgmt-zone/outside
   - Deny baseline proportional to deny_ratio: ~8x for external firewall, ~3x for internal
+  - Deny baseline timing uses burst/quiet cadence from host_activity_profiles.yaml, not evenly
+    spaced attempts; 106023 hash pairs should vary when the profile calls for it, not always
+    render as [0x0, 0x0]
   - Policy enforcement: external → corporate_lan denied, external → dmz:80/443 allowed,
     app_vlan → database_vlan:3306 allowed, corporate_lan → database_vlan denied
   - Storyline step 23 (failed exfil from DC-01) should produce a firewall deny record since
@@ -317,6 +327,9 @@
   - Causal expansion: DNS queries precede TCP connections; Kerberos precede domain logons;
     process_access follows create_remote_thread targeting lsass
   - Hawkes temporal model: user events show bursty clusters (CV > 1.0), not uniform spacing
+  - Host activity profiles: host type, roles, and persona shape broad rate families after
+    traffic_rates/scenario overrides. Verify DC/file/web/proxy/server hosts and user workstations
+    have distinct event-volume profiles rather than uniform per-host counts.
   - Typing cadence: multi-event storyline steps have 1-15 second gaps between events
   - Day-of-week variation: 3-day span exercises full weekday patterns
   - Lateral movement: backup/monitoring/AD replication/mail routing between servers
@@ -326,5 +339,9 @@
   - Linux syslog depth: SSH login messages, package management, systemd timers, logrotate, journald
   - Command diversification: user-specific paths and varied project/document names
   - Entity lifecycle: no process_access targeting nonexistent PIDs
+  - Browser-like page loads fan out into realistic CSS/JS/image/API subresource requests; the
+    top-level request budget counts user-driven page/tool requests, not every render component
+  - Observation profile: `complete` keeps cross-source coverage training-friendly; source gaps,
+    delays, and partial collection belong to named non-default profiles and should not appear here.
 
   Save to scenarios/apt-healthcare-breach-large/scenario.yaml with accompanying ENVIRONMENT.md.
diff --git a/src/evidenceforge/cli/validate_config.py b/src/evidenceforge/cli/validate_config.py
index 80ac0aaf..42fad68d 100644
--- a/src/evidenceforge/cli/validate_config.py
+++ b/src/evidenceforge/cli/validate_config.py
@@ -233,6 +233,16 @@ def validate_config() -> ValidationResult:
         "activity/endpoint_noise.yaml": {
             "dict_fields": {"windows_scheduled_processes", "registry_noise"},
         },
+        "activity/host_activity_profiles.yaml": {
+            "dict_fields": {
+                "rate_families",
+                "host_types",
+                "role_profiles",
+                "persona_profiles",
+                "artifact_variants",
+                "firewall_deny",
+            },
+        },
         "activity/ids_signatures.yaml": {
             "list_fields": {"signatures": None},
         },
@@ -450,6 +460,9 @@ def validate_config() -> ValidationResult:
     )
     from evidenceforge.generation.activity.dns_registry import load_dns_registry
     from evidenceforge.generation.activity.endpoint_noise import load_endpoint_noise
+    from evidenceforge.generation.activity.host_activity_profiles import (
+        load_host_activity_profiles,
+    )
     from evidenceforge.generation.activity.ids_signatures import load_ids_signatures
     from evidenceforge.generation.activity.process_access_patterns import (
         load_process_access_patterns,
@@ -481,6 +494,7 @@ def validate_config() -> ValidationResult:
     site_data = load_site_maps()
     sys_proc_data = load_system_processes()
     endpoint_noise_data = load_endpoint_noise()
+    host_activity_profiles_data = load_host_activity_profiles()
     observation_profiles_data = load_observation_profiles()
     tls_realism_data = load_tls_realism()
     windows_auth_data = load_windows_auth_realism()
@@ -1697,6 +1711,7 @@ def _record_ids_rule_identity(
         DnsTunnelTtlEntry,
         EdrFileSideEffectProfile,
         EndpointNoiseConfig,
+        HostActivityProfilesConfig,
         KerberosRealismConfig,
         ObservationProfilesConfig,
         OuiEntry,
@@ -1830,6 +1845,14 @@ def _record_ids_rule_identity(
         _SCHEMA_CHECKS.append(
             ([observation_profiles_data], ObservationProfilesConfig, "observation_profiles.yaml")
         )
+    if host_activity_profiles_data:
+        _SCHEMA_CHECKS.append(
+            (
+                [host_activity_profiles_data],
+                HostActivityProfilesConfig,
+                "host_activity_profiles.yaml",
+            )
+        )
 
     # traffic_profiles.yaml: connection entries
     all_traffic_connection_entries = []
diff --git a/src/evidenceforge/config/activity/README.md b/src/evidenceforge/config/activity/README.md
index 84f8050b..684bbb1a 100644
--- a/src/evidenceforge/config/activity/README.md
+++ b/src/evidenceforge/config/activity/README.md
@@ -23,6 +23,7 @@ caches data after first load. Two files (`network_params.yaml`,
 | `windows_auth_realism.yaml` | `windows_auth_realism.py` | Windows Security authentication realism knobs such as minimum 4800→4801 lock/unlock gap, failed-logon validation paths, companion network evidence, and 4672 privilege profiles. |
 | `auth_noise.yaml` | `auth_noise.py` | Baseline authentication-noise profiles such as stale scheduled-credential account pools and irregular recurrence timing. |
 | `endpoint_noise.yaml` | `endpoint_noise.py` | Endpoint background timing and registry-emission policies for Windows scheduled processes and DHCP interface registry writes. |
+| `host_activity_profiles.yaml` | `host_activity_profiles.py` | Coarse host/persona/role rate multipliers for baseline volume, endpoint noise, firewall deny bursts, and data-driven artifact variation. |
 | `observation_profiles.yaml` | `config/observation_profiles.py` | Named source-observation profiles for optional source-level missingness and delays. Scenario `observation_profile` defaults to `complete`. |
 | `proxy_uri_templates.yaml` | `proxy_uri.py` | Per-domain URI path templates for proxy logs (Windows Update, CRL, OCSP, Azure AD, etc.). |
 | `network_params.yaml` | `network_params.py`, `engine/emitter_setup.py` | MAC address OUI prefixes, public NTP fallback servers, and DNS tunnel RTT bounds. |
diff --git a/src/evidenceforge/config/activity/host_activity_profiles.yaml b/src/evidenceforge/config/activity/host_activity_profiles.yaml
new file mode 100644
index 00000000..fed3eb39
--- /dev/null
+++ b/src/evidenceforge/config/activity/host_activity_profiles.yaml
@@ -0,0 +1,199 @@
+# Host/persona/role activity multipliers for baseline realism.
+#
+# These profiles are intentionally coarse. They shape broad source families
+# without forcing every emitter/event type to carry its own micro-profile.
+#
+# Overridable via .eforge/config/activity/host_activity_profiles.yaml.
+#
+# Depended on by: baseline generation engine, suspicious benign activity
+# Depends on: scenario system.type, roles, assigned_user, user.persona
+
+rate_families:
+  default_bounds: [0.25, 6.0]
+  bounds:
+    web: [0.4, 2.5]
+    dns_interval: [0.5, 4.0]
+    smb_interval: [0.4, 5.0]
+    kerberos: [0.5, 6.0]
+    ldap: [0.5, 6.0]
+    windows_machine_auth: [0.5, 8.0]
+    dc_kerberos: [0.8, 8.0]
+    linux_syslog: [0.4, 5.0]
+    firewall_deny: [0.4, 5.0]
+
+host_types:
+  workstation:
+    base_multiplier: 1.0
+    variance: [0.75, 1.35]
+    families:
+      user_activity: 0.8
+      role_network: 0.85
+      inbound_network: 0.65
+      windows_service_logon: 0.75
+      windows_machine_auth: 0.9
+      linux_syslog: 0.85
+      firewall_deny: 0.8
+
+  server:
+    base_multiplier: 1.8
+    variance: [0.85, 1.45]
+    families:
+      user_activity: 0.45
+      persona_connections: 0.55
+      web: 0.65
+      dns_interval: 0.8
+      smb_interval: 0.85
+      kerberos: 0.9
+      ldap: 0.9
+      windows_service_process: 1.15
+      windows_registry: 1.25
+      windows_scheduled_task: 1.15
+      windows_process_access: 1.15
+      windows_module_load: 1.2
+      windows_service_logon: 1.25
+      windows_machine_auth: 1.0
+      linux_syslog: 1.25
+      linux_remote_admin: 1.2
+      linux_shell: 0.8
+      firewall_deny: 1.1
+
+  domain_controller:
+    base_multiplier: 4.0
+    variance: [0.9, 1.3]
+    families:
+      user_activity: 0.2
+      persona_connections: 0.25
+      web: 0.35
+      dns_interval: 0.45
+      smb_interval: 0.65
+      kerberos: 1.15
+      ldap: 1.05
+      role_network: 1.35
+      inbound_network: 1.35
+      windows_service_process: 1.35
+      windows_registry: 1.35
+      windows_scheduled_task: 1.2
+      windows_process_access: 1.25
+      windows_module_load: 1.3
+      windows_service_logon: 1.4
+      windows_machine_auth: 1.7
+      dc_kerberos: 1.5
+      firewall_deny: 1.1
+
+role_profiles:
+  file_server:
+    families:
+      role_network: 1.35
+      inbound_network: 2.2
+      smb_interval: 1.8
+      windows_registry: 1.1
+      windows_service_logon: 1.2
+
+  web_server:
+    families:
+      web: 1.2
+      role_network: 1.25
+      inbound_network: 2.0
+      linux_syslog: 1.45
+      firewall_deny: 1.35
+
+  database:
+    families:
+      role_network: 1.3
+      inbound_network: 1.8
+      linux_syslog: 1.25
+      windows_service_process: 1.15
+
+  app_server:
+    families:
+      role_network: 1.25
+      inbound_network: 1.6
+      windows_service_process: 1.1
+      linux_syslog: 1.15
+
+  log_server:
+    families:
+      role_network: 1.2
+      inbound_network: 2.1
+      linux_syslog: 1.7
+
+  forward_proxy:
+    families:
+      role_network: 1.35
+      inbound_network: 1.7
+      linux_syslog: 1.35
+      firewall_deny: 1.2
+
+  dns_server:
+    families:
+      dns_interval: 1.7
+      role_network: 1.25
+      inbound_network: 1.8
+      linux_syslog: 1.2
+
+  domain_controller:
+    families:
+      dns_interval: 1.4
+      kerberos: 1.25
+      ldap: 1.25
+      role_network: 1.35
+      inbound_network: 1.5
+      windows_machine_auth: 1.35
+      dc_kerberos: 1.35
+
+persona_profiles:
+  developer:
+    families:
+      persona_connections: 1.25
+      linux_shell: 1.35
+
+  sysadmin:
+    families:
+      user_activity: 1.05
+      persona_connections: 1.15
+      linux_remote_admin: 1.45
+      linux_shell: 1.45
+      windows_remote_admin: 1.35
+
+  security_analyst:
+    families:
+      user_activity: 1.05
+      persona_connections: 1.2
+      linux_remote_admin: 1.2
+      windows_remote_admin: 1.2
+
+  executive:
+    families:
+      user_activity: 0.8
+      persona_connections: 0.9
+      linux_shell: 0.6
+
+artifact_variants:
+  powershell_encoded:
+    host_preferred_template_count: 3
+    templates:
+      - "Get-Service -Name {svc}"
+      - "Get-EventLog -LogName {log} -Newest {n}"
+      - "Test-NetConnection {host} -Port {port}"
+      - "Get-Process -Name {proc}"
+      - "Get-ChildItem -Path C:\\{dir} -Recurse | Measure-Object"
+      - "Get-WmiObject Win32_LogicalDisk | Select-Object DeviceID, FreeSpace"
+      - "Get-HotFix | Sort-Object InstalledOn -Descending | Select-Object -First {n}"
+      - "Get-CimInstance Win32_Service | Where-Object {$_.State -eq '{svc_state}'}"
+      - "Get-ScheduledTask | Where-Object {$_.State -eq '{task_state}'}"
+    params:
+      svc: ["Spooler", "W32Time", "wuauserv", "BITS", "WinRM", "Dhcp", "Dnscache", "EventLog"]
+      svc_state: ["Running", "Stopped"]
+      task_state: ["Ready", "Running", "Disabled"]
+      log: ["System", "Application", "Security", "Setup"]
+      n: ["10", "25", "50", "100"]
+      host: ["dc01", "fileserver", "10.0.0.1", "localhost", "gateway"]
+      port: ["80", "443", "3389", "5985", "22"]
+      proc: ["svchost", "explorer", "chrome", "outlook", "code", "winlogon"]
+      dir: ["Logs", "Temp", "Reports", "Users\\Public"]
+
+firewall_deny:
+  burst_window_count: [2, 5]
+  burst_width_seconds: [20, 180]
+  quiet_probability: 0.08
+  metadata_hash_nonzero_probability: 0.18
diff --git a/src/evidenceforge/config/schemas.py b/src/evidenceforge/config/schemas.py
index 99862ea6..66fcfd08 100644
--- a/src/evidenceforge/config/schemas.py
+++ b/src/evidenceforge/config/schemas.py
@@ -1413,6 +1413,170 @@ def validate_rate_range(cls, v: Any) -> Any:
         return v
 
 
+# --- Host Activity Profiles ---
+
+
+_HOST_ACTIVITY_RATE_FAMILIES = frozenset(
+    {
+        "user_activity",
+        "web",
+        "dns_interval",
+        "ntp",
+        "smb_interval",
+        "kerberos",
+        "ldap",
+        "persona_connections",
+        "role_network",
+        "inbound_network",
+        "windows_service_process",
+        "windows_registry",
+        "windows_scheduled_task",
+        "windows_remote_thread",
+        "windows_process_access",
+        "windows_module_load",
+        "windows_remote_admin",
+        "windows_service_logon",
+        "windows_machine_auth",
+        "dc_kerberos",
+        "linux_syslog",
+        "linux_remote_admin",
+        "linux_shell",
+        "firewall_deny",
+        "ids_alert",
+        "icmp_monitoring",
+    }
+)
+
+
+class HostActivityRateFamiliesConfig(BaseModel, extra="forbid"):
+    """Rate-family bounds for host_activity_profiles.yaml."""
+
+    default_bounds: list[float]
+    bounds: dict[str, list[float]] = Field(default_factory=dict)
+
+    @field_validator("default_bounds")
+    @classmethod
+    def default_bounds_valid(cls, v: list[float]) -> list[float]:
+        return _validate_positive_pair(v, "default_bounds")
+
+    @field_validator("bounds")
+    @classmethod
+    def bounds_valid(cls, v: dict[str, list[float]]) -> dict[str, list[float]]:
+        unknown = sorted(set(v) - _HOST_ACTIVITY_RATE_FAMILIES)
+        if unknown:
+            raise ValueError(f"unknown rate family bounds: {unknown}")
+        for family, bounds in v.items():
+            _validate_positive_pair(bounds, f"bounds.{family}")
+        return v
+
+
+def _validate_positive_pair(v: list[float], field_name: str) -> list[float]:
+    """Validate a two-value positive numeric range."""
+    if len(v) != 2:
+        raise ValueError(f"{field_name} must be a two-value [min, max] list")
+    if not all(isinstance(item, int | float) and item > 0 for item in v):
+        raise ValueError(f"{field_name} values must be positive numbers")
+    if v[0] > v[1]:
+        raise ValueError(f"{field_name} min must be <= max")
+    return v
+
+
+class HostActivityProfileEntry(BaseModel, extra="forbid"):
+    """Host type, role, or persona multiplier profile."""
+
+    base_multiplier: float = Field(default=1.0, gt=0)
+    variance: list[float] | None = None
+    families: dict[str, float] = Field(default_factory=dict)
+
+    @field_validator("variance")
+    @classmethod
+    def variance_valid(cls, v: list[float] | None) -> list[float] | None:
+        if v is None:
+            return v
+        return _validate_positive_pair(v, "variance")
+
+    @field_validator("families")
+    @classmethod
+    def families_valid(cls, v: dict[str, float]) -> dict[str, float]:
+        unknown = sorted(set(v) - _HOST_ACTIVITY_RATE_FAMILIES)
+        if unknown:
+            raise ValueError(f"unknown activity families: {unknown}")
+        for family, multiplier in v.items():
+            if not isinstance(multiplier, int | float) or multiplier <= 0:
+                raise ValueError(f"family multiplier {family!r} must be positive")
+        return v
+
+
+class PowerShellEncodedVariantsConfig(BaseModel, extra="forbid"):
+    """Data-driven encoded PowerShell command variants."""
+
+    host_preferred_template_count: int = Field(default=3, gt=0)
+    templates: list[str]
+    params: dict[str, list[str]] = Field(default_factory=dict)
+
+    @field_validator("templates")
+    @classmethod
+    def templates_non_empty(cls, v: list[str]) -> list[str]:
+        if not v or any(not template for template in v):
+            raise ValueError("templates must contain non-empty strings")
+        return v
+
+    @field_validator("params")
+    @classmethod
+    def params_non_empty(cls, v: dict[str, list[str]]) -> dict[str, list[str]]:
+        for key, values in v.items():
+            if not key or not values or any(not value for value in values):
+                raise ValueError("params keys and values must be non-empty")
+        return v
+
+
+class HostActivityArtifactVariantsConfig(BaseModel, extra="forbid"):
+    """Artifact variation config for host_activity_profiles.yaml."""
+
+    powershell_encoded: PowerShellEncodedVariantsConfig
+
+
+class HostActivityFirewallDenyConfig(BaseModel, extra="forbid"):
+    """Firewall deny burst and metadata knobs."""
+
+    burst_window_count: list[int]
+    burst_width_seconds: list[int]
+    quiet_probability: float = Field(ge=0.0, le=1.0)
+    metadata_hash_nonzero_probability: float = Field(ge=0.0, le=1.0)
+
+    @field_validator("burst_window_count", "burst_width_seconds")
+    @classmethod
+    def integer_range_valid(cls, v: list[int]) -> list[int]:
+        if len(v) != 2:
+            raise ValueError("must be a two-value [min, max] list")
+        if not all(isinstance(item, int) and item > 0 for item in v):
+            raise ValueError("values must be positive integers")
+        if v[0] > v[1]:
+            raise ValueError("min must be <= max")
+        return v
+
+
+class HostActivityProfilesConfig(BaseModel, extra="forbid"):
+    """Root schema for host_activity_profiles.yaml."""
+
+    rate_families: HostActivityRateFamiliesConfig
+    host_types: dict[str, HostActivityProfileEntry]
+    role_profiles: dict[str, HostActivityProfileEntry] = Field(default_factory=dict)
+    persona_profiles: dict[str, HostActivityProfileEntry] = Field(default_factory=dict)
+    artifact_variants: HostActivityArtifactVariantsConfig
+    firewall_deny: HostActivityFirewallDenyConfig
+
+    @field_validator("host_types")
+    @classmethod
+    def required_host_types_present(
+        cls, v: dict[str, HostActivityProfileEntry]
+    ) -> dict[str, HostActivityProfileEntry]:
+        missing = sorted({"workstation", "server", "domain_controller"} - set(v))
+        if missing:
+            raise ValueError(f"missing host type profiles: {missing}")
+        return v
+
+
 # --- Validation helper ---
 
 
diff --git a/src/evidenceforge/events/contexts.py b/src/evidenceforge/events/contexts.py
index cce47207..48574b55 100644
--- a/src/evidenceforge/events/contexts.py
+++ b/src/evidenceforge/events/contexts.py
@@ -547,6 +547,8 @@ class FirewallContext:
     access_group: str = ""  # ACL name for deny logs
     bytes_sent: int = 0  # For teardown records
     duration: str = ""  # "H:MM:SS" for teardown
+    deny_hash_a: str = "0x0"  # ASA deny metadata hash field
+    deny_hash_b: str = "0x0"  # ASA deny metadata hash field
 
 
 @dataclass(slots=True)
diff --git a/src/evidenceforge/generation/activity/host_activity_profiles.py b/src/evidenceforge/generation/activity/host_activity_profiles.py
new file mode 100644
index 00000000..5d7e1c0a
--- /dev/null
+++ b/src/evidenceforge/generation/activity/host_activity_profiles.py
@@ -0,0 +1,281 @@
+# Copyright (c) 2026 Cisco Systems, Inc. and its affiliates
+# SPDX-License-Identifier: MIT
+
+"""Host/persona/role activity profile loader and resolver.
+
+The resolver intentionally works at coarse rate-family granularity. This keeps
+baseline realism configurable without making every emitter and event subtype
+carry its own profile knobs.
+"""
+
+from __future__ import annotations
+
+import base64
+import random
+from dataclasses import dataclass
+from typing import Any
+
+from evidenceforge.config import get_activity_directory
+from evidenceforge.config.overlay import deep_merge_dict, load_with_overlay
+from evidenceforge.utils.rng import _stable_seed
+
+_PROFILES_PATH = get_activity_directory() / "host_activity_profiles.yaml"
+_CACHED_DATA: dict[str, Any] | None = None
+
+RATE_FAMILIES = frozenset(
+    {
+        "user_activity",
+        "web",
+        "dns_interval",
+        "ntp",
+        "smb_interval",
+        "kerberos",
+        "ldap",
+        "persona_connections",
+        "role_network",
+        "inbound_network",
+        "windows_service_process",
+        "windows_registry",
+        "windows_scheduled_task",
+        "windows_remote_thread",
+        "windows_process_access",
+        "windows_module_load",
+        "windows_remote_admin",
+        "windows_service_logon",
+        "windows_machine_auth",
+        "dc_kerberos",
+        "linux_syslog",
+        "linux_remote_admin",
+        "linux_shell",
+        "firewall_deny",
+        "ids_alert",
+        "icmp_monitoring",
+    }
+)
+
+
+@dataclass(frozen=True)
+class HostActivityProfile:
+    """Resolved activity multipliers for one host/persona view."""
+
+    hostname: str
+    multipliers: dict[str, float]
+
+    def multiplier(self, family: str) -> float:
+        """Return a bounded multiplier for a rate family."""
+        return self.multipliers.get(family, 1.0)
+
+
+def load_host_activity_profiles() -> dict[str, Any]:
+    """Load host activity profiles, merged with overlay. Cached after first call."""
+    global _CACHED_DATA  # noqa: PLW0603
+    if _CACHED_DATA is not None:
+        return _CACHED_DATA
+    _CACHED_DATA = load_with_overlay(
+        _PROFILES_PATH,
+        "activity/host_activity_profiles.yaml",
+        deep_merge_dict,
+    )
+    return _CACHED_DATA
+
+
+def reset_cache() -> None:
+    """Clear cached data for tests."""
+    global _CACHED_DATA  # noqa: PLW0603
+    _CACHED_DATA = None
+
+
+def _as_float(value: Any, default: float) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def _range_pair(value: Any, default: tuple[float, float]) -> tuple[float, float]:
+    if not isinstance(value, list | tuple) or len(value) != 2:
+        return default
+    lo = _as_float(value[0], default[0])
+    hi = _as_float(value[1], default[1])
+    if lo <= 0 or hi <= 0:
+        return default
+    if lo > hi:
+        return (hi, lo)
+    return (lo, hi)
+
+
+def _family_multiplier(profile: dict[str, Any] | None, family: str) -> float:
+    if not isinstance(profile, dict):
+        return 1.0
+    families = profile.get("families", {})
+    if not isinstance(families, dict):
+        return 1.0
+    return max(0.0, _as_float(families.get(family), 1.0))
+
+
+def _bounds_for_family(data: dict[str, Any], family: str) -> tuple[float, float]:
+    rate_families = data.get("rate_families", {})
+    if not isinstance(rate_families, dict):
+        return (0.25, 6.0)
+    default_bounds = _range_pair(rate_families.get("default_bounds"), (0.25, 6.0))
+    bounds = rate_families.get("bounds", {})
+    if isinstance(bounds, dict) and family in bounds:
+        return _range_pair(bounds[family], default_bounds)
+    return default_bounds
+
+
+def resolve_host_activity_profile(
+    *,
+    scenario_name: str,
+    system: Any,
+    roles: list[str] | None = None,
+    persona: str | None = None,
+) -> HostActivityProfile:
+    """Resolve deterministic activity multipliers for a host/persona combination."""
+    data = load_host_activity_profiles()
+    host_type = str(getattr(system, "type", "workstation") or "workstation").lower()
+    hostname = str(getattr(system, "hostname", "") or "<unknown>")
+    normalized_roles = [role.lower() for role in roles or getattr(system, "roles", []) or []]
+    if host_type == "domain_controller" and "domain_controller" not in normalized_roles:
+        normalized_roles.append("domain_controller")
+
+    host_profiles = data.get("host_types", {}) if isinstance(data, dict) else {}
+    role_profiles = data.get("role_profiles", {}) if isinstance(data, dict) else {}
+    persona_profiles = data.get("persona_profiles", {}) if isinstance(data, dict) else {}
+    host_profile = (
+        host_profiles.get(host_type)
+        if isinstance(host_profiles, dict) and isinstance(host_profiles.get(host_type), dict)
+        else {}
+    )
+    base_multiplier = max(0.0, _as_float(host_profile.get("base_multiplier"), 1.0))
+    variance_min, variance_max = _range_pair(host_profile.get("variance"), (1.0, 1.0))
+    persona_profile = (
+        persona_profiles.get(str(persona).lower())
+        if persona and isinstance(persona_profiles, dict)
+        else None
+    )
+
+    multipliers: dict[str, float] = {}
+    for family in RATE_FAMILIES:
+        host_variance_rng = random.Random(
+            _stable_seed(f"host_activity:{scenario_name}:{hostname}:{family}")
+        )
+        multiplier = base_multiplier * host_variance_rng.uniform(variance_min, variance_max)
+        multiplier *= _family_multiplier(host_profile, family)
+        if isinstance(role_profiles, dict):
+            for role in normalized_roles:
+                role_profile = role_profiles.get(role)
+                multiplier *= _family_multiplier(role_profile, family)
+        multiplier *= _family_multiplier(persona_profile, family)
+
+        low, high = _bounds_for_family(data, family)
+        multipliers[family] = max(low, min(high, multiplier))
+
+    return HostActivityProfile(hostname=hostname, multipliers=multipliers)
+
+
+def scale_count_range(lo: int, hi: int, multiplier: float) -> tuple[int, int]:
+    """Scale a randint-style count range while preserving a nonzero range."""
+    lo = int(lo)
+    hi = int(hi)
+    if hi < lo:
+        lo, hi = hi, lo
+    scaled_lo = int(round(lo * multiplier))
+    scaled_hi = int(round(hi * multiplier))
+    if lo > 0:
+        scaled_lo = max(1, scaled_lo)
+        scaled_hi = max(scaled_lo, scaled_hi)
+    else:
+        scaled_lo = max(0, scaled_lo)
+        scaled_hi = max(scaled_lo, scaled_hi)
+    return scaled_lo, scaled_hi
+
+
+def scale_interval_range(lo: int, hi: int, multiplier: float) -> tuple[int, int]:
+    """Scale seconds-between-events ranges; higher multiplier means shorter intervals."""
+    lo = int(lo)
+    hi = int(hi)
+    if hi < lo:
+        lo, hi = hi, lo
+    divisor = max(0.01, multiplier)
+    scaled_lo = max(1, int(round(lo / divisor)))
+    scaled_hi = max(scaled_lo, int(round(hi / divisor)))
+    return scaled_lo, scaled_hi
+
+
+def pick_firewall_deny_offset(
+    *,
+    rng: random.Random,
+    sensor_name: str,
+    current_hour_epoch: int,
+    generated_index: int,
+    multiplier: float,
+) -> float | None:
+    """Pick a bursty deny-event offset for an ASA/firewall baseline record."""
+    data = load_host_activity_profiles()
+    config = data.get("firewall_deny", {}) if isinstance(data, dict) else {}
+    quiet_probability = _as_float(config.get("quiet_probability"), 0.08)
+    if rng.random() < quiet_probability / max(0.5, multiplier):
+        return None
+
+    count_lo, count_hi = _range_pair(config.get("burst_window_count"), (2.0, 5.0))
+    width_lo, width_hi = _range_pair(config.get("burst_width_seconds"), (20.0, 180.0))
+    burst_count = max(1, int(round(rng.randint(int(count_lo), int(count_hi)) * multiplier)))
+    burst_index = generated_index % burst_count
+    burst_rng = random.Random(
+        _stable_seed(f"firewall_deny_burst:{sensor_name}:{current_hour_epoch}:{burst_index}")
+    )
+    center = burst_rng.uniform(120, 3480)
+    width = burst_rng.uniform(width_lo, width_hi)
+    return max(0.0, min(3599.0, center + rng.gauss(0, width / 3.0)))
+
+
+def firewall_deny_hash_values(rng: random.Random) -> tuple[str, str]:
+    """Return ASA deny hash values with realistic mostly-zero behavior."""
+    data = load_host_activity_profiles()
+    config = data.get("firewall_deny", {}) if isinstance(data, dict) else {}
+    probability = max(
+        0.0, min(1.0, _as_float(config.get("metadata_hash_nonzero_probability"), 0.18))
+    )
+    if rng.random() >= probability:
+        return ("0x0", "0x0")
+    return (f"0x{rng.getrandbits(16):04x}", f"0x{rng.getrandbits(16):04x}")
+
+
+def generate_encoded_powershell_command(
+    *,
+    rng: random.Random,
+    hostname: str,
+    username: str,
+) -> str:
+    """Generate a host-biased UTF-16LE PowerShell EncodedCommand payload."""
+    data = load_host_activity_profiles()
+    variants = data.get("artifact_variants", {}) if isinstance(data, dict) else {}
+    ps_config = variants.get("powershell_encoded", {}) if isinstance(variants, dict) else {}
+    templates = ps_config.get("templates", [])
+    if not isinstance(templates, list) or not templates:
+        templates = ["Get-Service -Name {svc}"]
+
+    preferred_count = max(1, int(ps_config.get("host_preferred_template_count", 3)))
+    host_rng = random.Random(_stable_seed(f"ps_encoded_templates:{hostname}:{username}"))
+    preferred = list(templates)
+    if len(preferred) > preferred_count:
+        preferred = host_rng.sample(preferred, preferred_count)
+    template = str(rng.choice(preferred))
+
+    params = ps_config.get("params", {})
+    if not isinstance(params, dict):
+        params = {}
+    command = template
+    for key, values in params.items():
+        placeholder = "{" + str(key) + "}"
+        if placeholder not in command:
+            continue
+        if not isinstance(values, list) or not values:
+            continue
+        param_rng = random.Random(
+            _stable_seed(f"ps_encoded_param:{hostname}:{username}:{key}:{rng.random()}")
+        )
+        command = command.replace(placeholder, str(param_rng.choice(values)))
+
+    return base64.b64encode(command.encode("utf-16-le")).decode("ascii")
diff --git a/src/evidenceforge/generation/activity/suspicious_benign.py b/src/evidenceforge/generation/activity/suspicious_benign.py
index 52af7722..98a73566 100644
--- a/src/evidenceforge/generation/activity/suspicious_benign.py
+++ b/src/evidenceforge/generation/activity/suspicious_benign.py
@@ -30,11 +30,13 @@
   low=~1/hr, medium=~2/hr, high=~3/hr, ludicrous=~5/hr
 """
 
-import base64
 import logging
 import random
 from datetime import datetime, timedelta
 
+from evidenceforge.generation.activity.host_activity_profiles import (
+    generate_encoded_powershell_command,
+)
 from evidenceforge.models.scenario import Persona, System, User
 
 logger = logging.getLogger(__name__)
@@ -523,43 +525,22 @@ def generate_temp_dir_execution(
     }
 
 
-# Benign PowerShell command templates for base64-encoded commands.
-# Each invocation picks a template, substitutes parameters, then encodes
-# as UTF-16LE + base64 (matching real PowerShell -EncodedCommand format).
-_ENCODED_PS_TEMPLATES = [
-    "Get-Service -Name {svc}",
-    "Get-EventLog -LogName {log} -Newest {n}",
-    "Test-NetConnection {host} -Port {port}",
-    "Get-Process -Name {proc}",
-    "Get-ChildItem -Path C:\\{dir} -Recurse | Measure-Object",
-    "Get-WmiObject Win32_LogicalDisk | Select-Object DeviceID, FreeSpace",
-    "Get-HotFix | Sort-Object InstalledOn -Descending | Select-Object -First {n}",
-]
-
-_ENCODED_PS_PARAMS: dict[str, list[str]] = {
-    "svc": ["Spooler", "W32Time", "wuauserv", "BITS", "WinRM", "Dhcp", "Dnscache", "EventLog"],
-    "log": ["System", "Application", "Security", "Setup"],
-    "n": ["10", "25", "50", "100"],
-    "host": ["dc01", "fileserver", "10.0.0.1", "localhost", "gateway"],
-    "port": ["80", "443", "3389", "5985", "22"],
-    "proc": ["svchost", "explorer", "chrome", "outlook", "code", "winlogon"],
-    "dir": ["Logs", "Temp", "Reports", "Users\\Public"],
-}
-
-
-def _generate_encoded_command(rng: random.Random) -> str:
+def _generate_encoded_command(
+    rng: random.Random,
+    *,
+    hostname: str = "",
+    username: str = "",
+) -> str:
     """Generate a unique base64-encoded benign PowerShell command.
 
-    Picks a random template, substitutes parameters, then encodes as
-    UTF-16LE base64 — matching real Windows PowerShell -EncodedCommand format.
+    Uses data-driven host-biased templates and encodes as UTF-16LE base64,
+    matching real Windows PowerShell -EncodedCommand format.
     """
-    template = rng.choice(_ENCODED_PS_TEMPLATES)
-    cmd = template
-    for key, values in _ENCODED_PS_PARAMS.items():
-        placeholder = "{" + key + "}"
-        if placeholder in cmd:
-            cmd = cmd.replace(placeholder, rng.choice(values))
-    return base64.b64encode(cmd.encode("utf-16-le")).decode("ascii")
+    return generate_encoded_powershell_command(
+        rng=rng,
+        hostname=hostname or "unknown",
+        username=username or "unknown",
+    )
 
 
 def generate_unusual_powershell(
@@ -603,7 +584,8 @@ def generate_unusual_powershell(
 
     suspicious_ps = [
         rf'powershell.exe -WindowStyle Hidden -Command "Get-WinEvent -LogName Security -MaxEvents {rng.choice([50, 100, 200, 500])} | Export-Csv C:\Reports\{report}.csv"',
-        f"powershell.exe -EncodedCommand {_generate_encoded_command(rng)}",
+        "powershell.exe -EncodedCommand "
+        f"{_generate_encoded_command(rng, hostname=system.hostname, username=user.username)}",
         rf"powershell.exe -Exec Bypass -File C:\Scripts\{script}",
         rf'powershell.exe -NonInteractive -Command "Invoke-RestMethod -Uri https://{internal_api}{api_path}"',
         rf'powershell.exe -WindowStyle Hidden -Command "Compress-Archive -Path C:\{log_dir}\*.log -DestinationPath C:\Backups\{backup}.zip"',
diff --git a/src/evidenceforge/generation/emitters/cisco_asa.py b/src/evidenceforge/generation/emitters/cisco_asa.py
index f05b15b0..0233cd0f 100644
--- a/src/evidenceforge/generation/emitters/cisco_asa.py
+++ b/src/evidenceforge/generation/emitters/cisco_asa.py
@@ -522,6 +522,8 @@ def _emit_deny(
         """Emit a Deny record (106023)."""
         protocol = (net.protocol or "tcp").lower()
         acl_name = (fw.access_group if fw else "") or "outside_access_in"
+        deny_hash_a = getattr(fw, "deny_hash_a", "0x0") if fw else "0x0"
+        deny_hash_b = getattr(fw, "deny_hash_b", "0x0") if fw else "0x0"
 
         if protocol == "icmp":
             icmp_type = net.dst_port if net.dst_port else 8
@@ -530,13 +532,13 @@ def _emit_deny(
                 f"Deny {protocol} src {src_iface}:{net.src_ip} "
                 f"dst {dst_iface}:{net.dst_ip} "
                 f"(type {icmp_type}, code {icmp_code}) "
-                f'by access-group "{acl_name}" [0x0, 0x0]'
+                f'by access-group "{acl_name}" [{deny_hash_a}, {deny_hash_b}]'
             )
         else:
             message = (
                 f"Deny {protocol} src {src_iface}:{net.src_ip}/{net.src_port} "
                 f"dst {dst_iface}:{net.dst_ip}/{net.dst_port} "
-                f'by access-group "{acl_name}" [0x0, 0x0]'
+                f'by access-group "{acl_name}" [{deny_hash_a}, {deny_hash_b}]'
             )
 
         event_data = {
diff --git a/src/evidenceforge/generation/engine/baseline.py b/src/evidenceforge/generation/engine/baseline.py
index 14a3b784..762133f7 100644
--- a/src/evidenceforge/generation/engine/baseline.py
+++ b/src/evidenceforge/generation/engine/baseline.py
@@ -54,6 +54,13 @@
     _windows_foreground_lifetime,
 )
 from evidenceforge.generation.activity.helpers import _get_os_category
+from evidenceforge.generation.activity.host_activity_profiles import (
+    firewall_deny_hash_values,
+    pick_firewall_deny_offset,
+    resolve_host_activity_profile,
+    scale_count_range,
+    scale_interval_range,
+)
 from evidenceforge.generation.activity.ids_signatures import (
     load_ids_signatures,
     render_dns_query_template,
@@ -525,6 +532,7 @@ def _windows_scheduled_task_offsets(
     current_hour: datetime,
     system: Any,
     rng: random.Random,
+    count_multiplier: float = 1.0,
 ) -> list[float]:
     """Return config-driven Windows scheduled/background task offsets for this hour."""
     from evidenceforge.generation.activity.endpoint_noise import windows_scheduled_process_config
@@ -532,6 +540,7 @@ def _windows_scheduled_task_offsets(
     cfg = windows_scheduled_process_config()
     count_min = max(0, int(cfg.get("count_min", 2)))
     count_max = max(count_min, int(cfg.get("count_max", 5)))
+    count_min, count_max = scale_count_range(count_min, count_max, count_multiplier)
     start = max(0, min(3599, int(cfg.get("trigger_window_start_seconds", 90))))
     end = max(start + 1, min(3599, int(cfg.get("trigger_window_end_seconds", 3510))))
     spacing = max(1, int(cfg.get("slot_spacing_seconds", 300)))
@@ -726,6 +735,92 @@ def _resolve_traffic_rate(self, traffic_type: str) -> tuple[int, int]:
         rate = defaults[traffic_type]
         return (rate[0], rate[1])
 
+    def _activity_roles_for_system(self, system: Any) -> list[str]:
+        """Return canonical roles for host activity profile resolution."""
+        if hasattr(self, "world_model") and system.hostname in self.world_model.hosts:
+            roles = list(self.world_model.hosts[system.hostname].canonical_roles)
+        else:
+            roles = [r.lower() for r in (getattr(system, "roles", None) or [])]
+        host_type = (getattr(system, "type", None) or "workstation").lower()
+        if host_type == "domain_controller" and "domain_controller" not in roles:
+            roles.append("domain_controller")
+        return roles
+
+    def _resolve_activity_profile(self, system: Any, persona: str | None = None) -> Any:
+        """Resolve and cache host activity profile multipliers."""
+        cache = getattr(self, "_host_activity_profile_cache", None)
+        if cache is None:
+            cache = {}
+            self._host_activity_profile_cache = cache
+        key = (getattr(system, "hostname", ""), persona or "")
+        if key not in cache:
+            cache[key] = resolve_host_activity_profile(
+                scenario_name=getattr(self.scenario, "name", "scenario"),
+                system=system,
+                roles=self._activity_roles_for_system(system),
+                persona=persona,
+            )
+        return cache[key]
+
+    def _activity_multiplier(
+        self,
+        system: Any | None,
+        family: str,
+        persona: str | None = None,
+    ) -> float:
+        """Return host/persona multiplier for a broad activity family."""
+        if system is None:
+            return 1.0
+        return self._resolve_activity_profile(system, persona).multiplier(family)
+
+    def _scaled_count_range(
+        self,
+        system: Any | None,
+        family: str,
+        lo: int,
+        hi: int,
+        *,
+        persona: str | None = None,
+    ) -> tuple[int, int]:
+        """Scale a count range for the host activity profile."""
+        return scale_count_range(lo, hi, self._activity_multiplier(system, family, persona))
+
+    def _scaled_randint(
+        self,
+        rng: random.Random,
+        system: Any | None,
+        family: str,
+        lo: int,
+        hi: int,
+        *,
+        persona: str | None = None,
+    ) -> int:
+        """Draw from a count range after applying host activity profile scaling."""
+        scaled_lo, scaled_hi = self._scaled_count_range(system, family, lo, hi, persona=persona)
+        return rng.randint(scaled_lo, scaled_hi)
+
+    def _scaled_interval_range(
+        self,
+        system: Any | None,
+        family: str,
+        lo: int,
+        hi: int,
+    ) -> tuple[int, int]:
+        """Scale a seconds-between-events range for a host activity profile."""
+        return scale_interval_range(lo, hi, self._activity_multiplier(system, family))
+
+    def _activity_system_for_user(self, user: User) -> Any | None:
+        """Return the primary host whose profile should shape user activity."""
+        systems = self.scenario.environment.systems
+        if user.primary_system:
+            primary = next((s for s in systems if s.hostname == user.primary_system), None)
+            if primary is not None:
+                return primary
+        assigned = next((s for s in systems if s.assigned_user == user.username), None)
+        if assigned is not None:
+            return assigned
+        return systems[0] if systems else None
+
     def _emit_dhcp_registry_side_effect(
         self,
         *,
@@ -2234,8 +2329,25 @@ def _pick_public_scan_target(
                 offset = rng.randint(1, cidr.num_addresses - 2)
                 return str(cidr.network_address + offset)
 
-            # Estimate allow traffic: ~10-20 connections per internal system per hour
-            estimated_allows = len(internal_ips) * rng.randint(10, 20)
+            sensor_systems = []
+            for candidate in self.scenario.environment.systems:
+                try:
+                    candidate_ip = ipaddress.ip_address(candidate.ip)
+                except ValueError:
+                    continue
+                if any(
+                    seg_name in sensor.monitoring_segments and candidate_ip in cidr
+                    for seg_name, cidr in segment_cidrs.items()
+                ):
+                    sensor_systems.append(candidate)
+            sensor_systems = sensor_systems or self.scenario.environment.systems
+            avg_multiplier = sum(
+                self._activity_multiplier(system, "firewall_deny") for system in sensor_systems
+            ) / max(1, len(sensor_systems))
+
+            # Estimate allow traffic: ~10-20 connections per internal system per hour.
+            allows_lo, allows_hi = scale_count_range(10, 20, avg_multiplier)
+            estimated_allows = len(internal_ips) * rng.randint(allows_lo, allows_hi)
             deny_count = int(estimated_allows * sensor.deny_ratio)
             if deny_count <= 0:
                 continue
@@ -2319,13 +2431,22 @@ def _resolve_iface(ip: str, _ifaces: dict = sensor_interfaces) -> str:  # noqa:
                 ):
                     continue
 
-                offset_sec = rng.uniform(0, 3600)
+                offset_sec = pick_firewall_deny_offset(
+                    rng=rng,
+                    sensor_name=sensor.hostname or sensor.name,
+                    current_hour_epoch=int(current_hour.timestamp()),
+                    generated_index=generated,
+                    multiplier=avg_multiplier,
+                )
+                if offset_sec is None:
+                    continue
                 ts = current_hour + timedelta(seconds=offset_sec)
                 self.state_manager.set_current_time(ts)
 
                 src_iface = _resolve_iface(src_ip)
                 dst_iface = _resolve_iface(dst_ip)
                 acl_name = f"{src_iface}_access_in"
+                deny_hash_a, deny_hash_b = firewall_deny_hash_values(rng)
 
                 fw_ctx = FirewallContext(
                     action="deny",
@@ -2334,6 +2455,8 @@ def _resolve_iface(ip: str, _ifaces: dict = sensor_interfaces) -> str:  # noqa:
                     src_interface=src_iface,
                     dst_interface=dst_iface,
                     access_group=acl_name,
+                    deny_hash_a=deny_hash_a,
+                    deny_hash_b=deny_hash_b,
                 )
 
                 self.activity_generator.generate_connection(
@@ -2543,6 +2666,13 @@ def _calculate_events_for_hour(
         """Calculate number of events for user this hour."""
         lo, hi = self._resolve_traffic_rate("user_activity")
         base_events = lo if lo == hi else _get_rng().randint(lo, hi)
+        activity_system = self._activity_system_for_user(user)
+        base_events = int(
+            round(
+                base_events
+                * self._activity_multiplier(activity_system, "user_activity", user.persona)
+            )
+        )
 
         if persona and persona.risk_profile:
             risk_mult = {"low": 0.7, "medium": 1.0, "high": 1.3}
@@ -3365,7 +3495,10 @@ def _burst_offset() -> float:
         if role_conns:
             weights = [c.get("weight", 1) for c in role_conns]
             # Scale connection count by time-of-day (fewer at night)
-            base_count = rng.randint(8, 20) if is_business else rng.randint(2, 6)
+            if is_business:
+                base_count = self._scaled_randint(rng, system, "role_network", 8, 20)
+            else:
+                base_count = self._scaled_randint(rng, system, "role_network", 2, 6)
 
             for _ in range(base_count):
                 conn = rng.choices(role_conns, weights=weights, k=1)[0]
@@ -3492,7 +3625,10 @@ def _fw_is_on_path(fw_sensor, src_ip: str, dst_ip: str) -> bool:
                 from evidenceforge.events.contexts import FirewallContext as _InboundFwCtx
 
                 inbound_weights = [c.get("weight", 1) for c in inbound_conns]
-                num_inbound = rng.randint(4, 15) if is_business else rng.randint(1, 4)
+                if is_business:
+                    num_inbound = self._scaled_randint(rng, system, "inbound_network", 4, 15)
+                else:
+                    num_inbound = self._scaled_randint(rng, system, "inbound_network", 1, 4)
                 for _ in range(num_inbound):
                     conn = rng.choices(inbound_conns, weights=inbound_weights, k=1)[0]
                     is_external_src = conn["role"] == "_external"
@@ -3566,6 +3702,7 @@ def _fw_is_on_path(fw_sensor, src_ip: str, dst_ip: str) -> bool:
                         dst_hostname = self.world_model.fqdn_for_system(system)
 
                     if fw_denied and denying_sensor:
+                        deny_hash_a, deny_hash_b = firewall_deny_hash_values(rng)
                         # Emit as a deny record from the actual in-path firewall
                         deny_state = "REJ" if denying_sensor.drop_mode == "reject" else "S0"
                         self.activity_generator.generate_connection(
@@ -3583,6 +3720,8 @@ def _fw_is_on_path(fw_sensor, src_ip: str, dst_ip: str) -> bool:
                                 src_interface=_fw_iface_for(src_ip, denying_sensor),
                                 dst_interface=_fw_iface_for(system.ip, denying_sensor),
                                 access_group=f"{_fw_iface_for(src_ip, denying_sensor)}_access_in",
+                                deny_hash_a=deny_hash_a,
+                                deny_hash_b=deny_hash_b,
                             ),
                             emit_dns=False,
                         )
@@ -3655,6 +3794,13 @@ def _fw_is_on_path(fw_sensor, src_ip: str, dst_ip: str) -> bool:
             p_weights = [c.get("weight", 1) for c in persona_conns]
             # Fewer persona connections than role connections; scaled by intensity
             _pc_lo, _pc_hi = self._resolve_traffic_rate("persona_connections")
+            _pc_lo, _pc_hi = self._scaled_count_range(
+                system,
+                "persona_connections",
+                _pc_lo,
+                _pc_hi,
+                persona=persona,
+            )
             num_persona = rng.randint(_pc_lo, _pc_hi) if is_business else 0
             # Clamp timestamps to session lifetime within this hour
             session_start_sec = max(0.0, (session.start_time - current_hour).total_seconds())
@@ -3948,6 +4094,9 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
             # DNS lookups: truly periodic with small jitter, using global schedule
             if "dns-client" in services:
                 _dns_lo, _dns_hi = self._resolve_traffic_rate("dns_interval")
+                _dns_lo, _dns_hi = self._scaled_interval_range(
+                    system, "dns_interval", _dns_lo, _dns_hi
+                )
                 _dns_range = max(1, _dns_hi - _dns_lo)
                 dns_interval = _dns_lo + (_stable_seed(f"dns_iv_{system.hostname}") % _dns_range)
                 dns_phase = _stable_seed(f"dns_ph_{system.hostname}") % dns_interval
@@ -4064,6 +4213,9 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                 smb_targets, fs_targets = self._build_smb_targets(system, dc_ips)
                 if smb_targets:
                     _smb_lo, _smb_hi = self._resolve_traffic_rate("smb_interval")
+                    _smb_lo, _smb_hi = self._scaled_interval_range(
+                        system, "smb_interval", _smb_lo, _smb_hi
+                    )
                     _smb_range = max(1, _smb_hi - _smb_lo)
                     smb_interval = _smb_lo + (
                         _stable_seed(f"smb_iv_{system.hostname}") % _smb_range
@@ -4143,6 +4295,7 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
             # Kerberos
             if "kerberos-client" in services and os_cat == "windows" and dc_targets:
                 _krb_lo, _krb_hi = self._resolve_traffic_rate("kerberos")
+                _krb_lo, _krb_hi = self._scaled_count_range(system, "kerberos", _krb_lo, _krb_hi)
                 num_krb = rng.randint(_krb_lo, _krb_hi)
                 base_interval = 3600 / (num_krb + 1)
                 for i in range(num_krb):
@@ -4168,6 +4321,7 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
             # LDAP
             if "ldap-client" in services and os_cat == "windows" and dc_targets:
                 _ldap_lo, _ldap_hi = self._resolve_traffic_rate("ldap")
+                _ldap_lo, _ldap_hi = self._scaled_count_range(system, "ldap", _ldap_lo, _ldap_hi)
                 num_ldap = rng.randint(_ldap_lo, _ldap_hi)
                 base_interval = 3600 / (num_ldap + 1)
                 for i in range(num_ldap):
@@ -4210,7 +4364,7 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                 )
 
                 sys_type_str = (system.type or "workstation").lower()
-                num_svc = rng.randint(3, 8)
+                num_svc = self._scaled_randint(rng, system, "windows_service_process", 3, 8)
                 for _si in range(num_svc):
                     svc_offset = rng.uniform(0, 3599)
                     svc_ts = current_hour + timedelta(seconds=svc_offset)
@@ -4247,7 +4401,7 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
 
                 _REG_KEYS_HKCU = get_registry_keys_hkcu()
                 _REG_KEYS_HKLM = get_registry_keys_hklm()
-                _reg_count = rng.randint(18, 42)
+                _reg_count = self._scaled_randint(rng, system, "windows_registry", 18, 42)
                 _svc_pid = sys_pids.get("svchost_netsvcs", sys_pids.get("services", 4))
                 _host_ctx = self.activity_generator._build_host_context(system)
                 _registry_cfg = registry_noise_config()
@@ -4388,7 +4542,15 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                     pick_scheduled_task,
                 )
 
-                for offset in _windows_scheduled_task_offsets(current_hour, system, rng):
+                for offset in _windows_scheduled_task_offsets(
+                    current_hour,
+                    system,
+                    rng,
+                    count_multiplier=self._activity_multiplier(
+                        system,
+                        "windows_scheduled_task",
+                    ),
+                ):
                     ts = current_hour + timedelta(seconds=offset)
                     self.state_manager.set_current_time(ts)
                     task_image, task_cmd, task_parent_key = pick_scheduled_task(rng)
@@ -4474,7 +4636,8 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                 noise_cfg = load_create_remote_thread_noise_config()
                 probability = float(noise_cfg.get("probability_per_host_hour", 0.08))
                 max_events = int(noise_cfg.get("max_events_per_hour", 1))
-                if valid_crt and max_events > 0 and rng.random() < probability:
+                probability *= self._activity_multiplier(system, "windows_remote_thread")
+                if valid_crt and max_events > 0 and rng.random() < min(0.95, probability):
                     num_crt = rng.randint(1, max_events)
                     for _ in range(num_crt):
                         pattern = pick_create_remote_thread_pattern(valid_crt, rng)
@@ -4507,7 +4670,7 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                     if p.get("source_pid_key") in sys_pids and p.get("target_pid_key") in sys_pids
                 ]
                 if valid_pa:
-                    num_pa = rng.randint(3, 8)
+                    num_pa = self._scaled_randint(rng, system, "windows_process_access", 3, 8)
                     for _ in range(num_pa):
                         pattern = rng.choice(valid_pa)
                         src_key = pattern["source_pid_key"]
@@ -4546,7 +4709,7 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                 running = self.state_manager.get_processes_on_system(system.hostname)
                 if running:
                     generic_dll_pool = get_dll_pool()
-                    num_dll = rng.randint(20, 45)
+                    num_dll = self._scaled_randint(rng, system, "windows_module_load", 20, 45)
                     for _ in range(num_dll):
                         offset = rng.uniform(0, 3599)
                         ts = current_hour + timedelta(seconds=offset)
@@ -4607,7 +4770,7 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                         pick_bash_command_entry,
                     )
 
-                    num_ssh = rng.randint(1, 3)
+                    num_ssh = self._scaled_randint(rng, system, "linux_remote_admin", 1, 3)
                     for _ in range(num_ssh):
                         ssh_user = rng.choice(roster)
                         offset = rng.uniform(0, 3599)
@@ -4624,11 +4787,32 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
 
                         persona_lower = (ssh_user.persona or "").lower()
                         if persona_lower == "sysadmin":
-                            n_cmds = rng.randint(3, 8)
+                            n_cmds = self._scaled_randint(
+                                rng,
+                                system,
+                                "linux_shell",
+                                3,
+                                8,
+                                persona=ssh_user.persona,
+                            )
                         elif persona_lower == "developer":
-                            n_cmds = rng.randint(2, 6)
+                            n_cmds = self._scaled_randint(
+                                rng,
+                                system,
+                                "linux_shell",
+                                2,
+                                6,
+                                persona=ssh_user.persona,
+                            )
                         else:
-                            n_cmds = rng.randint(1, 4)
+                            n_cmds = self._scaled_randint(
+                                rng,
+                                system,
+                                "linux_shell",
+                                1,
+                                4,
+                                persona=ssh_user.persona,
+                            )
                         hour_end = current_hour + timedelta(hours=1)
                         cumulative_gap = 0
                         _SLOW_CMD_KEYWORDS = frozenset(
@@ -4701,7 +4885,14 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                         pick_bash_command_entry,
                     )
 
-                    n_cmds = rng.randint(1, 4)
+                    n_cmds = self._scaled_randint(
+                        rng,
+                        system,
+                        "linux_shell",
+                        1,
+                        4,
+                        persona=ws_user.persona,
+                    )
                     ts0 = current_hour + timedelta(seconds=rng.uniform(0, 3599))
                     hour_end = current_hour + timedelta(hours=1)
                     cumulative = 0
@@ -4735,8 +4926,9 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
             if os_cat_rdp != "windows" or sys_type_rdp not in ("server", "domain_controller"):
                 continue
 
-            # 1-3 RDP admin sessions per hour to servers, ~60% probability
-            if rng.random() > 0.60:
+            # 1-3 RDP admin sessions per hour to servers, shaped by host role/profile.
+            rdp_multiplier = self._activity_multiplier(system, "windows_remote_admin")
+            if rng.random() > min(0.95, 0.60 * rdp_multiplier):
                 continue
 
             if not any(
@@ -4745,7 +4937,7 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
             ):
                 continue
 
-            num_rdp = rng.randint(1, 3)
+            num_rdp = self._scaled_randint(rng, system, "windows_remote_admin", 1, 3)
             roster = self._get_server_ssh_users(system)
             if not roster:
                 continue
@@ -4773,7 +4965,10 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                 continue
 
             sys_type_svc = (system.type or "workstation").lower()
-            num_svc = rng.randint(2, 5) if sys_type_svc != "workstation" else rng.randint(1, 2)
+            if sys_type_svc != "workstation":
+                num_svc = self._scaled_randint(rng, system, "windows_service_logon", 2, 5)
+            else:
+                num_svc = self._scaled_randint(rng, system, "windows_service_logon", 1, 2)
             for _ in range(num_svc):
                 offset = rng.uniform(0, 3599)
                 ts = current_hour + timedelta(seconds=offset)
@@ -4786,7 +4981,7 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                 )
 
             if sys_type_svc in ("server", "domain_controller"):
-                num_anon = rng.randint(1, 3)
+                num_anon = self._scaled_randint(rng, system, "windows_service_logon", 1, 3)
                 for _ in range(num_anon):
                     offset = rng.uniform(0, 3599)
                     ts = current_hour + timedelta(seconds=offset)
@@ -4807,7 +5002,7 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                 if os_cat != "windows" or system.ip in dc_ips:
                     continue
 
-                num_auth = rng.randint(2, 6)
+                num_auth = self._scaled_randint(rng, system, "windows_machine_auth", 2, 6)
                 base_interval = 3600 / (num_auth + 1)
                 for i in range(num_auth):
                     offset = base_interval * (i + 1) + rng.gauss(0, base_interval * 0.1)
@@ -4832,8 +5027,12 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                 if _get_os_category(s.os) == "windows" and s.ip not in dc_ips
             ]
             for _dc_idx, dc_hostname in enumerate(dc_hostnames):
+                dc_system = next(
+                    (s for s in self.scenario.environment.systems if s.hostname == dc_hostname),
+                    None,
+                )
                 for client in windows_clients:
-                    num_cycles = rng.randint(3, 8)
+                    num_cycles = self._scaled_randint(rng, dc_system, "dc_kerberos", 3, 8)
                     base_interval = 3600 / (num_cycles + 1)
                     for i in range(num_cycles):
                         offset = base_interval * (i + 1) + rng.gauss(0, base_interval * 0.15)
@@ -4848,7 +5047,16 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                             dc_hostname=dc_hostname,
                             time=ts,
                         )
-                        num_tgs = 0 if rng.random() < 0.22 else rng.randint(1, 5)
+                        if rng.random() < 0.22:
+                            num_tgs = 0
+                        else:
+                            num_tgs = self._scaled_randint(
+                                rng,
+                                dc_system,
+                                "dc_kerberos",
+                                1,
+                                5,
+                            )
                         member_servers = [
                             s.hostname
                             for s in self.scenario.environment.systems
@@ -4935,7 +5143,10 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                 or "web" in system.hostname.lower()
             )
             has_ntp_client = "ntp-client" in self._system_service_defaults.get(system.hostname, [])
-            num_events = rng.randint(100, 300) if is_dmz else rng.randint(50, 120)
+            if is_dmz:
+                num_events = self._scaled_randint(rng, system, "linux_syslog", 100, 300)
+            else:
+                num_events = self._scaled_randint(rng, system, "linux_syslog", 50, 120)
 
             scenario_start = self.scenario.time_window.start
             boot_uptime = self._kernel_boot_uptimes.get(system.hostname, 500000.0)
@@ -5332,7 +5543,11 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
         # ICMP ping between systems on same subnet
         systems = self.scenario.environment.systems
         if len(systems) >= 2:
-            num_pings = rng.randint(1, 3)
+            avg_multiplier = sum(
+                self._activity_multiplier(system, "icmp_monitoring") for system in systems
+            ) / len(systems)
+            ping_lo, ping_hi = scale_count_range(1, 3, avg_multiplier)
+            num_pings = rng.randint(ping_lo, ping_hi)
             base_interval = 3600 / (num_pings + 1)
             for i in range(num_pings):
                 src_sys = rng.choice(systems)
@@ -5388,7 +5603,11 @@ def _svc_pid(*keys: str, _pids: dict = sys_pids) -> int:  # noqa: B006
                     monitored_systems.extend(segment_systems.get(seg_name, []))
                 if not monitored_systems:
                     continue
-                num_alerts = rng.randint(5, 15)
+                avg_multiplier = sum(
+                    self._activity_multiplier(system, "ids_alert") for system in monitored_systems
+                ) / len(monitored_systems)
+                alerts_lo, alerts_hi = scale_count_range(5, 15, avg_multiplier)
+                num_alerts = rng.randint(alerts_lo, alerts_hi)
                 # For IDS sensors (typically perimeter), generate alerts with
                 # external source IPs targeting monitored systems.
                 _EXTERNAL_SCAN_IPS = getattr(
@@ -5535,6 +5754,17 @@ def _emit_web_server_access(
         )
 
         web_lo, web_hi = self._resolve_traffic_rate("web")
+        scale_method = getattr(self, "_scaled_count_range", None)
+        if callable(scale_method):
+            scaled_range: tuple[int, int] | None = None
+            try:
+                candidate = scale_method(sys_obj, "web", web_lo, web_hi)
+            except (AttributeError, TypeError, ValueError):
+                candidate = None
+            if isinstance(candidate, (tuple, list)) and len(candidate) == 2:
+                scaled_range = (int(candidate[0]), int(candidate[1]))
+            if scaled_range is not None:
+                web_lo, web_hi = scaled_range
         top_level_budget = rng.randint(web_lo, web_hi)
         if top_level_budget <= 0:
             return
diff --git a/tests/unit/test_baseline_canonical.py b/tests/unit/test_baseline_canonical.py
index 2cdbe72e..146f0c0e 100644
--- a/tests/unit/test_baseline_canonical.py
+++ b/tests/unit/test_baseline_canonical.py
@@ -875,7 +875,9 @@ def test_registry_noise_prefers_dynamic_pools_and_filters_repeated_tells(self):
         from evidenceforge.generation.engine.baseline import BaselineMixin
 
         source = inspect.getsource(BaselineMixin)
-        assert "_reg_count = rng.randint(18, 42)" in source
+        assert (
+            '_reg_count = self._scaled_randint(rng, system, "windows_registry", 18, 42)' in source
+        )
         assert "Office\\\\16.0\\\\Word\\\\Reading Locations\\\\Document 1" in source
         assert "Windows NT\\\\CurrentVersion\\\\Winlogon" in source
         assert "Services\\\\EventLog\\\\Application" in source
diff --git a/tests/unit/test_cisco_asa_emitter.py b/tests/unit/test_cisco_asa_emitter.py
index 6af47a95..43cac3d0 100644
--- a/tests/unit/test_cisco_asa_emitter.py
+++ b/tests/unit/test_cisco_asa_emitter.py
@@ -467,6 +467,8 @@ def test_deny_produces_single_record(self, asa_emitter, tmp_path):
                 src_interface="outside",
                 dst_interface="inside",
                 access_group="outside_access_in",
+                deny_hash_a="0x2a1b",
+                deny_hash_b="0x031f",
             ),
         )
         asa_emitter.emit(event)
@@ -479,6 +481,7 @@ def test_deny_produces_single_record(self, asa_emitter, tmp_path):
         assert "Deny tcp src outside:198.51.100.1/54321" in lines[0]
         assert "dst inside:10.0.10.50/445" in lines[0]
         assert 'by access-group "outside_access_in"' in lines[0]
+        assert "[0x2a1b, 0x031f]" in lines[0]
 
     def test_icmp_deny_includes_type_code(self, asa_emitter, tmp_path):
         """ICMP deny should include (type N, code N) in the message."""
diff --git a/tests/unit/test_host_activity_profiles.py b/tests/unit/test_host_activity_profiles.py
new file mode 100644
index 00000000..1ab7fe9f
--- /dev/null
+++ b/tests/unit/test_host_activity_profiles.py
@@ -0,0 +1,141 @@
+# Copyright (c) 2026 Cisco Systems, Inc. and its affiliates
+# SPDX-License-Identifier: MIT
+
+"""Tests for host/persona/role activity profile configuration."""
+
+import base64
+import random
+from types import SimpleNamespace
+
+import pytest
+
+from evidenceforge.generation.activity.host_activity_profiles import (
+    RATE_FAMILIES,
+    firewall_deny_hash_values,
+    generate_encoded_powershell_command,
+    load_host_activity_profiles,
+    reset_cache,
+    resolve_host_activity_profile,
+    scale_count_range,
+    scale_interval_range,
+)
+from evidenceforge.generation.engine.baseline import BaselineMixin
+
+
+@pytest.fixture(autouse=True)
+def _reset_host_activity_profiles_cache():
+    reset_cache()
+    yield
+    reset_cache()
+
+
+def _system(
+    hostname: str,
+    system_type: str,
+    roles: list[str] | None = None,
+) -> SimpleNamespace:
+    return SimpleNamespace(hostname=hostname, type=system_type, roles=roles or [])
+
+
+def test_host_activity_profiles_cover_core_families():
+    data = load_host_activity_profiles()
+
+    assert {"workstation", "server", "domain_controller"} <= set(data["host_types"])
+    assert set(data["rate_families"]["bounds"]) <= RATE_FAMILIES
+    assert set(data["host_types"]["domain_controller"]["families"]) <= RATE_FAMILIES
+
+
+def test_resolved_profiles_shape_infrastructure_hosts_differently():
+    workstation = resolve_host_activity_profile(
+        scenario_name="profile-test",
+        system=_system("wkstn01", "workstation"),
+    )
+    server = resolve_host_activity_profile(
+        scenario_name="profile-test",
+        system=_system("files01", "server", ["file_server"]),
+    )
+    dc = resolve_host_activity_profile(
+        scenario_name="profile-test",
+        system=_system("dc01", "domain_controller", ["domain_controller"]),
+    )
+
+    assert dc.multiplier("dc_kerberos") > workstation.multiplier("dc_kerberos")
+    assert dc.multiplier("windows_machine_auth") > workstation.multiplier("windows_machine_auth")
+    assert server.multiplier("inbound_network") > workstation.multiplier("inbound_network")
+
+
+def test_count_and_interval_scaling_preserve_sensible_bounds():
+    assert scale_count_range(2, 6, 2.0) == (4, 12)
+    assert scale_count_range(0, 3, 0.25) == (0, 1)
+    assert scale_interval_range(300, 900, 2.0) == (150, 450)
+    assert scale_interval_range(300, 900, 0.5) == (600, 1800)
+
+
+def test_host_activity_profiles_overlay_merges(tmp_path, monkeypatch):
+    overlay_dir = tmp_path / ".eforge" / "config" / "activity"
+    overlay_dir.mkdir(parents=True)
+    (overlay_dir / "host_activity_profiles.yaml").write_text(
+        """
+role_profiles:
+  web_server:
+    families:
+      firewall_deny: 2.0
+firewall_deny:
+  metadata_hash_nonzero_probability: 1.0
+""",
+        encoding="utf-8",
+    )
+
+    monkeypatch.chdir(tmp_path)
+    reset_cache()
+
+    data = load_host_activity_profiles()
+    assert data["host_types"]["workstation"]
+    assert data["role_profiles"]["web_server"]["families"]["firewall_deny"] == 2.0
+    assert firewall_deny_hash_values(random.Random(4)) != ("0x0", "0x0")
+
+
+def test_encoded_powershell_variants_are_data_driven_and_decodable():
+    encoded = generate_encoded_powershell_command(
+        rng=random.Random(7),
+        hostname="wkstn01",
+        username="alice",
+    )
+
+    decoded = base64.b64decode(encoded).decode("utf-16-le")
+    assert "{" not in decoded
+    assert any(
+        decoded.startswith(prefix)
+        for prefix in (
+            "Get-Service",
+            "Get-EventLog",
+            "Test-NetConnection",
+            "Get-Process",
+            "Get-ChildItem",
+            "Get-WmiObject",
+            "Get-HotFix",
+            "Get-CimInstance",
+            "Get-ScheduledTask",
+        )
+    )
+
+
+def test_baseline_mixin_resolves_primary_host_activity_profile():
+    class Harness(BaselineMixin):
+        pass
+
+    workstation = _system("wkstn01", "workstation")
+    server = _system("files01", "server", ["file_server"])
+    harness = Harness()
+    harness.scenario = SimpleNamespace(
+        name="baseline-profile-test",
+        environment=SimpleNamespace(systems=[workstation, server]),
+    )
+
+    user = SimpleNamespace(username="alice", primary_system="wkstn01", persona="developer")
+
+    assert harness._activity_system_for_user(user) is workstation
+    assert harness._activity_multiplier(server, "inbound_network") > harness._activity_multiplier(
+        workstation,
+        "inbound_network",
+    )
diff --git a/tests/unit/test_validate_config.py b/tests/unit/test_validate_config.py
index 6728f400..fe7b0794 100644
--- a/tests/unit/test_validate_config.py
+++ b/tests/unit/test_validate_config.py
@@ -113,6 +113,34 @@ def load_invalid_observation_profiles():
             for issue in result.issues
         )
 
+    def test_validate_config_rejects_unknown_host_activity_family(self, monkeypatch):
+        from evidenceforge.generation.activity import host_activity_profiles
+
+        real_loader = host_activity_profiles.load_host_activity_profiles
+
+        def load_invalid_host_activity_profiles():
+            data = real_loader()
+            host_types = dict(data["host_types"])
+            workstation = dict(host_types["workstation"])
+            workstation["families"] = {**workstation.get("families", {}), "zeek_magic": 1.5}
+            host_types["workstation"] = workstation
+            return {**data, "host_types": host_types}
+
+        monkeypatch.setattr(
+            host_activity_profiles,
+            "load_host_activity_profiles",
+            load_invalid_host_activity_profiles,
+        )
+
+        result = validate_config()
+
+        assert any(
+            issue.severity == "ERROR"
+            and issue.file == "host_activity_profiles.yaml"
+            and "unknown activity families" in issue.message
+            for issue in result.issues
+        )
+
     def test_validate_config_rejects_third_party_module_with_microsoft_identity(self, monkeypatch):
         from evidenceforge.generation.activity import application_catalog