diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index 52de3267..1d86c9c7 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -22,6 +22,6 @@ jobs: contents: read issues: write steps: - - uses: actions/checkout@v6 - - uses: actions-rust-lang/audit@v1 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions-rust-lang/audit@72c09e02f132669d52284a3323acdb503cfc1a24 # v1.2.7 name: Audit Rust Dependencies diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index f4f846af..c259e297 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -1,6 +1,23 @@ name: Benchmarks on: + # The eventbus-latency-slo job gates PRs on the END-297 local-IPC round-trip + # p99 latency SLO (<1ms). Scoped to paths that can plausibly affect IPC + # latency so we don't run a bench on every PR. Other jobs in this workflow + # (benchmarks, load-tests) remain workflow_dispatch-only and are gated by + # per-job `if` conditions below; they are informational and expensive to + # run on every PR. + pull_request: + branches: [ main ] + paths: + - "daemoneye-eventbus/**" + - "collector-core/**" + - ".github/workflows/benchmarks.yml" + # Toolchain or workspace-level dep changes can shift Tokio/compiler + # behavior and move p99 without touching the eventbus crate itself. + - "Cargo.toml" + - "Cargo.lock" + - "rust-toolchain.toml" workflow_dispatch: inputs: suite: @@ -30,12 +47,15 @@ env: jobs: benchmarks: + # Informational benchmark suite; runs only on manual dispatch to avoid + # spending PR CI time on non-gating criterion runs. + if: github.event_name == 'workflow_dispatch' runs-on: ubuntu-latest timeout-minutes: 15 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: install: true cache: true @@ -45,7 +65,8 @@ jobs: uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: target/criterion - key: criterion-baseline-${{ runner.os }}-${{ hashFiles('rust-toolchain.toml', 'Cargo.lock') }} + key: criterion-baseline-${{ runner.os }}-${{ hashFiles('rust-toolchain.toml', + 'Cargo.lock') }} - name: Run benchmarks env: @@ -80,10 +101,11 @@ jobs: if: github.ref == 'refs/heads/main' with: path: target/criterion - key: criterion-baseline-${{ runner.os }}-${{ hashFiles('rust-toolchain.toml', 'Cargo.lock') }} + key: criterion-baseline-${{ runner.os }}-${{ hashFiles('rust-toolchain.toml', + 'Cargo.lock') }} - name: Upload benchmark results - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 if: always() with: name: benchmark-results @@ -91,12 +113,14 @@ jobs: retention-days: 30 load-tests: + # Informational load tests; runs only on manual dispatch. + if: github.event_name == 'workflow_dispatch' runs-on: ubuntu-latest timeout-minutes: 10 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: install: true cache: true @@ -114,3 +138,50 @@ jobs: name: load-test-results path: load-test-output.txt retention-days: 30 + + # END-297 acceptance-criterion gate: local-IPC round-trip p99 latency < 1ms. + # See daemoneye-eventbus/benches/ipc_performance.rs::latency_p99_slo. + # Gated on Linux and macOS only; Windows and FreeBSD are informational per + # the END-297 plan (Key Technical Decisions) and the AGENTS.md OS support + # matrix. + eventbus-latency-slo: + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - macos-latest + runs-on: ${{ matrix.os }} + timeout-minutes: 15 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 + with: + install: true + cache: true + github_token: ${{ secrets.GITHUB_TOKEN }} + + # Cache `target/` across runs so the cold-build time doesn't eat the + # 15-minute job timeout. Key includes the OS and Cargo.lock so the + # cache invalidates on toolchain/dep changes. + - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1 + with: + shared-key: eventbus-latency-slo-${{ matrix.os }} + cache-targets: "true" + cache-all-crates: "true" + + - name: Run END-297 latency p99 SLO bench (<1ms) + # Filter anchored with ^...$ so it matches only the SLO bench ID + # and not the pre-existing `latency/*` bench IDs in the same file. + run: | + set -o pipefail + mise x -- cargo bench --package daemoneye-eventbus --bench ipc_performance -- '^latency_p99_slo$' 2>&1 | tee eventbus-latency-slo.txt + + - name: Upload latency SLO results + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + if: always() + with: + name: eventbus-latency-slo-${{ matrix.os }} + path: eventbus-latency-slo.txt + retention-days: 30 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e111f4b2..a7ce7139 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,12 +2,12 @@ name: CI on: push: - branches: [main] + branches: [ main ] pull_request: - branches: [main] + branches: [ main ] workflow_dispatch: -# Restrict permissions to minimum required (principle of least privilege) + # Restrict permissions to minimum required (principle of least privilege) permissions: contents: read @@ -38,7 +38,7 @@ jobs: github_token: ${{ secrets.GITHUB_TOKEN }} - name: Ensure rustfmt and clippy are installed - run: rustup component add rustfmt clippy + run: mise x -- rustup component add rustfmt clippy - name: Check formatting run: just lint-rust @@ -114,7 +114,7 @@ jobs: coverage: runs-on: ubuntu-latest timeout-minutes: 30 - needs: [test, test-cross-platform, quality] + needs: [ test, test-cross-platform, quality ] steps: - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -138,8 +138,3 @@ jobs: fail_ci_if_error: false token: ${{ secrets.CODECOV_TOKEN }} slug: EvilBit-Labs/daemoneye - - name: Upload to Qlty - uses: qltysh/qlty-action/coverage@a19242102d17e497f437d7466aa01b528537e899 # v2.2.0 - with: - token: ${{ secrets.QLTY_COVERAGE_TOKEN }} - files: target/lcov.info diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index b03e07fc..d499832e 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -2,9 +2,9 @@ name: CodeQL on: push: - branches: [main] + branches: [ main ] pull_request: - branches: [main] + branches: [ main ] schedule: - cron: "43 22 * * 1" workflow_dispatch: @@ -19,8 +19,8 @@ jobs: name: CodeQL Analyze runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v6 - - uses: jdx/mise-action@v3 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: install: true cache: true diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml index 4f9a9868..910c1fa1 100644 --- a/.github/workflows/copilot-setup-steps.yml +++ b/.github/workflows/copilot-setup-steps.yml @@ -28,8 +28,8 @@ jobs: # You can define any steps you want, and they will run before the agent starts. # If you do not check out your code, Copilot will do this for you. steps: - - uses: actions/checkout@v6 - - uses: jdx/mise-action@v3 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: install: true cache: true diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index acb0fc8a..85a881cf 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -2,9 +2,9 @@ name: Deploy Documentation on: push: - branches: [main] + branches: [ main ] pull_request: - branches: [main] + branches: [ main ] workflow_dispatch: permissions: @@ -24,15 +24,15 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 - - uses: jdx/mise-action@v3 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: install: true cache: true github_token: ${{ secrets.GITHUB_TOKEN }} - name: Setup mdBook - uses: jontze/action-mdbook@v4 + uses: jontze/action-mdbook@6c0be56d14c4bf16861b00af61f50ff7400ce502 # v4.0.0 with: token: ${{ secrets.GITHUB_TOKEN }} mdbook-version: latest @@ -41,29 +41,30 @@ jobs: use-admonish: true - name: Install cargo-binstall - uses: cargo-bins/cargo-binstall@main + uses: cargo-bins/cargo-binstall@dc19f1e48450eefe5a29b8da6c6b00a87d730b37 # v1.18.1 - name: Install mdbook plugins - run: cargo binstall mdbook-tabs mdbook-i18n-helpers mdbook-yml-header mdbook-image-size --no-confirm + run: cargo binstall mdbook-tabs mdbook-i18n-helpers mdbook-yml-header + mdbook-image-size --no-confirm - name: Build rustdoc run: | - cargo doc --no-deps --document-private-items --target-dir target + mise x -- cargo doc --no-deps --document-private-items --target-dir target mkdir -p docs/book/api cp -r target/doc/* docs/book/api/ - name: Build mdBook run: | cd docs - mdbook build + mise x -- mdbook build - name: Setup Pages if: github.ref == 'refs/heads/main' - uses: actions/configure-pages@v5 + uses: actions/configure-pages@45bfe0192ca1faeb007ade9deae92b16b8254a0d # v6.0.0 - name: Upload artifact if: github.ref == 'refs/heads/main' - uses: actions/upload-pages-artifact@v4 + uses: actions/upload-pages-artifact@fc324d3547104276b827a68afc52ff2a11cc49c9 # v5.0.0 with: path: docs/book @@ -77,4 +78,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v4 + uses: actions/deploy-pages@cd2ce8fcbc39b97be8ca5fce6e763baed58fa128 # v5.0.0 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 05b5f955..53a4a134 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -27,18 +27,18 @@ jobs: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 - - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: install: true cache: true github_token: ${{ secrets.GITHUB_TOKEN }} - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@1.91.0 + uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # v1 with: + toolchain: stable targets: >- - x86_64-unknown-linux-gnu, - aarch64-unknown-linux-gnu + x86_64-unknown-linux-gnu, aarch64-unknown-linux-gnu - name: Install cross-compilation dependencies run: | @@ -46,7 +46,7 @@ jobs: sudo apt-get install -y gcc-aarch64-linux-gnu - name: Run GoReleaser - uses: goreleaser/goreleaser-action@ec59f474b9834571250b370d4735c50f8e2d1e29 # v7.0.0 + uses: goreleaser/goreleaser-action@e24998b8b67b290c2fa8b7c14fcfa7de2c5c9b8c # v7.1.0 with: distribution: goreleaser version: "~> v2" @@ -61,21 +61,21 @@ jobs: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 - - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: install: true cache: true github_token: ${{ secrets.GITHUB_TOKEN }} - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@1.91.0 + uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # v1 with: + toolchain: stable targets: >- - x86_64-apple-darwin, - aarch64-apple-darwin + x86_64-apple-darwin, aarch64-apple-darwin - name: Run GoReleaser - uses: goreleaser/goreleaser-action@ec59f474b9834571250b370d4735c50f8e2d1e29 # v7.0.0 + uses: goreleaser/goreleaser-action@e24998b8b67b290c2fa8b7c14fcfa7de2c5c9b8c # v7.1.0 with: distribution: goreleaser version: "~> v2" @@ -91,21 +91,21 @@ jobs: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 - - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: install: true cache: true github_token: ${{ secrets.GITHUB_TOKEN }} - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@1.91.0 + uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # v1 with: + toolchain: stable targets: >- - x86_64-pc-windows-msvc, - aarch64-pc-windows-msvc + x86_64-pc-windows-msvc, aarch64-pc-windows-msvc - name: Run GoReleaser - uses: goreleaser/goreleaser-action@ec59f474b9834571250b370d4735c50f8e2d1e29 # v7.0.0 + uses: goreleaser/goreleaser-action@e24998b8b67b290c2fa8b7c14fcfa7de2c5c9b8c # v7.1.0 with: distribution: goreleaser version: "~> v2" diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index c9cc7571..a96a3887 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -4,80 +4,77 @@ name: Scorecard supply-chain security on: - # For Branch-Protection check. Only the default branch is supported. See - # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection - branch_protection_rule: - # To guarantee Maintained check is occasionally updated. See - # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained - schedule: - - cron: '28 11 * * 2' - push: - branches: [ "main" ] - workflow_dispatch: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: # To guarantee Maintained check is occasionally updated. See -# Declare default permissions as read only and least-privilege. + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + schedule: + - cron: "28 11 * * 2" + push: + branches: [ "main" ] + workflow_dispatch: + + # Declare default permissions as read only and least-privilege. permissions: - contents: read - actions: read + contents: read + actions: read jobs: - analysis: - name: Scorecard analysis - runs-on: ubuntu-latest - # `publish_results: true` only works when run from the default branch. conditional can be removed if disabled. - if: github.event.repository.default_branch == github.ref_name - permissions: - # Needed to upload the results to code-scanning dashboard. - security-events: write - # Needed to publish results and get a badge (see publish_results below). - id-token: write - # Uncomment the permissions below if installing in a private repository. - # contents: read - # actions: read - - steps: - - name: "Checkout code" - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + # `publish_results: true` only works when run from the default branch. conditional can be removed if disabled. + if: github.event.repository.default_branch == github.ref_name + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + # Uncomment the permissions below if installing in a private repository. + # contents: read + # actions: read - - name: "Run analysis" - uses: ossf/scorecard-action@f49aabe0b5af0936a0987cfb85d86b75731b0186 # v2.4.1 - with: - results_file: results.sarif - results_format: sarif - # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: - # - you want to enable the Branch-Protection check on a *public* repository, or - # - you are installing Scorecard on a *private* repository - # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action?tab=readme-ov-file#authentication-with-fine-grained-pat-optional. - # repo_token: ${{ secrets.SCORECARD_TOKEN }} + steps: + - name: "Checkout code" + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - # Public repositories: - # - Publish results to OpenSSF REST API for easy access by consumers - # - Allows the repository to include the Scorecard badge. - # - See https://github.com/ossf/scorecard-action#publishing-results. - # For private repositories: - # - `publish_results` will always be set to `false`, regardless - # of the value entered here. - # Disabled because OSSF workflow verification currently rejects - # github/codeql-action/upload-sarif as an "imposter commit". - publish_results: false + - name: "Run analysis" + uses: ossf/scorecard-action@99c09fe975337306107572b4fdf4db224cf8e2f2 # v2.4.3 + with: + results_file: results.sarif + results_format: sarif + # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: + # - you want to enable the Branch-Protection check on a *public* repository, or + # - you are installing Scorecard on a *private* repository + # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action?tab=readme-ov-file#authentication-with-fine-grained-pat-optional. + # repo_token: ${{ secrets.SCORECARD_TOKEN }} - # (Optional) Uncomment file_mode if you have a .gitattributes with files marked export-ignore - # file_mode: git + # Public repositories: + # - Publish results to OpenSSF REST API for easy access by consumers + # - Allows the repository to include the Scorecard badge. + # - See https://github.com/ossf/scorecard-action#publishing-results. + # For private repositories: + # - `publish_results` will always be set to `false`, regardless + # of the value entered here. + publish_results: true + # (Optional) Uncomment file_mode if you have a .gitattributes with files marked export-ignore + # file_mode: git - # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF - # format to the repository Actions tab. - - name: "Upload artifact" - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1 - with: - name: SARIF file - path: results.sarif - retention-days: 5 + # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF + # format to the repository Actions tab. + - name: "Upload artifact" + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: SARIF file + path: results.sarif + retention-days: 5 - # Upload the results to GitHub's code scanning dashboard (optional). - # Commenting out will disable upload of results to your repo's Code Scanning dashboard - - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@33119e582d3ab4ed79c2610af108cb08ff983917 # v3 - with: - sarif_file: results.sarif + # Upload the results to GitHub's code scanning dashboard (optional). + # Commenting out will disable upload of results to your repo's Code Scanning dashboard + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@33119e582d3ab4ed79c2610af108cb08ff983917 # v3 + with: + sarif_file: results.sarif diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index e9ea532e..0f4cf3bc 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -2,8 +2,8 @@ name: Security on: workflow_run: - workflows: [CI] - types: [completed] + workflows: [ CI ] + types: [ completed ] schedule: - cron: "0 6 * * *" workflow_dispatch: @@ -24,18 +24,18 @@ jobs: audit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 - - uses: jdx/mise-action@v3 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: install: true cache: true github_token: ${{ secrets.GITHUB_TOKEN }} - name: Run cargo deny check - run: cargo deny check --config deny.toml + run: mise x -- cargo deny check --config deny.toml - name: Run cargo outdated - run: cargo outdated --depth=1 --exit-code=1 + run: mise x -- cargo outdated --depth=1 --exit-code=1 - name: Run goreleaser check - run: goreleaser check + run: mise x -- goreleaser check diff --git a/.gitignore b/.gitignore index f9fe897f..fa02db41 100644 --- a/.gitignore +++ b/.gitignore @@ -140,5 +140,5 @@ docs/plans **/*.local.md .tessl/tiles/ **/*.local.* -.context/**/*.md +.context/ todos/ diff --git a/.python-version b/.python-version index 2c45fe3a..0104088a 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.13.11 +3.14.4 diff --git a/AGENTS.md b/AGENTS.md index 3fc9132d..44b3e938 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -84,15 +84,16 @@ Commit style: [.github/commit-instructions.md](.github/commit-instructions.md) ### Behavior Guidelines -1. **No Auto-Commits**: Never commit without explicit permission. Always present diffs for approval. -2. **Security-First**: All changes must maintain least privilege and undergo security review. -3. **Zero-Warnings Policy**: `cargo clippy -- -D warnings` with no exceptions. -4. **Operator-Centric**: Prioritize workflows efficient in contested/airgapped environments. -5. **Documentation**: Mermaid for diagrams, relative links, maintain link hygiene. -6. **Testing Required**: All code changes must include appropriate tests. -7. **Linter Restrictions**: Never remove clippy restrictions or `deny` attributes. -8. **File Size Limit**: Keep source files under 500-600 lines when possible. -9. **AI Disclosure**: Always disclose AI usage in PR descriptions, following the AI Usage Policy [AI Usage Policy](AI_POLICY.md). Be transparent, but brief — no need to list every prompt, just the tools used (e.g., "Used Claude Code (`Claude Opus 4.7 (1M Context)`) for initial draft of detection engine refactor. All code reviewed and tested."). +01. **No Merging**: Never merge without a passing CI check and code review approval on a PR. This must be performed by a human maintainer, not an AI assistant. +02. **No Auto-Commits**: Never commit without explicit permission. Always present diffs for approval before staging or committing. This applies to every AI assistant regardless of tooling. +03. **Security-First**: All changes must maintain least privilege and undergo security review. +04. **Zero-Warnings Policy**: `cargo clippy -- -D warnings` with no exceptions. +05. **Operator-Centric**: Prioritize workflows efficient in contested/airgapped environments. +06. **Documentation**: Mermaid for diagrams, relative links, maintain link hygiene. +07. **Testing Required**: All code changes must include appropriate tests. +08. **Linter Restrictions**: Never remove clippy restrictions or `deny` attributes. +09. **File Size Limit**: Keep source files under 500-600 lines when possible. +10. **AI Disclosure**: Always disclose AI usage in PR descriptions, following the AI Usage Policy [AI Usage Policy](AI_POLICY.md). Be transparent, but brief — no need to list every prompt, just the tools used (e.g., "Used Claude Code (`Claude Opus 4.7 (1M Context)`) for initial draft of detection engine refactor. All code reviewed and tested."). ### Rule Precedence diff --git a/Cargo.lock b/Cargo.lock index 54c0e157..50889a86 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -71,9 +71,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anstyle-parse" @@ -133,9 +133,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "assert_cmd" -version = "2.2.0" +version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a686bbee5efb88a82df0621b236e74d925f470e5445d3220a5648b892ec99c9" +checksum = "39bae1d3fa576f7c6519514180a72559268dd7d1fe104070956cb687bc6673bd" dependencies = [ "anstyle", "bstr", @@ -189,9 +189,9 @@ checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" [[package]] name = "bitflags" -version = "2.11.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" dependencies = [ "serde_core", ] @@ -313,9 +313,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.56" +version = "1.2.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20" dependencies = [ "find-msvc-tools", "shlex", @@ -341,7 +341,7 @@ checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" dependencies = [ "cfg-if", "cpufeatures 0.3.0", - "rand_core 0.10.0", + "rand_core 0.10.1", ] [[package]] @@ -387,9 +387,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.6.0" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" dependencies = [ "clap_builder", "clap_derive", @@ -409,9 +409,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.6.0" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" dependencies = [ "heck", "proc-macro2", @@ -421,9 +421,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "cobs" @@ -453,7 +453,7 @@ dependencies = [ "postcard", "proptest", "prost", - "rand 0.10.0", + "rand 0.10.1", "serde", "serde_json", "sqlparser", @@ -468,9 +468,9 @@ dependencies = [ [[package]] name = "colorchoice" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "console" @@ -705,7 +705,7 @@ dependencies = [ "interprocess", "nix", "postcard", - "rand 0.10.0", + "rand 0.10.1", "regex", "serde", "serde_json", @@ -743,7 +743,7 @@ dependencies = [ "prost-build", "prost-types", "quick_cache", - "rand 0.10.0", + "rand 0.10.1", "redb", "rs_merkle", "serde", @@ -834,9 +834,9 @@ dependencies = [ [[package]] name = "doctest-file" -version = "1.0.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aac81fa3e28d21450aa4d2ac065992ba96a1d7303efbce51a95f4fd175b67562" +checksum = "c2db04e74f0a9a93103b50e90b96024c9b2bdca8bce6a632ec71b88736d3d359" [[package]] name = "either" @@ -880,9 +880,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.3.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" [[package]] name = "figment" @@ -1073,7 +1073,7 @@ dependencies = [ "cfg-if", "libc", "r-efi 6.0.0", - "rand_core 0.10.0", + "rand_core 0.10.1", "wasip2", "wasip3", ] @@ -1110,6 +1110,12 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "hashbrown" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" + [[package]] name = "heck" version = "0.5.0" @@ -1163,12 +1169,12 @@ checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" [[package]] name = "indexmap" -version = "2.13.0" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown 0.16.1", + "hashbrown 0.17.0", "serde", "serde_core", ] @@ -1195,9 +1201,9 @@ dependencies = [ [[package]] name = "interprocess" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6be5e5c847dbdb44564bd85294740d031f4f8aeb3464e5375ef7141f7538db69" +checksum = "ce4c3c8f298ee3d5467f2616384e3560c750226cc3c620e5456d3b95783156f6" dependencies = [ "doctest-file", "futures-core", @@ -1205,7 +1211,7 @@ dependencies = [ "recvmsg", "tokio", "widestring", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -1215,7 +1221,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20fd6de4ccfcc187e38bc21cfa543cb5a302cb86a8b114eb7f0bf0dc9f8ac00f" dependencies = [ "io-lifetimes 3.0.1", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -1262,15 +1268,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "js-sys" -version = "0.3.91" +version = "0.3.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca" dependencies = [ "once_cell", "wasm-bindgen", @@ -1300,20 +1306,20 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "libc" -version = "0.2.184" +version = "0.2.185" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" +checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" [[package]] name = "libredox" -version = "0.1.14" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" +checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c" dependencies = [ "bitflags", "libc", "plain", - "redox_syscall 0.7.3", + "redox_syscall 0.7.4", ] [[package]] @@ -1469,9 +1475,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "once_cell_polyfill" @@ -1730,7 +1736,7 @@ dependencies = [ "bit-vec", "bitflags", "num-traits", - "rand 0.9.2", + "rand 0.9.4", "rand_chacha", "rand_xorshift", "regex-syntax", @@ -1841,9 +1847,9 @@ checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" [[package]] name = "rand" -version = "0.9.2" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ "rand_chacha", "rand_core 0.9.5", @@ -1851,13 +1857,13 @@ dependencies = [ [[package]] name = "rand" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" dependencies = [ "chacha20", "getrandom 0.4.2", - "rand_core 0.10.0", + "rand_core 0.10.1", ] [[package]] @@ -1881,9 +1887,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba" +checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" [[package]] name = "rand_xorshift" @@ -1896,9 +1902,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" dependencies = [ "either", "rayon-core", @@ -1960,9 +1966,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16" +checksum = "f450ad9c3b1da563fb6948a8e0fb0fb9269711c9c73d9ea1de5058c79c8d643a" dependencies = [ "bitflags", ] @@ -2106,9 +2112,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" [[package]] name = "serde" @@ -2370,9 +2376,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.51.1" +version = "1.52.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f66bf9585cda4b724d3e78ab34b73fb2bbaba9011b9bfdf69dc836382ea13b8c" +checksum = "b67dee974fe86fd92cc45b7a95fdd2f99a36a6d7b0d431a231178d3d670bbcc6" dependencies = [ "bytes", "libc", @@ -2614,9 +2620,9 @@ checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" [[package]] name = "unidirs" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a93c94ee9b12aeb67d6455e3c991df1da11b7037ac9814d7cb4efe671a803f0c" +checksum = "7eff61b0db9834db688c174dde33450e6747999d9a14e44d77045838a81774ab" dependencies = [ "camino", "directories", @@ -2631,9 +2637,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.23.0" +version = "1.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" +checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -2699,11 +2705,11 @@ dependencies = [ [[package]] name = "wasip2" -version = "1.0.2+wasi-0.2.9" +version = "1.0.3+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.57.1", ] [[package]] @@ -2712,7 +2718,7 @@ version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.51.0", ] [[package]] @@ -2732,9 +2738,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.114" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89" dependencies = [ "cfg-if", "once_cell", @@ -2745,9 +2751,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.114" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2755,9 +2761,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.114" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904" dependencies = [ "bumpalo", "proc-macro2", @@ -2768,9 +2774,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.114" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129" dependencies = [ "unicode-ident", ] @@ -2811,9 +2817,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.91" +version = "0.3.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" +checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d" dependencies = [ "js-sys", "wasm-bindgen", @@ -2994,20 +3000,20 @@ dependencies = [ [[package]] name = "windows-sys" -version = "0.52.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", ] [[package]] name = "windows-sys" -version = "0.59.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets", + "windows-targets 0.53.5", ] [[package]] @@ -3025,14 +3031,31 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", ] [[package]] @@ -3050,48 +3073,96 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "winnow" version = "0.7.15" @@ -3136,6 +3207,12 @@ dependencies = [ "wit-bindgen-rust-macro", ] +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + [[package]] name = "wit-bindgen-core" version = "0.51.0" @@ -3223,18 +3300,18 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] name = "zerocopy" -version = "0.8.40" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.40" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 44f3473c..d266b203 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,12 +16,12 @@ resolver = "3" version = "0.1.0" authors = [ "UncleSp1d3r ", - "KryptoKat ", + "DaemonEye Contributors" ] license = "Apache-2.0" keywords = ["security", "monitoring", "process", "detection"] categories = ["security"] -rust-version = "1.91" +rust-version = "1.95" repository = "https://github.com/EvilBit-Labs/DaemonEye" homepage = "https://evilbitlabs.io/DaemonEye" edition = "2024" @@ -44,11 +44,11 @@ exclude = [ anyhow = "1.0.102" # Testing and development -assert_cmd = "2.2.0" +assert_cmd = "2.2.1" # Async runtime and traits async-trait = "0.1.89" -bitflags = { version = "2.11.0", features = ["serde"] } +bitflags = { version = "2.11.1", features = ["serde"] } # Cryptographic operations blake3 = { version = "1.8.4", default-features = false, features = ["std"] } @@ -64,7 +64,7 @@ cap-std = "=4.0.2" chrono = { version = "0.4.44", features = ["serde"] } # CLI and configuration -clap = { version = "4.6.0", features = ["derive"] } +clap = { version = "4.6.1", features = ["derive"] } # Internal libraries collector-core = { path = "collector-core" } @@ -86,11 +86,11 @@ futures-util = "0.3.32" # System information and IPC hostname-validator = "1.1.1" insta = { version = "1.47.2", features = ["filters"] } -interprocess = { version = "2.4.0", features = ["tokio"] } +interprocess = { version = "2.4.1", features = ["tokio"] } # Exact-pinned: libc exposes raw OS constants (O_NOFOLLOW, ELOOP, etc.) # that security-critical code in procmond relies on. See AGENTS.md # "Pin security-critical deps". -libc = "=0.2.184" +libc = "=0.2.185" parking_lot = "0.12.5" # Serialization @@ -110,7 +110,7 @@ prost-types = "0.14.3" # holds authoritative hash results so a silent upstream change here # could affect integrity decisions. quick_cache = "=0.6.21" -rand = "0.10.0" +rand = "0.10.1" # Database and storage redb = "4.0.0" @@ -134,7 +134,7 @@ thiserror = "2.0.18" # Exact-pinned: Tokio is the async runtime underpinning privilege # separation, IPC, and agent coordination. See AGENTS.md "Pin # security-critical deps". -tokio = { version = "=1.51.1", features = [ +tokio = { version = "=1.52.1", features = [ "rt", "rt-multi-thread", "net", @@ -153,8 +153,8 @@ toml = "1.1.2+spec-1.1.0" tracing = "0.1.44" tracing-subscriber = { version = "0.3.23", features = ["env-filter"] } tracing-test = "0.2.6" -unidirs = "0.1.2" -uuid = { version = "1.23.0", features = ["v4", "serde"] } +unidirs = "0.1.3" +uuid = { version = "1.23.1", features = ["v4", "serde"] } uzers = "0.12.2" whoami = "2.1.1" diff --git a/collector-core/benches/collector_benchmarks.rs b/collector-core/benches/collector_benchmarks.rs index 2ac83a21..26ca0d3d 100644 --- a/collector-core/benches/collector_benchmarks.rs +++ b/collector-core/benches/collector_benchmarks.rs @@ -454,7 +454,7 @@ fn bench_performance_monitoring_overhead(c: &mut Criterion) { PerformanceConfig { enabled: true, enable_trigger_latency_tracking: false, - collection_interval: Duration::from_secs(60), + collection_interval: Duration::from_mins(1), ..Default::default() }, ), diff --git a/collector-core/src/analysis_chain.rs b/collector-core/src/analysis_chain.rs index 56987fd0..48fe540b 100644 --- a/collector-core/src/analysis_chain.rs +++ b/collector-core/src/analysis_chain.rs @@ -93,11 +93,11 @@ impl Default for AnalysisChainConfig { fn default() -> Self { Self { max_concurrent_workflows: 50, - default_stage_timeout: Duration::from_secs(300), // 5 minutes - max_workflow_timeout: Duration::from_secs(1800), // 30 minutes + default_stage_timeout: Duration::from_mins(5), + max_workflow_timeout: Duration::from_mins(30), max_retry_attempts: 3, retry_base_delay: Duration::from_secs(2), - max_retry_delay: Duration::from_secs(60), + max_retry_delay: Duration::from_mins(1), status_monitoring_interval: Duration::from_secs(30), max_completed_workflows: 100, enable_debug_logging: false, @@ -794,8 +794,7 @@ impl AnalysisChainCoordinator { .duration_since(execution.started_at) .unwrap_or(Duration::from_secs(0)); - if elapsed > Duration::from_secs(600) { - // 10 minutes + if elapsed > Duration::from_mins(10) { warn!( execution_id = %execution_id, workflow_id = %execution.workflow_definition.workflow_id, diff --git a/collector-core/src/config.rs b/collector-core/src/config.rs index b201a401..7ce0209a 100644 --- a/collector-core/src/config.rs +++ b/collector-core/src/config.rs @@ -92,7 +92,7 @@ impl Default for CollectorConfig { max_event_sources: 16, event_buffer_size: 1000, shutdown_timeout: Duration::from_secs(30), - health_check_interval: Duration::from_secs(60), + health_check_interval: Duration::from_mins(1), startup_timeout: Duration::from_secs(10), enable_debug_logging: false, max_batch_size: 100, @@ -515,7 +515,7 @@ mod tests { assert_eq!(config.max_event_sources, 16); assert_eq!(config.event_buffer_size, 1000); assert_eq!(config.shutdown_timeout, Duration::from_secs(30)); - assert_eq!(config.health_check_interval, Duration::from_secs(60)); + assert_eq!(config.health_check_interval, Duration::from_mins(1)); assert_eq!(config.startup_timeout, Duration::from_secs(10)); assert!(!config.enable_debug_logging); assert_eq!(config.max_batch_size, 100); @@ -626,14 +626,14 @@ mod tests { let config = CollectorConfig::new() .with_max_event_sources(32) .with_event_buffer_size(2000) - .with_shutdown_timeout(Duration::from_secs(60)) - .with_health_check_interval(Duration::from_secs(120)) + .with_shutdown_timeout(Duration::from_mins(1)) + .with_health_check_interval(Duration::from_mins(2)) .with_debug_logging(true); assert_eq!(config.max_event_sources, 32); assert_eq!(config.event_buffer_size, 2000); - assert_eq!(config.shutdown_timeout, Duration::from_secs(60)); - assert_eq!(config.health_check_interval, Duration::from_secs(120)); + assert_eq!(config.shutdown_timeout, Duration::from_mins(1)); + assert_eq!(config.health_check_interval, Duration::from_mins(2)); assert!(config.enable_debug_logging); } diff --git a/collector-core/src/daemoneye_event_bus.rs b/collector-core/src/daemoneye_event_bus.rs index e1c0627f..f7740b87 100644 --- a/collector-core/src/daemoneye_event_bus.rs +++ b/collector-core/src/daemoneye_event_bus.rs @@ -451,6 +451,11 @@ impl DaemoneyeEventBus { }), topic_patterns: subscription.topic_patterns.clone(), enable_wildcards: subscription.enable_wildcards, + // collector-core subscriptions do not opt into Control delivery — + // this bridge layer only wires Event subscriptions into the + // daemoneye-eventbus `subscribe()` API. See END-297 for the + // opt-in path via `subscribe_with_control`. + include_control: false, } } @@ -1629,7 +1634,7 @@ mod tests { HealthStatus::Healthy | HealthStatus::Starting )); // Uptime might be 0 in fast tests - assert!(detailed_metrics.broker_health.uptime < Duration::from_secs(3600)); + assert!(detailed_metrics.broker_health.uptime < Duration::from_hours(1)); assert!(detailed_metrics.broker_health.active_connections >= 1); assert!(detailed_metrics.broker_health.message_throughput >= 0.0); @@ -1685,7 +1690,7 @@ mod tests { HealthStatus::Healthy | HealthStatus::Starting )); // Uptime might be 0 in fast tests - assert!(health_status.uptime < Duration::from_secs(3600)); + assert!(health_status.uptime < Duration::from_hours(1)); assert_eq!(health_status.active_connections, 0); // No subscribers yet assert!(health_status.message_throughput >= 0.0); assert_eq!(health_status.error_rate, 0.0); // No errors expected @@ -1860,7 +1865,7 @@ mod tests { assert_eq!(stats.events_delivered, 0); assert_eq!(stats.active_subscribers, 0); // Uptime might be 0 in fast tests - assert!(stats.uptime < Duration::from_secs(3600)); + assert!(stats.uptime < Duration::from_hours(1)); // Create subscription and publish event let subscription = EventSubscription { diff --git a/collector-core/src/high_performance_event_bus.rs b/collector-core/src/high_performance_event_bus.rs index a28e7aa8..08feef8b 100644 --- a/collector-core/src/high_performance_event_bus.rs +++ b/collector-core/src/high_performance_event_bus.rs @@ -785,7 +785,7 @@ mod tests { .unwrap(); // Wait for the event to be delivered using crossbeam channel - tokio::time::timeout(Duration::from_millis(1000), async { + tokio::time::timeout(Duration::from_secs(1), async { loop { if let Ok(event) = event_queue.try_recv() { break event; diff --git a/collector-core/src/monitor_collector.rs b/collector-core/src/monitor_collector.rs index 014c45c7..cf564f10 100644 --- a/collector-core/src/monitor_collector.rs +++ b/collector-core/src/monitor_collector.rs @@ -81,7 +81,7 @@ impl MonitorCollectorConfig { anyhow::bail!("Max events in flight must be between 1 and 100,000 for memory safety"); } - if self.shutdown_timeout > Duration::from_secs(300) { + if self.shutdown_timeout > Duration::from_mins(5) { anyhow::bail!("Shutdown timeout must not exceed 5 minutes"); } diff --git a/collector-core/src/shutdown_coordinator.rs b/collector-core/src/shutdown_coordinator.rs index 85ff0195..92cdaf72 100644 --- a/collector-core/src/shutdown_coordinator.rs +++ b/collector-core/src/shutdown_coordinator.rs @@ -41,7 +41,7 @@ pub struct ShutdownConfig { impl Default for ShutdownConfig { fn default() -> Self { Self { - graceful_timeout: Duration::from_secs(60), + graceful_timeout: Duration::from_mins(1), forced_timeout: Duration::from_secs(10), sequence_delay: Duration::from_millis(500), enable_parallel_shutdown: true, diff --git a/collector-core/src/trigger.rs b/collector-core/src/trigger.rs index f2c0bc1b..9b9f01a6 100644 --- a/collector-core/src/trigger.rs +++ b/collector-core/src/trigger.rs @@ -547,16 +547,12 @@ impl TriggerManager { pub fn is_backpressure_active(&self) -> bool { self.trigger_queue .lock() - .map(|queue| queue.is_backpressure_active()) - .unwrap_or(false) + .is_ok_and(|queue| queue.is_backpressure_active()) } /// Returns the current queue depth. pub fn get_queue_depth(&self) -> usize { - self.trigger_queue - .lock() - .map(|queue| queue.len()) - .unwrap_or(0) + self.trigger_queue.lock().map_or(0, |queue| queue.len()) } /// Returns collector capabilities for a specific collector. @@ -594,8 +590,7 @@ impl TriggerManager { pub fn is_trigger_tracked(&self, trigger_id: &str) -> bool { self.timeout_tracker .lock() - .map(|tracker| tracker.contains_key(trigger_id)) - .unwrap_or(false) + .is_ok_and(|tracker| tracker.contains_key(trigger_id)) } /// Evaluates trigger conditions against process event data. @@ -1162,7 +1157,7 @@ impl TriggerManager { /// Returns current trigger statistics for monitoring. pub fn get_statistics(&self) -> Result { - let pending_count = self.pending_count.lock().map(|count| *count).unwrap_or(0); + let pending_count = self.pending_count.lock().map_or(0, |count| *count); // Batch lock acquisitions to minimize lock contention let ( @@ -1176,18 +1171,12 @@ impl TriggerManager { let dedup_cache_size = self .deduplication_cache .lock() - .map(|cache| cache.len()) - .unwrap_or(0); - let rate_limit_states = self - .rate_limits - .lock() - .map(|limits| limits.len()) - .unwrap_or(0); + .map_or(0, |cache| cache.len()); + let rate_limit_states = self.rate_limits.lock().map_or(0, |limits| limits.len()); let registered_capabilities = self .collector_capabilities .lock() - .map(|caps| caps.len()) - .unwrap_or(0); + .map_or(0, |caps| caps.len()); let queue_stats = self .trigger_queue .lock() @@ -2727,8 +2716,8 @@ mod tests { let mut tracker = manager.timeout_tracker.lock().unwrap(); let expired_timeout = TriggerTimeout { target_collector: trigger.target_collector.clone(), - emitted_at: SystemTime::now() - Duration::from_secs(60), // 1 minute ago - timeout_duration: Duration::from_secs(30), // 30 second timeout + emitted_at: SystemTime::now() - Duration::from_mins(1), + timeout_duration: Duration::from_secs(30), // 30 second timeout correlation_id: trigger.correlation_id.clone(), }; tracker.insert(trigger_id.clone(), expired_timeout); diff --git a/collector-core/tests/compatibility_integration_test.rs b/collector-core/tests/compatibility_integration_test.rs index de7fc8e1..6cb4546e 100644 --- a/collector-core/tests/compatibility_integration_test.rs +++ b/collector-core/tests/compatibility_integration_test.rs @@ -622,7 +622,7 @@ async fn test_configuration_compatibility() { .with_component_name("procmond".to_string()) .with_max_event_sources(16) // Match existing default .with_event_buffer_size(1000) // Match existing default - .with_health_check_interval(Duration::from_secs(60)); // Match existing default + .with_health_check_interval(Duration::from_mins(1)); // Match existing default // Verify configuration validation assert!( diff --git a/collector-core/tests/comprehensive_test_suite.rs b/collector-core/tests/comprehensive_test_suite.rs index a6baa5bd..c1f4292e 100644 --- a/collector-core/tests/comprehensive_test_suite.rs +++ b/collector-core/tests/comprehensive_test_suite.rs @@ -358,7 +358,7 @@ async fn test_comprehensive_multi_source_integration() { // Run collector for comprehensive testing let start_time = Instant::now(); let collector_handle = tokio::spawn(async move { - let result = timeout(Duration::from_millis(2000), collector.run()).await; + let result = timeout(Duration::from_secs(2), collector.run()).await; match result { Ok(Ok(())) => info!("Collector completed successfully"), Ok(Err(e)) => warn!("Collector failed: {}", e), diff --git a/collector-core/tests/integration_test.rs b/collector-core/tests/integration_test.rs index 59ac1889..30570736 100644 --- a/collector-core/tests/integration_test.rs +++ b/collector-core/tests/integration_test.rs @@ -291,14 +291,14 @@ async fn test_config_builder_pattern() { let config = CollectorConfig::new() .with_max_event_sources(32) .with_event_buffer_size(2000) - .with_shutdown_timeout(Duration::from_secs(60)) - .with_health_check_interval(Duration::from_secs(120)) + .with_shutdown_timeout(Duration::from_mins(1)) + .with_health_check_interval(Duration::from_mins(2)) .with_debug_logging(true); assert_eq!(config.max_event_sources, 32); assert_eq!(config.event_buffer_size, 2000); - assert_eq!(config.shutdown_timeout, Duration::from_secs(60)); - assert_eq!(config.health_check_interval, Duration::from_secs(120)); + assert_eq!(config.shutdown_timeout, Duration::from_mins(1)); + assert_eq!(config.health_check_interval, Duration::from_mins(2)); assert!(config.enable_debug_logging); // Validate the built config diff --git a/collector-core/tests/rpc_server_integration.rs b/collector-core/tests/rpc_server_integration.rs index d4c99eaf..7da0bb2b 100644 --- a/collector-core/tests/rpc_server_integration.rs +++ b/collector-core/tests/rpc_server_integration.rs @@ -227,7 +227,7 @@ async fn test_health_check_response() -> anyhow::Result<()> { // Wait for collector to start, register, and RPC service to be ready eprintln!("TEST: Waiting for collector to start (2 seconds)"); - tokio::time::sleep(Duration::from_millis(2000)).await; + tokio::time::sleep(Duration::from_secs(2)).await; eprintln!("TEST: Done waiting"); // Create RPC client @@ -289,7 +289,7 @@ async fn test_lifecycle_operation_handling() -> anyhow::Result<()> { }); // Wait for collector to start, register, and RPC service to be ready - tokio::time::sleep(Duration::from_millis(1000)).await; + tokio::time::sleep(Duration::from_secs(1)).await; // Create RPC client let rpc_client = CollectorRpcClient::new( @@ -360,7 +360,7 @@ async fn test_graceful_shutdown_via_rpc() -> anyhow::Result<()> { }); // Wait for collector to start, register, and RPC service to be ready - tokio::time::sleep(Duration::from_millis(1000)).await; + tokio::time::sleep(Duration::from_secs(1)).await; // Create RPC client let rpc_client = CollectorRpcClient::new( diff --git a/daemoneye-agent/src/broker_manager.rs b/daemoneye-agent/src/broker_manager.rs index a22ecb31..bacc81a2 100644 --- a/daemoneye-agent/src/broker_manager.rs +++ b/daemoneye-agent/src/broker_manager.rs @@ -2162,7 +2162,7 @@ mod tests { manager .set_collectors_config(CollectorsConfig::default()) .await; - assert_eq!(manager.get_startup_timeout().await, Duration::from_secs(60)); + assert_eq!(manager.get_startup_timeout().await, Duration::from_mins(1)); // With collectors, use max timeout let collectors_config = CollectorsConfig { diff --git a/daemoneye-agent/src/main.rs b/daemoneye-agent/src/main.rs index ed101cb4..8213dd69 100644 --- a/daemoneye-agent/src/main.rs +++ b/daemoneye-agent/src/main.rs @@ -45,10 +45,7 @@ async fn run() -> Result<(), Box> { tracing_subscriber::fmt::init(); // Test mode: exit early to keep existing integration test semantics (set DAEMONEYE_AGENT_TEST_MODE=1) - if std::env::var("DAEMONEYE_AGENT_TEST_MODE") - .map(|v| v == "1") - .unwrap_or(false) - { + if std::env::var("DAEMONEYE_AGENT_TEST_MODE").is_ok_and(|v| v == "1") { #[allow(clippy::print_stdout, clippy::semicolon_if_nothing_returned)] { println!("daemoneye-agent started successfully") diff --git a/daemoneye-agent/src/recovery.rs b/daemoneye-agent/src/recovery.rs index 70854763..b32e716b 100644 --- a/daemoneye-agent/src/recovery.rs +++ b/daemoneye-agent/src/recovery.rs @@ -29,7 +29,7 @@ const GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(30); const FORCE_KILL_TIMEOUT: Duration = Duration::from_secs(10); /// Default timeout for restart. -const RESTART_TIMEOUT: Duration = Duration::from_secs(60); +const RESTART_TIMEOUT: Duration = Duration::from_mins(1); /// Recovery actions in escalating order. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] diff --git a/daemoneye-agent/tests/heartbeat_detection_integration.rs b/daemoneye-agent/tests/heartbeat_detection_integration.rs index 4f235742..cc928303 100644 --- a/daemoneye-agent/tests/heartbeat_detection_integration.rs +++ b/daemoneye-agent/tests/heartbeat_detection_integration.rs @@ -207,7 +207,7 @@ fn test_recovery_action_from_degraded_status() { fn test_recovery_action_from_failed_status() { let status = HeartbeatStatus::Failed { missed_count: MAX_MISSED_HEARTBEATS, - time_since_last: Duration::from_secs(120), + time_since_last: Duration::from_mins(2), }; // Failed status should trigger graceful shutdown (skip health check) diff --git a/daemoneye-agent/tests/rpc_lifecycle_integration.rs b/daemoneye-agent/tests/rpc_lifecycle_integration.rs index 67546c52..fd633ebd 100644 --- a/daemoneye-agent/tests/rpc_lifecycle_integration.rs +++ b/daemoneye-agent/tests/rpc_lifecycle_integration.rs @@ -213,7 +213,7 @@ async fn test_cross_process_rpc_workflow() -> anyhow::Result<()> { // Intentional: waiting for the spawned collector process to complete its // async registration handshake with the broker before probing via RPC. - tokio::time::sleep(Duration::from_millis(1000)).await; + tokio::time::sleep(Duration::from_secs(1)).await; // Verify collector is registered by checking RPC client can be created let _rpc_client = broker_manager.get_rpc_client(collector_id).await?; diff --git a/daemoneye-eventbus/COMPREHENSIVE_REVIEW.md b/daemoneye-eventbus/COMPREHENSIVE_REVIEW.md index e4fdb27b..39a07b93 100644 --- a/daemoneye-eventbus/COMPREHENSIVE_REVIEW.md +++ b/daemoneye-eventbus/COMPREHENSIVE_REVIEW.md @@ -2,7 +2,7 @@ ## Executive Summary -- Overall completion status: 95% complete, fully operational +- Overall completion status: complete, fully operational - Requirements satisfaction: All Requirements 15.1-15.5 and 16.1-16.4 satisfied - Code quality: Excellent adherence to project standards - Security posture: Strong with no unsafe code and comprehensive input validation @@ -34,7 +34,7 @@ - `rpc.rs` implements complete RPC service with `CollectorRpcClient` and `CollectorRpcService` - All lifecycle operations implemented: Start, Stop, Restart, HealthCheck, UpdateConfig, GracefulShutdown, ForceShutdown -- `docs/rpc-patterns.md` provides 627 lines of comprehensive documentation +- `docs/rpc-patterns.md` provides 631 lines of comprehensive documentation - Integration tests in `tests/rpc_integration_tests.rs` validate all RPC patterns **Key Components**: @@ -44,7 +44,7 @@ - `HealthCheckData` with component-level health tracking - Timeout handling and retry logic with circuit breaker pattern -**Minor Note**: Pause/Resume operations are stubbed with handlers that return immediate success, documented as planned for future implementation. +**Implementation Note**: Pause/Resume RPCs are fully implemented. `handle_pause_request` and `handle_resume_request` in `rpc.rs` extract the collector ID from the payload and delegate to `ProcessManager::pause_collector` / `resume_collector`, honoring request deadlines and mapping process-manager errors through `map_process_error_to_rpc_error`. ### Requirement 15.3: Event coordination and task distribution @@ -52,7 +52,7 @@ **Evidence**: -- `task_distribution.rs` implements complete task distribution system (873 lines) +- `task_distribution.rs` implements complete task distribution system (915 lines) - Capability-based routing with `CollectorCapability` registration - Priority queue with `BinaryHeap` for task ordering - 4 routing strategies: RoundRobin, LeastLoaded, FirstAvailable, Random @@ -71,11 +71,11 @@ **Evidence**: -- `result_aggregation.rs` implements complete aggregation system (768 lines) +- `result_aggregation.rs` implements complete aggregation system (932 lines) - `CorrelationMetadata` in `message.rs` supports hierarchical correlation tracking - Deduplication cache prevents duplicate processing - Backpressure handling with configurable thresholds -- `docs/correlation-metadata.md` provides 404 lines of documentation +- `docs/correlation-metadata.md` provides 403 lines of documentation **Key Components**: @@ -262,7 +262,7 @@ ## RPC Patterns Documentation -### Documented Operations (11 total) +### Documented Operations (10 total) 01. ✅ Start - Collector startup with configuration 02. ✅ Stop - Graceful collector stop @@ -272,12 +272,12 @@ 06. ✅ GetCapabilities - Capability discovery 07. ✅ GracefulShutdown - Coordinated graceful shutdown 08. ✅ ForceShutdown - Emergency shutdown -09. ⚠️ Pause - Stubbed, documented as planned -10. ⚠️ Resume - Stubbed, documented as planned +09. ✅ Pause - Delegates to `ProcessManager::pause_collector` +10. ✅ Resume - Delegates to `ProcessManager::resume_collector` ### Documentation Quality -- ✅ 627 lines in `docs/rpc-patterns.md` +- ✅ 631 lines in `docs/rpc-patterns.md` - ✅ Mermaid sequence diagrams for communication flows - ✅ Complete request/response examples with Rust code - ✅ Error handling patterns with retry and circuit breaker @@ -467,15 +467,16 @@ ### Core Documentation Files -1. ✅ `README.md` (242 lines) - Overview, features, usage examples -2. ✅ `IMPLEMENTATION_SUMMARY.md` (245 lines) - RPC implementation summary -3. ✅ `docs/rpc-patterns.md` (627 lines) - Comprehensive RPC documentation -4. ✅ `docs/topic-hierarchy.md` (286 lines) - Complete topic hierarchy -5. ✅ `docs/correlation-metadata.md` (404 lines) - Correlation tracking guide -6. ✅ `docs/task-distribution.md` (248 lines) - Task distribution guide -7. ✅ `docs/integration-guide.md` - Integration instructions -8. ✅ `docs/message-schemas.md` - Message format documentation -9. ✅ `docs/process-management.md` - Process lifecycle management +01. ✅ `README.md` (242 lines) - Overview, features, usage examples +02. ✅ `IMPLEMENTATION_SUMMARY.md` (252 lines) - RPC implementation summary +03. ✅ `docs/rpc-patterns.md` (631 lines) - Comprehensive RPC documentation +04. ✅ `docs/topic-hierarchy.md` (286 lines) - Complete topic hierarchy +05. ✅ `docs/correlation-metadata.md` (403 lines) - Correlation tracking guide +06. ✅ `docs/task-distribution.md` (250 lines) - Task distribution guide +07. ✅ `docs/integration-guide.md` - Integration instructions +08. ✅ `docs/message-schemas.md` - Message format documentation +09. ✅ `docs/process-management.md` - Process lifecycle management +10. ✅ `docs/topic-hierarchy-design.md` - Topic hierarchy design rationale ### Documentation Quality @@ -496,15 +497,7 @@ ### Stubbed Operations -**Issue**: Pause and Resume operations in RPC are stubbed - -**Location**: `rpc.rs` - `CollectorRpcService::handle_request()` - -**Status**: Documented as planned for future implementation - -**Recommendation**: Add TODO comments with issue tracker references - -**Impact**: Low - operations are documented and handlers exist +No stubbed operations remain. Pause and Resume RPCs now delegate to `ProcessManager::pause_collector` / `resume_collector` in `rpc.rs::handle_pause_request` and `handle_resume_request`, matching the lifecycle semantics of Start, Stop, and Restart. ### Dead Code Attributes @@ -583,8 +576,7 @@ None - all critical features are implemented and tested. ### Medium Priority (Enhance Quality) 1. **Add Coverage Metrics**: Integrate `cargo llvm-cov` into CI pipeline -2. **Document Stubbed Operations**: Add TODO comments with issue tracker references for Pause/Resume -3. **Cross-Reference Documentation**: Add "See Also" sections between related docs +2. **Cross-Reference Documentation**: Add "See Also" sections between related docs ### Low Priority (Future Enhancements) @@ -611,10 +603,9 @@ The daemoneye-eventbus implementation is **comprehensive, well-architected, and **Minor Gaps**: -- Pause/Resume operations stubbed (documented as planned) - Some dead code attributes for future features (properly documented) - Documentation could benefit from more cross-references -**Overall Assessment**: 95% complete, fully operational, ready for integration with daemoneye-agent and collector-core. +**Overall Assessment**: Complete, fully operational, ready for integration with daemoneye-agent and collector-core. **Recommendation**: Proceed with integration into the broader DaemonEye system. The minor gaps identified do not block deployment and can be addressed in future iterations. diff --git a/daemoneye-eventbus/IMPLEMENTATION_SUMMARY.md b/daemoneye-eventbus/IMPLEMENTATION_SUMMARY.md index 03a98ccd..95d6837f 100644 --- a/daemoneye-eventbus/IMPLEMENTATION_SUMMARY.md +++ b/daemoneye-eventbus/IMPLEMENTATION_SUMMARY.md @@ -29,8 +29,8 @@ The RPC call patterns have been fully implemented in the `daemoneye-eventbus` cr - `GetCapabilities` - Get collector capabilities - `GracefulShutdown` - Coordinate graceful shutdown - `ForceShutdown` - Emergency shutdown - - `Pause` - Planned (stubbed handlers in `daemoneye-eventbus/src/rpc.rs`) - - `Resume` - Planned (stubbed handlers in `daemoneye-eventbus/src/rpc.rs`) + - `Pause` - Suspend a running collector via `ProcessManager::pause_collector` + - `Resume` - Resume a paused collector via `ProcessManager::resume_collector` #### 2. Health Check RPC Patterns with Heartbeat and Status Reporting ✅ @@ -113,7 +113,7 @@ Use helper functions `collector::lifecycle_topic(collector_id)` and `collector:: - **Error Categories**: Configuration, Resource, Communication, Permission, Internal, Timeout - **Audit Logging**: All RPC operations logged for security audit -> **Note:** Pause/Resume RPCs are currently placeholders. The handlers exist in `CollectorRpcService` but return immediate success until coordination logic lands. The functionality is tracked in the collector lifecycle backlog and will be documented once implemented. +> **Note:** Pause and Resume RPCs are fully implemented. `CollectorRpcService::handle_pause_request` and `handle_resume_request` extract the target `collector_id` from the payload, honor the request deadline, and delegate to `ProcessManager::pause_collector` / `resume_collector`. Process-manager errors are mapped through `map_process_error_to_rpc_error` to structured `RpcError` responses. ### Testing Coverage diff --git a/daemoneye-eventbus/README.md b/daemoneye-eventbus/README.md index a80267fe..5cc12c67 100644 --- a/daemoneye-eventbus/README.md +++ b/daemoneye-eventbus/README.md @@ -117,7 +117,7 @@ For complete topic hierarchy documentation, see [docs/topic-hierarchy.md](docs/t - **Message Throughput**: 10,000+ messages per second - **Connection Overhead**: Minimal with connection pooling - **Memory Usage**: Bounded with configurable limits -- **Latency**: Sub-millisecond for local IPC +- **Latency**: \<1ms p99 for local IPC, asserted in CI on Linux and macOS by a dedicated criterion bench in `benches/ipc_performance.rs`. Windows and FreeBSD results are informational only. ## Cross-Platform Support diff --git a/daemoneye-eventbus/VALIDATION_CHECKLIST.md b/daemoneye-eventbus/VALIDATION_CHECKLIST.md index 03319816..2aabcaf0 100644 --- a/daemoneye-eventbus/VALIDATION_CHECKLIST.md +++ b/daemoneye-eventbus/VALIDATION_CHECKLIST.md @@ -15,8 +15,8 @@ This checklist provides a quick reference for validating the daemoneye-eventbus **Validation Commands**: ```bash -# Verify topic hierarchy -grep -r "pub const" daemoneye-eventbus/src/topics.rs | wc -l # Should show 26+ topics +# Verify topic hierarchy (35 named topic constants + 1 `pub const fn new`) +grep -r "pub const" daemoneye-eventbus/src/topics.rs | wc -l # Should show 36 # Run integration tests cargo test -p daemoneye-eventbus --test task_distribution_integration @@ -41,7 +41,7 @@ grep "pub enum CollectorOperation" daemoneye-eventbus/src/rpc.rs -A 20 cargo test -p daemoneye-eventbus --test rpc_integration_tests # Check documentation completeness -wc -l daemoneye-eventbus/docs/rpc-patterns.md # Should show 627 lines +wc -l daemoneye-eventbus/docs/rpc-patterns.md # Should show 631 lines ``` ### Requirement 15.3: Event coordination and task distribution @@ -63,7 +63,7 @@ grep "pub enum RoutingStrategy" daemoneye-eventbus/src/task_distribution.rs -A 1 cargo test -p daemoneye-eventbus task_distribution # Check documentation -wc -l daemoneye-eventbus/docs/task-distribution.md # Should show 248 lines +wc -l daemoneye-eventbus/docs/task-distribution.md # Should show 250 lines ``` ### Requirement 15.4: Result aggregation and correlation across collectors @@ -173,8 +173,8 @@ cargo test -p daemoneye-eventbus health **Validation Commands**: ```bash -# Count event topics -grep "pub const" daemoneye-eventbus/src/topics.rs | grep "events\." | wc -l # Should be 17 +# Count event topic constants (17 named topics + 4 `ALL` wildcard aliases) +grep "pub const" daemoneye-eventbus/src/topics.rs | grep "events\." | wc -l # Should be 21 # Verify all process topics grep "pub mod process" daemoneye-eventbus/src/topics.rs -A 30 @@ -189,8 +189,8 @@ grep "pub mod process" daemoneye-eventbus/src/topics.rs -A 30 **Validation Commands**: ```bash -# Count control topics -grep "pub const" daemoneye-eventbus/src/topics.rs | grep "control\." | wc -l # Should be 9 +# Count control topic constants (9 named topics + 3 `ALL` wildcard aliases + 2 shutdown constants) +grep "pub const" daemoneye-eventbus/src/topics.rs | grep "control\." | wc -l # Should be 14 # Verify all control topics grep "pub mod collector" daemoneye-eventbus/src/topics.rs -A 20 @@ -304,8 +304,8 @@ grep "pub fn matches" daemoneye-eventbus/src/message.rs -A 50 - [x] GetCapabilities - Capability discovery - [x] GracefulShutdown - Coordinated graceful shutdown - [x] ForceShutdown - Emergency shutdown -- [x] Pause - Stubbed, documented as planned -- [x] Resume - Stubbed, documented as planned +- [x] Pause - Delegates to `ProcessManager::pause_collector` +- [x] Resume - Delegates to `ProcessManager::resume_collector` **Validation Commands**: @@ -319,7 +319,7 @@ cargo test -p daemoneye-eventbus rpc ### Documentation Quality -- [x] 627 lines in docs/rpc-patterns.md +- [x] 631 lines in docs/rpc-patterns.md - [x] Mermaid sequence diagrams for communication flows - [x] Complete request/response examples with Rust code - [x] Error handling patterns with retry and circuit breaker @@ -354,8 +354,8 @@ grep "```rust" daemoneye-eventbus/docs/rpc-patterns.md | wc -l # Should have ex # Verify interprocess dependency grep "interprocess" daemoneye-eventbus/Cargo.toml -# Check for unsafe code -grep -r "unsafe" daemoneye-eventbus/src/ | grep -v "#\[deny(unsafe_code)\]" | wc -l # Should be 0 +# Check for unsafe blocks (doc-comment references to "unsafe" are not unsafe code) +grep -rn "^[^/]*unsafe[[:space:]]*[{]" daemoneye-eventbus/src/ | wc -l # Should be 0 ``` ### Platform Testing @@ -484,14 +484,15 @@ grep "MAX_RESULTS_PER_CORRELATION" daemoneye-eventbus/src/result_aggregation.rs ### Core Documentation Files - [x] README.md (242 lines) - Overview, features, usage examples -- [x] IMPLEMENTATION_SUMMARY.md (245 lines) - RPC implementation summary -- [x] docs/rpc-patterns.md (627 lines) - Comprehensive RPC documentation +- [x] IMPLEMENTATION_SUMMARY.md (252 lines) - RPC implementation summary +- [x] docs/rpc-patterns.md (631 lines) - Comprehensive RPC documentation - [x] docs/topic-hierarchy.md (286 lines) - Complete topic hierarchy -- [x] docs/correlation-metadata.md (404 lines) - Correlation tracking guide -- [x] docs/task-distribution.md (248 lines) - Task distribution guide +- [x] docs/correlation-metadata.md (403 lines) - Correlation tracking guide +- [x] docs/task-distribution.md (250 lines) - Task distribution guide - [x] docs/integration-guide.md - Integration instructions - [x] docs/message-schemas.md - Message format documentation - [x] docs/process-management.md - Process lifecycle management +- [x] docs/topic-hierarchy-design.md - Topic hierarchy design rationale **Validation Commands**: @@ -558,7 +559,7 @@ cargo test -p collector-core daemoneye_eventbus_integration ## Summary -**Overall Status**: ✅ 95% Complete, Fully Operational +**Overall Status**: ✅ Complete, Fully Operational **Requirements Satisfied**: 9/9 (100%) @@ -579,9 +580,8 @@ cargo test -p collector-core daemoneye_eventbus_integration - Comprehensive test coverage - Well-documented APIs -**Minor Issues**: 2 +**Minor Issues**: 1 -- ⚠️ Pause/Resume operations stubbed (documented as planned) -- ⚠️ Some dead code attributes for future features +- ⚠️ Some dead code attributes for future features (`CollectorStatus::ShuttingDown`, deduplication cache fields) **Recommendation**: ✅ Ready for integration with daemoneye-agent and collector-core diff --git a/daemoneye-eventbus/benches/ipc_performance.rs b/daemoneye-eventbus/benches/ipc_performance.rs index 1e56128c..94ab12ad 100644 --- a/daemoneye-eventbus/benches/ipc_performance.rs +++ b/daemoneye-eventbus/benches/ipc_performance.rs @@ -36,14 +36,30 @@ //! - Backpressure handling //! - Cross-platform performance //! - Zero-copy optimization improvements +//! - END-297 SLO gate: <1ms p99 local-IPC round-trip latency use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use daemoneye_eventbus::message::{CollectionEvent, ProcessEvent}; use daemoneye_eventbus::transport::{SocketConfig, TransportClient, TransportServer}; +use std::collections::HashMap; use std::hint::black_box; -use std::time::{Duration, Instant}; +use std::time::{Duration, Instant, SystemTime}; use tempfile::TempDir; use tokio::runtime::Runtime; +/// END-297 acceptance criterion: local-IPC round-trip p99 latency must stay under 1ms. +/// +/// This threshold is machine-checked by [`latency_p99_slo`]. Changing it requires +/// updating the ticket and the acceptance-evidence artifact. +/// Ticket: END-297 +const LATENCY_P99_SLO: Duration = Duration::from_millis(1); + +/// Number of warmup iterations discarded before p99 measurement begins. +const SLO_WARMUP_ITERS: usize = 1_000; + +/// Number of round-trip samples collected for the p99 computation. +const SLO_SAMPLE_COUNT: usize = 10_000; + fn throughput_benchmark(c: &mut Criterion) { let rt = Runtime::new().unwrap(); @@ -256,6 +272,153 @@ fn cross_platform_benchmark(c: &mut Criterion) { }); } +/// Build a representative `CollectionEvent::Process` payload for SLO latency runs. +/// +/// Mirrors the shape used by `throughput.rs` so the SLO assertion reflects +/// production-like envelope size rather than an artificially trivial one. +fn build_slo_payload() -> Vec { + let event = CollectionEvent::Process(ProcessEvent { + pid: 4242, + name: "slo_bench_process".to_owned(), + command_line: Some("slo_bench_process --latency-gate".to_owned()), + executable_path: Some("/usr/local/bin/slo_bench_process".to_owned()), + ppid: Some(1000), + start_time: Some(SystemTime::now()), + metadata: HashMap::new(), + }); + postcard::to_allocvec(&event).expect("serialize CollectionEvent::Process") +} + +/// p99 latency SLO gate for local-IPC round-trip (END-297 R9). +/// +/// Collects [`SLO_SAMPLE_COUNT`] round-trip samples after a +/// [`SLO_WARMUP_ITERS`]-iteration warmup, computes p99 from a sorted sample +/// vector (no external histogram dep), and panics if the observed p99 exceeds +/// [`LATENCY_P99_SLO`]. The panic fails the benchmark job so the SLO cannot +/// silently regress. +/// +/// Uses the existing `TransportServer`/`TransportClient` echo path for a realistic +/// send/receive round trip over the local transport. +/// +/// The histogram collection and SLO assertion run once in the function body. +/// A trivial `bench_function` pass-through is registered so criterion emits a +/// row for this target in standard output and the filter +/// `latency_p99_slo` targets it precisely via `cargo bench ... -- latency_p99_slo`. +fn latency_p99_slo(c: &mut Criterion) { + let rt = Runtime::new().unwrap(); + + rt.block_on(async { + let temp_dir = TempDir::new().unwrap(); + let socket_path = temp_dir.path().join("slo-latency-bench.sock"); + + let socket_config = SocketConfig { + unix_path: socket_path.to_string_lossy().to_string(), + windows_pipe: socket_path.to_string_lossy().to_string(), + connection_limit: 100, + #[cfg(target_os = "freebsd")] + freebsd_path: None, + auth_token: None, + per_client_byte_limit: 10 * 1024 * 1024, + rate_limit_config: None, + correlation_config: None, + }; + + let server = TransportServer::new(socket_config.clone()) + .await + .expect("Failed to create server"); + + // start_echo_handler is an infinite accept-loop; run it on a background + // task so the bench can drive the client on the main runtime. + tokio::spawn(async move { + let _ = server.start_echo_handler().await; + }); + + let mut client = TransportClient::connect(&socket_config) + .await + .expect("Failed to connect"); + + let payload = build_slo_payload(); + + // Warmup — drain cold-path effects (first-fault page faults, socket + // buffer priming, allocator warm-up) before measuring. + for _ in 0..SLO_WARMUP_ITERS { + client.send(&payload).await.unwrap(); + let _ = client.receive().await.unwrap(); + } + + // Collect SLO_SAMPLE_COUNT round-trip latencies in nanoseconds. + let mut samples: Vec = Vec::with_capacity(SLO_SAMPLE_COUNT); + for _ in 0..SLO_SAMPLE_COUNT { + let start = Instant::now(); + client.send(&payload).await.unwrap(); + let response = client.receive().await.unwrap(); + // Duration::as_nanos -> u128; local-IPC round-trip will never exceed u64 nanos. + let elapsed = start.elapsed().as_nanos() as u64; + samples.push(elapsed); + black_box(response); + } + + // Compute p99 from a sorted sample vector. + // + // hdrhistogram is not a workspace dependency (checked Cargo.lock before + // authoring this bench); the AGENTS.md rule against adding external + // deps without approval steers us toward a sorted-Vec implementation. + // For N=10k samples this is O(N log N) ~ negligible vs. the measured + // round-trips themselves. + samples.sort_unstable(); + // Nearest-rank p99: for 0-indexed samples, the p99 boundary is at + // index ceil(N * 0.99) - 1. For N=10000 this is 9899 (the 9900th element). + let p99_index = samples + .len() + .saturating_mul(99) + .div_ceil(100) + .saturating_sub(1); + let p99_nanos = samples.get(p99_index).copied().unwrap(); + let p99 = Duration::from_nanos(p99_nanos); + + // Also surface p50 and max for diagnostic output when the assertion fires. + let p50_index = samples.len() / 2; + let p50 = Duration::from_nanos(samples.get(p50_index).copied().unwrap()); + let max = Duration::from_nanos(samples.last().copied().unwrap()); + + println!( + "latency_p99_slo: samples={} p50={:?} p99={:?} max={:?} threshold={:?}", + samples.len(), + p50, + p99, + max, + LATENCY_P99_SLO, + ); + + // Gate the SLO assertion on Linux and macOS only (any architecture — + // the observed p99 on macOS aarch64 is already in the ~21µs range). + // Windows and FreeBSD remain informational per AGENTS.md OS support + // matrix and the END-297 plan. + #[cfg(any(target_os = "linux", target_os = "macos"))] + { + assert!( + p99 <= LATENCY_P99_SLO, + "END-297 SLO breach: local-IPC round-trip p99 = {p99:?} exceeds \ + {LATENCY_P99_SLO:?} threshold (samples={sample_count}, p50={p50:?}, max={max:?}). \ + See daemoneye-eventbus/benches/ipc_performance.rs::latency_p99_slo.", + sample_count = samples.len(), + ); + } + }); + + // Register a trivial bench entry so this target shows up in criterion's + // standard output and is filterable via `cargo bench -- latency_p99_slo`. + // The real measurement and SLO assertion happened above; this runs a + // no-op computation to satisfy criterion's reporter. + c.bench_function("latency_p99_slo", |b| { + b.iter(|| black_box(LATENCY_P99_SLO)); + }); +} + +// END-297 SLO bench runs FIRST so its assertion fires before any pre-existing +// bench that may exhibit nested-runtime behavior. The SLO group is separate so +// `cargo bench --bench ipc_performance -- latency_p99_slo` targets it cleanly. +criterion_group!(slo_benches, latency_p99_slo); criterion_group!( benches, throughput_benchmark, @@ -263,4 +426,4 @@ criterion_group!( backpressure_benchmark, cross_platform_benchmark ); -criterion_main!(benches); +criterion_main!(slo_benches, benches); diff --git a/daemoneye-eventbus/benches/throughput.rs b/daemoneye-eventbus/benches/throughput.rs index d5c1111f..e1fc88a8 100644 --- a/daemoneye-eventbus/benches/throughput.rs +++ b/daemoneye-eventbus/benches/throughput.rs @@ -50,6 +50,7 @@ fn create_test_subscription() -> EventSubscription { correlation_filter: None, topic_patterns: Some(vec!["events.process.*".to_string()]), enable_wildcards: true, + include_control: false, } } diff --git a/daemoneye-eventbus/src/broker.rs b/daemoneye-eventbus/src/broker.rs index daa743e2..4435292b 100644 --- a/daemoneye-eventbus/src/broker.rs +++ b/daemoneye-eventbus/src/broker.rs @@ -1207,6 +1207,7 @@ mod tests { correlation_filter: None, topic_patterns: Some(vec!["events.process.*".to_string()]), enable_wildcards: true, + include_control: false, }; let receiver = event_bus.subscribe(subscription).await.unwrap(); // Verify receiver is open and ready to receive messages diff --git a/daemoneye-eventbus/src/client.rs b/daemoneye-eventbus/src/client.rs index d03508c5..947f6e70 100644 --- a/daemoneye-eventbus/src/client.rs +++ b/daemoneye-eventbus/src/client.rs @@ -49,6 +49,14 @@ struct SubscriptionInfo { patterns: Vec, /// Event sender (bounded for backpressure) sender: mpsc::Sender, + /// Optional control-message sender (bounded for backpressure) + /// + /// Populated when the subscription was created via + /// [`EventBusClient::subscribe_with_control`] with + /// [`EventSubscription::include_control`] set to `true`. + /// `None` preserves legacy behavior — Control messages are dropped + /// at the client-side filter and never surface to the subscriber. + control_sender: Option>, /// Subscription timestamp (for future metrics) #[allow(dead_code)] created_at: Instant, @@ -203,6 +211,20 @@ impl EventBusClient { sg.messages_received = sg.messages_received.saturating_add(1); drop(sg); debug!("Received control message for client: {}", client_id); + // Deliver to opted-in subscribers. + // Ignore errors — delivery is best-effort and + // legacy subscribers simply do not opt in. + if let Err(e) = Self::handle_control_message_internal( + &subscriptions, + &message, + &client_id, + ) + .await + { + debug!( + "Error delivering control message for client {client_id}: {e}" + ); + } } crate::message::MessageType::Heartbeat => { // Update statistics for heartbeat messages @@ -415,6 +437,11 @@ impl EventBusClient { } /// Subscribe to topic patterns with backpressure support + /// + /// Delivers only [`MessageType::Event`] envelopes. To also receive + /// [`MessageType::Control`] envelopes use + /// [`subscribe_with_control`](Self::subscribe_with_control) with + /// [`EventSubscription::include_control`] set to `true`. pub async fn subscribe( &self, subscription: EventSubscription, @@ -436,6 +463,7 @@ impl EventBusClient { let subscription_info = SubscriptionInfo { patterns: patterns.clone(), sender: tx, + control_sender: None, created_at: Instant::now(), last_message: None, }; @@ -456,6 +484,75 @@ impl EventBusClient { Ok(rx) } + /// Subscribe to topic patterns and opt into [`MessageType::Control`] delivery. + /// + /// Returns a tuple `(event_rx, control_rx)` of parallel bounded receivers: + /// - `event_rx` carries [`BusEvent`] envelopes decoded from `MessageType::Event` + /// messages, matching the default behavior of [`subscribe`](Self::subscribe). + /// - `control_rx` carries raw [`Message`] envelopes for `MessageType::Control` + /// messages whose topic matches one of the subscription's topic patterns. + /// + /// If [`EventSubscription::include_control`] is `false`, the returned + /// `control_rx` is closed immediately and receives no messages — callers + /// who do not opt in should use [`subscribe`](Self::subscribe) instead. + /// + /// Both receivers use bounded 1000-slot channels for backpressure. Full + /// channels drop messages with a `warn!` log rather than blocking the + /// background message-processing task. + pub async fn subscribe_with_control( + &self, + subscription: EventSubscription, + ) -> Result<(mpsc::Receiver, mpsc::Receiver)> { + let subscription_id = subscription.subscriber_id.clone(); + let patterns = subscription.topic_patterns.clone().unwrap_or_default(); + let include_control = subscription.include_control; + + // Validate topic patterns + for pattern in &patterns { + TopicPattern::new(pattern).map_err(|e| { + EventBusError::topic(format!("Invalid topic pattern '{pattern}': {e}")) + })?; + } + + // Create bounded channels for backpressure (default: 1000 messages) + let (event_tx, event_rx) = mpsc::channel(1000); + let (control_tx, control_rx) = mpsc::channel(1000); + + // Store subscription info + let subscription_info = SubscriptionInfo { + patterns: patterns.clone(), + sender: event_tx, + control_sender: if include_control { + Some(control_tx) + } else { + // Drop the control sender so the paired receiver closes + // immediately — legacy callers that did not opt in observe + // exactly the same behavior as `subscribe()`. + drop(control_tx); + None + }, + created_at: Instant::now(), + last_message: None, + }; + + self.subscriptions + .write() + .await + .insert(subscription_id.clone(), subscription_info); + + // Update statistics + let sub_count = self.subscriptions.read().await.len(); + self.stats.lock().await.active_subscriptions = sub_count; + + // debug! (not info!): subscription topology including per-collector IDs + // should not appear in default-level logs that may ship to less-trusted + // SIEM pipelines (END-297 review SEC-004). + debug!( + "Subscribed (control={include_control}) to patterns: {patterns:?} (subscription: {subscription_id})" + ); + Ok((event_rx, control_rx)) + } + /// Unsubscribe from topics pub async fn unsubscribe(&self, subscription_id: &str) -> Result<()> { let removed = { @@ -540,12 +637,8 @@ impl EventBusClient { for (subscription_id, subscription_info) in subscriptions_guard.iter() { // Check if any pattern matches the topic - let matches = subscription_info.patterns.iter().any(|pattern| { - TopicPattern::new(pattern).is_ok_and(|topic_pattern| { - Topic::new(&message.topic) - .is_ok_and(|topic_obj| topic_pattern.matches(&topic_obj)) - }) - }); + let matches = + Self::subscription_matches_topic(&subscription_info.patterns, &message.topic); if matches { let mut event_copy = bus_event.clone(); @@ -582,6 +675,17 @@ impl EventBusClient { Ok(()) } + /// Return true when any of the subscription's topic patterns matches the + /// given topic string. Parsing errors on either side yield a non-match + /// (failing closed) rather than panicking. + fn subscription_matches_topic(patterns: &[String], topic: &str) -> bool { + patterns.iter().any(|pattern| { + TopicPattern::new(pattern).is_ok_and(|topic_pattern| { + Topic::new(topic).is_ok_and(|topic_obj| topic_pattern.matches(&topic_obj)) + }) + }) + } + /// Handle event messages async fn handle_event_message(&self, message: Message) -> Result<()> { Self::handle_event_message_internal( @@ -593,11 +697,69 @@ impl EventBusClient { .await } - /// Handle control messages - #[allow(clippy::unused_async)] + /// Handle control messages (synchronous entry point). + /// + /// Delegates to [`handle_control_message_internal`](Self::handle_control_message_internal) + /// so the background task and public entry points share delivery logic. async fn handle_control_message(&self, message: Message) -> Result<()> { debug!("Received control message: {}", message.topic); - // Control message handling can be extended as needed + Self::handle_control_message_internal(&self.subscriptions, &message, &self.client_id).await + } + + /// Deliver a [`MessageType::Control`] message to matching opted-in + /// subscribers via their control channel. + /// + /// Subscriptions with `control_sender = None` are skipped — that is the + /// legacy path where Control messages are silently dropped to preserve + /// source compatibility for callers that only use [`subscribe`](Self::subscribe). + /// + /// A full control channel logs a `warn!` and drops the message rather + /// than blocking the background receive task. A closed control channel + /// is logged at `warn!` and the stale subscription is not auto-removed + /// here (unsubscribe is the caller's responsibility). + async fn handle_control_message_internal( + subscriptions: &Arc>>, + message: &Message, + _client_id: &str, + ) -> Result<()> { + // Guard: only deliver Control envelopes through this path. + if message.message_type != MessageType::Control { + return Ok(()); + } + + let subscriptions_guard = subscriptions.read().await; + let mut delivered_count = 0_usize; + + for (subscription_id, subscription_info) in subscriptions_guard.iter() { + // Skip subscribers that did not opt into Control delivery. + let Some(ref control_sender) = subscription_info.control_sender else { + continue; + }; + + // Match the incoming topic against the subscriber's patterns. + if !Self::subscription_matches_topic(&subscription_info.patterns, &message.topic) { + continue; + } + + match control_sender.try_send(message.clone()) { + Ok(()) => { + delivered_count = delivered_count.saturating_add(1); + } + Err(tokio::sync::mpsc::error::TrySendError::Full(_)) => { + warn!( + "Subscription {subscription_id} control queue full, control message dropped" + ); + } + Err(tokio::sync::mpsc::error::TrySendError::Closed(_)) => { + warn!( + "Subscription {subscription_id} control channel closed; control message not delivered" + ); + } + } + } + drop(subscriptions_guard); + + debug!("Delivered control message to {delivered_count} subscriptions"); Ok(()) } @@ -646,6 +808,28 @@ impl EventBusClient { transport.is_alive().await } + /// Signal background tasks to shut down without consuming the client. + /// + /// Sends on the internal broadcast channel so background receive/heartbeat + /// tasks exit at their next loop iteration. Safe to call when the client + /// is shared via `Arc` — the broadcast `Sender::send(&self)` signature does + /// not require ownership. + /// + /// Returns `true` if the signal was delivered. Returns `false` only when + /// no live receivers are subscribed to the broadcast channel — for example + /// because background tasks have already exited and closed their receivers. + /// (The "already taken by a consuming `shutdown()` call" case cannot be + /// observed here: this method takes `&self`, so any caller holding such a + /// reference proves that `shutdown(self)` has not yet consumed the client.) + /// + /// Callers that also want to await background-task completion should use + /// the consuming [`shutdown`](Self::shutdown) method after this signal. + pub fn shutdown_signal(&self) -> bool { + self.shutdown_tx + .as_ref() + .is_some_and(|tx| tx.send(()).is_ok()) + } + /// Shutdown the client pub async fn shutdown(mut self) -> Result<()> { info!("Shutting down EventBus client: {}", self.client_id); @@ -826,9 +1010,305 @@ mod tests { correlation_filter: None, topic_patterns: Some(vec!["events.#.invalid".to_string()]), enable_wildcards: true, + include_control: false, }; let result = client.subscribe(subscription).await; assert!(result.is_err()); } + + /// Build a `SubscriptionInfo` with the given topic patterns and optional + /// control sender. Used by the control-delivery unit tests below to + /// exercise `handle_control_message_internal` without wiring a full + /// transport stack. + fn make_subscription_info( + patterns: Vec, + control_sender: Option>, + ) -> (SubscriptionInfo, mpsc::Receiver) { + let (event_tx, event_rx) = mpsc::channel::(1000); + let info = SubscriptionInfo { + patterns, + sender: event_tx, + control_sender, + created_at: Instant::now(), + last_message: None, + }; + (info, event_rx) + } + + /// Happy path (Unit 1): A subscriber that opts into Control delivery + /// receives Control messages on matching topics with correlation metadata + /// intact. + #[tokio::test] + async fn test_control_message_delivered_when_opted_in() { + let subscriptions: Arc>> = + Arc::new(RwLock::new(HashMap::new())); + + // Set up a subscriber that opted in to Control delivery. + let (control_tx, mut control_rx) = mpsc::channel::(10); + let (info, _event_rx) = make_subscription_info( + vec!["control.collector.lifecycle".to_string()], + Some(control_tx), + ); + subscriptions + .write() + .await + .insert("test-sub".to_string(), info); + + // Build a Control message on the subscribed topic with known correlation. + let message = Message::control( + "control.collector.lifecycle".to_string(), + "corr-xyz".to_string(), + b"{\"type\":\"BeginMonitoring\"}".to_vec(), + 42, + ); + + EventBusClient::handle_control_message_internal(&subscriptions, &message, "test-client") + .await + .expect("delivery should succeed"); + + let delivered = control_rx.recv().await.expect("should receive control msg"); + assert_eq!(delivered.topic, "control.collector.lifecycle"); + assert_eq!(delivered.correlation_metadata.correlation_id, "corr-xyz"); + assert_eq!(delivered.message_type, MessageType::Control); + } + + /// Edge case (Unit 1): A subscriber that did NOT opt into Control delivery + /// does not receive Control messages. Legacy Event-only subscribers are + /// unaffected by the new field. + #[tokio::test] + async fn test_control_message_not_delivered_when_not_opted_in() { + let subscriptions: Arc>> = + Arc::new(RwLock::new(HashMap::new())); + + // Set up a subscriber with NO control_sender (legacy default). + let (info, mut event_rx) = + make_subscription_info(vec!["control.collector.lifecycle".to_string()], None); + subscriptions + .write() + .await + .insert("legacy-sub".to_string(), info); + + let message = Message::control( + "control.collector.lifecycle".to_string(), + "corr-legacy".to_string(), + Vec::new(), + 7, + ); + + // Should not error; legacy subscriber is simply skipped. + EventBusClient::handle_control_message_internal(&subscriptions, &message, "test-client") + .await + .expect("handler tolerates legacy subscribers"); + + // Strengthened assertion (END-297 review T-004): verify the Control + // envelope did not leak onto the Event channel. `try_recv` returning + // `TryRecvError::Empty` confirms the channel is open but no message + // was delivered — a bug that routed Control to the event path would + // instead have produced `Ok(_)` with a delivered message. + assert!( + matches!( + event_rx.try_recv(), + Err(tokio::sync::mpsc::error::TryRecvError::Empty) + ), + "Control message must not leak onto the Event channel for opt-out subscribers" + ); + } + + /// Edge case (Unit 1): A Control message on a topic that does not match + /// the subscriber's patterns is not delivered even if the subscriber + /// opted into Control delivery. + #[tokio::test] + async fn test_control_message_topic_filter_still_applies_when_opted_in() { + let subscriptions: Arc>> = + Arc::new(RwLock::new(HashMap::new())); + + // Subscriber opts in but only for a DIFFERENT topic pattern. + let (control_tx, mut control_rx) = mpsc::channel::(10); + let (info, _event_rx) = + make_subscription_info(vec!["events.process.+".to_string()], Some(control_tx)); + subscriptions + .write() + .await + .insert("events-only-sub".to_string(), info); + + // Publish a Control message on a non-matching topic. + let message = Message::control( + "control.collector.lifecycle".to_string(), + "corr-noisy".to_string(), + Vec::new(), + 5, + ); + + EventBusClient::handle_control_message_internal(&subscriptions, &message, "test-client") + .await + .unwrap(); + + // Receiver should have nothing — the topic filter gated the delivery. + assert!( + control_rx.try_recv().is_err(), + "control msg on unrelated topic must not leak through topic filter" + ); + } + + /// Edge case (Unit 1): `handle_control_message_internal` is a no-op for + /// Event-type messages. This guards against future refactors that might + /// accidentally duplicate Event delivery via the control path. + #[tokio::test] + async fn test_control_handler_ignores_event_messages() { + let subscriptions: Arc>> = + Arc::new(RwLock::new(HashMap::new())); + + let (control_tx, mut control_rx) = mpsc::channel::(10); + let (info, _event_rx) = + make_subscription_info(vec!["events.process.+".to_string()], Some(control_tx)); + subscriptions + .write() + .await + .insert("guard-sub".to_string(), info); + + // Event-typed message — should NEVER flow through control channel. + let message = Message::event( + "events.process.new".to_string(), + "corr-event".to_string(), + Vec::new(), + 1, + ); + + EventBusClient::handle_control_message_internal(&subscriptions, &message, "test-client") + .await + .unwrap(); + + assert!( + control_rx.try_recv().is_err(), + "control handler must not forward Event-type messages" + ); + } + + /// Integration-style (Unit 1): `subscribe_with_control` wires the + /// returned `control_rx` to the per-subscription `control_sender` so the + /// background task's delivery path reaches it. + #[tokio::test] + async fn test_subscribe_with_control_round_trip_when_opted_in() { + let temp_dir = tempdir().unwrap(); + let socket_path = temp_dir.path().join("test-control-opt-in.sock"); + let socket_config = SocketConfig { + unix_path: socket_path.to_string_lossy().to_string(), + windows_pipe: socket_path.to_string_lossy().to_string(), + connection_limit: 100, + #[cfg(target_os = "freebsd")] + freebsd_path: None, + auth_token: None, + per_client_byte_limit: 10 * 1024 * 1024, + rate_limit_config: None, + correlation_config: None, + }; + + let _server = TransportServer::new(socket_config.clone()).await.unwrap(); + + let client = EventBusClient::new( + "test-client".to_string(), + socket_config, + ClientConfig::default(), + ) + .await + .unwrap(); + + let subscription = EventSubscription { + subscriber_id: "opt-in-sub".to_string(), + capabilities: crate::message::SourceCaps::default(), + event_filter: None, + correlation_filter: None, + topic_patterns: Some(vec!["control.collector.lifecycle".to_string()]), + enable_wildcards: true, + include_control: true, + }; + + let (_events_rx, mut control_rx) = client + .subscribe_with_control(subscription) + .await + .expect("subscribe_with_control should succeed"); + + // Simulate an inbound Control message by directly invoking the + // same entry point the background task uses. + let message = Message::control( + "control.collector.lifecycle".to_string(), + "round-trip-corr".to_string(), + b"BeginMonitoring".to_vec(), + 99, + ); + client.handle_control_message(message).await.unwrap(); + + let delivered = control_rx.recv().await.expect("should receive msg"); + assert_eq!( + delivered.correlation_metadata.correlation_id, + "round-trip-corr" + ); + } + + /// Integration-style (Unit 1): `subscribe_with_control` with + /// `include_control=false` returns a closed control receiver and never + /// delivers Control messages — matches the behavior of `subscribe()`. + #[tokio::test] + async fn test_subscribe_with_control_closed_channel_when_not_opted_in() { + let temp_dir = tempdir().unwrap(); + let socket_path = temp_dir.path().join("test-control-legacy.sock"); + let socket_config = SocketConfig { + unix_path: socket_path.to_string_lossy().to_string(), + windows_pipe: socket_path.to_string_lossy().to_string(), + connection_limit: 100, + #[cfg(target_os = "freebsd")] + freebsd_path: None, + auth_token: None, + per_client_byte_limit: 10 * 1024 * 1024, + rate_limit_config: None, + correlation_config: None, + }; + + let _server = TransportServer::new(socket_config.clone()).await.unwrap(); + + let client = EventBusClient::new( + "test-client".to_string(), + socket_config, + ClientConfig::default(), + ) + .await + .unwrap(); + + let subscription = EventSubscription { + subscriber_id: "no-opt-sub".to_string(), + capabilities: crate::message::SourceCaps::default(), + event_filter: None, + correlation_filter: None, + topic_patterns: Some(vec!["control.collector.lifecycle".to_string()]), + enable_wildcards: true, + include_control: false, + }; + + let (_events_rx, mut control_rx) = client + .subscribe_with_control(subscription) + .await + .expect("subscribe_with_control should still succeed for legacy"); + + // Fire a control message — should NOT land on the channel. + let message = Message::control( + "control.collector.lifecycle".to_string(), + "legacy-corr".to_string(), + Vec::new(), + 1, + ); + client.handle_control_message(message).await.unwrap(); + + // The channel must be CLOSED (not just empty) — subscribe_with_control + // drops the paired sender when include_control=false. A blocking `recv()` + // on a closed channel returns None promptly, which is the stronger + // assertion the opt-out contract requires (PR #178 review). + // `Message` doesn't impl PartialEq, so pattern-match instead of assert_eq!. + let recv_result = + tokio::time::timeout(std::time::Duration::from_millis(100), control_rx.recv()).await; + assert!( + matches!(recv_result, Ok(None)), + "control channel must be closed (not just empty) for non-opted-in subscriber; got {recv_result:?}" + ); + } } diff --git a/daemoneye-eventbus/src/correlation.rs b/daemoneye-eventbus/src/correlation.rs index b33d0666..5373bf6a 100644 --- a/daemoneye-eventbus/src/correlation.rs +++ b/daemoneye-eventbus/src/correlation.rs @@ -33,7 +33,7 @@ impl Default for CorrelationTrackerConfig { Self { max_history_size: 10_000, max_active_workflows: 1_000, - workflow_timeout: Duration::from_secs(300), + workflow_timeout: Duration::from_mins(5), } } } diff --git a/daemoneye-eventbus/src/message.rs b/daemoneye-eventbus/src/message.rs index 6534b2e8..d6953721 100644 --- a/daemoneye-eventbus/src/message.rs +++ b/daemoneye-eventbus/src/message.rs @@ -589,7 +589,34 @@ pub struct TriggerRequest { } /// Event subscription configuration -#[derive(Debug, Clone, Serialize, Deserialize)] +/// +/// A subscription declares which topics a consumer wants to receive messages +/// on. By default, only [`MessageType::Event`] envelopes are delivered to the +/// subscriber's channel — legacy behavior that keeps typed `CollectionEvent` +/// consumers free of RPC/lifecycle noise. +/// +/// To receive [`MessageType::Control`] envelopes (lifecycle commands such as +/// `BeginMonitoring`, per-collector RPC requests, etc.) set +/// [`include_control`](Self::include_control) to `true` AND call +/// [`EventBusClient::subscribe_with_control`](crate::EventBusClient::subscribe_with_control), +/// which returns a parallel [`tokio::sync::mpsc::Receiver`] of raw +/// [`Message`] envelopes. The legacy [`EventBusClient::subscribe`](crate::EventBusClient::subscribe) +/// method never delivers Control messages, preserving source- and +/// behavior-compatibility for existing subscribers. +/// +/// `include_control` defaults to `false` so existing call sites remain +/// source-compatible; new callers should use struct-update syntax with +/// [`Default`] to pick up future fields without churn. +/// +/// Note: this struct is **not** currently marked `#[non_exhaustive]`. +/// Cross-crate `struct-literal` construction (including FRU) is forbidden +/// for non-exhaustive types, which means adding the attribute requires +/// introducing a builder or `with_*` setter API for external consumers +/// (collector-core's bridge, tests, benches). That refactor is deferred +/// as local follow-up work. In the meantime, always construct via +/// `..Default::default()` so future field additions cause the smallest +/// possible diff. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct EventSubscription { /// Unique identifier for the subscriber pub subscriber_id: String, @@ -601,12 +628,33 @@ pub struct EventSubscription { pub correlation_filter: Option, /// Optional explicit topic patterns pub topic_patterns: Option>, - /// Enable wildcarding support for topic patterns + /// Advisory flag reserved for a future wildcard-enforcement policy. + /// + /// The client-side subscription matcher in `client.rs` currently parses + /// `+` and `#` wildcards unconditionally via + /// [`crate::TopicPattern`], so this field has no runtime effect + /// today — callers get wildcard matching regardless of the value. + /// Setting it to `true` is therefore the honest default until + /// enforcement lands. + /// + /// A follow-up will either (a) gate wildcard parsing on this flag and + /// reject wildcard-containing patterns when `false`, or (b) remove the + /// field entirely in favor of an always-on semantics. pub enable_wildcards: bool, + /// Opt into delivery of [`MessageType::Control`] envelopes on matching topics. + /// + /// When `false` (default) Control messages are silently dropped by the + /// client so legacy event-only consumers keep their current behavior. + /// When `true`, the subscribed topic patterns are tracked for Control + /// delivery and callers should use + /// [`EventBusClient::subscribe_with_control`](crate::EventBusClient::subscribe_with_control) + /// to obtain the parallel Control receiver. + #[serde(default)] + pub include_control: bool, } /// Source capabilities -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct SourceCaps { /// Supported event types pub event_types: Vec, diff --git a/daemoneye-eventbus/src/process_manager.rs b/daemoneye-eventbus/src/process_manager.rs index a2ecfa88..1d03a24c 100644 --- a/daemoneye-eventbus/src/process_manager.rs +++ b/daemoneye-eventbus/src/process_manager.rs @@ -215,7 +215,7 @@ impl Default for ProcessManagerConfig { collector_binaries: HashMap::new(), default_graceful_timeout: Duration::from_secs(30), default_force_timeout: Duration::from_secs(5), - health_check_interval: Duration::from_secs(60), + health_check_interval: Duration::from_mins(1), enable_auto_restart: false, heartbeat_timeout_multiplier: 3, } @@ -792,18 +792,15 @@ impl CollectorProcessManager { .unwrap_or(Duration::from_secs(0)); // Calculate expected heartbeats missed based on elapsed time. - // Use millisecond precision and guard against sub-interval rounding to avoid division by zero. + // `checked_div` returns None when the divisor is zero, which collapses + // to the "no intervals missed yet" case via `unwrap_or(0)`. let intervals_missed: u64 = { let interval_ms = heartbeat_interval.as_millis(); - if interval_ms == 0 { - 0 - } else { - let elapsed_ms = elapsed_since_heartbeat.as_millis(); - // SAFETY: both values are u128 millisecond counts; quotient fits u64 in practice. - #[allow(clippy::arithmetic_side_effects, clippy::integer_division)] - let n = elapsed_ms / interval_ms; - u64::try_from(n).unwrap_or(u64::MAX) - } + let elapsed_ms = elapsed_since_heartbeat.as_millis(); + // SAFETY: both values are u128 millisecond counts; the quotient + // fits u64 in practice and we saturate on overflow. + let n = elapsed_ms.checked_div(interval_ms).unwrap_or(0); + u64::try_from(n).unwrap_or(u64::MAX) }; // Increment heartbeat sequence diff --git a/daemoneye-eventbus/src/result_aggregation.rs b/daemoneye-eventbus/src/result_aggregation.rs index debcfcd2..c4c2e4d8 100644 --- a/daemoneye-eventbus/src/result_aggregation.rs +++ b/daemoneye-eventbus/src/result_aggregation.rs @@ -88,8 +88,8 @@ impl Default for AggregationConfig { fn default() -> Self { Self { max_pending_results: 10000, - deduplication_window: Duration::from_secs(300), - correlation_timeout: Duration::from_secs(60), + deduplication_window: Duration::from_mins(5), + correlation_timeout: Duration::from_mins(1), backpressure_threshold: 8000, health_check_interval: Duration::from_secs(10), } @@ -468,7 +468,7 @@ impl ResultAggregator { for (collector_id, status) in health_map.iter_mut() { // Check if collector has been inactive if let Ok(elapsed) = now.duration_since(status.last_success) - && elapsed > Duration::from_secs(60) + && elapsed > Duration::from_mins(1) { // Mark as unhealthy if no results for 60 seconds if status.health != CollectorHealth::Unhealthy @@ -532,7 +532,7 @@ impl ResultAggregator { let deduplication_window = self.config.deduplication_window; tokio::spawn(async move { - let mut interval = tokio::time::interval(Duration::from_secs(60)); + let mut interval = tokio::time::interval(Duration::from_mins(1)); loop { interval.tick().await; diff --git a/daemoneye-eventbus/tests/correlation_metadata_tests.rs b/daemoneye-eventbus/tests/correlation_metadata_tests.rs index 9bf7ad47..a2d11158 100644 --- a/daemoneye-eventbus/tests/correlation_metadata_tests.rs +++ b/daemoneye-eventbus/tests/correlation_metadata_tests.rs @@ -431,6 +431,7 @@ async fn test_multi_collector_workflow_correlation() { ), topic_patterns: Some(vec!["events.process.*".to_string()]), enable_wildcards: true, + include_control: false, }; let _receiver = event_bus.subscribe(subscription).await.unwrap(); diff --git a/daemoneye-eventbus/tests/correlation_tracker_tests.rs b/daemoneye-eventbus/tests/correlation_tracker_tests.rs index d0b61593..abb9493e 100644 --- a/daemoneye-eventbus/tests/correlation_tracker_tests.rs +++ b/daemoneye-eventbus/tests/correlation_tracker_tests.rs @@ -136,7 +136,7 @@ async fn test_event_history_bounded() { let config = CorrelationTrackerConfig { max_history_size: 3, max_active_workflows: 100, - workflow_timeout: Duration::from_secs(300), + workflow_timeout: Duration::from_mins(5), }; let tracker = CorrelationTracker::new(config); @@ -285,7 +285,7 @@ async fn test_zero_history_size_does_not_loop() { let config = CorrelationTrackerConfig { max_history_size: 0, max_active_workflows: 100, - workflow_timeout: Duration::from_secs(300), + workflow_timeout: Duration::from_mins(5), }; let tracker = CorrelationTracker::new(config); diff --git a/daemoneye-eventbus/tests/e2e_multi_collector.rs b/daemoneye-eventbus/tests/e2e_multi_collector.rs new file mode 100644 index 00000000..60c440a5 --- /dev/null +++ b/daemoneye-eventbus/tests/e2e_multi_collector.rs @@ -0,0 +1,556 @@ +// This file is an integration test — it compiles as its own binary crate +// under `tests/` (no `#[cfg(test)]` wrapping). A few workspace-level lints +// are standard to relax at the test-file level so we can write expressive, +// easy-to-read assertions without noise. The set below is minimised to +// lints that trip genuinely benign patterns in this file (matches common +// practice across the crate's test files; see PR #178 review). +#![allow( + clippy::unwrap_used, // Tests panic on unexpected Err; that's the diagnostic. + clippy::expect_used, // Same rationale as unwrap_used. + clippy::panic, // Test-only assertions that panic on bad state. + clippy::arithmetic_side_effects, // Test iteration counters and expected-value math. + clippy::as_conversions, // Small numeric casts in test data. + clippy::cast_possible_truncation,// Same as as_conversions. + clippy::cast_sign_loss, // Same. + clippy::indexing_slicing, // Fixed-size test fixtures indexed directly. + clippy::items_after_statements, // Test-local const declarations near their use. + clippy::ignore_without_reason, // #[ignore] markers used in this file carry rationale in docstrings. + clippy::shadow_unrelated, // Test scenarios re-use short binding names (e.g. `stats`) across phases for readability. + clippy::doc_markdown, // Integration-test docstrings reference protobuf-style identifiers and plain text labels. + dead_code // Test helpers used in a subset of tests. +)] +//! End-to-end multi-collector coordination tests (END-297 Unit 4). +//! +//! # Scope +//! +//! These tests prove the acceptance criteria R5 (load balancing), R6 (result +//! aggregation), R8 (failover), and R13 (multi-collector integration) from +//! END-297 by exercising the **real broker, task distributor, and result +//! aggregator** through the same code paths production uses. +//! +//! All clients in this file are **in-process** subscribers that represent +//! logical collectors (`procmond-1`, `procmond-2`, ...). The broker itself is +//! constructed with [`DaemoneyeBroker::new`] against a temp-dir socket path +//! because the constructor requires one, but no separate OS processes are +//! spawned and no traffic crosses the `interprocess` transport layer. +//! +//! # Why in-process, not cross-process? +//! +//! Unit 1 of this closure pass (see +//! `docs/plans/2026-04-18-001-feat-close-end-297-message-broker-plan.md`) +//! wired `MessageType::Control` delivery through the *in-process* broker +//! subscription path ([`DaemoneyeBroker::subscribe_raw`]). A latent gap +//! remains at the transport layer: the broker does not currently run a +//! per-connection receive loop that registers subscriptions from remote +//! clients over `interprocess`. `ClientConnectionManager::subscribe_client` +//! exists and is exercised by an in-process unit test in `transport.rs`, but +//! cross-process subscription registration is not wired end-to-end. +//! +//! That gap is explicitly out of scope for END-297 closure. The acceptance +//! criteria are about broker coordination semantics — queue groups, routing +//! strategies, capability filters, failover, correlation — and those live in +//! [`TaskDistributor`], [`ResultAggregator`], and [`DaemoneyeBroker`]. This +//! file exercises every one of them through the production APIs. +//! +//! # Queue group convention +//! +//! "Queue group" in the END-297 ticket maps onto a set of collectors +//! registered under the same `collector_type` with the same +//! `supported_operations`. [`TaskDistributor::distribute_task`] picks exactly +//! one member per task via the configured [`RoutingStrategy`]. Broadcast +//! topics (config reload, wildcard observation) are delivered to *all* +//! matching subscribers by [`DaemoneyeBroker::publish`]. + +use daemoneye_eventbus::{ + AggregationConfig, CollectionEvent, CollectorCapability, CollectorResult, DaemoneyeBroker, + ProcessEvent, ResultAggregator, RoutingStrategy, TaskDistributor, TaskRequest, collector, +}; +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; +use std::time::{Duration, SystemTime}; +use tempfile::TempDir; +use tokio::sync::mpsc; +use tokio::time::timeout; +use uuid::Uuid; + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +/// Queue-group name used throughout these tests, mirroring the ticket's +/// `local-procmond-workers` example via the canonical `procmond` +/// collector_type. +const QUEUE_GROUP_TYPE: &str = "procmond"; +const QUEUE_GROUP_OPERATION: &str = "enumerate_processes"; +const MEMBER_1: &str = "procmond-1"; +const MEMBER_2: &str = "procmond-2"; + +/// Wall-clock budget for any single `recv` attempt. Keeps watchdog timeouts +/// small so a regression hangs fast. +const RECV_TIMEOUT: Duration = Duration::from_secs(5); + +// --------------------------------------------------------------------------- +// Fixture helpers (kept inline — no `tests/common/mod.rs` convention in this +// crate today). +// --------------------------------------------------------------------------- + +/// Bring up a fresh broker backed by a temp-dir socket path. The broker is +/// not `start()`-ed because the in-process subscription paths do not need the +/// transport server. Returns the broker and the `TempDir` guard so the +/// caller can keep the temp dir alive for the full test. +async fn spawn_broker(test_name: &str) -> (Arc, TempDir) { + let temp_dir = TempDir::new().expect("create temp dir"); + let socket_path = temp_dir.path().join(format!("{test_name}.sock")); + let broker = Arc::new( + DaemoneyeBroker::new(&socket_path.to_string_lossy()) + .await + .expect("construct broker"), + ); + (broker, temp_dir) +} + +/// Build a `CollectorCapability` for a queue-group member. All members share +/// `collector_type` and `supported_operations` so the distributor routes +/// between them. +fn queue_group_capability(collector_id: &str, max_concurrent: u32) -> CollectorCapability { + CollectorCapability { + collector_id: collector_id.to_owned(), + collector_type: QUEUE_GROUP_TYPE.to_owned(), + supported_operations: vec![QUEUE_GROUP_OPERATION.to_owned()], + max_concurrent_tasks: max_concurrent, + priority_levels: vec![1, 2, 3, 4, 5], + metadata: HashMap::new(), + } +} + +/// Build a minimal `TaskRequest` targeting the queue group's operation. +fn make_task(task_id: &str, priority: u8) -> TaskRequest { + let now = SystemTime::now(); + TaskRequest { + task_id: task_id.to_owned(), + operation: QUEUE_GROUP_OPERATION.to_owned(), + priority, + payload: Vec::new(), + timeout_ms: 30_000, + metadata: HashMap::new(), + correlation_id: Some(format!("corr-{task_id}")), + created_at: now, + deadline: now + Duration::from_secs(30), + } +} + +/// Register a logical collector on the distributor *and* subscribe its raw +/// task-topic receiver on the broker. Returns the receiver so the test can +/// assert which tasks each member received. +async fn join_queue_group( + broker: &Arc, + distributor: &TaskDistributor, + collector_id: &str, + max_concurrent: u32, +) -> mpsc::UnboundedReceiver { + let task_topic = collector::task_topic(QUEUE_GROUP_TYPE, collector_id); + let subscriber_id = Uuid::new_v4(); + let rx = broker + .subscribe_raw(&task_topic, subscriber_id) + .await + .expect("subscribe to per-collector task topic"); + distributor + .register_collector(queue_group_capability(collector_id, max_concurrent)) + .await + .expect("register collector with distributor"); + rx +} + +/// Drain pending messages from a receiver without blocking. Returns the +/// drained messages and leaves the receiver open. +fn drain_now( + rx: &mut mpsc::UnboundedReceiver, +) -> Vec { + let mut out = Vec::new(); + while let Ok(msg) = rx.try_recv() { + out.push(msg); + } + out +} + +/// Wait for `target` messages across `N` receivers, budgeted by `RECV_TIMEOUT`. +/// Returns the per-receiver counts. Polls each receiver with `try_recv()` and +/// drains all immediately-available messages; if no receiver has anything +/// ready, sleeps briefly before retrying so the test stays responsive +/// without busy-looping. Delivery order across receivers is not guaranteed, +/// so this helper is order-independent by design. +async fn collect_until( + receivers: &mut [&mut mpsc::UnboundedReceiver; N], + target: usize, +) -> [usize; N] { + let mut counts = [0_usize; N]; + + let deadline = tokio::time::Instant::now() + RECV_TIMEOUT; + loop { + let total: usize = counts.iter().sum(); + if total >= target { + break; + } + if tokio::time::Instant::now() >= deadline { + break; + } + + // Try each receiver non-blockingly first. + let mut made_progress = false; + for (idx, rx) in receivers.iter_mut().enumerate() { + while let Ok(_msg) = rx.try_recv() { + counts[idx] = counts[idx].saturating_add(1); + made_progress = true; + } + } + + if !made_progress { + // Nothing was immediately available — yield so the broker can + // finish delivering in-flight messages. A short sleep keeps the + // test responsive without busy-looping. + tokio::time::sleep(Duration::from_millis(10)).await; + } + } + + counts +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +/// Load balancing across a queue group: 100 tasks distributed round-robin +/// between two members should reach both; neither member should starve and +/// neither should monopolize the group. +#[tokio::test] +async fn load_balancing_across_queue_group_members() { + let (broker, _temp) = spawn_broker("load-balancing").await; + let distributor = TaskDistributor::new(Arc::clone(&broker)) + .await + .expect("construct distributor"); + distributor + .set_routing_strategy(RoutingStrategy::RoundRobin) + .await; + + // Two queue-group members with enough capacity to absorb the full 100-task + // burst so nothing gets queued — load-balancing assertion is on the + // distribution, not the queue. + let mut rx1 = join_queue_group(&broker, &distributor, MEMBER_1, 200).await; + let mut rx2 = join_queue_group(&broker, &distributor, MEMBER_2, 200).await; + + const TASK_COUNT: usize = 100; + for i in 0..TASK_COUNT { + let task = make_task(&format!("lb-task-{i}"), 3); + let selected = distributor + .distribute_task(task) + .await + .expect("distribute task"); + assert!( + selected == MEMBER_1 || selected == MEMBER_2, + "unexpected collector selection: {selected}" + ); + } + + let counts = collect_until(&mut [&mut rx1, &mut rx2], TASK_COUNT).await; + let received_1 = counts[0]; + let received_2 = counts[1]; + let total_received = received_1.saturating_add(received_2); + + assert_eq!( + total_received, TASK_COUNT, + "every distributed task should be delivered to exactly one member", + ); + assert!( + received_1 > 0 && received_2 > 0, + "both members must receive at least one task (got {received_1}/{received_2})", + ); + // Plan acceptance: neither member gets > 95 of 100. + assert!( + received_1 < 95 && received_2 < 95, + "neither member should monopolize the queue group \ + (got {received_1}/{received_2})", + ); +} + +/// Broadcast: when the agent publishes a single config reload on a shared +/// broadcast topic, every member subscribed to that topic receives it. The +/// broker's fan-out delivers one copy per subscriber — this is the opposite +/// semantic to load balancing. +#[tokio::test] +async fn broadcast_config_reload_reaches_all_members() { + let (broker, _temp) = spawn_broker("broadcast-config").await; + + // Shared broadcast topic — "config" for the whole procmond family. + let config_topic = format!("{}.{}", collector::CONFIG, QUEUE_GROUP_TYPE); + + let sub1_id = Uuid::new_v4(); + let sub2_id = Uuid::new_v4(); + let mut rx1 = broker + .subscribe_raw(&config_topic, sub1_id) + .await + .expect("member 1 subscribes to config topic"); + let mut rx2 = broker + .subscribe_raw(&config_topic, sub2_id) + .await + .expect("member 2 subscribes to config topic"); + + let correlation_id = "config-reload-1"; + let payload = b"config-version=2".to_vec(); + broker + .publish(&config_topic, correlation_id, payload.clone()) + .await + .expect("publish config reload"); + + let msg1 = timeout(RECV_TIMEOUT, rx1.recv()) + .await + .expect("member 1 receives config reload before timeout") + .expect("member 1 channel open"); + let msg2 = timeout(RECV_TIMEOUT, rx2.recv()) + .await + .expect("member 2 receives config reload before timeout") + .expect("member 2 channel open"); + + assert_eq!(msg1.topic, config_topic); + assert_eq!(msg2.topic, config_topic); + assert_eq!(msg1.payload, payload); + assert_eq!(msg2.payload, payload); + assert_eq!(msg1.correlation_metadata.correlation_id, correlation_id); + assert_eq!(msg2.correlation_metadata.correlation_id, correlation_id); +} + +/// Failover: after one queue-group member is deregistered mid-run, every +/// subsequent task routes to the surviving member; no task is silently +/// dropped. +#[tokio::test] +async fn failover_redirects_tasks_after_member_deregisters() { + let (broker, _temp) = spawn_broker("failover").await; + let distributor = TaskDistributor::new(Arc::clone(&broker)) + .await + .expect("construct distributor"); + distributor + .set_routing_strategy(RoutingStrategy::RoundRobin) + .await; + + let mut rx1 = join_queue_group(&broker, &distributor, MEMBER_1, 200).await; + let mut rx2 = join_queue_group(&broker, &distributor, MEMBER_2, 200).await; + + // Phase 1: distribute an initial batch while both members are healthy. + const PHASE_1_COUNT: usize = 10; + for i in 0..PHASE_1_COUNT { + let task = make_task(&format!("phase1-{i}"), 2); + distributor.distribute_task(task).await.expect("phase 1"); + } + + // Give the broker a moment to fan everything out. + let phase1_counts = collect_until(&mut [&mut rx1, &mut rx2], PHASE_1_COUNT).await; + let phase1_total = phase1_counts[0].saturating_add(phase1_counts[1]); + assert_eq!( + phase1_total, PHASE_1_COUNT, + "phase 1 tasks should all be delivered while both members are healthy", + ); + + // Fail member 1 — simulates a procmond crash. There is no kill primitive + // for an in-process subscriber, so the canonical way for a queue group to + // drop a member is `deregister_collector`, which is exactly the + // code path the production restart-supervisor would take. + distributor + .deregister_collector(MEMBER_1) + .await + .expect("deregister member 1"); + + // Phase 2: every new task must route to member 2. Member 1's raw + // receiver is still open (broker doesn't force-close subscriptions on + // collector deregistration — that is a distributor-level state change), + // but the distributor will no longer route to it. + const PHASE_2_COUNT: usize = 15; + for i in 0..PHASE_2_COUNT { + let task = make_task(&format!("phase2-{i}"), 2); + let selected = distributor + .distribute_task(task) + .await + .expect("phase 2 distribute"); + assert_eq!( + selected, MEMBER_2, + "after failover, all tasks must route to the surviving member", + ); + } + + // Drain phase-2 traffic and confirm member 1 received nothing new while + // member 2 received the full phase-2 burst. + let rx1_phase2 = drain_now(&mut rx1); + assert!( + rx1_phase2.is_empty(), + "failed-over member 1 should receive zero phase-2 tasks (got {})", + rx1_phase2.len(), + ); + let phase2_counts = collect_until(&mut [&mut rx2], PHASE_2_COUNT).await; + assert_eq!( + phase2_counts[0], PHASE_2_COUNT, + "member 2 should receive every phase-2 task ({PHASE_2_COUNT} expected)", + ); + + // No tasks lost overall. + let total_delivered = phase1_total.saturating_add(phase2_counts[0]); + assert_eq!( + total_delivered, + PHASE_1_COUNT.saturating_add(PHASE_2_COUNT), + "no tasks should be lost across the failover boundary", + ); +} + +/// Result aggregation: both queue-group members publish results tagged with +/// the same correlation ID. The aggregator collects them, preserves the +/// correlation ID, and the deduplication cache is not bypassed when +/// sequence numbers differ. +#[tokio::test] +async fn result_aggregation_preserves_correlation_ids() { + let (broker, _temp) = spawn_broker("result-aggregation").await; + let aggregator = ResultAggregator::new(Arc::clone(&broker), AggregationConfig::default()) + .await + .expect("construct aggregator"); + + let correlation_id = "multi-collector-corr"; + let make_result = |collector_id: &str, pid: u32, sequence: u64| -> CollectorResult { + let mut metadata = HashMap::new(); + metadata.insert("correlation_id".to_owned(), correlation_id.to_owned()); + CollectorResult { + collector_id: collector_id.to_owned(), + collector_type: QUEUE_GROUP_TYPE.to_owned(), + event: CollectionEvent::Process(ProcessEvent { + pid, + name: format!("proc-{collector_id}"), + command_line: Some(format!("run --from {collector_id}")), + executable_path: Some(format!("/bin/{collector_id}")), + ppid: Some(1), + start_time: Some(SystemTime::now()), + metadata, + }), + timestamp: SystemTime::now(), + sequence, + } + }; + + // Each member contributes two results under the same correlation ID. + aggregator + .collect_result(make_result(MEMBER_1, 1001, 1)) + .await + .expect("collect member 1 result 1"); + aggregator + .collect_result(make_result(MEMBER_1, 1002, 2)) + .await + .expect("collect member 1 result 2"); + aggregator + .collect_result(make_result(MEMBER_2, 2001, 1)) + .await + .expect("collect member 2 result 1"); + aggregator + .collect_result(make_result(MEMBER_2, 2002, 2)) + .await + .expect("collect member 2 result 2"); + + let stats = aggregator.get_stats().await; + assert_eq!( + stats.results_collected, 4, + "every unique result across both members should be collected", + ); + assert_eq!( + stats.results_deduplicated, 0, + "results with distinct (pid, sequence) tuples should not be deduplicated", + ); + assert_eq!( + stats.results_pending, 4, + "pending count should equal results collected before any aggregation emits", + ); + + // Re-submitting an identical result should be caught by the + // deduplication cache rather than double-counted. + aggregator + .collect_result(make_result(MEMBER_1, 1001, 1)) + .await + .expect("collect duplicate"); + + let stats = aggregator.get_stats().await; + assert_eq!( + stats.results_collected, 4, + "duplicate result must not increment results_collected", + ); + assert_eq!( + stats.results_deduplicated, 1, + "duplicate result must be counted as deduplicated", + ); +} + +/// Wildcard observer: a third subscriber listens on `control.collector.task.#` +/// (multi-level wildcard covering both `collector_type` and `collector_id` +/// segments) and observes every task distribution **without** participating +/// in the queue group. Queue-group routing is unaffected. +#[tokio::test] +async fn wildcard_observer_sees_all_task_broadcasts() { + let (broker, _temp) = spawn_broker("wildcard-observer").await; + let distributor = TaskDistributor::new(Arc::clone(&broker)) + .await + .expect("construct distributor"); + distributor + .set_routing_strategy(RoutingStrategy::RoundRobin) + .await; + + let mut rx1 = join_queue_group(&broker, &distributor, MEMBER_1, 50).await; + let mut rx2 = join_queue_group(&broker, &distributor, MEMBER_2, 50).await; + + // Observer subscribes to the multi-level wildcard. `#` matches zero or + // more segments; for every `control.collector.task.procmond.` topic + // this pattern matches on the trailing segments. + let observer_id = Uuid::new_v4(); + let observer_pattern = format!("{}.#", collector::TASK); + let mut observer_rx = broker + .subscribe_raw(&observer_pattern, observer_id) + .await + .expect("observer subscribes to wildcard pattern"); + + const TASK_COUNT: usize = 12; + let mut published_ids = HashSet::new(); + for i in 0..TASK_COUNT { + let task_id = format!("wild-{i}"); + published_ids.insert(task_id.clone()); + distributor + .distribute_task(make_task(&task_id, 2)) + .await + .expect("distribute wildcard-observed task"); + } + + // Queue group still gets balanced routing. + let qg_counts = collect_until(&mut [&mut rx1, &mut rx2], TASK_COUNT).await; + assert_eq!( + qg_counts[0].saturating_add(qg_counts[1]), + TASK_COUNT, + "queue-group delivery should be unaffected by the wildcard observer", + ); + + // Observer receives one copy per task — the broker fans the wildcard + // subscriber in alongside the targeted member, not instead of it. + let mut observed_ids = HashSet::new(); + let deadline = tokio::time::Instant::now() + RECV_TIMEOUT; + while observed_ids.len() < TASK_COUNT && tokio::time::Instant::now() < deadline { + match timeout(Duration::from_millis(50), observer_rx.recv()).await { + Ok(Some(msg)) => { + assert!( + msg.topic.starts_with(collector::TASK), + "observed topic must be under the task prefix, got {}", + msg.topic, + ); + let task: TaskRequest = + postcard::from_bytes(&msg.payload).expect("payload decodes as TaskRequest"); + observed_ids.insert(task.task_id); + } + Ok(None) => panic!("observer channel closed unexpectedly"), + Err(_elapsed) => { + // no message this tick — loop until deadline + } + } + } + + assert_eq!( + observed_ids, published_ids, + "wildcard observer should see every published task exactly once", + ); +} diff --git a/daemoneye-eventbus/tests/integration_tests.rs b/daemoneye-eventbus/tests/integration_tests.rs index 67831e79..a19450e9 100644 --- a/daemoneye-eventbus/tests/integration_tests.rs +++ b/daemoneye-eventbus/tests/integration_tests.rs @@ -145,6 +145,7 @@ async fn test_event_subscription() { correlation_filter: None, topic_patterns: Some(vec!["events.process.*".to_string()]), enable_wildcards: true, + include_control: false, }; // Subscribe @@ -249,3 +250,53 @@ async fn test_statistics_tracking() { "Uptime should be reasonable for a short test" ); } + +/// Unit 1 / END-297: Verify `EventSubscription::include_control` threads +/// through to the serialized form (postcard round-trip preserves the field). +/// +/// This guards against a later `#[serde(default)]` regression that would +/// silently drop the flag on the wire and break control delivery. +#[tokio::test] +async fn test_event_subscription_serialization_preserves_include_control() { + // Build subscription with include_control=true. + let sub = EventSubscription { + subscriber_id: "round-trip".to_string(), + capabilities: SourceCaps { + event_types: vec!["control".to_string()], + collectors: vec![], + max_priority: 0, + }, + event_filter: None, + correlation_filter: None, + topic_patterns: Some(vec!["control.collector.lifecycle".to_string()]), + enable_wildcards: true, + include_control: true, + }; + + let encoded = postcard::to_allocvec(&sub).expect("encode subscription"); + let decoded: EventSubscription = postcard::from_bytes(&encoded).expect("decode subscription"); + assert!( + decoded.include_control, + "include_control=true must survive postcard round-trip" + ); + + // And the legacy default path. + let legacy = EventSubscription { + include_control: false, + ..sub.clone() + }; + let encoded2 = postcard::to_allocvec(&legacy).expect("encode"); + let decoded2: EventSubscription = postcard::from_bytes(&encoded2).expect("decode"); + assert!(!decoded2.include_control); +} + +/// Unit 1 / END-297: `EventSubscription::default()` produces a legacy-safe +/// subscription that does NOT opt into Control delivery. +#[test] +fn test_event_subscription_default_does_not_opt_into_control() { + let sub = EventSubscription::default(); + assert!( + !sub.include_control, + "Default EventSubscription must remain legacy (Event-only) to preserve compatibility" + ); +} diff --git a/daemoneye-eventbus/tests/rpc_integration_tests.rs b/daemoneye-eventbus/tests/rpc_integration_tests.rs index b5e83118..ff3b2619 100644 --- a/daemoneye-eventbus/tests/rpc_integration_tests.rs +++ b/daemoneye-eventbus/tests/rpc_integration_tests.rs @@ -464,7 +464,7 @@ async fn test_graceful_shutdown_request() { "test-client".to_string(), shutdown::shutdown_topic("test-collector"), shutdown_req, - Duration::from_secs(60), + Duration::from_mins(1), ); // Handle request @@ -1514,7 +1514,7 @@ async fn test_rpc_start_collector_with_process_manager() -> Result<()> { operation: CollectorOperation::Start, payload: RpcPayload::Lifecycle(lifecycle_req), timestamp: now, - deadline: now.checked_add(Duration::from_millis(10000)).unwrap_or(now), + deadline: now.checked_add(Duration::from_secs(10)).unwrap_or(now), correlation_metadata: RpcCorrelationMetadata::new(Uuid::new_v4().to_string()), }; // Start collector let response = service.handle_request(request.clone()).await; @@ -1588,7 +1588,7 @@ async fn test_rpc_stop_collector_with_process_manager() -> Result<()> { operation: CollectorOperation::Stop, payload: RpcPayload::Lifecycle(lifecycle_req), timestamp: now, - deadline: now.checked_add(Duration::from_millis(10000)).unwrap_or(now), + deadline: now.checked_add(Duration::from_secs(10)).unwrap_or(now), correlation_metadata: RpcCorrelationMetadata::new(Uuid::new_v4().to_string()), }; @@ -1664,7 +1664,7 @@ async fn test_rpc_restart_collector_with_process_manager() -> Result<()> { operation: CollectorOperation::Restart, payload: RpcPayload::Lifecycle(lifecycle_req), timestamp: now, - deadline: now.checked_add(Duration::from_millis(15000)).unwrap_or(now), + deadline: now.checked_add(Duration::from_secs(15)).unwrap_or(now), correlation_metadata: RpcCorrelationMetadata::new(Uuid::new_v4().to_string()), }; @@ -1745,7 +1745,7 @@ async fn test_rpc_health_check_with_running_collector() -> Result<()> { operation: CollectorOperation::HealthCheck, payload: RpcPayload::Generic(payload_map), timestamp: now, - deadline: now.checked_add(Duration::from_millis(5000)).unwrap_or(now), + deadline: now.checked_add(Duration::from_secs(5)).unwrap_or(now), correlation_metadata: RpcCorrelationMetadata::new(Uuid::new_v4().to_string()), }; @@ -1803,7 +1803,7 @@ async fn test_rpc_health_check_with_stopped_collector() -> Result<()> { operation: CollectorOperation::HealthCheck, payload: RpcPayload::Empty, timestamp: now, - deadline: now.checked_add(Duration::from_millis(5000)).unwrap_or(now), + deadline: now.checked_add(Duration::from_secs(5)).unwrap_or(now), correlation_metadata: RpcCorrelationMetadata::new(Uuid::new_v4().to_string()), }; @@ -1880,7 +1880,7 @@ async fn test_rpc_pause_collector_with_process_manager() -> Result<()> { operation: CollectorOperation::Pause, payload: RpcPayload::Lifecycle(lifecycle_req), timestamp: now, - deadline: now.checked_add(Duration::from_millis(5000)).unwrap_or(now), + deadline: now.checked_add(Duration::from_secs(5)).unwrap_or(now), correlation_metadata: RpcCorrelationMetadata::new(Uuid::new_v4().to_string()), }; @@ -1967,7 +1967,7 @@ async fn test_rpc_resume_collector_with_process_manager() -> Result<()> { operation: CollectorOperation::Resume, payload: RpcPayload::Lifecycle(lifecycle_req), timestamp: now, - deadline: now.checked_add(Duration::from_millis(5000)).unwrap_or(now), + deadline: now.checked_add(Duration::from_secs(5)).unwrap_or(now), correlation_metadata: RpcCorrelationMetadata::new(Uuid::new_v4().to_string()), }; diff --git a/daemoneye-lib/src/integrity/mod.rs b/daemoneye-lib/src/integrity/mod.rs index 672280a3..f8cf146b 100644 --- a/daemoneye-lib/src/integrity/mod.rs +++ b/daemoneye-lib/src/integrity/mod.rs @@ -420,8 +420,7 @@ pub struct HasherConfig { impl Default for HasherConfig { fn default() -> Self { let max_concurrent = std::thread::available_parallelism() - .map(std::num::NonZero::get) - .unwrap_or(MIN_CONCURRENCY) + .map_or(MIN_CONCURRENCY, std::num::NonZero::get) .clamp(MIN_CONCURRENCY, MAX_CONCURRENCY); Self { algorithms: vec![HashAlgorithm::Sha256, HashAlgorithm::Blake3], diff --git a/daemoneye-lib/src/ipc/client.rs b/daemoneye-lib/src/ipc/client.rs index f8f0272c..7ddd2da7 100644 --- a/daemoneye-lib/src/ipc/client.rs +++ b/daemoneye-lib/src/ipc/client.rs @@ -612,8 +612,8 @@ impl ConnectionPool { connections: HashMap::new(), max_connections_per_endpoint, max_total_connections, - max_idle_time: Duration::from_secs(300), // 5 minutes - max_lifetime: Duration::from_secs(3600), // 1 hour + max_idle_time: Duration::from_mins(5), + max_lifetime: Duration::from_hours(1), connection_semaphore: Arc::new(Semaphore::new(max_total_connections)), } } @@ -948,7 +948,7 @@ impl ResilientIpcClient { /// Refresh capabilities for all endpoints that need it pub async fn refresh_capabilities(&self) -> IpcResult<()> { - let capability_refresh_interval = Duration::from_secs(300); // 5 minutes + let capability_refresh_interval = Duration::from_mins(5); let endpoints_to_refresh = { let endpoints = self.endpoints.read().await; endpoints diff --git a/daemoneye-lib/src/models/alert.rs b/daemoneye-lib/src/models/alert.rs index d31b7609..c1261629 100644 --- a/daemoneye-lib/src/models/alert.rs +++ b/daemoneye-lib/src/models/alert.rs @@ -383,7 +383,7 @@ impl Alert { /// println!("alert age: {}s", age); /// ``` pub fn age_seconds(&self) -> u64 { - self.timestamp.elapsed().map(|d| d.as_secs()).unwrap_or(0) + self.timestamp.elapsed().map_or(0, |d| d.as_secs()) } /// Check if the alert is recent (within the specified threshold). diff --git a/daemoneye-lib/src/telemetry.rs b/daemoneye-lib/src/telemetry.rs index b6bf4c66..32474ed2 100644 --- a/daemoneye-lib/src/telemetry.rs +++ b/daemoneye-lib/src/telemetry.rs @@ -571,7 +571,7 @@ mod tests { let mut collector = TelemetryCollector::new("test-component".to_owned()); // Record a slow operation (> 5000ms) - collector.record_operation(Duration::from_millis(6000)); + collector.record_operation(Duration::from_secs(6)); let health_check = collector.health_check(); assert_eq!(health_check.status, HealthStatus::Degraded); diff --git a/daemoneye-lib/tests/ipc_capability_negotiation.rs b/daemoneye-lib/tests/ipc_capability_negotiation.rs index f9f70931..d85b6d6c 100644 --- a/daemoneye-lib/tests/ipc_capability_negotiation.rs +++ b/daemoneye-lib/tests/ipc_capability_negotiation.rs @@ -227,7 +227,7 @@ async fn test_capability_refresh_timing() { ); // Initially should need refresh (never checked) - assert!(endpoint.needs_capability_refresh(Duration::from_secs(300))); + assert!(endpoint.needs_capability_refresh(Duration::from_mins(5))); // Update capabilities endpoint.update_capabilities(CollectionCapabilities { @@ -236,7 +236,7 @@ async fn test_capability_refresh_timing() { }); // Should not need refresh immediately after update - assert!(!endpoint.needs_capability_refresh(Duration::from_secs(300))); + assert!(!endpoint.needs_capability_refresh(Duration::from_mins(5))); // Should need refresh with very short max age (wait a bit first) tokio::time::sleep(Duration::from_millis(2)).await; diff --git a/daemoneye-lib/tests/ipc_comprehensive_integration.rs b/daemoneye-lib/tests/ipc_comprehensive_integration.rs index dfe54268..9a6d89f2 100644 --- a/daemoneye-lib/tests/ipc_comprehensive_integration.rs +++ b/daemoneye-lib/tests/ipc_comprehensive_integration.rs @@ -708,7 +708,7 @@ async fn test_timeout_handling() { server.set_handler(|task: DetectionTask| async move { // Simulate slow processing that exceeds timeout - sleep(Duration::from_millis(2000)).await; + sleep(Duration::from_secs(2)).await; Ok(DetectionResult { task_id: task.task_id, diff --git a/daemoneye-lib/tests/ipc_cross_platform_tests.rs b/daemoneye-lib/tests/ipc_cross_platform_tests.rs index 52fd0215..fae3733b 100644 --- a/daemoneye-lib/tests/ipc_cross_platform_tests.rs +++ b/daemoneye-lib/tests/ipc_cross_platform_tests.rs @@ -553,7 +553,7 @@ async fn test_cross_platform_timeout_behavior() { server.set_handler(|task: DetectionTask| async move { // Simulate slow processing - sleep(Duration::from_millis(2000)).await; + sleep(Duration::from_secs(2)).await; Ok(DetectionResult { task_id: task.task_id, diff --git a/daemoneye-lib/tests/ipc_integration.rs b/daemoneye-lib/tests/ipc_integration.rs index f5c1a901..f91bbd87 100644 --- a/daemoneye-lib/tests/ipc_integration.rs +++ b/daemoneye-lib/tests/ipc_integration.rs @@ -205,7 +205,7 @@ mod tests { "Server connection test failed (attempt {retries}/{max_retries}): {e}, retrying in {retry_delay:?}" ); tokio::time::sleep(retry_delay).await; - retry_delay = std::cmp::min(retry_delay * 2, Duration::from_millis(1000)); + retry_delay = std::cmp::min(retry_delay * 2, Duration::from_secs(1)); } Err(_) => { retries += 1; @@ -219,7 +219,7 @@ mod tests { "Server connection test timed out (attempt {retries}/{max_retries}), retrying in {retry_delay:?}" ); tokio::time::sleep(retry_delay).await; - retry_delay = std::cmp::min(retry_delay * 2, Duration::from_millis(1000)); + retry_delay = std::cmp::min(retry_delay * 2, Duration::from_secs(1)); } } } diff --git a/daemoneye-lib/tests/ipc_security_validation.rs b/daemoneye-lib/tests/ipc_security_validation.rs index 6db28ef2..db84153c 100644 --- a/daemoneye-lib/tests/ipc_security_validation.rs +++ b/daemoneye-lib/tests/ipc_security_validation.rs @@ -188,7 +188,7 @@ async fn test_connection_limits_enforcement() { let conn_num = counter.fetch_add(1, Ordering::SeqCst); // Hold connection open to test limits - sleep(Duration::from_millis(1000)).await; + sleep(Duration::from_secs(1)).await; Ok(DetectionResult { task_id: task.task_id, @@ -534,7 +534,7 @@ async fn test_timeout_dos_resistance() { // First few requests are slow (potential DoS) if request_num < 3 { - sleep(Duration::from_millis(2000)).await; // Exceed timeout + sleep(Duration::from_secs(2)).await; // Exceed timeout } Ok(DetectionResult { diff --git a/docs/solutions/best-practices/rust-async-arc-rwlock-await-holding-lock-pattern-2026-04-18.md b/docs/solutions/best-practices/rust-async-arc-rwlock-await-holding-lock-pattern-2026-04-18.md new file mode 100644 index 00000000..d4c8787d --- /dev/null +++ b/docs/solutions/best-practices/rust-async-arc-rwlock-await-holding-lock-pattern-2026-04-18.md @@ -0,0 +1,227 @@ +--- +title: 'Rust Async: Wrap RwLock-held resources in Arc to satisfy await_holding_lock' +date: 2026-04-18 +category: best-practices +module: procmond/event_bus_connector +problem_type: best_practice +component: tooling +severity: medium +applies_when: + - A struct holds an `Option` or `T` behind `Arc>` or `Arc>` and a caller needs to `await` on that inner value + - Clippy denies `await_holding_lock` and a lock guard is live across an `.await` point + - The inner resource's async methods take `&self` and the caller can work through a cloned `Arc` (no need for `&mut self`) + - Shutdown needs exclusive ownership of a resource that is normally shared via `Arc` + - A per-iteration timeout must not reset on each loop iteration — use a single absolute `timeout_at` deadline instead +tags: + - rust + - async + - tokio + - concurrency + - rwlock + - arc + - clippy + - await-holding-lock + - shutdown +--- + +# Rust Async: Wrap RwLock-held resources in Arc to satisfy `await_holding_lock` + +## Context + +During the END-297 PR #178 review, `procmond`'s startup code introduced a shared `EventBusConnector` behind `Arc>` and added an opt-in control-message subscription call path. The initial implementation acquired the read guard and then awaited on `bus_guard.subscribe_with_control(...)` — the guard was live across the `.await` point, which the workspace rule `clippy::await_holding_lock = "deny"` correctly flagged. + +The pattern below is the canonical project-wide fix for this class of problem. It applies anywhere a resource lives inside a `tokio::sync::Mutex` or `RwLock` and callers need to issue async calls on it. + +The `clippy::await_holding_lock = "deny"` rule was originally not inherited on the `collector-core` and `daemoneye-eventbus` crates (session history); once the workspace-lints inheritance landed, any async call made while holding a lock guard started failing to compile. This pattern is the canonical response — expect it to be needed again whenever an `Arc>` field is introduced (session history). + +## Guidance + +### 1. The anti-pattern: lock guard held across `.await` + +```rust +// WRONG — read guard is live when .await executes +let subscribe_result = { + let bus_guard = event_bus.read().await; // guard acquired + bus_guard + .subscribe_with_control(subscriber_id, topics) + .await // clippy::await_holding_lock fires here +}; +``` + +Other tasks needing the lock are blocked for the full duration of the async call, which can be unbounded. The lint denies this shape workspace-wide. + +### 2. The fix: clone an `Arc` inside a short guard, drop it, then await + +Wrap the inner resource in `Arc` inside the lockable container. Add a cheap accessor that clones the `Arc`. Callers acquire the guard only long enough to take the clone, drop the guard, then `.await` on the owned `Arc`. + +In `procmond/src/event_bus_connector.rs`: + +```rust +pub struct EventBusConnector { + // store the client behind Arc so callers can clone it out of the lock + client: Option>, + /* ... */ +} + +/// Cheap accessor — just clones the Arc pointer. No async, no I/O. +#[must_use] +pub fn client_arc(&self) -> Option> { + self.client.clone() +} +``` + +At the call site in `procmond/src/main.rs`: + +```rust +// CORRECT — guard scope ends before .await +let client_arc = { + let bus_guard = event_bus.read().await; + bus_guard.client_arc() // clone; guard drops at block end +}; +let subscribe_result = match client_arc { + Some(client) => client.subscribe_with_control(subscription).await, + None => return Err(anyhow::anyhow!("not connected to broker")), +}; +``` + +The `{ let bus_guard = ...; bus_guard.client_arc() }` block shape is what makes the lint pass — the guard drops at the end of the block, before the expression value feeds into the subsequent `.await`. + +### 3. `Arc::into_inner` for shutdown when the inner type consumes `self` + +Some inner types have `async fn shutdown(self) -> Result<(), E>` that consumes `self`. When the normal value is shared via `Arc`, use `Arc::into_inner` to reclaim ownership only when the strong count is 1 — if any clones are in flight, log and drop so background tasks exit naturally. + +From `procmond/src/event_bus_connector.rs`: + +```rust +if let Some(client_arc) = self.client.take() { + match Arc::into_inner(client_arc) { + Some(client) => { + if let Err(e) = client.shutdown().await { + error!(error = %e, "Error during client shutdown"); + } + } + None => { + warn!( + "EventBusClient has outstanding Arc references; skipping \ + explicit shutdown - background tasks will exit when the \ + Arc drops" + ); + } + } +} +``` + +`self.client.take()` removes the connector's own reference. If `Arc::into_inner` returns `None`, outstanding callers still hold the client alive; they will drop it when they go out of scope and the client's `Drop` / background-task shutdown paths take over. + +### 4. Single absolute deadline with `timeout_at` for wait loops + +When waiting for a signal across multiple loop iterations, compute the deadline once before the loop. `tokio::time::timeout(dur, fut)` applied per iteration resets the window on every received message, which lets a chatty or adversarial stream push the deadline out indefinitely (session history: this was a CodeRabbit Major finding on the first implementation). + +From `procmond/src/main.rs` (`lifecycle_wait_task`): + +```rust +// CORRECT — deadline is fixed once; cannot be reset by arriving messages +let now = tokio::time::Instant::now(); +let deadline = now + .checked_add(BEGIN_MONITORING_WAIT_TIMEOUT) + .unwrap_or(now); +loop { + match tokio::time::timeout_at(deadline, control_rx.recv()).await { + Ok(Some(msg)) if msg.topic == lifecycle_topic => { + /* transition to running */ + return; + } + Ok(Some(_)) => { /* non-lifecycle — keep waiting */ } + Ok(None) => { /* channel closed — fallback */ return; } + Err(_elapsed) => { /* timed out — fallback */ return; } + } +} +``` + +`checked_add` + `unwrap_or(now)` is required because the workspace enables `clippy::arithmetic_side_effects = "deny"`; the overflow case is treated as an immediate deadline (fails fast) rather than silently panicking. + +## Why This Matters + +- **Lock contention under load.** An async call holding a lock can take tens to hundreds of milliseconds. Every other task needing the lock — including read-only observers — blocks for that duration. Under high throughput the effect compounds. +- **Workspace clippy enforcement is hard.** `await_holding_lock = "deny"` is workspace-wide, and the `--all-targets` pre-commit hook treats any offending shape as a build failure, not a warning. The `Arc`-clone pattern is the canonical fix the lint expects. +- **Shutdown-race survivability.** `Arc::into_inner` is the safe way to call a consuming `shutdown(self)` on a shared value. The fallback (log + drop) ensures shutdown never hangs on in-flight subscribers. +- **Deadline absoluteness matters.** `timeout_at` makes the wait window honest about the operator's intent. Per-iteration `timeout` is the most common accidental way to turn a hard SLO into an infinite wait. + +## When to Apply + +Apply this pattern when **all** of the following hold: + +- A shared resource is behind `Arc>` or `Arc>`. +- Callers need to invoke `async fn (&self, ...)` methods on the resource. +- The inner resource is either cheap to wrap in `Arc` or already uses interior mutability internally. + +**Do not apply when:** + +- The method requires `&mut self`. `Arc` only hands out `&T`; mutating operations must still happen inside the lock. Restructure the caller so the mutation is a short non-awaiting block, or move to a dedicated owner-task/actor pattern. +- The resource has exclusive single-owner semantics and is never shared. +- `shutdown` is a sync `&mut self` method — plain `Option::take()` + normal shutdown is simpler. + +## Examples + +Concrete references in this repo: + +- `procmond/src/event_bus_connector.rs` — the `client: Option>` field and `client_arc()` accessor; `Arc::into_inner` in the shutdown path. +- `procmond/src/main.rs` — the short-guard clone shape at the `subscribe_with_control` call site; `timeout_at` + absolute deadline in `lifecycle_wait_task`. +- Enforcement: `cargo clippy --workspace --all-targets -- -D warnings`. + +### Before — lock guard held across `.await` + +```rust +let bus_guard = event_bus.read().await; +let result = bus_guard + .subscribe_with_control(subscription) + .await?; // guard alive — clippy denies +``` + +### After — clone `Arc`, drop guard, then await + +```rust +let client_arc = { + let bus_guard = event_bus.read().await; + bus_guard.client_arc() // Option> +}; // guard dropped here +let result = match client_arc { + Some(client) => client.subscribe_with_control(subscription).await?, + None => return Err(anyhow::anyhow!("not connected")), +}; +``` + +### Before — per-iteration timeout (window resets on every message) + +```rust +loop { + match tokio::time::timeout(Duration::from_secs(60), rx.recv()).await { /* ... */ } +} +``` + +### After — single absolute deadline + +```rust +let deadline = tokio::time::Instant::now() + .checked_add(Duration::from_secs(60)) + .unwrap_or_else(tokio::time::Instant::now); +loop { + match tokio::time::timeout_at(deadline, rx.recv()).await { /* ... */ } +} +``` + +### Shutdown — consuming the client when the refcount allows + +```rust +if let Some(client_arc) = self.client.take() { + match Arc::into_inner(client_arc) { + Some(client) => { let _ = client.shutdown().await; } + None => warn!("outstanding Arc clones; background tasks will self-exit"), + } +} +``` + +## Related + +- [rust-security-batch-cleanup-patterns-2026-04-04.md](rust-security-batch-cleanup-patterns-2026-04-04.md) — companion best-practices doc covering `Arc` (over `Mutex` for counters), `Arc` zero-copy fan-out, crossbeam idle-loop blocking, Unix socket permissions, and UTF-8-safe truncation in the same eventbus/IPC domain. This doc is orthogonal — that one is about replacing locks with cheaper primitives; this one is about living well when a lock is genuinely needed. +- PR #178 on `EvilBit-Labs/DaemonEye`, commit `2eb4ba4` — the concrete instance that motivated this doc (END-297 closure pass). diff --git a/docs/solutions/best-practices/rust-security-batch-cleanup-patterns-2026-04-04.md b/docs/solutions/best-practices/rust-security-batch-cleanup-patterns-2026-04-04.md index 227b10b1..7cb24aff 100644 --- a/docs/solutions/best-practices/rust-security-batch-cleanup-patterns-2026-04-04.md +++ b/docs/solutions/best-practices/rust-security-batch-cleanup-patterns-2026-04-04.md @@ -142,3 +142,4 @@ These findings affect a security monitoring tool where integrity, correctness, a - GitHub issue #42: Implement Tamper-Evident Audit Logging System with BLAKE3 - GitHub issue #50: Implement Comprehensive Security Testing Framework - `.kiro/steering/security.md`: Security validation guidelines (source of truth) +- [rust-async-arc-rwlock-await-holding-lock-pattern-2026-04-18.md](rust-async-arc-rwlock-await-holding-lock-pattern-2026-04-18.md) — companion doc covering the `Arc` wrapping pattern when a lock is genuinely needed (complements the "replace lock with cheaper primitive" guidance here) diff --git a/docs/src/technical/eventbus-architecture.md b/docs/src/technical/eventbus-architecture.md index 4b1107f7..f7fd1460 100644 --- a/docs/src/technical/eventbus-architecture.md +++ b/docs/src/technical/eventbus-architecture.md @@ -286,7 +286,7 @@ The broker tracks the following statistics: ### Latency -- **Local IPC**: Sub-millisecond latency +- **Local IPC**: \<1ms p99 for local IPC, validated in CI on Linux and macOS by a dedicated criterion benchmark. Windows and FreeBSD are informational only (not gated in CI). - **Message Routing**: < 100μs per message - **Correlation Lookup**: < 10μs per filter @@ -362,6 +362,7 @@ let subscription = EventSubscription { ), topic_patterns: Some(vec!["events.process.#".to_string()]), enable_wildcards: true, + include_control: false, // default: only receive Event messages }; let mut receiver = event_bus.subscribe(subscription).await?; @@ -372,6 +373,33 @@ while let Some(bus_event) = receiver.recv().await { } ``` +To also receive control messages (lifecycle commands, task assignments): + +```rust,ignore +let subscription = EventSubscription { + subscriber_id: "agent-aware-collector".to_string(), + // ... other fields + topic_patterns: Some(vec!["control.collector.lifecycle".to_string()]), + include_control: true, + ..Default::default() +}; + +let (mut event_rx, mut control_rx) = event_bus.subscribe_with_control(subscription).await?; + +// Receive events and control messages on parallel channels +loop { + tokio::select! { + Some(bus_event) = event_rx.recv() => { + println!("Received event: {:?}", bus_event); + } + Some(control_msg) = control_rx.recv() => { + println!("Received control message: {:?}", control_msg); + } + else => break, + } +} +``` + ## Troubleshooting ### Common Issues diff --git a/justfile b/justfile index a8ed7392..587b2e30 100644 --- a/justfile +++ b/justfile @@ -55,16 +55,15 @@ setup: # FORMATTING AND LINTING # ============================================================================= -format: fmt format-docs +alias format-rust := fmt +alias format-md := format-docs +alias format-just := fmt-justfile -[windows] -format-docs: - @if (Get-Command mdformat -ErrorAction SilentlyContinue) { Get-ChildItem -Recurse -Filter "*.md" | Where-Object { $_.FullName -notmatch "\\target\\" -and $_.FullName -notmatch "\\node_modules\\" } | ForEach-Object { mdformat $_.FullName } } else { Write-Host "mdformat not found. Run 'just mdformat-install' first." } +format: fmt format-docs fmt-justfile -[unix] +# mdformat walks the tree itself; exclusions live in .mdformat.toml so this recipe stays uniform across Windows and Unix. format-docs: - cd "{{ root }}" - @if command -v mdformat >/dev/null 2>&1; then find . -type f -name "*.md" -not -path "./target/*" -not -path "./node_modules/*" -exec mdformat {} + ; else echo "mdformat not found. Run 'just mdformat-install' first."; fi + @{{ mise_exec }} mdformat . fmt: @{{ mise_exec }} cargo fmt --all @@ -78,15 +77,24 @@ lint-rust: fmt-check lint-rust-min: @{{ mise_exec }} cargo clippy --workspace --all-targets --no-default-features -- -D warnings -# Check documentation compiles without warnings +# Check rustdoc comments compile without warnings [windows] -lint-docs: +lint-rustdoc: $env:RUSTDOCFLAGS='-D warnings'; @{{ mise_exec }} cargo doc --no-deps --document-private-items [unix] -lint-docs: +lint-rustdoc: RUSTDOCFLAGS='-D warnings' {{ mise_exec }} cargo doc --no-deps --document-private-items +# Lint markdown files: style rules (markdownlint) + link validation (lychee). Complements format-docs — that fixes formatting; this catches broken links and rule violations that formatting can't infer. +lint-md: + @{{ mise_exec }} markdownlint-cli2 + @{{ mise_exec }} lychee --no-progress --cache "**/*.md" + +# Lint GitHub Actions workflows. `release.yml` is generated by GoReleaser integration and skipped to avoid false positives on generated expressions. +lint-actions: + @{{ mise_exec }} actionlint -ignore 'release\.yml' .github/workflows/*.yml + # Format justfile fmt-justfile: @{{ mise_exec }} just --fmt --unstable @@ -95,7 +103,10 @@ fmt-justfile: lint-justfile: @{{ mise_exec }} just --fmt --check --unstable -lint: lint-rust lint-docs lint-justfile +alias lint-just := lint-justfile + +# `lint-md` is intentionally NOT in the default `lint` chain. The repo has pre-existing markdownlint violations across third-party skill docs (.claude/skills/**), draft specs (.kiro/upcoming-specs/**), and older mdbook content. Run `just lint-md` on-demand when doing a focused docs pass; a cleanup ticket will re-enable it here once the backlog is resolved. +lint: lint-rust lint-rustdoc lint-actions lint-justfile # Run clippy with fixes fix: @@ -104,6 +115,7 @@ fix: # Quick development check check: pre-commit-run lint test-check +[private] pre-commit-run: @{{ mise_exec }} pre-commit run -a @@ -186,43 +198,55 @@ test-security: # ============================================================================= # Run all benchmarks +[group('bench')] bench: @{{ mise_exec }} cargo bench --workspace # Run procmond benchmarks (WAL, EventBus, process collection, serialization) +[group('bench')] bench-procmond: @{{ mise_exec }} cargo bench -p procmond # Run specific benchmark suites +[group('bench')] bench-process: @{{ mise_exec }} cargo bench -p daemoneye-lib --bench process_collection +[group('bench')] bench-database: @{{ mise_exec }} cargo bench -p daemoneye-lib --bench database_operations +[group('bench')] bench-detection: @{{ mise_exec }} cargo bench -p daemoneye-lib --bench detection_engine +[group('bench')] bench-ipc: @{{ mise_exec }} cargo bench -p daemoneye-lib --bench ipc_communication +[group('bench')] bench-ipc-comprehensive: @{{ mise_exec }} cargo bench -p daemoneye-lib --bench ipc_performance_comprehensive +[group('bench')] bench-ipc-validation: @{{ mise_exec }} cargo bench -p daemoneye-lib --bench ipc_client_validation_benchmarks +[group('bench')] bench-alerts: @{{ mise_exec }} cargo bench -p daemoneye-lib --bench alert_processing +[group('bench')] bench-crypto: @{{ mise_exec }} cargo bench -p daemoneye-lib --bench cryptographic_operations # Run benchmarks with HTML output (Criterion generates HTML by default) +[group('bench')] bench-html: @{{ mise_exec }} cargo bench -p daemoneye-lib # Run benchmarks and save results to benchmark.json +[group('bench')] bench-save: @{{ mise_exec }} cargo bench -p daemoneye-lib -- --save-baseline baseline @@ -248,19 +272,61 @@ deny: deny-deps # ============================================================================= # CI AND QUALITY ASSURANCE # ============================================================================= +# Private coverage runner — one place to edit the cargo-llvm-cov command line. +# RUSTFLAGS='--cfg coverage' lets production code gate expensive paths with +# `#[cfg(coverage)]` when coverage runs get slow. Additional flags are passed -# Generate coverage report using nextest with coverage profile +# through via the variadic arg so each public recipe is a one-liner. +[private] +[unix] +_coverage +args='': + #!/usr/bin/env bash + set -euo pipefail + rm -rf target/llvm-cov-target + RUSTFLAGS="--cfg coverage" {{ mise_exec }} cargo llvm-cov nextest --workspace --profile coverage --lcov --output-path lcov.info {{ args }} + +[private] +[windows] +_coverage +args='': + Remove-Item -Recurse -Force target/llvm-cov-target -ErrorAction SilentlyContinue + $env:RUSTFLAGS = "--cfg coverage"; {{ mise_exec }} cargo llvm-cov nextest --workspace --profile coverage --lcov --output-path lcov.info {{ args }} + +# Generate LCOV coverage report (lcov.info) for upload to coverage services. coverage: - @{{ mise_exec }} cargo llvm-cov nextest --workspace --profile coverage --lcov --output-path lcov.info + @just _coverage # Alias for coverage generation test-coverage: coverage -# Check coverage thresholds - -# TODO: Raise threshold to 80% once test coverage reaches target (see TESTING.md) +# Check coverage thresholds. TODO: raise to 80% once coverage reaches target (see TESTING.md). coverage-check: - @{{ mise_exec }} cargo llvm-cov nextest --workspace --profile coverage --lcov --output-path lcov.info --fail-under-lines 9.7 + @just _coverage --fail-under-lines 9.7 + +# Generate HTML coverage report and open it locally. +[unix] +coverage-report: + #!/usr/bin/env bash + set -euo pipefail + rm -rf target/llvm-cov-target + RUSTFLAGS="--cfg coverage" {{ mise_exec }} cargo llvm-cov nextest --workspace --profile coverage --html --open + +[windows] +coverage-report: + Remove-Item -Recurse -Force target/llvm-cov-target -ErrorAction SilentlyContinue + $env:RUSTFLAGS = "--cfg coverage"; {{ mise_exec }} cargo llvm-cov nextest --workspace --profile coverage --html --open + +# Terminal-only coverage summary (no file artifact). +[unix] +coverage-summary: + #!/usr/bin/env bash + set -euo pipefail + rm -rf target/llvm-cov-target + RUSTFLAGS="--cfg coverage" {{ mise_exec }} cargo llvm-cov nextest --workspace --profile coverage + +[windows] +coverage-summary: + Remove-Item -Recurse -Force target/llvm-cov-target -ErrorAction SilentlyContinue + $env:RUSTFLAGS = "--cfg coverage"; {{ mise_exec }} cargo llvm-cov nextest --workspace --profile coverage # Full local CI parity check ci-check: check test-ci build-release security-scan coverage-check goreleaser-check @@ -290,14 +356,17 @@ install: # ============================================================================= # Test GoReleaser configuration +[group('goreleaser')] goreleaser-check: @{{ mise_exec }} goreleaser check # Build binaries locally with GoReleaser (test build process) +[group('goreleaser')] [windows] goreleaser-build: @{{ mise_exec }} goreleaser build --clean +[group('goreleaser')] [unix] goreleaser-build: #!/bin/bash @@ -315,10 +384,12 @@ goreleaser-build: @{{ mise_exec }} goreleaser build --clean # Run snapshot release (test full pipeline without publishing) +[group('goreleaser')] [windows] goreleaser-snapshot: @{{ mise_exec }} goreleaser release --snapshot --clean +[group('goreleaser')] [unix] goreleaser-snapshot: #!/bin/bash @@ -336,10 +407,12 @@ goreleaser-snapshot: @{{ mise_exec }} goreleaser release --snapshot --clean # Test GoReleaser with specific target +[group('goreleaser')] [windows] goreleaser-build-target target: - @{{ mise_exec }} goreleaser build --clean --single-target {{ target }} + @{{ mise_exec }} goreleaser build --clean --single-target {{ target }} +[group('goreleaser')] [unix] goreleaser-build-target target: #!/bin/bash @@ -357,6 +430,7 @@ goreleaser-build-target target: @{{ mise_exec }} goreleaser build --clean --single-target {{ target }} # Clean GoReleaser artifacts +[group('goreleaser')] goreleaser-clean: @just rmrf dist @@ -365,10 +439,12 @@ goreleaser-clean: # ============================================================================= # Test macOS release configuration +[group('goreleaser')] [windows] goreleaser-test-macos: @echo "⚠️ Skipping macOS test (not on macOS)" +[group('goreleaser')] [unix] goreleaser-test-macos: #!/bin/bash @@ -382,10 +458,12 @@ goreleaser-test-macos: fi # Test Linux release configuration +[group('goreleaser')] [windows] goreleaser-test-linux: @echo "⚠️ Skipping Linux test (not on Linux)" +[group('goreleaser')] [unix] goreleaser-test-linux: #!/bin/bash @@ -399,21 +477,25 @@ goreleaser-test-linux: fi # Test Windows release configuration +[group('goreleaser')] [windows] goreleaser-test-windows: @echo "🪟 Testing Windows configuration..." @{{ mise_exec }} goreleaser build --config .goreleaser-windows.yaml --snapshot --clean @echo "✅ Windows build successful" +[group('goreleaser')] [unix] goreleaser-test-windows: @echo "⚠️ Skipping Windows test (not on Windows)" # Test all platform configurations (skips incompatible platforms) +[group('goreleaser')] goreleaser-test-all: goreleaser-test-macos goreleaser-test-linux goreleaser-test-windows @echo "🎉 All platform tests completed!" # Test specific platform configuration +[group('goreleaser')] goreleaser-test-platform platform: @{{ mise_exec }} goreleaser build --config .goreleaser-{{ platform }}.yaml --snapshot --clean @echo "✅ {{ platform }} build successful" @@ -422,17 +504,22 @@ goreleaser-test-platform platform: # RELEASE MANAGEMENT # ============================================================================= +[group('release')] release: @{{ mise_exec }} cargo release +[group('release')] release-dry-run: @{{ mise_exec }} cargo release --dry-run +[group('release')] release-patch: @{{ mise_exec }} cargo release patch +[group('release')] release-minor: @{{ mise_exec }} cargo release minor +[group('release')] release-major: @{{ mise_exec }} cargo release major diff --git a/mise.lock b/mise.lock index 05679a26..04730511 100644 --- a/mise.lock +++ b/mise.lock @@ -7,142 +7,153 @@ backend = "aqua:rhysd/actionlint" [tools.actionlint."platforms.linux-arm64"] checksum = "sha256:325e971b6ba9bfa504672e29be93c24981eeb1c07576d730e9f7c8805afff0c6" url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_linux_arm64.tar.gz" +provenance = "github-attestations" [tools.actionlint."platforms.linux-arm64-musl"] checksum = "sha256:325e971b6ba9bfa504672e29be93c24981eeb1c07576d730e9f7c8805afff0c6" url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_linux_arm64.tar.gz" +provenance = "github-attestations" [tools.actionlint."platforms.linux-x64"] checksum = "sha256:8aca8db96f1b94770f1b0d72b6dddcb1ebb8123cb3712530b08cc387b349a3d8" url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_linux_amd64.tar.gz" +provenance = "github-attestations" [tools.actionlint."platforms.linux-x64-baseline"] checksum = "sha256:8aca8db96f1b94770f1b0d72b6dddcb1ebb8123cb3712530b08cc387b349a3d8" url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_linux_amd64.tar.gz" +provenance = "github-attestations" [tools.actionlint."platforms.linux-x64-musl"] checksum = "sha256:8aca8db96f1b94770f1b0d72b6dddcb1ebb8123cb3712530b08cc387b349a3d8" url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_linux_amd64.tar.gz" +provenance = "github-attestations" [tools.actionlint."platforms.linux-x64-musl-baseline"] checksum = "sha256:8aca8db96f1b94770f1b0d72b6dddcb1ebb8123cb3712530b08cc387b349a3d8" url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_linux_amd64.tar.gz" +provenance = "github-attestations" [tools.actionlint."platforms.macos-arm64"] checksum = "sha256:aba9ced2dee8d27fecca3dc7feb1a7f9a52caefa1eb46f3271ea66b6e0e6953f" url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_darwin_arm64.tar.gz" +provenance = "github-attestations" [tools.actionlint."platforms.macos-x64"] checksum = "sha256:5b44c3bc2255115c9b69e30efc0fecdf498fdb63c5d58e17084fd5f16324c644" url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_darwin_amd64.tar.gz" +provenance = "github-attestations" [tools.actionlint."platforms.macos-x64-baseline"] checksum = "sha256:5b44c3bc2255115c9b69e30efc0fecdf498fdb63c5d58e17084fd5f16324c644" url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_darwin_amd64.tar.gz" +provenance = "github-attestations" [tools.actionlint."platforms.windows-x64"] checksum = "sha256:6e7241b51e6817ea6a047693d8e6fed13b31819c9a0dd6c5a726e1592d22f6e9" url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_windows_amd64.zip" +provenance = "github-attestations" [tools.actionlint."platforms.windows-x64-baseline"] checksum = "sha256:6e7241b51e6817ea6a047693d8e6fed13b31819c9a0dd6c5a726e1592d22f6e9" url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_windows_amd64.zip" +provenance = "github-attestations" [[tools.bun]] -version = "1.3.11" +version = "1.3.12" backend = "core:bun" [tools.bun."platforms.linux-arm64"] -checksum = "sha256:d13944da12a53ecc74bf6a720bd1d04c4555c038dfe422365356a7be47691fdf" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-aarch64.zip" +checksum = "sha256:c40bc0ebca11bde7d75af497a654a874d0c7fd8d6a8d6031c173c10c9064297b" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.12/bun-linux-aarch64.zip" [tools.bun."platforms.linux-arm64-musl"] -checksum = "sha256:0f5bf5dc3f276053196274bb84f90a44e2fa40c9432bd6757e3247a8d9476a3d" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-aarch64-musl.zip" +checksum = "sha256:731baab945bc471c17248ea375e66f71442879d2595c54045b3e861f4e8b9ab1" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.12/bun-linux-aarch64-musl.zip" [tools.bun."platforms.linux-x64"] -checksum = "sha256:8611ba935af886f05a6f38740a15160326c15e5d5d07adef966130b4493607ed" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-x64.zip" +checksum = "sha256:11dc3ee11bc1695e149737c6ca3d5619302cf4346e6b8a6ec7988967ef01ddc5" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.12/bun-linux-x64.zip" [tools.bun."platforms.linux-x64-baseline"] -checksum = "sha256:abe346f63414547cdf6b35b7a649a490c728b93d006226156923918a84c0e59b" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-x64-baseline.zip" +checksum = "sha256:f8bb377a9ae93d44697ff91a2611164d2aedc9263415d623b0c3af24a6f55dab" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.12/bun-linux-x64-baseline.zip" [tools.bun."platforms.linux-x64-musl"] -checksum = "sha256:b0fce3bc4fab52f26a1e0d8886dc07fd0c0eb2a274cb343b59c83a2d5997b5b1" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-x64-musl.zip" +checksum = "sha256:5a9f9a2102d4bd0d5210b4f6bd345151d2310623947085177c1b306e8587dce6" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.12/bun-linux-x64-musl.zip" [tools.bun."platforms.linux-x64-musl-baseline"] -checksum = "sha256:2fa2b697f14ada86a28df771d3876ca7606d7453b2339454893b1937aa9c0c7e" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-x64-musl-baseline.zip" +checksum = "sha256:a95e079aef96f1387b86e27b69f9a6babbd08154d9a59483f29d9de285b8e3ad" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.12/bun-linux-x64-musl-baseline.zip" [tools.bun."platforms.macos-arm64"] -checksum = "sha256:6f5a3467ed9caec4795bf78cd476507d9f870c7d57b86c945fcb338126772ffc" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-darwin-aarch64.zip" +checksum = "sha256:6c4bb87dd013ed1a8d6a16e357a3d094959fd5530b4d7061f7f3680c3c7cea1c" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.12/bun-darwin-aarch64.zip" [tools.bun."platforms.macos-x64"] -checksum = "sha256:c4fe2b9247218b0295f24e895aaec8fee62e74452679a9026b67eacbd611a286" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-darwin-x64.zip" +checksum = "sha256:0f58c53a3e7947f1e626d2f8d285f97c14b7cadcca9c09ebafc0ae9d35b58c3d" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.12/bun-darwin-x64.zip" [tools.bun."platforms.macos-x64-baseline"] -checksum = "sha256:fb6739b08bf54550edaa7c824cd5b2dca45b6a06afef408443087a63105f6f8d" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-darwin-x64-baseline.zip" +checksum = "sha256:cc4e22130c2bc2d944d3a286de08f2ed37fa74136e59760f3a4661e610246474" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.12/bun-darwin-x64-baseline.zip" [tools.bun."platforms.windows-x64"] -checksum = "sha256:066f8694f8b7d8df592452746d18f01710d4053e93030922dbc6e8c34a8c4b9f" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-windows-x64.zip" +checksum = "sha256:841ff9c5dffcaa3a2620d1e3f87ee500f32a4ca830b001cade7a3479609d4a89" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.12/bun-windows-x64.zip" [tools.bun."platforms.windows-x64-baseline"] -checksum = "sha256:9d0e0f923e9626f3bc6044fc32e0d3ab29039aea753f5678ef8801cf26f75288" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-windows-x64-baseline.zip" +checksum = "sha256:2d3d5f88a95943563f56f3643c8f4e2422261018f6d915329bb2fb33f7256ba2" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.12/bun-windows-x64-baseline.zip" [[tools.cargo-binstall]] -version = "1.17.9" +version = "1.18.1" backend = "aqua:cargo-bins/cargo-binstall" [tools.cargo-binstall."platforms.linux-arm64"] -checksum = "sha256:89df253cc00a307209daee7648272bba4d8a342fce084d5922f6b3f4e4db0e25" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-aarch64-unknown-linux-musl.tgz" +checksum = "sha256:c55962a0115f9716b709216de7f8bdd59d6ba8738779e60b051b4593f677717a" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.18.1/cargo-binstall-aarch64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-arm64-musl"] -checksum = "sha256:89df253cc00a307209daee7648272bba4d8a342fce084d5922f6b3f4e4db0e25" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-aarch64-unknown-linux-musl.tgz" +checksum = "sha256:c55962a0115f9716b709216de7f8bdd59d6ba8738779e60b051b4593f677717a" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.18.1/cargo-binstall-aarch64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-x64"] -checksum = "sha256:bdea3fd820b118576316bf69c8240f454857287717809e6ecef010faace901ff" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-unknown-linux-musl.tgz" +checksum = "sha256:cf2a4b54494ea8555d6349685e9a301efc1051d9fba6308c76914b2486f8700f" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.18.1/cargo-binstall-x86_64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-x64-baseline"] -checksum = "sha256:bdea3fd820b118576316bf69c8240f454857287717809e6ecef010faace901ff" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-unknown-linux-musl.tgz" +checksum = "sha256:cf2a4b54494ea8555d6349685e9a301efc1051d9fba6308c76914b2486f8700f" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.18.1/cargo-binstall-x86_64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-x64-musl"] -checksum = "sha256:bdea3fd820b118576316bf69c8240f454857287717809e6ecef010faace901ff" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-unknown-linux-musl.tgz" +checksum = "sha256:cf2a4b54494ea8555d6349685e9a301efc1051d9fba6308c76914b2486f8700f" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.18.1/cargo-binstall-x86_64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-x64-musl-baseline"] -checksum = "sha256:bdea3fd820b118576316bf69c8240f454857287717809e6ecef010faace901ff" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-unknown-linux-musl.tgz" +checksum = "sha256:cf2a4b54494ea8555d6349685e9a301efc1051d9fba6308c76914b2486f8700f" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.18.1/cargo-binstall-x86_64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.macos-arm64"] -checksum = "sha256:021d537caa2071bbcd8ec50b210ea45af68a6983cca7780918b3603ef68d2585" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-aarch64-apple-darwin.zip" +checksum = "sha256:955abf167994c90f3547e233edace4c0f794465dd4aa408249b38999aa5ca3cf" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.18.1/cargo-binstall-aarch64-apple-darwin.zip" [tools.cargo-binstall."platforms.macos-x64"] -checksum = "sha256:1d4fbd74c15274a029dd92e19a0ead8e1831b6e6b60f0f5c3690ef517022c41c" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-apple-darwin.zip" +checksum = "sha256:e06370bec7143668653bb7c09d0b8b689fc703dd4fa58ec5847c4b571d8a490d" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.18.1/cargo-binstall-x86_64-apple-darwin.zip" [tools.cargo-binstall."platforms.macos-x64-baseline"] -checksum = "sha256:1d4fbd74c15274a029dd92e19a0ead8e1831b6e6b60f0f5c3690ef517022c41c" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-apple-darwin.zip" +checksum = "sha256:e06370bec7143668653bb7c09d0b8b689fc703dd4fa58ec5847c4b571d8a490d" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.18.1/cargo-binstall-x86_64-apple-darwin.zip" [tools.cargo-binstall."platforms.windows-x64"] -checksum = "sha256:d17b413a19592af2cf57f9f742d49bef8837099e1407d73e79de5bc7834c4fd6" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-pc-windows-msvc.zip" +checksum = "sha256:89706aa5215c164d8d091597a470fee72308ac87e8553af395ea77db844a888c" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.18.1/cargo-binstall-x86_64-pc-windows-msvc.zip" [tools.cargo-binstall."platforms.windows-x64-baseline"] -checksum = "sha256:d17b413a19592af2cf57f9f742d49bef8837099e1407d73e79de5bc7834c4fd6" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-pc-windows-msvc.zip" +checksum = "sha256:89706aa5215c164d8d091597a470fee72308ac87e8553af395ea77db844a888c" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.18.1/cargo-binstall-x86_64-pc-windows-msvc.zip" [[tools.cargo-insta]] version = "1.47.2" @@ -197,7 +208,7 @@ version = "0.5.9" backend = "cargo:cargo-cyclonedx" [[tools."cargo:cargo-deny"]] -version = "0.19.0" +version = "0.19.4" backend = "cargo:cargo-deny" [[tools."cargo:cargo-llvm-cov"]] @@ -205,11 +216,11 @@ version = "0.8.5" backend = "cargo:cargo-llvm-cov" [[tools."cargo:cargo-nextest"]] -version = "0.9.132" +version = "0.9.133" backend = "cargo:cargo-nextest" [[tools."cargo:cargo-outdated"]] -version = "0.17.0" +version = "0.19.0" backend = "cargo:cargo-outdated" [[tools."cargo:cargo-release"]] @@ -217,7 +228,7 @@ version = "1.1.2" backend = "cargo:cargo-release" [[tools."cargo:cargo-zigbuild"]] -version = "0.22.1" +version = "0.22.2" backend = "cargo:cargo-zigbuild" [[tools."cargo:mdbook"]] @@ -253,111 +264,111 @@ version = "0.15.3" backend = "cargo:mdbook-toc" [[tools.goreleaser]] -version = "2.15.2" +version = "2.15.3" backend = "aqua:goreleaser/goreleaser" [tools.goreleaser."platforms.linux-arm64"] -checksum = "sha256:5db66761a98f6693161e49e1a95d28d2673a892ba60cb4a5e16736cafd41c4c9" -url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.2/goreleaser_Linux_arm64.tar.gz" -provenance = "cosign" +checksum = "sha256:646b8f36329cf1ec02af18d40e7096973f62524bdef19c3690414e390a9f757d" +url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.3/goreleaser_Linux_arm64.tar.gz" +provenance = "github-attestations" [tools.goreleaser."platforms.linux-arm64-musl"] -checksum = "sha256:5db66761a98f6693161e49e1a95d28d2673a892ba60cb4a5e16736cafd41c4c9" -url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.2/goreleaser_Linux_arm64.tar.gz" -provenance = "cosign" +checksum = "sha256:646b8f36329cf1ec02af18d40e7096973f62524bdef19c3690414e390a9f757d" +url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.3/goreleaser_Linux_arm64.tar.gz" +provenance = "github-attestations" [tools.goreleaser."platforms.linux-x64"] -checksum = "sha256:0ebdbf0353aba566b969dde746cc4e4806f96c27aa2f3971b229a9df7611fedc" -url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.2/goreleaser_Linux_x86_64.tar.gz" -provenance = "cosign" +checksum = "sha256:3b24b3a1629be21a9527d2f46f08b9bbf012c52fe33395714fe2c70acee57e0f" +url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.3/goreleaser_Linux_x86_64.tar.gz" +provenance = "github-attestations" [tools.goreleaser."platforms.linux-x64-baseline"] -checksum = "sha256:0ebdbf0353aba566b969dde746cc4e4806f96c27aa2f3971b229a9df7611fedc" -url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.2/goreleaser_Linux_x86_64.tar.gz" -provenance = "cosign" +checksum = "sha256:3b24b3a1629be21a9527d2f46f08b9bbf012c52fe33395714fe2c70acee57e0f" +url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.3/goreleaser_Linux_x86_64.tar.gz" +provenance = "github-attestations" [tools.goreleaser."platforms.linux-x64-musl"] -checksum = "sha256:0ebdbf0353aba566b969dde746cc4e4806f96c27aa2f3971b229a9df7611fedc" -url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.2/goreleaser_Linux_x86_64.tar.gz" -provenance = "cosign" +checksum = "sha256:3b24b3a1629be21a9527d2f46f08b9bbf012c52fe33395714fe2c70acee57e0f" +url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.3/goreleaser_Linux_x86_64.tar.gz" +provenance = "github-attestations" [tools.goreleaser."platforms.linux-x64-musl-baseline"] -checksum = "sha256:0ebdbf0353aba566b969dde746cc4e4806f96c27aa2f3971b229a9df7611fedc" -url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.2/goreleaser_Linux_x86_64.tar.gz" -provenance = "cosign" +checksum = "sha256:3b24b3a1629be21a9527d2f46f08b9bbf012c52fe33395714fe2c70acee57e0f" +url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.3/goreleaser_Linux_x86_64.tar.gz" +provenance = "github-attestations" [tools.goreleaser."platforms.macos-arm64"] -checksum = "sha256:0e6bd67688ac949780bf1166813a91f89856898ef4c40d7d46c2c74ebaa4b9ee" -url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.2/goreleaser_Darwin_all.tar.gz" -provenance = "cosign" +checksum = "sha256:b8525b87b2cc722efd8dc5cd2af95d9f3a6e69dcb73990131e18e1d2372cf579" +url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.3/goreleaser_Darwin_all.tar.gz" +provenance = "github-attestations" [tools.goreleaser."platforms.macos-x64"] -checksum = "sha256:0e6bd67688ac949780bf1166813a91f89856898ef4c40d7d46c2c74ebaa4b9ee" -url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.2/goreleaser_Darwin_all.tar.gz" -provenance = "cosign" +checksum = "sha256:b8525b87b2cc722efd8dc5cd2af95d9f3a6e69dcb73990131e18e1d2372cf579" +url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.3/goreleaser_Darwin_all.tar.gz" +provenance = "github-attestations" [tools.goreleaser."platforms.macos-x64-baseline"] -checksum = "sha256:0e6bd67688ac949780bf1166813a91f89856898ef4c40d7d46c2c74ebaa4b9ee" -url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.2/goreleaser_Darwin_all.tar.gz" -provenance = "cosign" +checksum = "sha256:b8525b87b2cc722efd8dc5cd2af95d9f3a6e69dcb73990131e18e1d2372cf579" +url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.3/goreleaser_Darwin_all.tar.gz" +provenance = "github-attestations" [tools.goreleaser."platforms.windows-x64"] -checksum = "sha256:7459832946dbe122c144f8d7f87484d8572ca005b779310aa6bb03346e8de17a" -url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.2/goreleaser_Windows_x86_64.zip" -provenance = "cosign" +checksum = "sha256:8722fdebfc23e10a36356ddd59af0c1f52c4120ca847afd091bd34e09d8aae8c" +url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.3/goreleaser_Windows_x86_64.zip" +provenance = "github-attestations" [tools.goreleaser."platforms.windows-x64-baseline"] -checksum = "sha256:7459832946dbe122c144f8d7f87484d8572ca005b779310aa6bb03346e8de17a" -url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.2/goreleaser_Windows_x86_64.zip" -provenance = "cosign" +checksum = "sha256:8722fdebfc23e10a36356ddd59af0c1f52c4120ca847afd091bd34e09d8aae8c" +url = "https://github.com/goreleaser/goreleaser/releases/download/v2.15.3/goreleaser_Windows_x86_64.zip" +provenance = "github-attestations" [[tools.just]] -version = "1.48.1" +version = "1.49.0" backend = "aqua:casey/just" [tools.just."platforms.linux-arm64"] -checksum = "sha256:3308721b991cf88cf2b9bbb3b31ac40550ec61a0c9b6fc011564e25e87964030" -url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-aarch64-unknown-linux-musl.tar.gz" +checksum = "sha256:993b78f51004248114af22368f69715541542b3c9941c80e02f8ae10eb404ae0" +url = "https://github.com/casey/just/releases/download/1.49.0/just-1.49.0-aarch64-unknown-linux-musl.tar.gz" [tools.just."platforms.linux-arm64-musl"] -checksum = "sha256:3308721b991cf88cf2b9bbb3b31ac40550ec61a0c9b6fc011564e25e87964030" -url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-aarch64-unknown-linux-musl.tar.gz" +checksum = "sha256:993b78f51004248114af22368f69715541542b3c9941c80e02f8ae10eb404ae0" +url = "https://github.com/casey/just/releases/download/1.49.0/just-1.49.0-aarch64-unknown-linux-musl.tar.gz" [tools.just."platforms.linux-x64"] -checksum = "sha256:9293e553ce401d1b524bf4e104918f72f268e3f9c6827e0055fe98d84a1b2522" -url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-unknown-linux-musl.tar.gz" +checksum = "sha256:05eb2f068b641b06e5b318796c2e27d4dcca608e65b34329a08c1b9f582611bd" +url = "https://github.com/casey/just/releases/download/1.49.0/just-1.49.0-x86_64-unknown-linux-musl.tar.gz" [tools.just."platforms.linux-x64-baseline"] -checksum = "sha256:9293e553ce401d1b524bf4e104918f72f268e3f9c6827e0055fe98d84a1b2522" -url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-unknown-linux-musl.tar.gz" +checksum = "sha256:05eb2f068b641b06e5b318796c2e27d4dcca608e65b34329a08c1b9f582611bd" +url = "https://github.com/casey/just/releases/download/1.49.0/just-1.49.0-x86_64-unknown-linux-musl.tar.gz" [tools.just."platforms.linux-x64-musl"] -checksum = "sha256:9293e553ce401d1b524bf4e104918f72f268e3f9c6827e0055fe98d84a1b2522" -url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-unknown-linux-musl.tar.gz" +checksum = "sha256:05eb2f068b641b06e5b318796c2e27d4dcca608e65b34329a08c1b9f582611bd" +url = "https://github.com/casey/just/releases/download/1.49.0/just-1.49.0-x86_64-unknown-linux-musl.tar.gz" [tools.just."platforms.linux-x64-musl-baseline"] -checksum = "sha256:9293e553ce401d1b524bf4e104918f72f268e3f9c6827e0055fe98d84a1b2522" -url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-unknown-linux-musl.tar.gz" +checksum = "sha256:05eb2f068b641b06e5b318796c2e27d4dcca608e65b34329a08c1b9f582611bd" +url = "https://github.com/casey/just/releases/download/1.49.0/just-1.49.0-x86_64-unknown-linux-musl.tar.gz" [tools.just."platforms.macos-arm64"] -checksum = "sha256:03a73339ff55bcf7411a3c940cdcb0a726d98134b87203c83a9008575434e2a8" -url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-aarch64-apple-darwin.tar.gz" +checksum = "sha256:d21b20df01ec9b9762b0ef08e56ae8dccf3738770edeafa8d2b3a750aee06d78" +url = "https://github.com/casey/just/releases/download/1.49.0/just-1.49.0-aarch64-apple-darwin.tar.gz" [tools.just."platforms.macos-x64"] -checksum = "sha256:4c3e9c880b8fc93d7fc24abfde3c36b0cc59f6e9f8b31f7175095700f64125a7" -url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-apple-darwin.tar.gz" +checksum = "sha256:e0b83a9352952ab25e5cf13f6cb03dd1872416e5d89388b56d6ca58f11b0a3a8" +url = "https://github.com/casey/just/releases/download/1.49.0/just-1.49.0-x86_64-apple-darwin.tar.gz" [tools.just."platforms.macos-x64-baseline"] -checksum = "sha256:4c3e9c880b8fc93d7fc24abfde3c36b0cc59f6e9f8b31f7175095700f64125a7" -url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-apple-darwin.tar.gz" +checksum = "sha256:e0b83a9352952ab25e5cf13f6cb03dd1872416e5d89388b56d6ca58f11b0a3a8" +url = "https://github.com/casey/just/releases/download/1.49.0/just-1.49.0-x86_64-apple-darwin.tar.gz" [tools.just."platforms.windows-x64"] -checksum = "sha256:368cd9ca827cba04d9e6fc00f7ad840773c4605b6f64b9f87bdb00325d351029" -url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-pc-windows-msvc.zip" +checksum = "sha256:657338772efd17a31d67285bb5ed691da87741e44311c0366273c6cb7d913b15" +url = "https://github.com/casey/just/releases/download/1.49.0/just-1.49.0-x86_64-pc-windows-msvc.zip" [tools.just."platforms.windows-x64-baseline"] -checksum = "sha256:368cd9ca827cba04d9e6fc00f7ad840773c4605b6f64b9f87bdb00325d351029" -url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-pc-windows-msvc.zip" +checksum = "sha256:657338772efd17a31d67285bb5ed691da87741e44311c0366273c6cb7d913b15" +url = "https://github.com/casey/just/releases/download/1.49.0/just-1.49.0-x86_64-pc-windows-msvc.zip" [[tools.lychee]] version = "0.23.0" @@ -415,7 +426,7 @@ version = "4.5.1" backend = "pipx:pre-commit" [[tools.prettier]] -version = "3.8.1" +version = "3.8.3" backend = "npm:prettier" [[tools.protobuf]] @@ -515,112 +526,113 @@ checksum = "sha256:76ddeb5ae7a31c8f9f7759d3b843a4cadda2150ac037ad0c1794665d6cf31 url = "https://github.com/protocolbuffers/protobuf/releases/download/v34.0/protoc-34.0-win64.zip" [[tools.python]] -version = "3.14.3" +version = "3.14.4" backend = "core:python" [tools.python."platforms.linux-arm64"] -checksum = "sha256:53700338695e402a1a1fe22be4a41fbdacc70e22bb308a48eca8ed67cb7992be" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:b8b597fdb2f8dccdc502c11947b60a4b65eb6bce79cfa60c7ccf9b6e8352c60a" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260414/cpython-3.14.4+20260414-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" provenance = "github-attestations" [tools.python."platforms.linux-arm64-musl"] -checksum = "sha256:53700338695e402a1a1fe22be4a41fbdacc70e22bb308a48eca8ed67cb7992be" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:b8b597fdb2f8dccdc502c11947b60a4b65eb6bce79cfa60c7ccf9b6e8352c60a" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260414/cpython-3.14.4+20260414-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" provenance = "github-attestations" [tools.python."platforms.linux-x64"] -checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:fe9a9c32d13870af632cbac3dfc7528ae53597e94472aa4c7d6a42e8166136cd" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260414/cpython-3.14.4+20260414-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" provenance = "github-attestations" [tools.python."platforms.linux-x64-baseline"] -checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:fe9a9c32d13870af632cbac3dfc7528ae53597e94472aa4c7d6a42e8166136cd" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260414/cpython-3.14.4+20260414-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" provenance = "github-attestations" [tools.python."platforms.linux-x64-musl"] -checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:fe9a9c32d13870af632cbac3dfc7528ae53597e94472aa4c7d6a42e8166136cd" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260414/cpython-3.14.4+20260414-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" provenance = "github-attestations" [tools.python."platforms.linux-x64-musl-baseline"] -checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:fe9a9c32d13870af632cbac3dfc7528ae53597e94472aa4c7d6a42e8166136cd" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260414/cpython-3.14.4+20260414-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" provenance = "github-attestations" [tools.python."platforms.macos-arm64"] -checksum = "sha256:c43aecde4a663aebff99b9b83da0efec506479f1c3f98331442f33d2c43501f9" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-aarch64-apple-darwin-install_only_stripped.tar.gz" +checksum = "sha256:6f304f4ec30854611f23316578302235fb517cd970519ecdd11a8c4db87fd843" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260414/cpython-3.14.4+20260414-aarch64-apple-darwin-install_only_stripped.tar.gz" provenance = "github-attestations" [tools.python."platforms.macos-x64"] -checksum = "sha256:9ab41dbc2f100a2a45d1833b9c11165f51051c558b5213eda9a9731d5948a0c0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-apple-darwin-install_only_stripped.tar.gz" +checksum = "sha256:d51250a32fa5d9f0799c7bcb71720c27b10a3afd4a7de288120f96085d508a5a" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260414/cpython-3.14.4+20260414-x86_64-apple-darwin-install_only_stripped.tar.gz" provenance = "github-attestations" [tools.python."platforms.macos-x64-baseline"] -checksum = "sha256:9ab41dbc2f100a2a45d1833b9c11165f51051c558b5213eda9a9731d5948a0c0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-apple-darwin-install_only_stripped.tar.gz" +checksum = "sha256:d51250a32fa5d9f0799c7bcb71720c27b10a3afd4a7de288120f96085d508a5a" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260414/cpython-3.14.4+20260414-x86_64-apple-darwin-install_only_stripped.tar.gz" provenance = "github-attestations" [tools.python."platforms.windows-x64"] -checksum = "sha256:bbe19034b35b0267176a7442575ae7dc6343480fd4d35598cb7700173d431e09" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" +checksum = "sha256:a976991dcd085c1bb5d9a8084823a6bc8b7f9b079d8c432574a6ddd68c3a6fe1" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260414/cpython-3.14.4+20260414-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" provenance = "github-attestations" [tools.python."platforms.windows-x64-baseline"] -checksum = "sha256:bbe19034b35b0267176a7442575ae7dc6343480fd4d35598cb7700173d431e09" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" +checksum = "sha256:a976991dcd085c1bb5d9a8084823a6bc8b7f9b079d8c432574a6ddd68c3a6fe1" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260414/cpython-3.14.4+20260414-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" provenance = "github-attestations" [[tools.rust]] -version = "1.94.1" +version = "1.95.0" backend = "core:rust" [[tools.zig]] -version = "0.15.2" +version = "0.16.0" backend = "core:zig" [tools.zig."platforms.linux-arm64"] -checksum = "sha256:958ed7d1e00d0ea76590d27666efbf7a932281b3d7ba0c6b01b0ff26498f667f" -url = "https://ziglang.org/download/0.15.2/zig-aarch64-linux-0.15.2.tar.xz" +checksum = "sha256:ea4b09bfb22ec6f6c6ceac57ab63efb6b46e17ab08d21f69f3a48b38e1534f17" +url = "https://ziglang.org/download/0.16.0/zig-aarch64-linux-0.16.0.tar.xz" [tools.zig."platforms.linux-arm64-musl"] -checksum = "sha256:958ed7d1e00d0ea76590d27666efbf7a932281b3d7ba0c6b01b0ff26498f667f" -url = "https://ziglang.org/download/0.15.2/zig-aarch64-linux-0.15.2.tar.xz" +checksum = "sha256:ea4b09bfb22ec6f6c6ceac57ab63efb6b46e17ab08d21f69f3a48b38e1534f17" +url = "https://ziglang.org/download/0.16.0/zig-aarch64-linux-0.16.0.tar.xz" [tools.zig."platforms.linux-x64"] -checksum = "sha256:02aa270f183da276e5b5920b1dac44a63f1a49e55050ebde3aecc9eb82f93239" -url = "https://ziglang.org/download/0.15.2/zig-x86_64-linux-0.15.2.tar.xz" +checksum = "sha256:70e49664a74374b48b51e6f3fdfbf437f6395d42509050588bd49abe52ba3d00" +url = "https://ziglang.org/download/0.16.0/zig-x86_64-linux-0.16.0.tar.xz" [tools.zig."platforms.linux-x64-baseline"] -checksum = "sha256:02aa270f183da276e5b5920b1dac44a63f1a49e55050ebde3aecc9eb82f93239" -url = "https://ziglang.org/download/0.15.2/zig-x86_64-linux-0.15.2.tar.xz" +checksum = "sha256:70e49664a74374b48b51e6f3fdfbf437f6395d42509050588bd49abe52ba3d00" +url = "https://ziglang.org/download/0.16.0/zig-x86_64-linux-0.16.0.tar.xz" [tools.zig."platforms.linux-x64-musl"] -checksum = "sha256:02aa270f183da276e5b5920b1dac44a63f1a49e55050ebde3aecc9eb82f93239" -url = "https://ziglang.org/download/0.15.2/zig-x86_64-linux-0.15.2.tar.xz" +checksum = "sha256:70e49664a74374b48b51e6f3fdfbf437f6395d42509050588bd49abe52ba3d00" +url = "https://ziglang.org/download/0.16.0/zig-x86_64-linux-0.16.0.tar.xz" [tools.zig."platforms.linux-x64-musl-baseline"] -checksum = "sha256:02aa270f183da276e5b5920b1dac44a63f1a49e55050ebde3aecc9eb82f93239" -url = "https://ziglang.org/download/0.15.2/zig-x86_64-linux-0.15.2.tar.xz" +checksum = "sha256:70e49664a74374b48b51e6f3fdfbf437f6395d42509050588bd49abe52ba3d00" +url = "https://ziglang.org/download/0.16.0/zig-x86_64-linux-0.16.0.tar.xz" [tools.zig."platforms.macos-arm64"] -checksum = "sha256:3cc2bab367e185cdfb27501c4b30b1b0653c28d9f73df8dc91488e66ece5fa6b" -url = "https://ziglang.org/download/0.15.2/zig-aarch64-macos-0.15.2.tar.xz" +checksum = "sha256:b23d70deaa879b5c2d486ed3316f7eaa53e84acf6fc9cc747de152450d401489" +url = "https://ziglang.org/download/0.16.0/zig-aarch64-macos-0.16.0.tar.xz" +provenance = "minisign" [tools.zig."platforms.macos-x64"] -checksum = "sha256:375b6909fc1495d16fc2c7db9538f707456bfc3373b14ee83fdd3e22b3d43f7f" -url = "https://ziglang.org/download/0.15.2/zig-x86_64-macos-0.15.2.tar.xz" +checksum = "sha256:0387557ed1877bc6a2e1802c8391953baddba76081876301c522f52977b52ba7" +url = "https://ziglang.org/download/0.16.0/zig-x86_64-macos-0.16.0.tar.xz" [tools.zig."platforms.macos-x64-baseline"] -checksum = "sha256:375b6909fc1495d16fc2c7db9538f707456bfc3373b14ee83fdd3e22b3d43f7f" -url = "https://ziglang.org/download/0.15.2/zig-x86_64-macos-0.15.2.tar.xz" +checksum = "sha256:0387557ed1877bc6a2e1802c8391953baddba76081876301c522f52977b52ba7" +url = "https://ziglang.org/download/0.16.0/zig-x86_64-macos-0.16.0.tar.xz" [tools.zig."platforms.windows-x64"] -checksum = "sha256:3a0ed1e8799a2f8ce2a6e6290a9ff22e6906f8227865911fb7ddedc3cc14cb0c" -url = "https://ziglang.org/download/0.15.2/zig-x86_64-windows-0.15.2.zip" +checksum = "sha256:68659eb5f1e4eb1437a722f1dd889c5a322c9954607f5edcf337bc3684a75a7e" +url = "https://ziglang.org/download/0.16.0/zig-x86_64-windows-0.16.0.zip" [tools.zig."platforms.windows-x64-baseline"] -checksum = "sha256:3a0ed1e8799a2f8ce2a6e6290a9ff22e6906f8227865911fb7ddedc3cc14cb0c" -url = "https://ziglang.org/download/0.15.2/zig-x86_64-windows-0.15.2.zip" +checksum = "sha256:68659eb5f1e4eb1437a722f1dd889c5a322c9954607f5edcf337bc3684a75a7e" +url = "https://ziglang.org/download/0.16.0/zig-x86_64-windows-0.16.0.zip" diff --git a/mise.toml b/mise.toml index 31476dd5..49e8905b 100644 --- a/mise.toml +++ b/mise.toml @@ -1,11 +1,11 @@ # Several tools are set to "latest" to ensure they are using the idiomatic version file. [tools] -cargo-binstall = "1.17.9" +cargo-binstall = "latest" cargo-insta = "1.47.2" "cargo:cargo-audit" = "0.22.1" -"cargo:cargo-deny" = "0.19.0" +"cargo:cargo-deny" = "0.19.4" "cargo:cargo-llvm-cov" = "0.8.5" -"cargo:cargo-nextest" = "0.9.132" +"cargo:cargo-nextest" = "0.9.133" "cargo:mdbook" = "0.5.2" "cargo:mdbook-linkcheck" = "0.7.7" "cargo:mdbook-tabs" = "0.3.4" @@ -16,13 +16,13 @@ cargo-insta = "1.47.2" "cargo:mdbook-i18n-helpers" = "0.4.0" just = "latest" python = "latest" -rust = { version = "latest", components = "llvm-tools,cargo,rustfmt,clippy", profile = "default" } -"cargo:cargo-outdated" = "0.17.0" +rust = { version = "1.95.0", components = "llvm-tools,cargo,rustfmt,clippy", profile = "default" } +"cargo:cargo-outdated" = "0.19.0" "cargo:cargo-release" = "1.1.2" "cargo:cargo-auditable" = "0.7.4" "cargo:cargo-cyclonedx" = "0.5.9" "pipx:mdformat" = { version = "1.0.0", uvx_args = "--with mdformat-gfm --with mdformat-config --with mdformat-footnote --with mdformat-front-matters --with mdformat-simple-breaks --with mdformat-web --with mdformat-gfm-alerts --with mdformat-toc" } -prettier = "3.8.1" +prettier = "3.8.3" actionlint = "1.7.12" lychee = "0.23.0" markdownlint-cli2 = "0.22.0" @@ -31,7 +31,7 @@ protoc = "34.0" zig = "latest" "cargo:cargo-zigbuild" = "latest" bun = "latest" -goreleaser = "latest" +goreleaser = "2.15.3" "pipx:pre-commit" = "latest" # Many of these settings are defaults, but we are explicit in case they change in the future and to make it clear to users what is enabled. diff --git a/procmond/src/event_bus_connector.rs b/procmond/src/event_bus_connector.rs index 1f9549a7..86b0818f 100644 --- a/procmond/src/event_bus_connector.rs +++ b/procmond/src/event_bus_connector.rs @@ -77,6 +77,7 @@ use daemoneye_eventbus::{ }; use std::collections::VecDeque; use std::path::PathBuf; +use std::sync::Arc; use thiserror::Error; use tokio::sync::mpsc; use tracing::{debug, error, info, warn}; @@ -274,7 +275,12 @@ pub struct EventBusConnector { wal: WriteAheadLog, /// EventBus client for broker communication (None when disconnected). - client: Option, + /// + /// Wrapped in `Arc` so callers holding the connector behind a `RwLock` can + /// clone the client handle out of a brief read-lock acquisition and do + /// async work (subscribe, publish) without holding the lock across `.await`. + /// See [`client_arc`](Self::client_arc). + client: Option>, /// In-memory buffer for events when disconnected. buffer: VecDeque, @@ -430,9 +436,11 @@ impl EventBusConnector { }; // Attempt to connect - let client = EventBusClient::new(self.client_id.clone(), socket_config, client_config) - .await - .map_err(|e| EventBusConnectorError::Connection(e.to_string()))?; + let client = Arc::new( + EventBusClient::new(self.client_id.clone(), socket_config, client_config) + .await + .map_err(|e| EventBusConnectorError::Connection(e.to_string()))?, + ); self.client = Some(client); self.connected = true; @@ -520,7 +528,7 @@ impl EventBusConnector { match EventBusClient::new(self.client_id.clone(), socket_config, client_config).await { Ok(client) => { - self.client = Some(client); + self.client = Some(Arc::new(client)); self.connected = true; self.reconnect_attempts = 0; self.last_reconnect_attempt = None; @@ -930,11 +938,35 @@ impl EventBusConnector { debug!(flushed = flushed, "Flushed buffer before shutdown"); } - // Close the client connection - if let Some(client) = self.client.take() - && let Err(e) = client.shutdown().await - { - error!(error = %e, "Error during client shutdown"); + // Close the client connection. The client is held behind `Arc` so other + // tasks can briefly clone it for async work without holding our lock. + // + // Strategy: always fire the non-consuming `shutdown_signal()` first so + // background tasks exit immediately regardless of outstanding clones. + // Then try `Arc::into_inner` to reclaim ownership for the full + // consuming `shutdown().await` (which also awaits task JoinHandles and + // closes the transport). If clones remain, the signal has already + // stopped the background tasks; the struct itself is dropped when the + // last clone drops. + if let Some(client_arc) = self.client.take() { + let signaled = client_arc.shutdown_signal(); + if !signaled { + debug!( + "EventBusClient shutdown signal not delivered - no active receivers (background tasks already exited)" + ); + } + match Arc::into_inner(client_arc) { + Some(client) => { + if let Err(e) = client.shutdown().await { + error!(error = %e, "Error during client shutdown"); + } + } + None => { + warn!( + "EventBusClient has outstanding Arc references; skipped consuming shutdown, but background tasks received the shutdown signal and will exit" + ); + } + } } self.connected = false; @@ -994,6 +1026,19 @@ impl EventBusConnector { self.buffer.len() } + /// Clone the current broker client handle without holding the connector lock. + /// + /// Callers that hold the connector behind `Arc>` should acquire + /// the read guard, call this to obtain an owned `Arc`, and + /// drop the guard before issuing any async broker calls. This satisfies the + /// workspace `clippy::await_holding_lock = "deny"` rule. + /// + /// Returns `None` when disconnected. + #[must_use] + pub fn client_arc(&self) -> Option> { + self.client.clone() + } + /// Subscribe to topic patterns on the broker. /// /// Returns a receiver for bus events matching the given topic patterns. @@ -1028,6 +1073,7 @@ impl EventBusConnector { correlation_filter: None, topic_patterns: Some(topic_patterns), enable_wildcards: true, + include_control: false, }; client @@ -1036,6 +1082,58 @@ impl EventBusConnector { .map_err(|e| EventBusConnectorError::EventBus(e.to_string())) } + /// Subscribe to topic patterns on the broker with opt-in Control delivery. + /// + /// Returns a `(event_rx, control_rx)` tuple of parallel receivers. + /// `control_rx` carries raw [`daemoneye_eventbus::Message`] envelopes for + /// `MessageType::Control` messages on matching topics — used by procmond + /// to receive lifecycle signals (`BeginMonitoring`) and per-collector RPC + /// requests from the agent. + /// + /// See [`EventSubscription::include_control`](daemoneye_eventbus::EventSubscription::include_control) + /// for the wire-level semantics. + /// + /// # Arguments + /// + /// * `subscriber_id` - Unique identifier for this subscription + /// * `topic_patterns` - Topic patterns to subscribe to (supports wildcards) + /// + /// # Errors + /// + /// - `EventBusConnectorError::Connection` if not connected + /// - `EventBusConnectorError::EventBus` if subscription fails + pub async fn subscribe_with_control( + &self, + subscriber_id: &str, + topic_patterns: Vec, + ) -> EventBusConnectorResult<( + tokio::sync::mpsc::Receiver, + tokio::sync::mpsc::Receiver, + )> { + let client = self.client.as_ref().ok_or_else(|| { + EventBusConnectorError::Connection("Not connected to broker".to_owned()) + })?; + + let subscription = daemoneye_eventbus::EventSubscription { + subscriber_id: subscriber_id.to_owned(), + capabilities: daemoneye_eventbus::SourceCaps { + event_types: vec!["control".to_owned()], + collectors: vec![], + max_priority: 0, + }, + event_filter: None, + correlation_filter: None, + topic_patterns: Some(topic_patterns), + enable_wildcards: true, + include_control: true, + }; + + client + .subscribe_with_control(subscription) + .await + .map_err(|e| EventBusConnectorError::EventBus(e.to_string())) + } + /// Publish raw bytes to a topic on the broker. /// /// This is used for control messages (RPC responses, heartbeats) that are diff --git a/procmond/src/event_source.rs b/procmond/src/event_source.rs index b00e5b8a..2f611606 100644 --- a/procmond/src/event_source.rs +++ b/procmond/src/event_source.rs @@ -1006,7 +1006,7 @@ impl EventSource for ProcessEventSource { // Wait longer before next attempt #[allow(clippy::arithmetic_side_effects)] // Safe: consecutive_failures is bounded let backoff_duration = FAILURE_BACKOFF_BASE * consecutive_failures; - let max_backoff = Duration::from_secs(60); + let max_backoff = Duration::from_mins(1); let actual_backoff = std::cmp::min(backoff_duration, max_backoff); warn!( @@ -1494,7 +1494,7 @@ mod tests { // Slow collection should not include REALTIME capability let slow_config = ProcessSourceConfig { - collection_interval: Duration::from_secs(60), + collection_interval: Duration::from_mins(1), ..Default::default() }; let slow_source = ProcessEventSource::with_config(db_manager, slow_config); @@ -1683,7 +1683,7 @@ mod tests { // Should complete reasonably quickly even with timeout // Allow more time on slower systems or under load assert!( - shutdown_duration < Duration::from_secs(60), + shutdown_duration < Duration::from_mins(1), "Shutdown should be fast, took {:?}", shutdown_duration ); diff --git a/procmond/src/lib.rs b/procmond/src/lib.rs index 8efded08..27d76b7c 100644 --- a/procmond/src/lib.rs +++ b/procmond/src/lib.rs @@ -345,7 +345,7 @@ impl ProcessMessageHandler { // already stamped (partial coverage is fine — downstream // handles missing hashes) and log the truncation. if let Some(hasher) = self.hasher.as_ref() { - const HASH_PASS_OVERALL_DEADLINE: Duration = Duration::from_secs(60); + const HASH_PASS_OVERALL_DEADLINE: Duration = Duration::from_mins(1); match tokio::time::timeout( HASH_PASS_OVERALL_DEADLINE, hash_pass::populate_hashes(&mut process_events, hasher), @@ -641,7 +641,7 @@ mod tests { /// Creates a new mock collector with test process data. pub fn new() -> Self { let now = SystemTime::now(); - let start_time = now - std::time::Duration::from_secs(3600); + let start_time = now - std::time::Duration::from_hours(1); let processes = vec![ ProcessEvent { @@ -920,7 +920,7 @@ mod tests { // Create a test ProcessEvent let now = SystemTime::now(); - let start_time = now - std::time::Duration::from_secs(3600); // 1 hour ago + let start_time = now - std::time::Duration::from_hours(1); let event = ProcessEvent { pid: 1234, diff --git a/procmond/src/lifecycle.rs b/procmond/src/lifecycle.rs index bb37c5d3..b31d900f 100644 --- a/procmond/src/lifecycle.rs +++ b/procmond/src/lifecycle.rs @@ -237,7 +237,7 @@ pub struct LifecycleTrackingConfig { impl Default for LifecycleTrackingConfig { fn default() -> Self { Self { - max_snapshot_age: Duration::from_secs(300), // 5 minutes + max_snapshot_age: Duration::from_mins(5), min_process_lifetime: Duration::from_millis(100), detect_pid_reuse: true, track_command_line_changes: true, @@ -734,7 +734,7 @@ impl ProcessLifecycleTracker { )?; // This is a simple heuristic - in practice, you might want to check actual system uptime - if age > Duration::from_secs(365 * 24 * 3600) { + if age > Duration::from_hours(365 * 24) { // More than a year return Ok(Some(ProcessLifecycleEvent::Suspicious { process: Box::new(snapshot.clone()), @@ -839,7 +839,7 @@ mod tests { name: name.to_string(), executable_path: Some(format!("/usr/bin/{}", name)), command_line: vec![name.to_string()], - start_time: Some(SystemTime::now() - Duration::from_secs(60)), + start_time: Some(SystemTime::now() - Duration::from_mins(1)), cpu_usage: Some(1.0), memory_usage: Some(1024 * 1024), executable_hash: Some("abc123".to_string()), diff --git a/procmond/src/main.rs b/procmond/src/main.rs index b3b5caeb..c6c89f27 100644 --- a/procmond/src/main.rs +++ b/procmond/src/main.rs @@ -23,6 +23,97 @@ use std::time::Duration; use tokio::sync::{Mutex, RwLock, mpsc}; use tracing::{debug, error, info, warn}; +/// Maximum time procmond waits for `BeginMonitoring` after subscribing +/// to control topics before falling back to standalone collection. +/// Chosen empirically: 60s is long enough for a slow agent boot on +/// contested hardware, short enough that operators notice when the +/// agent is genuinely unreachable (END-297 review REL-001 / ADV-004). +const BEGIN_MONITORING_WAIT_TIMEOUT: Duration = Duration::from_mins(1); + +/// Typed reason for each call to [`begin_monitoring_or_exit`]. Used purely as +/// a log label so error output is consistent and misspelling-proof across the +/// five call sites; the helper itself no longer branches on the variant — all +/// paths are treated as fatal on any actor error (see the function docstring). +#[derive(Debug, Clone, Copy)] +enum BeginMonitoringReason { + /// Operator-opted standalone mode (CLI flag or `PROCMOND_STANDALONE=1`). + StandaloneMode, + /// Agent's `BeginMonitoring` broadcast arrived on + /// `control.collector.lifecycle`. + LifecycleSignal, + /// Subscribe-with-control failed; falling back to immediate collection. + BrokerUnreachable, + /// Control channel closed before a lifecycle message arrived; falling + /// back to standalone collection. + ChannelClosed, + /// Wait deadline elapsed before `BeginMonitoring` arrived; falling back + /// to standalone collection. + WaitTimeout, +} + +impl BeginMonitoringReason { + const fn as_str(self) -> &'static str { + match self { + Self::StandaloneMode => "standalone-mode", + Self::LifecycleSignal => "lifecycle-signal", + Self::BrokerUnreachable => "broker-unreachable", + Self::ChannelClosed => "channel-closed", + Self::WaitTimeout => "wait-timeout", + } + } +} + +impl std::fmt::Display for BeginMonitoringReason { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} + +/// Send `BeginMonitoring` to the actor, exiting the process on *any* error. +/// +/// `begin_monitoring()` performs a one-time state transition from +/// `WaitingForAgent` -> `Running`. It is called exactly once per procmond +/// lifetime from each of the five call sites below, and none of those sites +/// has a retry loop — once this function returns without exiting, the actor +/// has either transitioned or it never will. +/// +/// That makes the error-handling policy uniformly fatal: if the actor cannot +/// receive this signal for any reason (`ChannelFull`, `ChannelClosed`, +/// `ResponseDropped`, or a future `#[non_exhaustive]` variant), procmond +/// would hang in `WaitingForAgent` forever while the binary stays alive and +/// the supervisor (systemd, kubelet, etc.) reports it as healthy — the exact +/// zombie failure mode `ShadowHunt` is designed to catch on monitored hosts. +/// Exiting non-zero hands the problem to the supervisor, which restarts us. +/// +/// The typed `reason` parameter is retained purely for log labeling so the +/// five call sites produce consistent, misspelling-proof log output. +#[allow( + clippy::exit, + reason = "Three of the five call sites execute inside a tokio::spawn task \ + (the lifecycle_wait_task), where returning Err propagates nowhere and \ + has no effect on process exit status. Using std::process::exit gives a \ + uniform failure shape across both main-task and spawned-task call \ + sites; supervisors observe a non-zero exit and restart procmond. \ + std::process::exit skips destructor runs, but WAL writes are committed \ + before events leave the actor (see event_bus_connector.rs) so buffered \ + events are recoverable on next startup via replay_wal()." +)] +fn begin_monitoring_or_exit(handle: &procmond::ActorHandle, reason: BeginMonitoringReason) { + let err = match handle.begin_monitoring() { + Ok(()) => return, + Err(e) => e, + }; + + error!( + error = %err, + reason = %reason, + "Fatal actor error after {reason} - begin_monitoring() is a one-shot \ + transition with no retry, so any failure (including ChannelFull) leaves \ + procmond hung in WaitingForAgent. Exiting so the supervisor can restart." + ); + std::process::exit(1); +} + /// Parse and validate the collection interval argument. /// /// Ensures the interval is within acceptable bounds (5-3600 seconds). @@ -73,6 +164,10 @@ struct Cli { /// Enable executable hashing #[arg(long)] compute_hashes: bool, + + /// Start monitoring immediately without waiting for an agent `BeginMonitoring` signal (also: `PROCMOND_STANDALONE=1`) + #[arg(long)] + standalone: bool, } #[tokio::main] @@ -135,10 +230,10 @@ pub async fn main() -> anyhow::Result<()> { // (actor-mode `ProcmondMonitorCollector` and standalone-mode // `ProcessEventSource`). Sharing the `Arc` guarantees a single // policy (one concurrency cap, one algorithm list, one size - // budget) no matter which path the process runs through. See - // `daemoneye-lib::integrity::MultiAlgorithmHasher` rustdoc for the - // statelessness invariant that protects the shared `Arc` across - // trust domains. + // budget) no matter which path the process runs through. The + // hasher is `Send + Sync` and holds no per-request mutable state + // (see `daemoneye-lib::integrity::MultiAlgorithmHasher` rustdoc), + // so a single `Arc` is safe to share across worker tasks. // // If engine construction fails when `--compute-hashes` is set, the // error propagates immediately via `?` — there is no silent fallback. @@ -269,6 +364,7 @@ pub async fn main() -> anyhow::Result<()> { // Perform registration with daemoneye-agent info!("Registering with daemoneye-agent"); + let mut registration_failed = false; match registration_manager.register().await { Ok(response) => { info!( @@ -279,19 +375,23 @@ pub async fn main() -> anyhow::Result<()> { ); } Err(e) => { - // Log warning but continue - procmond can operate without registration - // in standalone/development scenarios + // Log warning and force standalone mode - an unregistered collector + // won't receive an agent BeginMonitoring broadcast, so subscribe-then-wait + // would block forever (END-297 review COR-003). warn!( error = %e, - "Registration failed, continuing in standalone mode" + "Registration failed, forcing standalone mode" ); + registration_failed = true; } } - // Start heartbeat task (only publishes when registered) - let heartbeat_task = - RegistrationManager::spawn_heartbeat_task(Arc::clone(®istration_manager)); - info!("Heartbeat task started"); + // Heartbeat task is spawned AFTER the control subscription is established + // below. Tokio's `interval::tick()` fires immediately on first poll, so + // if the heartbeat were spawned here it could publish a ready signal + // before our `control.collector.lifecycle` subscription is registered + // with the broker, causing an agent `BeginMonitoring` broadcast to be + // delivered to zero subscribers (END-297 PR #178 review, copilot). // ======================================================================== // Initialize RPC Service Handler @@ -394,23 +494,219 @@ pub async fn main() -> anyhow::Result<()> { } }); - // Begin monitoring immediately on startup. + // Subscribe-then-wait flow: subscribe to the lifecycle control topic + // and wait for the agent's BeginMonitoring signal before transitioning + // to Running state. Falls back to immediate-start if the `--standalone` + // CLI flag is set, the `PROCMOND_STANDALONE=1` environment variable is + // set, or if the subscription to the broker fails. + // + // Startup-ordering invariant (END-297 plan + // `docs/plans/2026-04-18-001-feat-close-end-297-message-broker-plan.md`): + // the lifecycle subscription must be active when the agent publishes + // `BeginMonitoring`, otherwise the message is lost (eventbus README: + // at-most-once delivery). The agent triggers + // `broadcast_begin_monitoring` from `transition_to_steady_state`, + // which fires after all expected collectors register. // - // The collector does not wait for an explicit "begin monitoring" command - // from the agent. This makes procmond usable in isolation and in test - // environments without requiring the full agent/broker stack. + // Heartbeats don't directly trigger the broadcast today — no code path + // in `daemoneye-agent/src/broker_manager.rs` routes heartbeat arrival + // into `broadcast_begin_monitoring`; the broadcast is only fired from + // `transition_to_steady_state` after `wait_for_collectors_ready` + // returns. The spawn ordering below is therefore **defensive only**: + // the subscribe/spawn order could be reversed today without breaking + // correctness. It's maintained as a safeguard against future agent + // changes that might route heartbeats into the broadcast trigger. + // The heartbeat task is spawned AFTER the subscribe block completes — + // see the `spawn_heartbeat_task` call below. + let standalone_mode = cli.standalone + || std::env::var("PROCMOND_STANDALONE").ok().as_deref() == Some("1") + || registration_failed; + + // Build the set of control topics procmond must receive on in this + // subscription. Only `control.collector.lifecycle` is wired here — + // it carries the agent-broadcast BeginMonitoring signal that this + // task is waiting for. // - // TODO: Once daemoneye-eventbus supports control message subscriptions - // (MessageType::Control delivery to subscribers), add a control transport - // task here that subscribes to `control.collector.lifecycle` for - // BeginMonitoring signals and `control.collector.procmond` for RPC - // requests (HealthCheck, UpdateConfig, etc.). The current EventBusClient - // subscription mechanism only delivers MessageType::Event, not Control. - info!("Starting collection immediately on startup"); - if let Err(e) = actor_handle.begin_monitoring() { - error!(error = %e, "Failed to send BeginMonitoring command"); + // The per-collector RPC topic `control.collector.{id}` (HealthCheck, + // UpdateConfig, etc.) is intentionally NOT subscribed here. Adding + // it to this receiver would cause RPC messages to be consumed and + // silently discarded by the lifecycle wait task (END-297 review + // COR-002 / COR-004). RPC-over-bus delivery is a follow-up and will + // live on its own subscription owned by `RpcServiceHandler`. + let lifecycle_topic = "control.collector.lifecycle".to_owned(); + let control_topics = vec![lifecycle_topic.clone()]; + + let mut control_rx_opt: Option> = + None; + + if standalone_mode { + info!( + standalone = true, + "Standalone escape hatch active - starting collection immediately without waiting for agent" + ); + begin_monitoring_or_exit(&actor_handle, BeginMonitoringReason::StandaloneMode); + } else { + // Subscribe to control topics. If the broker is unreachable, log + // loudly and fall back to standalone so we never silently start + // collecting without coordination (see END-297 plan, + // "System-Wide Impact -> Error propagation"). + // + // Acquire the read guard only long enough to clone an owned + // `Arc`, then drop the guard before the async + // subscribe call — this satisfies the workspace + // `clippy::await_holding_lock = "deny"` rule. + let subscriber_id = format!("procmond-{}", registration_manager.collector_id()); + let client_arc = { + let bus_guard = event_bus.read().await; + bus_guard.client_arc() + }; + let subscribe_result = match client_arc { + Some(client) => { + let subscription = daemoneye_eventbus::EventSubscription { + subscriber_id: subscriber_id.clone(), + capabilities: daemoneye_eventbus::SourceCaps { + event_types: vec!["control".to_owned()], + collectors: vec![], + max_priority: 0, + }, + event_filter: None, + correlation_filter: None, + topic_patterns: Some(control_topics.clone()), + enable_wildcards: true, + include_control: true, + }; + client + .subscribe_with_control(subscription) + .await + .map_err(|e| { + anyhow::anyhow!("subscribe_with_control failed on {subscriber_id}: {e}") + }) + } + None => Err(anyhow::anyhow!( + "EventBusConnector is not connected to broker; cannot subscribe to control topics for {subscriber_id}" + )), + }; + + match subscribe_result { + Ok((mut events_rx, control_rx)) => { + // debug! (not info!): subscription topology with per-collector IDs + // should stay out of default-level logs that may ship to less-trusted + // SIEM pipelines (END-297 review SEC-004). + debug!( + subscriber_id = %subscriber_id, + topics = ?control_topics, + "Subscribed to control topics; waiting for BeginMonitoring" + ); + info!("Subscribed to control topics; waiting for BeginMonitoring"); + // Drain task: subscribe_with_control returns (events_rx, control_rx) + // but procmond only uses the control channel. Consume and discard + // anything that arrives on events_rx so the channel doesn't backpressure + // or produce warn! spam for closed-sender (END-297 review COR-001). + tokio::spawn(async move { + while events_rx.recv().await.is_some() { + // Discard: procmond's subscription exists solely for control + // delivery; event traffic on these topics is agent-owned. + } + }); + control_rx_opt = Some(control_rx); + } + Err(sub_err) => { + warn!( + error = %sub_err, + topics = ?control_topics, + "Failed to subscribe to control topics - falling back to standalone escape hatch" + ); + // Loud fallback: start monitoring immediately, but record + // the reason so operators see it in logs. + begin_monitoring_or_exit( + &actor_handle, + BeginMonitoringReason::BrokerUnreachable, + ); + } + } } + // Now that the control subscription is registered (or we have + // deliberately fallen back to standalone), it is safe to start the + // heartbeat task. Starting it earlier could cause the first tick to + // publish a ready-state heartbeat before the subscription landed with + // the broker, losing the agent's `BeginMonitoring` broadcast. + let heartbeat_task = + RegistrationManager::spawn_heartbeat_task(Arc::clone(®istration_manager)); + info!("Heartbeat task started"); + + // Spawn the lifecycle wait task (only if we have a control receiver). + // It waits for any control message on `control.collector.lifecycle` + // — any message on that topic is interpreted as BeginMonitoring for + // now (the current agent broadcasts a single BeginMonitoring JSON + // payload). Future message types on this topic should be filtered + // by inspecting the payload, which is intentionally left permissive + // here for forward compatibility. + // + // Defensive guard: because this subscription only requests + // `control.collector.lifecycle`, we should never observe any other + // topic on this receiver. If we do, it indicates a broker routing + // bug and is logged at `warn!` rather than silently discarded. + let wait_actor_handle = actor_handle.clone(); + let lifecycle_topic_task = lifecycle_topic.clone(); + let lifecycle_wait_task = control_rx_opt.map(|mut control_rx| { + tokio::spawn(async move { + // Single absolute deadline for the whole wait — a chatty control + // stream cannot reset the window by delivering non-lifecycle + // messages (END-297 PR #178 review, coderabbitai). + // `checked_add` and then `unwrap_or(now)` guards against the + // pathological far-future overflow case required by clippy's + // `arithmetic_side_effects = deny`. + let now = tokio::time::Instant::now(); + let deadline = now + .checked_add(BEGIN_MONITORING_WAIT_TIMEOUT) + .unwrap_or(now); + loop { + match tokio::time::timeout_at(deadline, control_rx.recv()).await { + Ok(Some(msg)) => { + if msg.topic == lifecycle_topic_task { + info!( + topic = %msg.topic, + "Received lifecycle control message - transitioning to Running" + ); + begin_monitoring_or_exit( + &wait_actor_handle, + BeginMonitoringReason::LifecycleSignal, + ); + return; + } + warn!( + topic = %msg.topic, + expected_topic = %lifecycle_topic_task, + "Received unexpected control topic on lifecycle subscription - ignoring (broker routing bug?)" + ); + } + Ok(None) => { + error!( + "Control channel closed before BeginMonitoring received - falling back to standalone collection" + ); + begin_monitoring_or_exit( + &wait_actor_handle, + BeginMonitoringReason::ChannelClosed, + ); + return; + } + Err(_elapsed) => { + error!( + timeout_secs = BEGIN_MONITORING_WAIT_TIMEOUT.as_secs(), + "Timed out waiting for BeginMonitoring - falling back to standalone collection" + ); + begin_monitoring_or_exit( + &wait_actor_handle, + BeginMonitoringReason::WaitTimeout, + ); + return; + } + } + } + }) + }); + // Keep RPC service reference alive for future control transport integration let _rpc_service = rpc_service; let _control_event_bus = Arc::clone(&event_bus); @@ -471,6 +767,13 @@ pub async fn main() -> anyhow::Result<()> { heartbeat_task.abort(); info!("Heartbeat task aborted"); + // Clean up lifecycle wait task (if it is still running — e.g. shutdown + // fired before BeginMonitoring arrived). + if let Some(wait_task) = lifecycle_wait_task { + wait_task.abort(); + info!("Lifecycle wait task aborted"); + } + // Wait for event consumer to exit naturally (channel sender is dropped) // Use a timeout to avoid hanging indefinitely match tokio::time::timeout(Duration::from_secs(5), event_consumer_task).await { @@ -495,7 +798,7 @@ pub async fn main() -> anyhow::Result<()> { .with_max_event_sources(1) .with_event_buffer_size(1000) .with_shutdown_timeout(Duration::from_secs(30)) - .with_health_check_interval(Duration::from_secs(60)) + .with_health_check_interval(Duration::from_mins(1)) .with_telemetry(true) .with_debug_logging(cli.log_level == "debug"); diff --git a/procmond/src/monitor_collector.rs b/procmond/src/monitor_collector.rs index e744f798..8c59c983 100644 --- a/procmond/src/monitor_collector.rs +++ b/procmond/src/monitor_collector.rs @@ -1351,7 +1351,7 @@ mod tests { // Test without real-time capability with slow collection interval let slow_config = ProcmondMonitorConfig { base_config: MonitorCollectorConfig { - collection_interval: Duration::from_secs(60), + collection_interval: Duration::from_mins(1), ..Default::default() }, ..Default::default() @@ -1481,7 +1481,7 @@ mod tests { // Create updated config with new interval let new_config = ProcmondMonitorConfig { base_config: MonitorCollectorConfig { - collection_interval: Duration::from_secs(60), + collection_interval: Duration::from_mins(1), ..Default::default() }, ..Default::default() @@ -1498,7 +1498,7 @@ mod tests { // Verify config was applied assert_eq!( collector.config.base_config.collection_interval, - Duration::from_secs(60) + Duration::from_mins(1) ); drop(handle); @@ -1539,7 +1539,7 @@ mod tests { // Test maximum reasonable interval let max_config = ProcmondMonitorConfig { base_config: MonitorCollectorConfig { - collection_interval: Duration::from_secs(3600), + collection_interval: Duration::from_hours(1), ..Default::default() }, ..Default::default() @@ -1677,14 +1677,14 @@ mod tests { let handle = ActorHandle::new(tx); // Send adjust interval message - let result = handle.adjust_interval(Duration::from_secs(60)); + let result = handle.adjust_interval(Duration::from_mins(1)); assert!(result.is_ok()); // Verify the message was received let msg = rx.recv().await.unwrap(); match msg { ActorMessage::AdjustInterval { new_interval } => { - assert_eq!(new_interval, Duration::from_secs(60)); + assert_eq!(new_interval, Duration::from_mins(1)); } other => panic!("Expected AdjustInterval message, got: {other:?}"), } @@ -1700,7 +1700,7 @@ mod tests { let _ = handle.adjust_interval(Duration::from_secs(30)); // Next call should fail - let result = handle.adjust_interval(Duration::from_secs(60)); + let result = handle.adjust_interval(Duration::from_mins(1)); assert!(result.is_err()); } @@ -1945,7 +1945,7 @@ mod tests { let new_config = ProcmondMonitorConfig { base_config: MonitorCollectorConfig { - collection_interval: Duration::from_secs(60), + collection_interval: Duration::from_mins(1), ..Default::default() }, ..Default::default() @@ -2052,12 +2052,12 @@ mod tests { let (mut collector, _handle) = create_collector_with_channel(db_manager, config).unwrap(); let msg = ActorMessage::AdjustInterval { - new_interval: Duration::from_secs(60), + new_interval: Duration::from_mins(1), }; let should_exit = collector.handle_message(msg); assert!(!should_exit); - assert_eq!(collector.pending_interval, Some(Duration::from_secs(60))); + assert_eq!(collector.pending_interval, Some(Duration::from_mins(1))); } // ============================================================================ @@ -2572,7 +2572,7 @@ mod tests { // Test UpdateConfig variant with valid config let new_config = ProcmondMonitorConfig { base_config: MonitorCollectorConfig { - collection_interval: Duration::from_secs(60), + collection_interval: Duration::from_mins(1), ..Default::default() }, ..Default::default() diff --git a/procmond/src/registration.rs b/procmond/src/registration.rs index 68184ef8..6c231d9f 100644 --- a/procmond/src/registration.rs +++ b/procmond/src/registration.rs @@ -908,9 +908,9 @@ mod tests { ); // After setting assigned interval, uses that instead - *manager.assigned_heartbeat_interval.write().await = Some(Duration::from_secs(60)); + *manager.assigned_heartbeat_interval.write().await = Some(Duration::from_mins(1)); let updated_interval = manager.effective_heartbeat_interval().await; - assert_eq!(updated_interval, Duration::from_secs(60)); + assert_eq!(updated_interval, Duration::from_mins(1)); } #[tokio::test] @@ -1799,7 +1799,7 @@ mod tests { collector_type: "custom-type".to_owned(), version: "2.0.0".to_owned(), capabilities: vec!["cap1".to_owned(), "cap2".to_owned()], - heartbeat_interval: Duration::from_secs(60), + heartbeat_interval: Duration::from_mins(1), registration_timeout: Duration::from_secs(20), max_retries: 5, attributes: HashMap::new(), @@ -1809,7 +1809,7 @@ mod tests { assert_eq!(config.collector_type, "custom-type"); assert_eq!(config.version, "2.0.0"); assert_eq!(config.capabilities.len(), 2); - assert_eq!(config.heartbeat_interval, Duration::from_secs(60)); + assert_eq!(config.heartbeat_interval, Duration::from_mins(1)); assert_eq!(config.registration_timeout, Duration::from_secs(20)); assert_eq!(config.max_retries, 5); } diff --git a/procmond/src/rpc_service.rs b/procmond/src/rpc_service.rs index ef95f13d..9400f323 100644 --- a/procmond/src/rpc_service.rs +++ b/procmond/src/rpc_service.rs @@ -1024,7 +1024,7 @@ mod tests { assert_eq!( config.base_config.collection_interval, - Duration::from_secs(60) + Duration::from_mins(1) ); assert_eq!(config.process_config.max_processes, 500); assert!(config.process_config.compute_executable_hashes); @@ -1092,7 +1092,7 @@ mod tests { target: "control.collector.procmond".to_string(), operation: CollectorOperation::HealthCheck, payload: RpcPayload::Empty, - timestamp: SystemTime::now() - Duration::from_secs(60), + timestamp: SystemTime::now() - Duration::from_mins(1), deadline: SystemTime::now() - Duration::from_secs(30), // Past deadline correlation_metadata: RpcCorrelationMetadata::new("corr-expired".to_string()), }; @@ -1256,7 +1256,7 @@ mod tests { collector_id: "my-collector".to_owned(), control_topic: "custom.topic".to_owned(), response_topic_prefix: "custom.response".to_owned(), - default_timeout: Duration::from_secs(60), + default_timeout: Duration::from_mins(1), max_concurrent_requests: 20, }; let handler = RpcServiceHandler::new(actor_handle, event_bus, config.clone()); @@ -1265,7 +1265,7 @@ mod tests { assert_eq!(retrieved_config.collector_id, "my-collector"); assert_eq!(retrieved_config.control_topic, "custom.topic"); assert_eq!(retrieved_config.response_topic_prefix, "custom.response"); - assert_eq!(retrieved_config.default_timeout, Duration::from_secs(60)); + assert_eq!(retrieved_config.default_timeout, Duration::from_mins(1)); assert_eq!(retrieved_config.max_concurrent_requests, 20); } @@ -1510,7 +1510,7 @@ mod tests { // Verify the config was built correctly assert_eq!( config.base_config.collection_interval, - Duration::from_secs(60) + Duration::from_mins(1) ); assert_eq!(config.process_config.max_processes, 500); assert!(config.process_config.collect_enhanced_metadata); @@ -2113,7 +2113,7 @@ mod tests { target: "control.collector.procmond".to_string(), operation: CollectorOperation::HealthCheck, payload: RpcPayload::Empty, - timestamp: SystemTime::now() - Duration::from_secs(60), + timestamp: SystemTime::now() - Duration::from_mins(1), deadline: SystemTime::now() - Duration::from_secs(30), // Already expired correlation_metadata: RpcCorrelationMetadata::new("corr-stats-timeout".to_string()), }; @@ -2319,7 +2319,7 @@ mod tests { // Long default timeout let config = RpcServiceConfig { - default_timeout: Duration::from_secs(60), + default_timeout: Duration::from_mins(1), ..RpcServiceConfig::default() }; let handler = RpcServiceHandler::new(actor_handle, event_bus, config); diff --git a/procmond/tests/actor_mode_integration_tests.rs b/procmond/tests/actor_mode_integration_tests.rs index ec47ed72..a9dc4f42 100644 --- a/procmond/tests/actor_mode_integration_tests.rs +++ b/procmond/tests/actor_mode_integration_tests.rs @@ -38,7 +38,9 @@ use collector_core::event::ProcessEvent; use daemoneye_eventbus::rpc::{ CollectorOperation, RpcCorrelationMetadata, RpcPayload, RpcRequest, RpcStatus, }; -use procmond::event_bus_connector::{BackpressureSignal, EventBusConnector, ProcessEventType}; +use procmond::event_bus_connector::{ + BackpressureSignal, EventBusConnector, EventBusConnectorError, ProcessEventType, +}; use procmond::monitor_collector::{ACTOR_CHANNEL_CAPACITY, ActorHandle, ActorMessage}; use procmond::registration::{RegistrationConfig, RegistrationManager, RegistrationState}; use procmond::rpc_service::{RpcServiceConfig, RpcServiceHandler}; @@ -346,7 +348,7 @@ async fn test_deadline_exceeded_returns_immediately() { target: "control.collector.procmond".to_string(), operation: CollectorOperation::HealthCheck, payload: RpcPayload::Empty, - timestamp: SystemTime::now() - Duration::from_secs(60), + timestamp: SystemTime::now() - Duration::from_mins(1), deadline: SystemTime::now() - Duration::from_secs(30), // Past deadline correlation_metadata: RpcCorrelationMetadata::new("test-expired".to_string()), }; @@ -410,6 +412,29 @@ async fn test_heartbeat_skipped_when_not_registered() { } /// Verifies that RegistrationManager and RpcServiceHandler share the same EventBusConnector. +/// Error path (Unit 1 / END-297): `subscribe_with_control` fails with a +/// connection error when the EventBusConnector is not connected to a broker. +/// This is the path procmond uses in main.rs to detect unreachable brokers +/// and fall back to the standalone escape hatch rather than silently +/// starting collection without coordination. +#[tokio::test] +async fn test_subscribe_with_control_fails_when_not_connected() { + let (connector, _temp_dir) = create_isolated_connector().await; + + // Connector is created but never connected — simulates broker unreachable. + let result = connector + .subscribe_with_control( + "procmond-test", + vec!["control.collector.lifecycle".to_string()], + ) + .await; + + assert!( + matches!(result, Err(EventBusConnectorError::Connection(_))), + "expected Connection error variant, got: {result:?}" + ); +} + #[tokio::test] async fn test_shared_event_bus_between_components() { let (actor_handle, _rx) = create_test_actor(); diff --git a/procmond/tests/chaos_tests.rs b/procmond/tests/chaos_tests.rs index b09da428..842a55ce 100644 --- a/procmond/tests/chaos_tests.rs +++ b/procmond/tests/chaos_tests.rs @@ -773,7 +773,7 @@ async fn test_concurrent_interval_adjustments() { let intervals = vec![ Duration::from_secs(30), Duration::from_secs(45), - Duration::from_secs(60), + Duration::from_mins(1), Duration::from_secs(30), // Back to original ]; diff --git a/procmond/tests/integration_tests.rs b/procmond/tests/integration_tests.rs index 49344e46..c6b8b7dd 100644 --- a/procmond/tests/integration_tests.rs +++ b/procmond/tests/integration_tests.rs @@ -71,14 +71,14 @@ impl TestDatabase { /// Creates a test configuration optimized for integration testing. fn create_test_config() -> ProcessSourceConfig { ProcessSourceConfig { - collection_interval: Duration::from_millis(1000), // More realistic interval - collect_enhanced_metadata: false, // Disabled for speed in tests - max_processes_per_cycle: 10, // Very small limit for testing - compute_executable_hashes: false, // Disabled for speed + collection_interval: Duration::from_secs(1), // More realistic interval + collect_enhanced_metadata: false, // Disabled for speed in tests + max_processes_per_cycle: 10, // Very small limit for testing + compute_executable_hashes: false, // Disabled for speed max_events_in_flight: 50, collection_timeout: Duration::from_secs(10), // Longer timeout shutdown_timeout: Duration::from_secs(5), - max_backpressure_wait: Duration::from_millis(1000), + max_backpressure_wait: Duration::from_secs(1), event_batch_size: 5, batch_timeout: Duration::from_millis(500), } diff --git a/procmond/tests/lifecycle_integration_tests.rs b/procmond/tests/lifecycle_integration_tests.rs index 1363e38b..b52fb654 100644 --- a/procmond/tests/lifecycle_integration_tests.rs +++ b/procmond/tests/lifecycle_integration_tests.rs @@ -40,7 +40,7 @@ fn create_test_process_event( name: name.to_string(), executable_path: executable_path.map(|s| s.to_string()), command_line: command_line.iter().map(|s| s.to_string()).collect(), - start_time: Some(SystemTime::now() - Duration::from_secs(60)), + start_time: Some(SystemTime::now() - Duration::from_mins(1)), cpu_usage: Some(1.0), memory_usage: Some(1024 * 1024), executable_hash: Some("abc123".to_string()), diff --git a/procmond/tests/lifecycle_tracking_tests.rs b/procmond/tests/lifecycle_tracking_tests.rs index 97d21211..f12f3312 100644 --- a/procmond/tests/lifecycle_tracking_tests.rs +++ b/procmond/tests/lifecycle_tracking_tests.rs @@ -70,7 +70,7 @@ fn create_test_process_event( name: name.to_string(), executable_path: executable_path.map(|s| s.to_string()), command_line: command_line.iter().map(|s| s.to_string()).collect(), - start_time: Some(SystemTime::now() - Duration::from_secs(60)), + start_time: Some(SystemTime::now() - Duration::from_mins(1)), cpu_usage: Some(1.0), memory_usage: Some(1024 * 1024), executable_hash: Some("abc123".to_string()), @@ -468,7 +468,7 @@ fn test_stop_detection_runtime_duration_calculated() { let mut tracker = ProcessLifecycleTracker::new(config); // Create process with known start time - let process_start_time = SystemTime::now() - Duration::from_secs(120); // Started 2 minutes ago + let process_start_time = SystemTime::now() - Duration::from_mins(2); let initial_processes = vec![create_process_event_with_start_time( 500, "long_running_process", @@ -1206,7 +1206,7 @@ fn test_snapshot_conversion_roundtrip() { "--arg1".to_string(), "--arg2=value".to_string(), ], - start_time: Some(SystemTime::now() - Duration::from_secs(3600)), + start_time: Some(SystemTime::now() - Duration::from_hours(1)), cpu_usage: Some(25.5), memory_usage: Some(256 * 1024 * 1024), executable_hash: Some("fedcba987654321".to_string()), diff --git a/procmond/tests/rpc_integration_tests.rs b/procmond/tests/rpc_integration_tests.rs index 5b711c38..b8abd011 100644 --- a/procmond/tests/rpc_integration_tests.rs +++ b/procmond/tests/rpc_integration_tests.rs @@ -420,7 +420,7 @@ async fn test_config_update_applies_changes() { ActorMessage::UpdateConfig { config, respond_to } => { assert_eq!( config.base_config.collection_interval, - Duration::from_secs(60) + Duration::from_mins(1) ); assert_eq!(config.process_config.max_processes, 500); assert!(config.process_config.collect_enhanced_metadata); @@ -825,7 +825,7 @@ async fn test_expired_deadline_returns_timeout() { target: "control.collector.procmond".to_string(), operation: CollectorOperation::HealthCheck, payload: RpcPayload::Empty, - timestamp: SystemTime::now() - Duration::from_secs(60), + timestamp: SystemTime::now() - Duration::from_mins(1), deadline: SystemTime::now() - Duration::from_secs(30), // Past deadline correlation_metadata: RpcCorrelationMetadata::new("corr-expired".to_string()), }; @@ -1114,7 +1114,7 @@ async fn test_custom_handler_configuration() { collector_id: "custom-collector".to_string(), control_topic: "custom.control.topic".to_string(), response_topic_prefix: "custom.response".to_string(), - default_timeout: Duration::from_secs(60), + default_timeout: Duration::from_mins(1), max_concurrent_requests: 20, }; diff --git a/procmond/tests/snapshots/cli__procmond_help.snap b/procmond/tests/snapshots/cli__procmond_help.snap index b31c525f..86626c10 100644 --- a/procmond/tests/snapshots/cli__procmond_help.snap +++ b/procmond/tests/snapshots/cli__procmond_help.snap @@ -13,5 +13,6 @@ Options: --max-processes Maximum processes to collect per cycle (0 = unlimited) [default: 0] --enhanced-metadata Enable enhanced metadata collection --compute-hashes Enable executable hashing + --standalone Start monitoring immediately without waiting for an agent `BeginMonitoring` signal (also: `PROCMOND_STANDALONE=1`) -h, --help Print help -V, --version Print version