From a4a3e8c0d0aa355d5b487b315018409020c38053 Mon Sep 17 00:00:00 2001 From: diego Date: Mon, 23 Mar 2026 13:12:01 -0300 Subject: [PATCH 1/6] message layer firt checkpoint --- .../2026-03-20-iteration-orchestration.md | 1217 ++++++++++++ .../2026-03-21-void-box-runtime-alignment.md | 327 ++++ .../plans/2026-03-22-message-box-v0.md | 175 ++ .../plans/2026-03-22-persistent-dispatcher.md | 367 ++++ .../plans/2026-03-22-search-strategy.md | 302 +++ ...ox-orchestration-runtime-readiness-v0.1.md | 374 ++++ spec/void-control-iteration-spec-v0.1.md | 568 ++++++ spec/void-control-iteration-spec-v0.2.md | 1653 +++++++++++++++++ spec/void-control-message-box-spec-v0.1.md | 679 +++++++ spec/void-control-runtime-spec-v0.2.md | 20 +- src/bin/voidctl.rs | 381 +++- src/bridge.rs | 1476 ++++++++++++--- src/contract/api.rs | 1 + src/contract/compat.rs | 7 +- src/contract/error.rs | 6 + src/lib.rs | 1 + src/orchestration/events.rs | 166 ++ src/orchestration/message_box.rs | 193 ++ src/orchestration/mod.rs | 42 + src/orchestration/policy.rs | 162 ++ src/orchestration/reconcile.rs | 42 + src/orchestration/scheduler.rs | 169 ++ src/orchestration/scoring.rs | 105 ++ src/orchestration/service.rs | 1210 ++++++++++++ src/orchestration/spec.rs | 87 + src/orchestration/store.rs | 13 + src/orchestration/store/fs.rs | 912 +++++++++ src/orchestration/strategy.rs | 383 ++++ src/orchestration/types.rs | 256 +++ src/orchestration/variation.rs | 108 ++ src/runtime/mock.rs | 67 +- src/runtime/mod.rs | 70 + src/runtime/void_box.rs | 524 +++++- tests/execution_artifact_collection.rs | 203 ++ tests/execution_bridge.rs | 491 +++++ tests/execution_bridge_live.rs | 515 +++++ tests/execution_dry_run.rs | 117 ++ tests/execution_event_replay.rs | 145 ++ tests/execution_message_box.rs | 580 ++++++ tests/execution_reconciliation.rs | 157 ++ tests/execution_scheduler.rs | 335 ++++ tests/execution_search_strategy.rs | 200 ++ tests/execution_spec_validation.rs | 130 ++ tests/execution_strategy_acceptance.rs | 549 ++++++ tests/execution_swarm_strategy.rs | 268 +++ tests/execution_worker.rs | 878 +++++++++ tests/strategy_scenarios.rs | 673 +++++++ tests/void_box_contract.rs | 320 +++- 48 files changed, 17388 insertions(+), 236 deletions(-) create mode 100644 docs/superpowers/plans/2026-03-20-iteration-orchestration.md create mode 100644 docs/superpowers/plans/2026-03-21-void-box-runtime-alignment.md create mode 100644 docs/superpowers/plans/2026-03-22-message-box-v0.md create mode 100644 docs/superpowers/plans/2026-03-22-persistent-dispatcher.md create mode 100644 docs/superpowers/plans/2026-03-22-search-strategy.md create mode 100644 spec/void-box-orchestration-runtime-readiness-v0.1.md create mode 100644 spec/void-control-iteration-spec-v0.1.md create mode 100644 spec/void-control-iteration-spec-v0.2.md create mode 100644 spec/void-control-message-box-spec-v0.1.md create mode 100644 src/orchestration/events.rs create mode 100644 src/orchestration/message_box.rs create mode 100644 src/orchestration/mod.rs create mode 100644 src/orchestration/policy.rs create mode 100644 src/orchestration/reconcile.rs create mode 100644 src/orchestration/scheduler.rs create mode 100644 src/orchestration/scoring.rs create mode 100644 src/orchestration/service.rs create mode 100644 src/orchestration/spec.rs create mode 100644 src/orchestration/store.rs create mode 100644 src/orchestration/store/fs.rs create mode 100644 src/orchestration/strategy.rs create mode 100644 src/orchestration/types.rs create mode 100644 src/orchestration/variation.rs create mode 100644 tests/execution_artifact_collection.rs create mode 100644 tests/execution_bridge.rs create mode 100644 tests/execution_bridge_live.rs create mode 100644 tests/execution_dry_run.rs create mode 100644 tests/execution_event_replay.rs create mode 100644 tests/execution_message_box.rs create mode 100644 tests/execution_reconciliation.rs create mode 100644 tests/execution_scheduler.rs create mode 100644 tests/execution_search_strategy.rs create mode 100644 tests/execution_spec_validation.rs create mode 100644 tests/execution_strategy_acceptance.rs create mode 100644 tests/execution_swarm_strategy.rs create mode 100644 tests/execution_worker.rs create mode 100644 tests/strategy_scenarios.rs diff --git a/docs/superpowers/plans/2026-03-20-iteration-orchestration.md b/docs/superpowers/plans/2026-03-20-iteration-orchestration.md new file mode 100644 index 0000000..b10c7cf --- /dev/null +++ b/docs/superpowers/plans/2026-03-20-iteration-orchestration.md @@ -0,0 +1,1217 @@ +# Iteration Orchestration Implementation Plan + +> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Implement the v0.2 iteration control plane so `void-control` can create and manage multi-iteration `Execution`s above the existing run-level `void-box` client. + +**Architecture:** Add a new orchestration layer under `src/orchestration/` that is explicitly separate from the existing `src/contract/` and `src/runtime/` run-level boundary. The orchestration layer owns `ExecutionSpec` validation, durable execution state, event reduction, scheduling, candidate dispatch, artifact collection, scoring, checkpointing, and policy updates, while continuing to consume `VoidBoxRuntimeClient` only through the current runtime contract. + +**Tech Stack:** Rust 2021, existing `serde` feature gates, current TCP/HTTP runtime client, filesystem-backed persistence for the first cut, and Cargo test integration. + +--- + +## Scope Check + +The spec spans multiple subsystems: + +- orchestration domain model and persistence +- control-loop execution engine and scheduler +- strategy/evaluation/variation logic +- API and bridge exposure +- observability, pause/resume, reconciliation, and dry-run + +This is large enough that it could be split into separate plans. To keep momentum and preserve cross-cutting invariants, this document keeps one plan but breaks it into independently shippable chunks. Each chunk should end in passing tests and a focused commit. + +## File Map + +### Existing files to keep as-is conceptually + +- Modify: `src/lib.rs` +- Modify: `src/bin/voidctl.rs` +- Modify: `src/bridge.rs` +- Modify: `src/runtime/mock.rs` +- Modify: `src/runtime/void_box.rs` +- Modify: `Cargo.toml` +- Modify: `README.md` +- Modify: `tests/void_box_contract.rs` + +### New orchestration module tree + +- Create: `src/orchestration/mod.rs` + Responsibility: public exports for execution orchestration. +- Create: `src/orchestration/types.rs` + Responsibility: `Execution`, `Iteration`, `Candidate`, `ExecutionResult`, `ExecutionStatus`, accumulator, artifact refs. +- Create: `src/orchestration/spec.rs` + Responsibility: `ExecutionSpec`, mode-specific config, validation, dry-run input parsing. +- Create: `src/orchestration/policy.rs` + Responsibility: orchestration policy model from spec section 14 and mutable/immutable policy checks. +- Create: `src/orchestration/events.rs` + Responsibility: control-plane event envelope, event types, payload structs, event reduction helpers. +- Create: `src/orchestration/store.rs` + Responsibility: persistence traits for executions, events, queues, accumulator, and reconciliation snapshots. +- Create: `src/orchestration/store/fs.rs` + Responsibility: filesystem-backed store for the first implementation. +- Create: `src/orchestration/strategy.rs` + Responsibility: `IterationStrategy` trait and registry. +- Create: `src/orchestration/strategy/swarm.rs` + Responsibility: `SwarmStrategy`, inbox materialization, candidate planning hooks. +- Create: `src/orchestration/scoring.rs` + Responsibility: deterministic scoring, ranking, tie-breaking. +- Create: `src/orchestration/variation.rs` + Responsibility: parameter-space, explicit, and leader-directed candidate override generation. +- Create: `src/orchestration/artifacts.rs` + Responsibility: artifact retrieval, parsing, and mode-aware validation. +- Create: `src/orchestration/scheduler.rs` + Responsibility: global pool, per-execution limits, queue ordering, dispatch admission. +- Create: `src/orchestration/loop.rs` + Responsibility: shared control loop, dispatch/collect/evaluate/reduce/stop sequencing. +- Create: `src/orchestration/reconcile.rs` + Responsibility: restart recovery, replay, orphaned handle handling. +- Create: `src/orchestration/service.rs` + Responsibility: high-level service API for create/list/inspect/pause/resume/cancel/patch-policy/dry-run. +- Create: `src/orchestration/http.rs` + Responsibility: HTTP request/response models for execution endpoints if bridge continues using `tiny_http`. + +### New tests + +- Create: `tests/execution_spec_validation.rs` +- Create: `tests/execution_dry_run.rs` +- Create: `tests/execution_event_replay.rs` +- Create: `tests/execution_scheduler.rs` +- Create: `tests/execution_pause_resume.rs` +- Create: `tests/execution_policy_patch.rs` +- Create: `tests/execution_swarm_strategy.rs` +- Create: `tests/execution_artifact_collection.rs` +- Create: `tests/execution_reconciliation.rs` + +### Optional later UI follow-up + +- Defer from this plan unless explicitly requested: + `web/void-control-ux/*` + +The spec includes UI visibility, but the backend orchestration contract should land first so the UI can bind to stable execution endpoints rather than internal scaffolding. + +## Delivery Strategy + +Implement in this order: + +1. domain model, persistence shape, and validation +2. pure strategy/evaluation logic +3. scheduler and control loop using `MockRuntime` +4. artifact retrieval and reconciliation +5. HTTP/API exposure +6. operational features: pause/resume, policy patch, dry-run, observability + +That order keeps early milestones testable without requiring a full live daemon. + +## Chunk 1: Domain Model, Validation, and Persistence Skeleton + +### Task 1: Add orchestration module exports + +**Files:** +- Modify: `src/lib.rs` +- Create: `src/orchestration/mod.rs` + +- [ ] **Step 1: Write the failing compile test by referencing the future module** + +Add a smoke test in `src/lib.rs` or `tests/execution_spec_validation.rs` that imports: + +```rust +use void_control::orchestration::ExecutionSpec; +``` + +Expected: compile failure because `orchestration` does not exist. + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cargo test execution_spec_validation -- --nocapture` +Expected: compile error mentioning missing `orchestration`. + +- [ ] **Step 3: Add minimal module exports** + +Create: + +```rust +// src/orchestration/mod.rs +pub mod spec; + +pub use spec::ExecutionSpec; +``` + +Modify `src/lib.rs`: + +```rust +pub mod orchestration; +``` + +- [ ] **Step 4: Run test to verify the module resolves** + +Run: `cargo test execution_spec_validation -- --nocapture` +Expected: next failure moves to missing `ExecutionSpec` details. + +- [ ] **Step 5: Commit** + +```bash +git add src/lib.rs src/orchestration/mod.rs tests/execution_spec_validation.rs +git commit -m "orchestration: add module scaffold" +``` + +### Task 2: Implement `ExecutionSpec` and policy validation + +**Files:** +- Create: `src/orchestration/spec.rs` +- Create: `src/orchestration/policy.rs` +- Test: `tests/execution_spec_validation.rs` + +- [ ] **Step 1: Write failing validation tests from spec section 14 and 25** + +Cover at least: + +```rust +#[test] +fn rejects_unbounded_execution() {} + +#[test] +fn rejects_concurrency_above_global_pool() {} + +#[test] +fn rejects_threshold_without_min_score() {} + +#[test] +fn accepts_exhaustive_with_max_iterations() {} + +#[test] +fn rejects_unknown_mode() {} +``` + +- [ ] **Step 2: Run targeted tests** + +Run: `cargo test --test execution_spec_validation -- --nocapture` +Expected: failures for missing types and `validate()`. + +- [ ] **Step 3: Implement minimal validation** + +Define: + +```rust +pub struct ExecutionSpec { + pub mode: String, + pub goal: String, + pub workflow: WorkflowTemplateRef, + pub policy: OrchestrationPolicy, + pub evaluation: EvaluationConfig, + pub variation: VariationConfig, + pub swarm: Option, +} + +impl ExecutionSpec { + pub fn validate(&self, global: &GlobalConfig) -> Result<(), SpecValidationError> { + // enforce section 14.6 and section 25.3 + } +} +``` + +Implement only rules already stated in the spec. Do not infer extra policy semantics. + +- [ ] **Step 4: Re-run tests** + +Run: `cargo test --test execution_spec_validation -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/orchestration/spec.rs src/orchestration/policy.rs tests/execution_spec_validation.rs +git commit -m "orchestration: validate execution specs" +``` + +### Task 3: Add execution state and control-plane events + +**Files:** +- Create: `src/orchestration/types.rs` +- Create: `src/orchestration/events.rs` +- Test: `tests/execution_event_replay.rs` + +- [ ] **Step 1: Write failing tests for event-sourced state reduction** + +Cover: + +```rust +#[test] +fn execution_state_advances_from_control_plane_events() {} + +#[test] +fn warning_events_do_not_advance_execution_state() {} + +#[test] +fn accumulator_is_reconstructible_from_event_log() {} +``` + +- [ ] **Step 2: Run tests to verify failures** + +Run: `cargo test --test execution_event_replay -- --nocapture` +Expected: missing reducer/types. + +- [ ] **Step 3: Implement event and reducer types** + +Include: + +- `ExecutionCreated` +- `IterationPlanned` +- `IterationStarted` +- `CandidateScheduled` +- `CandidateQueued` +- `CandidateDispatched` +- `CandidateCompleted` +- `CandidateScored` +- `IterationCompleted` +- `ExecutionCompleted` +- `ExecutionFailed` +- `ExecutionCanceled` +- operational side-channel events from section 22 + +Keep warning events persisted but excluded from state transitions. + +- [ ] **Step 4: Re-run tests** + +Run: `cargo test --test execution_event_replay -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/orchestration/types.rs src/orchestration/events.rs tests/execution_event_replay.rs +git commit -m "orchestration: add execution state and events" +``` + +### Task 4: Add a filesystem-backed orchestration store + +**Files:** +- Create: `src/orchestration/store.rs` +- Create: `src/orchestration/store/fs.rs` +- Test: `tests/execution_event_replay.rs` + +- [ ] **Step 1: Write failing persistence tests** + +Cover: + +```rust +#[test] +fn store_round_trips_execution_and_events() {} + +#[test] +fn store_can_reload_accumulator_after_restart() {} +``` + +- [ ] **Step 2: Run targeted tests** + +Run: `cargo test --test execution_event_replay store_ -- --nocapture` +Expected: missing store trait/implementation. + +- [ ] **Step 3: Implement a narrow persistence interface** + +Start with: + +```rust +pub trait ExecutionStore { + fn create_execution(&self, execution: &Execution) -> Result<()>; + fn append_event(&self, event: &ControlEventEnvelope) -> Result<()>; + fn load_execution(&self, execution_id: &str) -> Result; + fn list_active_executions(&self) -> Result>; + fn save_accumulator(&self, execution_id: &str, acc: &ExecutionAccumulator) -> Result<()>; +} +``` + +Use one directory per execution under a configured root such as `target/tmp/executions/`. + +- [ ] **Step 4: Re-run tests** + +Run: `cargo test --test execution_event_replay -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/orchestration/store.rs src/orchestration/store/fs.rs tests/execution_event_replay.rs +git commit -m "orchestration: add filesystem execution store" +``` + +## Chunk 2: Pure Strategy, Evaluation, and Variation + +### Task 5: Implement deterministic scoring and ranking + +**Files:** +- Create: `src/orchestration/scoring.rs` +- Test: `tests/execution_swarm_strategy.rs` + +- [ ] **Step 1: Write failing tests for weighted metrics and tie-breaking** + +Cover: + +```rust +#[test] +fn weighted_metrics_normalizes_within_iteration() {} + +#[test] +fn failed_candidate_scores_zero() {} + +#[test] +fn best_result_uses_tie_breaking_after_score() {} +``` + +- [ ] **Step 2: Run tests** + +Run: `cargo test --test execution_swarm_strategy scoring -- --nocapture` +Expected: failures for missing scorer. + +- [ ] **Step 3: Implement scoring exactly per section 15** + +Provide a scorer API: + +```rust +pub trait ScoringFunction { + fn score_iteration(&self, outputs: &[CandidateOutput]) -> Vec; +} +``` + +Do not compare normalized scores across iterations for `best_result`; use the raw metric comparison rule from section 15.5 plus section 19.3. + +- [ ] **Step 4: Re-run tests** + +Run: `cargo test --test execution_swarm_strategy scoring -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/orchestration/scoring.rs tests/execution_swarm_strategy.rs +git commit -m "orchestration: add deterministic scoring" +``` + +### Task 6: Implement candidate variation generators + +**Files:** +- Create: `src/orchestration/variation.rs` +- Test: `tests/execution_swarm_strategy.rs` + +- [ ] **Step 1: Write failing tests for variation sources** + +Cover: + +```rust +#[test] +fn parameter_space_random_respects_candidates_per_iteration() {} + +#[test] +fn parameter_space_sequential_preserves_order() {} + +#[test] +fn explicit_variation_cycles_through_overrides() {} + +#[test] +fn leader_directed_proposals_are_validated_before_use() {} +``` + +- [ ] **Step 2: Run tests** + +Run: `cargo test --test execution_swarm_strategy variation -- --nocapture` +Expected: failures for missing generator. + +- [ ] **Step 3: Implement minimal generators** + +Important rules: + +- `parameter_space` supports `random` and `sequential` +- `explicit` cycles through provided sets +- `leader_directed` only consumes validated proposals from `intents.json` +- override application is shallow replacement using dot-paths + +- [ ] **Step 4: Re-run tests** + +Run: `cargo test --test execution_swarm_strategy variation -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/orchestration/variation.rs tests/execution_swarm_strategy.rs +git commit -m "orchestration: add candidate variation generators" +``` + +### Task 7: Add `IterationStrategy` and `SwarmStrategy` + +**Files:** +- Create: `src/orchestration/strategy.rs` +- Create: `src/orchestration/strategy/swarm.rs` +- Test: `tests/execution_swarm_strategy.rs` + +- [ ] **Step 1: Write failing tests for pure swarm behavior** + +Cover: + +```rust +#[test] +fn swarm_materializes_inboxes_from_message_backlog() {} + +#[test] +fn swarm_plans_candidates_from_variation_source() {} + +#[test] +fn swarm_should_stop_on_threshold() {} + +#[test] +fn swarm_should_stop_on_plateau() {} + +#[test] +fn swarm_reduce_updates_best_result_and_failure_counts() {} +``` + +- [ ] **Step 2: Run tests** + +Run: `cargo test --test execution_swarm_strategy -- --nocapture` +Expected: failures for missing trait/strategy. + +- [ ] **Step 3: Implement trait and registry** + +Base trait: + +```rust +pub trait IterationStrategy { + fn materialize_inboxes(&self, accumulator: &ExecutionAccumulator) -> Vec; + fn plan_candidates(&self, accumulator: &ExecutionAccumulator, inboxes: &[CandidateInbox]) -> Vec; + fn evaluate(&self, accumulator: &ExecutionAccumulator, outputs: &[CandidateOutput]) -> IterationEvaluation; + fn should_stop(&self, accumulator: &ExecutionAccumulator, evaluation: &IterationEvaluation) -> Option; + fn reduce(&self, accumulator: ExecutionAccumulator, evaluation: IterationEvaluation) -> ExecutionAccumulator; +} +``` + +Register only `swarm` in the first implementation. Reject `search` and `tournament` during validation with a clear “named but not implemented” error until those modes exist. + +- [ ] **Step 4: Re-run tests** + +Run: `cargo test --test execution_swarm_strategy -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/orchestration/strategy.rs src/orchestration/strategy/swarm.rs tests/execution_swarm_strategy.rs +git commit -m "orchestration: add swarm iteration strategy" +``` + +## Chunk 3: Scheduler and Control Loop + +### Task 8: Extend `MockRuntime` to support orchestrator tests + +**Files:** +- Modify: `src/runtime/mock.rs` +- Test: `tests/execution_scheduler.rs` + +- [ ] **Step 1: Write failing orchestrator-facing tests** + +Cover: + +```rust +#[test] +fn mock_runtime_can_complete_runs_with_structured_outputs() {} + +#[test] +fn mock_runtime_can_simulate_failure_timeout_and_missing_output() {} +``` + +- [ ] **Step 2: Run tests** + +Run: `cargo test --test execution_scheduler mock_runtime -- --nocapture` +Expected: existing mock is too shallow. + +- [ ] **Step 3: Add deterministic test hooks** + +Add helper APIs for tests only, for example: + +```rust +#[cfg(test)] +impl MockRuntime { + pub fn seed_run_outcome(&mut self, run_id: &str, outcome: SeededOutcome) { /* ... */ } +} +``` + +Do not complicate the production runtime contract. Keep this support behind `#[cfg(test)]` or an orchestration-test-only constructor. + +- [ ] **Step 4: Re-run tests** + +Run: `cargo test --test execution_scheduler mock_runtime -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/runtime/mock.rs tests/execution_scheduler.rs +git commit -m "runtime: extend mock for execution orchestration tests" +``` + +### Task 9: Implement the two-level scheduler + +**Files:** +- Create: `src/orchestration/scheduler.rs` +- Test: `tests/execution_scheduler.rs` + +- [ ] **Step 1: Write failing scheduler tests from section 21** + +Cover: + +```rust +#[test] +fn preserves_plan_candidates_order_within_execution() {} + +#[test] +fn dispatches_across_executions_fifo_by_candidate_creation_time() {} + +#[test] +fn releases_slots_immediately_on_completion() {} + +#[test] +fn paused_execution_keeps_queue_but_releases_slots() {} + +#[test] +fn exhausted_budget_prevents_queue_entry() {} +``` + +- [ ] **Step 2: Run tests** + +Run: `cargo test --test execution_scheduler scheduler -- --nocapture` +Expected: failures for missing scheduler. + +- [ ] **Step 3: Implement scheduler primitives** + +Suggested structure: + +```rust +pub struct GlobalScheduler { + max_concurrent_child_runs: usize, + queues: BTreeMap, +} + +impl GlobalScheduler { + pub fn enqueue(&mut self, execution_id: &str, candidate: QueuedCandidate) -> QueueDecision; + pub fn dispatchable(&self) -> Vec; + pub fn release(&mut self, execution_id: &str, candidate_id: &str); +} +``` + +Persist enough queue metadata to recover after restart. + +- [ ] **Step 4: Re-run tests** + +Run: `cargo test --test execution_scheduler scheduler -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/orchestration/scheduler.rs tests/execution_scheduler.rs +git commit -m "orchestration: add execution scheduler" +``` + +### Task 10: Implement the shared execution control loop + +**Files:** +- Create: `src/orchestration/loop.rs` +- Create: `src/orchestration/service.rs` +- Test: `tests/execution_scheduler.rs` + +- [ ] **Step 1: Write failing end-to-end loop tests with `MockRuntime`** + +Cover: + +```rust +#[test] +fn runs_single_iteration_and_completes_with_best_result() {} + +#[test] +fn runs_multiple_iterations_until_threshold() {} + +#[test] +fn short_circuits_iteration_after_failure_limit() {} + +#[test] +fn marks_execution_failed_when_all_candidates_fail_and_policy_says_fail() {} +``` + +- [ ] **Step 2: Run tests** + +Run: `cargo test --test execution_scheduler -- --nocapture` +Expected: failures for missing service/loop. + +- [ ] **Step 3: Implement infrastructure methods** + +Required orchestration flow: + +```rust +create_execution() +-> persist ExecutionCreated +-> plan iteration +-> queue candidates +-> scheduler grants dispatch slots +-> runtime.start(...) +-> collect terminal events and artifacts +-> strategy.evaluate(...) +-> strategy.should_stop(...) +-> strategy.reduce(...) +-> persist accumulator and next state +``` + +Keep `dispatch_candidates()` and `collect_outputs()` as shared infrastructure, not strategy methods. + +- [ ] **Step 4: Re-run tests** + +Run: `cargo test --test execution_scheduler -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/orchestration/loop.rs src/orchestration/service.rs tests/execution_scheduler.rs +git commit -m "orchestration: add execution control loop" +``` + +## Chunk 4: Artifact Retrieval, Failure Semantics, and Reconciliation + +### Task 11: Add artifact retrieval and candidate completion mapping + +**Files:** +- Create: `src/orchestration/artifacts.rs` +- Modify: `src/runtime/void_box.rs` +- Test: `tests/execution_artifact_collection.rs` + +- [ ] **Step 1: Write failing tests for section 18 behavior** + +Cover: + +```rust +#[test] +fn waits_for_terminal_event_before_fetching_result() {} + +#[test] +fn parses_result_json_metrics_and_artifact_refs() {} + +#[test] +fn emits_output_error_for_missing_or_malformed_result() {} + +#[test] +fn leader_directed_intents_are_read_from_output_contract() {} +``` + +- [ ] **Step 2: Run tests** + +Run: `cargo test --test execution_artifact_collection -- --nocapture` +Expected: failures for missing retrieval method. + +- [ ] **Step 3: Extend runtime client with artifact fetch support** + +Add a narrow method on `VoidBoxRuntimeClient`: + +```rust +pub fn fetch_stage_output_file(&self, run_id: &str, stage: &str) -> Result, ContractError>; +``` + +Map response errors into orchestrator-visible output diagnostics. + +- [ ] **Step 4: Implement artifact parsing** + +Represent: + +- `result.json` +- embedded or staged `intents.json` +- retrieval timeout handling +- reference-only persistence, not full artifact duplication + +- [ ] **Step 5: Re-run tests** + +Run: `cargo test --test execution_artifact_collection -- --nocapture` +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add src/orchestration/artifacts.rs src/runtime/void_box.rs tests/execution_artifact_collection.rs +git commit -m "orchestration: collect structured candidate outputs" +``` + +### Task 12: Implement failure semantics and timeout handling + +**Files:** +- Modify: `src/orchestration/loop.rs` +- Test: `tests/execution_artifact_collection.rs` +- Test: `tests/execution_scheduler.rs` + +- [ ] **Step 1: Write failing tests for section 17 decision paths** + +Cover: + +```rust +#[test] +fn missing_output_can_mark_failed() {} + +#[test] +fn missing_output_can_mark_incomplete_without_failure_count() {} + +#[test] +fn candidate_timeout_cancels_run() {} + +#[test] +fn iteration_failure_policy_continue_advances_despite_all_failures() {} + +#[test] +fn iteration_failure_policy_retry_retries_once() {} +``` + +- [ ] **Step 2: Run tests** + +Run: `cargo test execution_ -- --nocapture` +Expected: failures in new policy paths. + +- [ ] **Step 3: Implement only the spec-defined decisions** + +Important limits: + +- `retry_iteration` is hardcoded to one retry in v0.2 +- timeout defaults from workflow template if policy field is unset +- all failures must emit explicit control-plane diagnostics + +- [ ] **Step 4: Re-run tests** + +Run: `cargo test execution_ -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/orchestration/loop.rs tests/execution_artifact_collection.rs tests/execution_scheduler.rs +git commit -m "orchestration: enforce execution failure semantics" +``` + +### Task 13: Implement restart reconciliation + +**Files:** +- Create: `src/orchestration/reconcile.rs` +- Modify: `src/orchestration/service.rs` +- Test: `tests/execution_reconciliation.rs` + +- [ ] **Step 1: Write failing reconciliation tests** + +Cover: + +```rust +#[test] +fn reloads_non_terminal_executions_after_restart() {} + +#[test] +fn resumes_event_stream_from_last_seen_id() {} + +#[test] +fn marks_unknown_handles_as_orphaned() {} + +#[test] +fn paused_execution_remains_paused_after_restart() {} +``` + +- [ ] **Step 2: Run tests** + +Run: `cargo test --test execution_reconciliation -- --nocapture` +Expected: failures for missing reconciliation service. + +- [ ] **Step 3: Implement reconciliation using store plus runtime inspection** + +Rules to enforce: + +- control-plane events remain source of truth +- direct runtime inspection is for repair/re-sync only +- replay should rebuild accumulator and queue state + +- [ ] **Step 4: Re-run tests** + +Run: `cargo test --test execution_reconciliation -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/orchestration/reconcile.rs src/orchestration/service.rs tests/execution_reconciliation.rs +git commit -m "orchestration: add execution reconciliation" +``` + +## Chunk 5: API Surface and Operational Controls + +### Task 14: Add dry-run service and validation endpoint + +**Files:** +- Modify: `src/orchestration/service.rs` +- Create: `src/orchestration/http.rs` +- Modify: `src/bridge.rs` +- Test: `tests/execution_dry_run.rs` + +- [ ] **Step 1: Write failing dry-run tests** + +Cover: + +```rust +#[test] +fn dry_run_validates_without_creating_execution() {} + +#[test] +fn dry_run_returns_plan_warnings_and_errors() {} + +#[test] +fn dry_run_reports_parameter_space_cardinality() {} +``` + +- [ ] **Step 2: Run tests** + +Run: `cargo test --test execution_dry_run -- --nocapture` +Expected: failures for missing endpoint/service. + +- [ ] **Step 3: Implement dry-run response model** + +Include: + +- `valid` +- `mode` +- computed plan summary +- warnings +- errors + +Do not contact `void-box` during dry-run. + +- [ ] **Step 4: Expose endpoint in bridge** + +Add: + +- `POST /v1/executions/dry-run` + +Keep the existing `/v1/launch` path intact for run-level workflows. + +- [ ] **Step 5: Re-run tests** + +Run: `cargo test --features serde --test execution_dry_run -- --nocapture` +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add src/orchestration/service.rs src/orchestration/http.rs src/bridge.rs tests/execution_dry_run.rs +git commit -m "bridge: add execution dry-run endpoint" +``` + +### Task 15: Add execution lifecycle endpoints + +**Files:** +- Modify: `src/bridge.rs` +- Modify: `src/bin/voidctl.rs` +- Test: `tests/execution_pause_resume.rs` + +- [ ] **Step 1: Write failing API tests** + +Cover: + +```rust +#[test] +fn can_create_and_inspect_execution() {} + +#[test] +fn can_pause_and_resume_execution() {} + +#[test] +fn can_cancel_running_or_paused_execution() {} + +#[test] +fn inspect_exposes_queue_depth_and_wait_time() {} +``` + +- [ ] **Step 2: Run tests** + +Run: `cargo test --features serde --test execution_pause_resume -- --nocapture` +Expected: missing endpoints. + +- [ ] **Step 3: Implement HTTP endpoints** + +Add: + +- `POST /v1/executions` +- `GET /v1/executions/{id}` +- `GET /v1/executions` +- `POST /v1/executions/{id}/pause` +- `POST /v1/executions/{id}/resume` +- `POST /v1/executions/{id}/cancel` + +Do not overload the run-level endpoints with execution semantics. + +- [ ] **Step 4: Add `voidctl` commands** + +Add CLI support for: + +- `/execution create ` +- `/execution status ` +- `/execution pause ` +- `/execution resume ` +- `/execution cancel ` + +Avoid breaking current run-oriented commands. + +- [ ] **Step 5: Re-run tests** + +Run: `cargo test --features serde --test execution_pause_resume -- --nocapture` +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add src/bridge.rs src/bin/voidctl.rs tests/execution_pause_resume.rs +git commit -m "bridge: add execution lifecycle endpoints" +``` + +### Task 16: Add policy patching and observability events + +**Files:** +- Modify: `src/orchestration/service.rs` +- Modify: `src/bridge.rs` +- Test: `tests/execution_policy_patch.rs` + +- [ ] **Step 1: Write failing tests for mutable and immutable policy fields** + +Cover: + +```rust +#[test] +fn patches_budget_and_concurrency_for_running_execution() {} + +#[test] +fn rejects_mutation_of_convergence_and_evaluation() {} + +#[test] +fn rejects_new_limits_below_consumed_values() {} + +#[test] +fn emits_policy_updated_event() {} + +#[test] +fn emits_budget_warning_and_stall_events() {} +``` + +- [ ] **Step 2: Run tests** + +Run: `cargo test --features serde --test execution_policy_patch -- --nocapture` +Expected: failures for missing patch service. + +- [ ] **Step 3: Implement policy patch path** + +Add: + +- `PATCH /v1/executions/{id}/policy` + +Emit `PolicyUpdated`, `IterationBudgetWarning`, and `ExecutionStalled` through the control-plane event log. + +- [ ] **Step 4: Re-run tests** + +Run: `cargo test --features serde --test execution_policy_patch -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/orchestration/service.rs src/bridge.rs tests/execution_policy_patch.rs +git commit -m "orchestration: add policy patching and observability" +``` + +## Chunk 6: Finish, Integrate, and Document + +### Task 17: Wire exports and feature gates cleanly + +**Files:** +- Modify: `Cargo.toml` +- Modify: `src/lib.rs` + +- [ ] **Step 1: Write failing compile/test command matrix note** + +Record the intended matrix: + +- `cargo test` +- `cargo test --features serde` + +Expected: orchestration core should compile without requiring bridge-only serde HTTP code when feasible. + +- [ ] **Step 2: Implement feature gating carefully** + +Keep: + +- pure orchestration logic available without HTTP server dependencies where possible +- filesystem store and JSON parsing under `serde` only if strictly required + +Avoid making the entire library impossible to test without `serde` unless unavoidable. + +- [ ] **Step 3: Run full unit suite** + +Run: `cargo test` +Expected: PASS. + +- [ ] **Step 4: Run serde suite** + +Run: `cargo test --features serde` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add Cargo.toml src/lib.rs +git commit -m "build: wire orchestration modules and features" +``` + +### Task 18: Add integration coverage for execution contract behavior + +**Files:** +- Modify: `tests/void_box_contract.rs` +- Optionally create: `tests/execution_contract.rs` + +- [ ] **Step 1: Write failing contract-style tests against bridge** + +Cover: + +- dry-run side-effect freedom +- execution create/inspect/pause/resume/cancel +- policy patch validation +- final result provenance fields + +- [ ] **Step 2: Run tests with `serde`** + +Run: `cargo test --features serde execution_contract -- --nocapture` +Expected: failures for any missing bridge contract behavior. + +- [ ] **Step 3: Implement missing glue only** + +Do not move orchestration logic into the bridge. Keep bridge thin. + +- [ ] **Step 4: Re-run tests** + +Run: `cargo test --features serde execution_contract -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add tests/void_box_contract.rs tests/execution_contract.rs +git commit -m "test: cover execution bridge contract" +``` + +### Task 19: Update docs and developer guidance + +**Files:** +- Modify: `README.md` +- Optionally create: `docs/execution-orchestration.md` + +- [ ] **Step 1: Document the new execution API and storage model** + +Include: + +- difference between `Run` and `Execution` +- current supported mode: `swarm` +- unsupported named modes: `search`, `tournament` +- persistence location and reconciliation behavior +- command examples + +- [ ] **Step 2: Document verification commands** + +Include exactly: + +```bash +cargo test +cargo test --features serde +``` + +And any new targeted commands for execution tests. + +- [ ] **Step 3: Re-read for consistency with spec** + +Check that docs do not claim: + +- distributed scheduling +- non-swarm strategy implementation +- LLM-based scoring + +- [ ] **Step 4: Commit** + +```bash +git add README.md docs/execution-orchestration.md +git commit -m "docs: describe execution orchestration" +``` + +## Cross-Cutting Design Constraints + +- Keep the run-level runtime contract separate from the execution-level orchestration contract. +- Treat control-plane events as the primary execution truth; runtime inspection is reconciliation-only. +- Keep strategy methods pure and side-effect free. +- Avoid deep coupling between HTTP handlers and orchestration internals. +- Implement only `SwarmStrategy` initially even though the spec names future modes. +- Persist references to artifacts, not full artifact bodies. +- Do not infer execution success from logs alone. + +## Risks and Open Decisions + +### 1. Persistence format + +The spec requires durable execution state and replayability, but the repo has no storage layer yet. Start with a filesystem store and wrap it in a trait so SQLite or another backend can replace it later without rewriting the control loop. + +### 2. Background execution model + +The spec implies long-running orchestration workers. If the first implementation runs everything synchronously inside request handlers, pause/resume and queue fairness will become fragile. Prefer a service object with an explicit worker thread or polling loop, even if single-process only. + +### 3. Artifact contract mismatch + +The current void-box API retrieves a single stage output file. The plan must preserve the v0.2 constraint that `result.json` is the stage’s structured output and treat richer artifact manifests as future work. + +### 4. Feature-gate sprawl + +The repo currently gates HTTP/JSON-heavy code behind `serde`. Keep the new orchestration layer as independent as possible so pure logic remains easy to unit test. + +### 5. UI scope creep + +The spec mentions visibility, but wiring the React app before backend contracts settle will create churn. Keep UI work out of the first implementation branch unless the backend is already stable. + +## Verification Checklist + +Run this full matrix before calling the implementation complete: + +```bash +cargo test +cargo test --features serde +cargo test --features serde --test execution_spec_validation -- --nocapture +cargo test --features serde --test execution_dry_run -- --nocapture +cargo test --features serde --test execution_scheduler -- --nocapture +cargo test --features serde --test execution_pause_resume -- --nocapture +cargo test --features serde --test execution_policy_patch -- --nocapture +cargo test --features serde --test execution_reconciliation -- --nocapture +``` + +If a live daemon contract gate is added for executions later, keep it separate from the pure orchestration suite so implementation work does not block on an external service. + +## Recommended Commit Sequence + +1. `orchestration: add module scaffold` +2. `orchestration: validate execution specs` +3. `orchestration: add execution state and events` +4. `orchestration: add filesystem execution store` +5. `orchestration: add deterministic scoring` +6. `orchestration: add candidate variation generators` +7. `orchestration: add swarm iteration strategy` +8. `runtime: extend mock for execution orchestration tests` +9. `orchestration: add execution scheduler` +10. `orchestration: add execution control loop` +11. `orchestration: collect structured candidate outputs` +12. `orchestration: enforce execution failure semantics` +13. `orchestration: add execution reconciliation` +14. `bridge: add execution dry-run endpoint` +15. `bridge: add execution lifecycle endpoints` +16. `orchestration: add policy patching and observability` +17. `build: wire orchestration modules and features` +18. `test: cover execution bridge contract` +19. `docs: describe execution orchestration` + +Plan complete and saved to `docs/superpowers/plans/2026-03-20-iteration-orchestration.md`. Ready to execute? diff --git a/docs/superpowers/plans/2026-03-21-void-box-runtime-alignment.md b/docs/superpowers/plans/2026-03-21-void-box-runtime-alignment.md new file mode 100644 index 0000000..17865f8 --- /dev/null +++ b/docs/superpowers/plans/2026-03-21-void-box-runtime-alignment.md @@ -0,0 +1,327 @@ +# Void-Box Runtime Alignment Implementation Plan + +> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Align `void-control` with the forward-looking `void-box` runtime contract by adding live execution API tests, manifest-aware runtime client behavior, and only the minimum bridge/service adjustments required by the richer runtime surface. + +**Architecture:** Keep the work focused on the `void-control` to `void-box` boundary. `tests/void_box_contract.rs` becomes the live ignored contract gate for execution and artifact behavior, `src/runtime/void_box.rs` becomes the single place that understands manifests, named artifact retrieval, and typed runtime output failures, and `src/bridge.rs` plus `src/orchestration/service.rs` should change only where the runtime client’s richer semantics force controller-side behavior changes. + +**Tech Stack:** Rust 2021, existing `serde` feature-gated bridge/runtime code, live ignored Cargo tests against a running `void-box` daemon, filesystem-backed execution store, and the current TCP/HTTP runtime client. + +--- + +## Scope Check + +This plan intentionally excludes broader hardening work: +- no worker locking redesign, +- no execution lifecycle event expansion, +- no UI changes, +- no new orchestration strategies. + +The focus is only: +1. live daemon contract coverage, +2. runtime client alignment to the new `void-box` artifact contract, +3. minimal bridge/service changes needed to consume that contract cleanly. + +## File Map + +### Primary files + +- Modify: `tests/void_box_contract.rs` + Responsibility: ignored live daemon contract tests for runtime behavior, artifact publication, and reconciliation-facing endpoints. +- Modify: `src/runtime/void_box.rs` + Responsibility: runtime HTTP client, manifest-aware structured output retrieval, named artifact retrieval, and typed runtime error mapping. +- Modify: `src/runtime/mod.rs` + Responsibility: `ExecutionRuntime` glue for the richer `VoidBoxRuntimeClient` behavior. +- Modify: `src/orchestration/service.rs` + Responsibility: consume runtime output/error semantics without guessing from `None`. +- Modify: `src/bridge.rs` + Responsibility: only the minimum response and processing adjustments needed once runtime errors and artifact metadata become explicit. + +### Possible supporting files + +- Modify: `src/contract/mod.rs` + Responsibility: add or extend contract error mapping if the new runtime error codes need first-class representation. +- Modify: `tests/execution_bridge.rs` + Responsibility: bridge-level regression tests if runtime error mapping changes user-facing execution responses. +- Modify: `tests/execution_worker.rs` + Responsibility: worker-side regression tests if artifact collection semantics change. + +## Delivery Strategy + +Implement in this order: + +1. write ignored live tests that describe the new daemon contract, +2. adapt `VoidBoxRuntimeClient` until those tests can pass against the new daemon, +3. tighten orchestration handling of explicit runtime output failures, +4. run the verification matrix locally against both unit tests and live daemon gates. + +This keeps the contract authoritative and prevents the client from baking in assumptions that the daemon does not actually satisfy. + +## Chunk 1: Live Daemon Contract Tests + +### Task 1: Add execution-and-artifact live tests + +**Files:** +- Modify: `tests/void_box_contract.rs` + +- [ ] **Step 1: Add ignored failing tests for the new runtime contract** + +Add live ignored tests covering: + +```rust +#[test] +#[ignore] +fn structured_output_result_json_is_retrievable() {} + +#[test] +#[ignore] +fn missing_result_json_is_typed_failure() {} + +#[test] +#[ignore] +fn malformed_result_json_is_typed_failure() {} + +#[test] +#[ignore] +fn manifest_lists_named_artifacts() {} + +#[test] +#[ignore] +fn named_artifact_endpoint_serves_manifested_file() {} + +#[test] +#[ignore] +fn active_run_listing_supports_reconciliation() {} +``` + +- [ ] **Step 2: Add fixture generation helpers only if needed** + +Extend the fallback spec generation in `tests/void_box_contract.rs` with the minimum new cases: +- success case that emits valid `result.json`, +- success case that emits `result.json` plus one named artifact, +- terminal case with missing `result.json`, +- terminal case with malformed `result.json`. + +Do not add broad fixture abstractions. Keep them in the existing test file. + +- [ ] **Step 3: Run targeted compile-only validation** + +Run: `cargo test --features serde --test void_box_contract -- --ignored --list` +Expected: the new ignored tests appear in the list and compile. + +- [ ] **Step 4: Run live tests against the daemon once Claude’s `void-box` branch exposes the new endpoints** + +Run: + +```bash +TMPDIR=/tmp CARGO_TARGET_DIR=/home/diego/github/void-control/target \ +VOID_BOX_BASE_URL=http://127.0.0.1:43100 \ +cargo test --features serde --test void_box_contract -- --ignored --nocapture +``` + +Expected: +- current failures identify real contract gaps, +- no failures come from malformed test harness assumptions. + +- [ ] **Step 5: Commit** + +```bash +git add tests/void_box_contract.rs +git commit -m "tests: add live void-box artifact contract coverage" +``` + +## Chunk 2: Runtime Client Contract Alignment + +### Task 2: Make `VoidBoxRuntimeClient` manifest-aware + +**Files:** +- Modify: `src/runtime/void_box.rs` +- Modify: `src/runtime/mod.rs` + +- [ ] **Step 1: Write focused unit tests in `src/runtime/void_box.rs` for the new retrieval paths** + +Cover at least: + +```rust +#[test] +fn fetch_structured_output_prefers_manifested_result_json() {} + +#[test] +fn fetch_structured_output_maps_missing_output_error() {} + +#[test] +fn fetch_structured_output_maps_malformed_output_error() {} + +#[test] +fn fetch_named_artifact_uses_manifest_retrieval_path() {} + +#[test] +fn inspect_reads_artifact_publication_metadata_when_present() {} +``` + +- [ ] **Step 2: Run targeted unit tests and verify they fail** + +Run: + +```bash +cargo test --features serde runtime::void_box:: -- --nocapture +``` + +Expected: failures for missing manifest parsing, missing typed error handling, or missing named artifact helpers. + +- [ ] **Step 3: Implement minimal client additions** + +In `src/runtime/void_box.rs`, add: +- manifest parsing from inspect payloads when present, +- a helper to retrieve named artifacts from manifest entries, +- typed handling for runtime error codes such as: + - `STRUCTURED_OUTPUT_MISSING` + - `STRUCTURED_OUTPUT_MALFORMED` + - `ARTIFACT_NOT_FOUND` + - `ARTIFACT_PUBLICATION_INCOMPLETE` + - `ARTIFACT_STORE_UNAVAILABLE` + - `RETRIEVAL_TIMEOUT` +- structured output retrieval that prefers normalized manifest/inspection metadata and only falls back to the current `output-file` path when needed for compatibility. + +Do not redesign the transport layer. + +- [ ] **Step 4: Update `src/runtime/mod.rs` glue only if the runtime trait needs richer return semantics** + +If `Option` is no longer expressive enough, add the smallest trait/API change needed to distinguish: +- missing structured output, +- malformed structured output, +- retrieval temporary failure, +- successful output. + +Keep the change local and update only the call sites this plan covers. + +- [ ] **Step 5: Re-run unit coverage** + +Run: + +```bash +cargo test --features serde runtime::void_box:: -- --nocapture +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add src/runtime/void_box.rs src/runtime/mod.rs +git commit -m "runtime: align void-box client with artifact contract" +``` + +## Chunk 3: Minimal Orchestration and Bridge Adjustments + +### Task 3: Consume explicit runtime output failures + +**Files:** +- Modify: `src/orchestration/service.rs` +- Modify: `src/bridge.rs` +- Test: `tests/execution_worker.rs` +- Test: `tests/execution_bridge.rs` + +- [ ] **Step 1: Add failing regression tests for runtime output failure mapping** + +Cover at least: + +```rust +#[test] +fn worker_marks_candidate_failed_on_structured_output_missing() {} + +#[test] +fn worker_marks_candidate_failed_on_structured_output_malformed() {} + +#[test] +fn bridge_preserves_retryable_runtime_error_information_when_execution_fails() {} +``` + +- [ ] **Step 2: Run targeted tests and verify they fail** + +Run: + +```bash +cargo test --features serde --test execution_worker --test execution_bridge -- --nocapture +``` + +Expected: current code collapses too many runtime cases into `None` or generic failure. + +- [ ] **Step 3: Implement the smallest controller-side change** + +Update `src/orchestration/service.rs` so artifact collection distinguishes: +- no structured output because the candidate genuinely did not produce one, +- malformed output, +- temporary retrieval/publishing failure. + +Update `src/bridge.rs` only if the richer failure information should surface in execution inspection or HTTP error bodies. Avoid changing route shapes unless required. + +- [ ] **Step 4: Re-run targeted regression tests** + +Run: + +```bash +cargo test --features serde --test execution_worker --test execution_bridge -- --nocapture +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/orchestration/service.rs src/bridge.rs tests/execution_worker.rs tests/execution_bridge.rs +git commit -m "orchestration: preserve runtime output failure semantics" +``` + +## Chunk 4: Verification and Rollout + +### Task 4: Run the full relevant verification matrix + +**Files:** +- No source changes expected unless failures reveal drift + +- [ ] **Step 1: Run fast local regression coverage** + +Run: + +```bash +cargo test --features serde --test execution_bridge --test execution_worker --test void_box_contract -- --nocapture +``` + +Expected: +- non-ignored tests pass, +- ignored live tests compile. + +- [ ] **Step 2: Run runtime unit coverage** + +Run: + +```bash +cargo test --features serde runtime::void_box:: -- --nocapture +``` + +Expected: PASS. + +- [ ] **Step 3: Run live daemon gate** + +Run: + +```bash +TMPDIR=/tmp CARGO_TARGET_DIR=/home/diego/github/void-control/target \ +VOID_BOX_BASE_URL=http://127.0.0.1:43100 \ +cargo test --features serde --test void_box_contract -- --ignored --nocapture +``` + +Expected: PASS once the paired `void-box` changes are live. + +- [ ] **Step 4: If the live gate exposes daemon/client drift, fix only the boundary mismatch** + +Do not expand scope into worker locking, lifecycle events, or UI work during this pass. + +- [ ] **Step 5: Commit any final boundary fixes** + +```bash +git add src/runtime/void_box.rs src/runtime/mod.rs src/orchestration/service.rs src/bridge.rs tests/void_box_contract.rs tests/execution_worker.rs tests/execution_bridge.rs +git commit -m "runtime: finalize void-box boundary alignment" +``` diff --git a/docs/superpowers/plans/2026-03-22-message-box-v0.md b/docs/superpowers/plans/2026-03-22-message-box-v0.md new file mode 100644 index 0000000..8dc4ea1 --- /dev/null +++ b/docs/superpowers/plans/2026-03-22-message-box-v0.md @@ -0,0 +1,175 @@ +# Message Box V0 Implementation Plan + +> **For agentic workers:** REQUIRED: Use superpowers:executing-plans or +> superpowers:subagent-driven-development to implement this plan. Steps +> use checkbox syntax for tracking. + +**Goal:** Implement the first real `void-control` message box: +- structured communication intents in orchestration-facing results, +- persisted `intents.log`, `messages.log`, and inbox snapshots, +- deterministic next-iteration delivery, +- provider-adapter launch injection as the required v0 delivery mode. + +**Architecture:** Keep collaboration semantics owned by `void-control`. +Extend the orchestration model with message-box records and a provider +adapter abstraction. Do not move routing into `void-box`. Do not require +live vendor channels in v0. + +**Tech Stack:** Rust 2021, existing orchestration/runtime/store modules, +filesystem-backed execution persistence, current `MockRuntime`, +integration tests under `tests/`. + +--- + +## Scope Check + +This plan includes: +1. message-box domain types and persistence, +2. structured intent extraction from candidate output, +3. routing for `leader` and `broadcast`, +4. inbox snapshot materialization, +5. provider adapter abstraction with launch injection, +6. integration acceptance tests for emission, routing, delivery, replay. + +This plan intentionally excludes: +- direct `candidate:` addressing, +- same-iteration delivery, +- semantic consumed-tracking, +- provider-required live delivery, +- UI message-thread rendering. + +## File Map + +### Primary files + +- Modify: `src/orchestration/types.rs` + Responsibility: add message-box domain types and persisted records. +- Modify: `src/orchestration/events.rs` + Responsibility: add collaboration event types. +- Modify: `src/orchestration/service.rs` + Responsibility: extract intents, route messages, persist inboxes, and + invoke provider adapter launch injection. +- Modify: `src/orchestration/store/fs.rs` + Responsibility: persist `intents.log`, `messages.log`, and inbox + snapshots. +- Modify: `src/orchestration/mod.rs` + Responsibility: export new message-box types. +- Create or modify: `src/orchestration/message_box.rs` + Responsibility: routing, TTL, dedup, and inbox materialization logic. +- Create or modify: `src/runtime/mod.rs` + Responsibility: provider adapter abstraction boundary. + +### Tests + +- Create: `tests/execution_message_box.rs` + Responsibility: focused integration coverage for emission, routing, + delivery, TTL expiry, dedup, and replay. +- Modify: `tests/strategy_scenarios.rs` + Responsibility: upgrade swarm/search scenarios to use real routed + message records instead of only backlog shaping. +- Modify: `tests/execution_strategy_acceptance.rs` + Responsibility: require provider launch-injection delivery path. + +## Delivery Strategy + +Implement in this order: + +1. define domain types and persistence, +2. add provider adapter abstraction with launch injection, +3. extract intents from structured output, +4. route and persist messages, +5. persist inbox snapshots, +6. add replay/integration tests, +7. then upgrade scenario coverage. + +This keeps the control-plane truth stable while allowing provider +delivery to remain a thin adapter. + +## Chunk 1: Domain Model and Persistence + +### Task 1: Add message-box records + +**Files:** +- Modify: `src/orchestration/types.rs` +- Modify: `src/orchestration/store/fs.rs` +- Modify: `src/orchestration/mod.rs` +- Test: `tests/execution_message_box.rs` + +- [ ] Step 1: add failing persistence tests for intents/messages/inboxes +- [ ] Step 2: add `CommunicationIntent`, `RoutedMessage`, + `InboxEntry`, `InboxSnapshot` +- [ ] Step 3: persist `intents.log` and `messages.log` as NDJSON +- [ ] Step 4: persist inbox snapshots as JSON files +- [ ] Step 5: run focused persistence tests + +## Chunk 2: Provider Adapter V0 + +### Task 2: Add launch-injection adapter boundary + +**Files:** +- Modify: `src/runtime/mod.rs` +- Modify: `src/orchestration/service.rs` +- Test: `tests/execution_message_box.rs` + +- [ ] Step 1: add failing test for provider launch injection +- [ ] Step 2: add provider adapter trait with required launch injection +- [ ] Step 3: add default adapter that renders inbox snapshot into launch input +- [ ] Step 4: wire service launch path through the adapter +- [ ] Step 5: run focused adapter test + +## Chunk 3: Intent Extraction and Routing + +### Task 3: Extract intents from candidate output and route them + +**Files:** +- Modify: `src/orchestration/service.rs` +- Create or modify: `src/orchestration/message_box.rs` +- Modify: `src/orchestration/events.rs` +- Test: `tests/execution_message_box.rs` + +- [ ] Step 1: add failing test for valid intent emission +- [ ] Step 2: parse `intents` from structured candidate output +- [ ] Step 3: validate kind/audience/limits +- [ ] Step 4: persist valid intents and rejection diagnostics +- [ ] Step 5: route `leader` and `broadcast` messages +- [ ] Step 6: append collaboration events +- [ ] Step 7: run focused routing tests + +## Chunk 4: Inbox Materialization and Replay + +### Task 4: Deliver inbox snapshots deterministically + +**Files:** +- Modify: `src/orchestration/message_box.rs` +- Modify: `src/orchestration/service.rs` +- Modify: `src/orchestration/store/fs.rs` +- Test: `tests/execution_message_box.rs` + +- [ ] Step 1: add failing test for next-iteration inbox delivery +- [ ] Step 2: materialize inbox snapshots from routed messages +- [ ] Step 3: enforce TTL, dedup, and fan-out limits +- [ ] Step 4: persist immutable inbox snapshots +- [ ] Step 5: replay from persisted logs plus snapshots after restart +- [ ] Step 6: run delivery/replay tests + +## Chunk 5: Scenario Upgrade and Acceptance + +### Task 5: Use real message-box flow in strategy scenarios + +**Files:** +- Modify: `tests/strategy_scenarios.rs` +- Modify: `tests/execution_strategy_acceptance.rs` + +- [ ] Step 1: upgrade swarm scenario to assert real routed message records +- [ ] Step 2: upgrade search scenario to assert `caused_by` lineage +- [ ] Step 3: ensure supported-strategy acceptance uses provider launch injection +- [ ] Step 4: run: + +```bash +cargo test --features serde --test execution_message_box -- --nocapture +cargo test --features serde --test strategy_scenarios -- --nocapture +cargo test --features serde --test execution_strategy_acceptance -- --nocapture +cargo test --features serde +``` + +Expected: PASS. diff --git a/docs/superpowers/plans/2026-03-22-persistent-dispatcher.md b/docs/superpowers/plans/2026-03-22-persistent-dispatcher.md new file mode 100644 index 0000000..e5ed737 --- /dev/null +++ b/docs/superpowers/plans/2026-03-22-persistent-dispatcher.md @@ -0,0 +1,367 @@ +# Persistent Dispatcher Implementation Plan + +> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace the current “scan pending executions and process them to completion” worker loop with a persistent dispatcher that stores queued and running candidate state, enforces execution-local and global concurrency rules, survives restart, and exposes queue/dispatch state through persisted events and bridge inspection. + +**Architecture:** Keep execution planning, scheduling, and candidate dispatch separate. `ExecutionService` should plan and persist candidate work, `scheduler.rs` should decide which persisted candidates are runnable under global and per-execution limits, `store/fs.rs` should become the source of truth for queued/running candidate records, and `bridge.rs` should drive a dispatcher tick that advances work incrementally instead of executing an entire orchestration loop in one pass. + +**Tech Stack:** Rust 2021, filesystem-backed execution store, existing `serde`-gated bridge and orchestration modules, current runtime trait abstraction, persisted control-plane events, and the existing `GlobalScheduler` primitives as the starting point. + +--- + +## Scope Check + +This plan includes: +1. persisted candidate queue/running state, +2. dispatcher ticks with global and per-execution slot enforcement, +3. restart reconciliation of queued/running candidates, +4. bridge/inspection updates for queue and dispatch observability, +5. focused restart and fairness tests. + +This plan intentionally excludes: +- additional orchestration strategies beyond `swarm`, +- UI work, +- richer candidate artifact history beyond what dispatching needs, +- database-backed persistence, +- distributed locking beyond the current filesystem claim model. + +## File Map + +### Primary files + +- Modify: `src/orchestration/types.rs` + Responsibility: add persisted candidate record types and candidate lifecycle status. +- Modify: `src/orchestration/store/fs.rs` + Responsibility: persist candidate queue/running/completed state and reload it on restart. +- Modify: `src/orchestration/store.rs` + Responsibility: expose candidate-oriented store operations through the store abstraction. +- Modify: `src/orchestration/service.rs` + Responsibility: split planning from dispatch and process one persisted candidate at a time. +- Modify: `src/orchestration/scheduler.rs` + Responsibility: decide runnable candidates from persisted state under spec rules. +- Modify: `src/orchestration/events.rs` + Responsibility: add queue/dispatched/released candidate lifecycle events required by the dispatcher. +- Modify: `src/orchestration/reconcile.rs` + Responsibility: rebuild dispatcher state from persisted executions and candidates after restart. +- Modify: `src/bridge.rs` + Responsibility: replace the simple pending-execution scan with a dispatcher tick and expose queue state via inspection. + +### Supporting tests + +- Modify: `tests/execution_scheduler.rs` + Responsibility: fairness, ordering, slot release, and candidate lifecycle coverage. +- Modify: `tests/execution_reconciliation.rs` + Responsibility: restart rebuild of queued/running candidate state. +- Modify: `tests/execution_worker.rs` + Responsibility: worker dispatch of persisted candidates and restart-safe progression. +- Modify: `tests/execution_bridge.rs` + Responsibility: bridge inspection surface for queued/running candidate summaries. + +## Delivery Strategy + +Implement in this order: + +1. define persisted candidate records and store support, +2. split orchestration into planning and candidate dispatch, +3. wire a dispatcher tick around the persisted queue, +4. add restart reconciliation and queue observability, +5. run the full verification sweep. + +This order keeps the store model authoritative and avoids building more behavior on top of the current monolithic `process_execution()` loop. + +## Chunk 1: Persisted Candidate Records + +### Task 1: Add candidate lifecycle types + +**Files:** +- Modify: `src/orchestration/types.rs` + +- [ ] **Step 1: Add a candidate record model** + +Add types such as: + +```rust +pub enum CandidateStatus { + Queued, + Running, + Completed, + Failed, + Canceled, +} + +pub struct ExecutionCandidate { + pub execution_id: String, + pub candidate_id: String, + pub created_seq: u64, + pub iteration: u32, + pub status: CandidateStatus, + pub runtime_run_id: Option, +} +``` + +Do not add candidate metric history yet. Keep the type limited to what dispatching and restart recovery need. + +- [ ] **Step 2: Add store support in `src/orchestration/store/fs.rs`** + +Persist candidate records separately from `execution.txt`, for example under: +- `candidates/.txt`, or +- a single `candidates.log` plus a reload parser. + +Prefer a simple per-candidate file because it makes reload and patch updates easier. + +- [ ] **Step 3: Extend `src/orchestration/store.rs`** + +Expose store methods for: +- save candidate, +- load candidates for one execution, +- list runnable/active candidates across executions. + +- [ ] **Step 4: Add focused store tests** + +Add tests that round-trip queued and running candidate records through the filesystem store. + +- [ ] **Step 5: Verify** + +Run: + +```bash +cargo test --features serde --test execution_worker -- --nocapture +cargo test --features serde --test execution_scheduler -- --nocapture +``` + +Expected: compilation and any new store tests pass. + +## Chunk 2: Split Planning From Dispatch + +### Task 2: Persist candidate queue state instead of dispatching immediately + +**Files:** +- Modify: `src/orchestration/service.rs` +- Modify: `src/orchestration/events.rs` + +- [ ] **Step 1: Introduce a planning-only phase** + +Refactor `process_execution()` so the iteration loop can: +- materialize inboxes, +- plan candidates, +- persist them as `Queued`, +- emit `CandidateQueued`, +- stop before calling `runtime.start_run(...)` inline. + +- [ ] **Step 2: Add a one-candidate dispatch path** + +Introduce a method with a shape like: + +```rust +fn dispatch_candidate( + &mut self, + execution: &mut Execution, + candidate: &ExecutionCandidate, + spec: &ExecutionSpec, + worker_id: &str, +) -> io::Result +``` + +This method should: +- mark the candidate `Running`, +- start the runtime run, +- wait/poll to terminal, +- collect structured output, +- mark the candidate terminal, +- update accumulator/execution state incrementally. + +- [ ] **Step 3: Add/extend events** + +Add or use events for: +- `CandidateQueued` +- `CandidateDispatched` +- `CandidateOutputCollected` +- candidate terminal release/failure if needed for observability + +Do not remove the existing execution lifecycle events. + +- [ ] **Step 4: Keep the current single-process semantics temporarily** + +Within this chunk, it is acceptable if one dispatcher tick still drains all runnable candidates. The main requirement is that candidates are persisted and lifecycle transitions are explicit. + +- [ ] **Step 5: Verify** + +Run: + +```bash +cargo test --features serde --test execution_worker -- --nocapture +cargo test --features serde --test execution_event_replay -- --nocapture +``` + +Expected: worker tests still pass and event replay remains consistent. + +## Chunk 3: Dispatcher Tick and Slot Enforcement + +### Task 3: Enforce scheduler rules from persisted state + +**Files:** +- Modify: `src/orchestration/scheduler.rs` +- Modify: `src/bridge.rs` +- Modify: `src/orchestration/service.rs` + +- [ ] **Step 1: Build a scheduler view from persisted candidates** + +The dispatcher tick should reconstruct runnable work from: +- queued candidates, +- running candidates, +- execution pause/cancel state, +- global child-run limit, +- per-execution max concurrent candidates. + +- [ ] **Step 2: Enforce spec ordering** + +Ensure: +- within one execution, dispatch order matches persisted candidate creation sequence, +- across executions, dispatch is FIFO by candidate creation time. + +- [ ] **Step 3: Release slots on candidate completion** + +Candidate completion should immediately make capacity available on the next tick. + +- [ ] **Step 4: Replace the simple bridge worker scan** + +Update `process_pending_executions_once()` in `src/bridge.rs` to: +- queue work for executions that need planning, +- dispatch runnable persisted candidates, +- avoid treating one execution as a monolithic unit. + +- [ ] **Step 5: Add scheduler tests** + +Extend `tests/execution_scheduler.rs` to cover: +- persisted FIFO across executions, +- execution-local order preservation after restart, +- slot release on completion, +- paused execution not dispatching queued candidates. + +- [ ] **Step 6: Verify** + +Run: + +```bash +cargo test --features serde --test execution_scheduler -- --nocapture +cargo test --features serde --test execution_worker -- --nocapture +``` + +Expected: PASS. + +## Chunk 4: Restart Reconciliation + +### Task 4: Rebuild dispatcher state after restart + +**Files:** +- Modify: `src/orchestration/reconcile.rs` +- Modify: `src/orchestration/store/fs.rs` +- Modify: `tests/execution_reconciliation.rs` + +- [ ] **Step 1: Reload queued and running candidates from disk** + +Reconciliation should restore: +- active executions, +- queued candidates, +- running candidates, +- execution-level pause/cancel state, +- accumulator state. + +- [ ] **Step 2: Define restart behavior for running candidates** + +At first cut, choose one explicit behavior and encode it in tests: +- either mark previously running candidates back to `Queued`, +- or mark them failed/stalled and allow replan. + +Recommendation: +- move previously `Running` candidates back to `Queued` on restart unless the runtime can be proven terminal from reconciliation data. + +- [ ] **Step 3: Add restart tests** + +Cover: +- queued candidates remain queued after restart, +- running candidates are recovered into a safe resumable state, +- completed candidates are not re-dispatched. + +- [ ] **Step 4: Verify** + +Run: + +```bash +cargo test --features serde --test execution_reconciliation -- --nocapture +``` + +Expected: PASS. + +## Chunk 5: Bridge Observability + +### Task 5: Expose queue and dispatch state through execution inspection + +**Files:** +- Modify: `src/bridge.rs` +- Modify: `tests/execution_bridge.rs` + +- [ ] **Step 1: Extend execution detail response** + +Add queue-oriented fields such as: +- queued candidate count, +- running candidate count, +- completed candidate count, +- maybe the next queued candidate id. + +- [ ] **Step 2: Keep event history as the source of truth** + +Do not invent a second in-memory scheduler status model in the bridge. Derive summaries from persisted candidate and event state. + +- [ ] **Step 3: Add bridge tests** + +Add tests that validate: +- execution detail includes queued/running/completed counts, +- event stream stays stable, +- paused executions show queued candidates without dispatch progress. + +- [ ] **Step 4: Verify** + +Run: + +```bash +cargo test --features serde --test execution_bridge -- --nocapture +``` + +Expected: PASS. + +## Final Verification + +- [ ] Run the full suite: + +```bash +cargo test --features serde +``` + +- [ ] If a live daemon is available, rerun live bridge tests serially: + +```bash +TMPDIR=/tmp CARGO_TARGET_DIR=/home/diego/github/void-control/target \ +VOID_BOX_BASE_URL=http://127.0.0.1:43100 \ +cargo test --features serde --test execution_bridge_live -- --ignored --nocapture --test-threads=1 +``` + +- [ ] Review bridge inspection output for one completed and one paused execution. + +## Risks and Notes + +- The existing `process_execution()` logic currently couples planning and evaluation tightly. The refactor must preserve current scoring behavior while changing when dispatch happens. +- Persisting candidate records will expose backward-compatibility questions for existing temp stores. Treat old stores as best-effort and prefer forward correctness. +- Restart behavior for previously running candidates must be explicit and test-backed. Hidden assumptions here will cause duplicate work or dropped work. +- Keep the first cut filesystem format simple. A later migration to SQLite or a more structured store is easier if the behavior is already correct and well-tested. + +## Definition of Done + +The plan is complete when: +- executions persist queued/running candidate records, +- dispatcher ticks enforce global and per-execution slot rules from persisted state, +- restart reconciliation rebuilds runnable work safely, +- bridge inspection exposes queue/dispatch progress, +- all `cargo test --features serde` tests pass, +- live bridge tests still pass when run serially against the real daemon. diff --git a/docs/superpowers/plans/2026-03-22-search-strategy.md b/docs/superpowers/plans/2026-03-22-search-strategy.md new file mode 100644 index 0000000..738afde --- /dev/null +++ b/docs/superpowers/plans/2026-03-22-search-strategy.md @@ -0,0 +1,302 @@ +# Search Strategy Implementation Plan + +> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add `search` as a second supported orchestration strategy using incumbent-centered refinement with optional bootstrap, and add an acceptance suite that exercises every supported strategy end to end. + +**Architecture:** Keep the execution loop, runtime contract, scheduler, and bridge unchanged. Implement `SearchStrategy` inside the existing `IterationStrategy` boundary, extend the accumulator with only the state search needs, and route mode selection through the existing service/strategy layer. Add an integration acceptance suite that runs `swarm` and `search` through the same orchestration path so supported-strategy coverage is explicit. + +**Tech Stack:** Rust 2021, existing orchestration modules under `src/orchestration/`, filesystem-backed execution store, `serde`-gated bridge/integration tests, current `MockRuntime` and live bridge test infrastructure. + +--- + +## Scope Check + +This plan includes: +1. `search` mode validation and strategy selection, +2. incumbent-centered refinement with optional bootstrap, +3. minimal accumulator/state additions for explored signatures and search phase, +4. strategy-focused unit coverage, +5. a strategy acceptance suite that runs all supported strategies. + +This plan intentionally excludes: +- adaptive `swarm -> search` mode switching, +- new evaluation models such as pairwise/tournament scoring, +- UI changes, +- non-`swarm` / non-`search` strategy implementations, +- broader runtime or scheduler refactors unrelated to search semantics. + +## File Map + +### Primary files + +- Modify: `src/orchestration/spec.rs` + Responsibility: allow `search` as a valid execution mode and validate any mode-specific constraints added for the first cut. +- Modify: `src/orchestration/types.rs` + Responsibility: add minimal accumulator fields needed by search, such as explored signatures and optional search phase. +- Modify: `src/orchestration/strategy.rs` + Responsibility: add `SearchStrategy`, keep `SwarmStrategy` intact, and expose a shared strategy-selection boundary. +- Modify: `src/orchestration/mod.rs` + Responsibility: export `SearchStrategy` and any new search-specific types. +- Modify: `src/orchestration/service.rs` + Responsibility: select `SwarmStrategy` vs `SearchStrategy` by mode without changing execution/runtime behavior. +- Modify: `src/orchestration/variation.rs` + Responsibility: add small helper logic if search needs reusable mutation/signature helpers. + +### Tests + +- Modify: `tests/execution_spec_validation.rs` + Responsibility: validate that `search` is accepted and bad unknown modes still reject. +- Modify: `tests/execution_swarm_strategy.rs` + Responsibility: rename or broaden where useful so strategy-specific unit tests cover both swarm and search. +- Create: `tests/execution_search_strategy.rs` + Responsibility: focused unit tests for bootstrap, incumbent refinement, explored-signature avoidance, and reduce behavior. +- Create: `tests/execution_strategy_acceptance.rs` + Responsibility: integration acceptance suite that runs every supported strategy (`swarm`, `search`) through the same orchestration execution path. +- Modify: `tests/execution_bridge.rs` + Responsibility: ensure bridge create/get surfaces work with `search` specs as well as `swarm`. + +## Delivery Strategy + +Implement in this order: + +1. add validation and accumulator support for `search`, +2. add `SearchStrategy` with bootstrap and refinement behavior, +3. wire mode selection in the service, +4. add focused unit tests, +5. add the supported-strategy acceptance suite. + +This keeps strategy behavior isolated from the dispatcher and bridge machinery already stabilized on the branch. + +## Chunk 1: Mode and State Support + +### Task 1: Add `search` as a supported mode with minimal accumulator extensions + +**Files:** +- Modify: `src/orchestration/spec.rs` +- Modify: `src/orchestration/types.rs` +- Modify: `src/orchestration/mod.rs` +- Test: `tests/execution_spec_validation.rs` + +- [ ] **Step 1: Write the failing validation test** + +Add a test in `tests/execution_spec_validation.rs` that submits a `search` mode spec and expects validation success. + +- [ ] **Step 2: Run the validation test to verify it fails** + +Run: `cargo test --features serde --test execution_spec_validation accepts_search_mode -- --exact --nocapture` +Expected: FAIL because `search` is not yet an accepted mode. + +- [ ] **Step 3: Allow `search` in spec validation** + +Update `src/orchestration/spec.rs` so `search` is accepted alongside `swarm`. Keep unknown modes rejected. + +- [ ] **Step 4: Add minimal search state to the accumulator** + +Update `src/orchestration/types.rs`: +- add `search_phase: Option` or a small enum-like string field, +- add `explored_signatures: Vec` or another minimal persisted representation. + +Do not add adaptive mode-switching state yet. + +- [ ] **Step 5: Export any new state types** + +Update `src/orchestration/mod.rs` so new search-related types are available to tests. + +- [ ] **Step 6: Run the validation file** + +Run: `cargo test --features serde --test execution_spec_validation -- --nocapture` +Expected: PASS. + +## Chunk 2: Search Strategy Core + +### Task 2: Implement `SearchStrategy` + +**Files:** +- Modify: `src/orchestration/strategy.rs` +- Modify: `src/orchestration/variation.rs` +- Create: `tests/execution_search_strategy.rs` + +- [ ] **Step 1: Write the failing bootstrap test** + +Add a test in `tests/execution_search_strategy.rs` for: +- no seed / no incumbent, +- bootstrap round returns a small non-empty candidate batch, +- bootstrap is smaller than unconstrained broad swarm behavior for the same variation source. + +- [ ] **Step 2: Run the bootstrap test to verify it fails** + +Run: `cargo test --features serde --test execution_search_strategy search_bootstraps_when_no_seed_exists -- --exact --nocapture` +Expected: FAIL because `SearchStrategy` does not exist yet. + +- [ ] **Step 3: Add `SearchStrategy` type** + +In `src/orchestration/strategy.rs`, add `SearchStrategy` implementing the same trait surface as `SwarmStrategy`: +- `materialize_inboxes()` +- `plan_candidates()` +- `evaluate()` +- `should_stop()` +- `reduce()` + +- [ ] **Step 4: Implement bootstrap planning** + +For iteration 0 when no incumbent/seed exists: +- generate a constrained bootstrap batch, +- reuse existing variation helpers where possible, +- keep candidate count intentionally small. + +- [ ] **Step 5: Implement refinement planning** + +For iterations after bootstrap or when a seed/incumbent exists: +- generate candidates by mutating/refining around the incumbent, +- avoid signatures already in `explored_signatures`, +- keep the first cut simple and deterministic. + +- [ ] **Step 6: Implement reduce behavior** + +`reduce()` should: +- preserve/update incumbent best, +- append signatures for completed candidates, +- update `search_phase` from bootstrap to refine once an incumbent exists, +- keep existing scoring-history and failure-count behavior aligned with swarm. + +- [ ] **Step 7: Implement stop behavior** + +`should_stop()` should reuse existing threshold/plateau/budget logic and additionally allow stop when no unexplored refinement candidates remain. + +- [ ] **Step 8: Add unit tests** + +In `tests/execution_search_strategy.rs`, add tests for: +- bootstrap with no seed, +- refine around incumbent, +- explored-signature avoidance, +- reduce updates incumbent and phase, +- stop when no new neighbors remain. + +- [ ] **Step 9: Run the search strategy tests** + +Run: `cargo test --features serde --test execution_search_strategy -- --nocapture` +Expected: PASS. + +## Chunk 3: Service Wiring + +### Task 3: Select strategy by execution mode + +**Files:** +- Modify: `src/orchestration/service.rs` +- Test: `tests/execution_strategy_acceptance.rs` + +- [ ] **Step 1: Write the failing acceptance test for `search`** + +Create `tests/execution_strategy_acceptance.rs` with a test that: +- builds a valid `search` spec, +- runs it through `ExecutionService::run_to_completion(...)`, +- expects a valid terminal execution result. + +- [ ] **Step 2: Run the new acceptance test to verify it fails** + +Run: `cargo test --features serde --test execution_strategy_acceptance search_strategy_runs_end_to_end -- --exact --nocapture` +Expected: FAIL because the service still hardcodes `SwarmStrategy`. + +- [ ] **Step 3: Add strategy selection in `ExecutionService`** + +Update `src/orchestration/service.rs` so mode dispatch selects: +- `SwarmStrategy` for `swarm` +- `SearchStrategy` for `search` + +Do not fork the execution loop. Keep only strategy creation mode-specific. + +- [ ] **Step 4: Keep runtime/scheduler behavior unchanged** + +Confirm no changes are needed to: +- candidate dispatch, +- artifact retrieval, +- bridge routes, +- scheduler rebuild, +- reconciliation. + +- [ ] **Step 5: Run the targeted acceptance test** + +Run: `cargo test --features serde --test execution_strategy_acceptance search_strategy_runs_end_to_end -- --exact --nocapture` +Expected: PASS. + +## Chunk 4: Supported-Strategy Acceptance Suite + +### Task 4: Add acceptance coverage for all supported strategies + +**Files:** +- Create: `tests/execution_strategy_acceptance.rs` +- Modify: `tests/execution_bridge.rs` + +- [ ] **Step 1: Add one shared acceptance helper** + +In `tests/execution_strategy_acceptance.rs`, create a helper that: +- constructs a spec for a named mode, +- seeds the mock runtime, +- executes through the same orchestration path, +- asserts terminal success plus a non-empty result shape. + +- [ ] **Step 2: Add one acceptance test per supported strategy** + +Add tests for: +- `swarm_strategy_runs_end_to_end` +- `search_strategy_runs_end_to_end` + +The point is not strategy-specific internals; the point is that every supported strategy runs successfully through the shared execution path. + +- [ ] **Step 3: Add a bridge-level `search` route test** + +In `tests/execution_bridge.rs`, add one test that: +- submits a `search` spec through `POST /v1/executions`, +- verifies the execution resource is created normally. + +- [ ] **Step 4: Run the acceptance suite** + +Run: + +```bash +cargo test --features serde --test execution_strategy_acceptance -- --nocapture +cargo test --features serde --test execution_bridge -- --nocapture +``` + +Expected: PASS. + +## Chunk 5: Final Verification + +### Task 5: Verify the branch-wide strategy surface + +**Files:** +- No new files + +- [ ] **Step 1: Run focused strategy files** + +Run: + +```bash +cargo test --features serde --test execution_search_strategy -- --nocapture +cargo test --features serde --test execution_swarm_strategy -- --nocapture +cargo test --features serde --test execution_strategy_acceptance -- --nocapture +``` + +Expected: PASS. + +- [ ] **Step 2: Run branch-wide verification** + +Run: + +```bash +cargo test --features serde +``` + +Expected: PASS. + +- [ ] **Step 3: Optional live sanity** + +If the local daemon is available, rerun: + +```bash +TMPDIR=/tmp CARGO_TARGET_DIR=/home/diego/github/void-control/target VOID_BOX_BASE_URL=http://127.0.0.1:43100 cargo test --features serde --test execution_bridge_live -- --ignored --nocapture --test-threads=1 +``` + +Expected: PASS. This is a regression check only; no search-specific live daemon fixture is required for the first cut. + diff --git a/spec/void-box-orchestration-runtime-readiness-v0.1.md b/spec/void-box-orchestration-runtime-readiness-v0.1.md new file mode 100644 index 0000000..67df111 --- /dev/null +++ b/spec/void-box-orchestration-runtime-readiness-v0.1.md @@ -0,0 +1,374 @@ +# Void-Box Orchestration Runtime Readiness + +## Version: v0.1 + +## Scope +This spec defines the `void-box` runtime changes required to support +forward-looking orchestration by `void-control`. + +It extends the existing controller/runtime boundary described in: +- `spec/void-control-runtime-spec-v0.2.md` +- `spec/void-control-iteration-spec-v0.2.md` +- `spec/void-box-orchestration-integration-changes-v0.1.md` +- `spec/void-box-orchestration-fixes-v0.1.md` + +This document is intentionally contract-first: +- it defines the runtime guarantees `void-control` depends on, +- it adds internal `void-box` guidance only where needed to make those + guarantees implementable and testable, +- it does not move orchestration ownership from `void-control` into + `void-box`. + +--- + +# 1. Ownership Boundary + +`void-control` owns: +- execution-level orchestration, +- iterative strategies such as `swarm`, +- cross-run scheduling and admission control, +- execution persistence and control-plane events, +- execution pause/resume/cancel policy, +- scoring, convergence, and reduction. + +`void-box` owns: +- single-run workflow/stage execution, +- microVM isolation and runtime policy enforcement, +- durable publication of stage output artifacts, +- run inspection data, +- typed runtime failure reporting. + +`void-box` MUST NOT: +- make cross-run scheduling decisions, +- infer execution-level strategy semantics, +- compute orchestration scores, +- replace control-plane execution state with runtime-local guesses. + +--- + +# 2. Problem Summary + +`void-control` can already launch and inspect child runs, but orchestration +readiness still depends on stronger runtime guarantees. + +Current gaps: +- structured outputs are retrievable, but not yet modeled as a stable + first-class artifact contract, +- additional artifacts are not exposed through a durable manifest contract, +- inspection data is not yet specified as the normalized source for + reconciliation support, +- artifact publication and retrieval failure modes are not fully typed, +- internal publication/storage guidance is missing, which risks daemon + implementations that technically expose files but do not make them + durable, discoverable, or testable. + +--- + +# 3. Required External Contract Changes + +## 3.1 Structured stage output is a first-class runtime contract + +For orchestration-facing stages, `void-box` MUST treat `result.json` as the +canonical structured output artifact. + +`result.json` MUST be machine-readable JSON and MUST support this shape: + +```json +{ + "status": "ok", + "summary": "short human-readable summary", + "metrics": { + "latency_p99_ms": 98, + "cost_usd": 0.02 + }, + "artifacts": [ + { + "name": "report.md", + "path": "report.md", + "media_type": "text/markdown" + } + ] +} +``` + +Rules: +- `status` is required. +- `summary` is optional but recommended. +- `metrics` is a flat string-to-number map. +- `artifacts` is a list of references to additional outputs produced by the + same stage. +- unknown fields are allowed for forward compatibility. + +## 3.2 Artifact retrieval contract + +The current endpoint remains valid: +- `GET /v1/runs/{run_id}/stages/{stage}/output-file` + +For orchestration readiness, `void-box` MUST guarantee that this endpoint +returns the canonical structured output for the stage when `result.json` +exists. + +Forward-looking support MUST also be added for named artifact retrieval: +- `GET /v1/runs/{run_id}/stages/{stage}/artifacts/{name}` + +Response behavior: +- `200` with artifact content when found, +- `404` with typed error when the named artifact does not exist, +- `409` or `424` style typed error when artifact publication is incomplete, +- `5xx` only for true internal failures. + +## 3.3 Artifact manifest contract + +Each completed stage that publishes structured output MUST have a stable +artifact manifest available through run inspection or stage inspection. + +Minimum manifest entry shape: + +```json +{ + "name": "report.md", + "stage": "main", + "media_type": "text/markdown", + "size_bytes": 1824, + "retrieval_path": "/v1/runs/run_123/stages/main/artifacts/report.md" +} +``` + +Rules: +- `name` is stable within a stage. +- `retrieval_path` is the canonical retrieval URI suffix exposed by the + daemon. +- `size_bytes` MAY be omitted if not known cheaply, but SHOULD be present. +- the manifest MUST include the canonical structured output artifact even if + it is also retrievable via `output-file`. + +## 3.4 Run inspection and reconciliation support + +`GET /v1/runs/{id}` MUST expose normalized fields sufficient for +`void-control` reconciliation after restart. + +Required fields: +- `run_id` +- `attempt_id` +- `state` +- `started_at` +- `updated_at` +- `finished_at` when terminal +- `terminal_reason` when terminal +- `active_stage_count` +- `active_microvm_count` +- `stage_states` +- `artifact_publication` + +Recommended `artifact_publication` shape: + +```json +{ + "status": "published", + "published_at": "2026-03-20T18:20:00Z", + "manifest": [ + { + "name": "result.json", + "stage": "main", + "media_type": "application/json", + "retrieval_path": "/v1/runs/run_123/stages/main/output-file" + } + ] +} +``` + +`artifact_publication.status` MUST distinguish at least: +- `not_started` +- `publishing` +- `published` +- `failed` + +## 3.5 Active-run listing for reconciliation + +`void-box` MUST expose one of: +- `GET /v1/runs?state=active` +- `GET /v1/runs/active` + +This endpoint MUST be safe after daemon restart and MUST return enough +inspection data for `void-control` to resume runtime tracking of non-terminal +runs. + +--- + +# 4. Failure and Error Semantics + +`void-box` MUST surface typed conditions for output and publication failures. + +Minimum conditions: +- `STRUCTURED_OUTPUT_MISSING` +- `STRUCTURED_OUTPUT_MALFORMED` +- `ARTIFACT_NOT_FOUND` +- `ARTIFACT_PUBLICATION_INCOMPLETE` +- `ARTIFACT_STORE_UNAVAILABLE` +- `RETRIEVAL_TIMEOUT` + +These conditions MAY appear: +- in non-2xx HTTP error payloads, +- in run inspection terminal metadata, +- in event payloads when such events exist. + +Minimum error envelope: + +```json +{ + "code": "STRUCTURED_OUTPUT_MISSING", + "message": "main stage completed without result.json", + "retryable": false +} +``` + +Rules: +- missing structured output is distinct from malformed structured output, +- artifact lookup failure is distinct from publication-not-yet-complete, +- retrieval timeout is distinct from daemon internal failure. + +--- + +# 5. Artifact and Output Semantics + +## 5.1 Publication durability + +Published artifacts MUST remain retrievable for at least the configured +retention window after run completion. + +`void-box` MUST NOT report artifacts as published before their retrieval path +is actually readable. + +## 5.2 Publication atomicity + +Artifact publication SHOULD behave atomically from the perspective of +inspection: +- before publication completes, manifest state is `publishing`, +- after publication completes, manifest state becomes `published` and listed + artifacts are retrievable, +- partial publication MUST surface as `failed` or `publishing`, never as a + silently incomplete `published` manifest. + +## 5.3 Backward compatibility + +During rollout: +- `output-file` remains supported, +- `result.json` remains the orchestration default, +- manifest support is additive, +- existing non-orchestration uses of `void-box` MUST continue working. + +--- + +# 6. Internal Implementation Guidance + +This section is guidance, not a required source-level design, but daemon +implementations SHOULD follow it closely. + +## 6.1 Separate metadata from raw artifact bytes + +`void-box` SHOULD persist artifact metadata separately from the artifact +contents so inspection can answer quickly without directory scans. + +Suggested persisted metadata: +- run id +- attempt id +- stage +- artifact name +- media type +- size +- publication status +- retrieval path +- publication timestamp + +## 6.2 Treat artifact publication as an explicit runtime step + +Artifact publication SHOULD be modeled as a distinct post-stage step: +- stage execution produces local outputs, +- publication validates and registers structured outputs, +- inspection reads the persisted publication result. + +This avoids mixing "stage exited successfully" with "artifact contract is +durably published". + +## 6.3 Normalize per-run inspection state + +`void-box` SHOULD maintain a normalized per-run summary record containing: +- lifecycle state, +- terminal reason, +- stage terminal states, +- active stage count, +- active microVM count, +- artifact publication status, +- artifact manifest, +- timestamps. + +Inspection endpoints SHOULD read from this normalized record rather than +recomputing state from logs on demand. + +## 6.4 Keep execution and publication responsibilities separate + +Recommended split: +- execution worker: runs stages and records runtime facts, +- publication step: validates `result.json`, registers artifacts, updates + manifest status, +- inspection layer: serves normalized state and retrieval metadata. + +This split is guidance, not a requirement for separate processes. + +## 6.5 Retention and cleanup coordination + +Retention logic SHOULD ensure: +- manifests do not outlive the referenced artifact bytes, +- artifact bytes do not remain indefinitely without manifest metadata, +- terminal inspection remains useful until retention expiry. + +Cleanup SHOULD update publication metadata consistently rather than leaving +stale retrieval paths behind. + +--- + +# 7. Compatibility and Migration + +Recommended rollout order: + +1. Add additive inspection fields and typed error payloads. +2. Make `result.json` publication rules explicit and contract-tested. +3. Add manifest support and named artifact retrieval. +4. Add reconciliation-ready active-run listing. +5. Deprecate any ad hoc artifact discovery assumptions in controller code. + +Non-goals for this version: +- redesigning stage orchestration inside `void-box`, +- moving execution-level pause/resume logic into `void-box`, +- defining strategy-specific artifact schemas beyond the base `result.json` + contract, +- specifying a particular storage backend. + +--- + +# 8. Acceptance Criteria + +`void-box` is orchestration-ready for this spec when all of the following are +true against a live daemon: + +1. `void-control` can submit a child run, inspect it, and reconcile its + lifecycle after restart using only runtime APIs and published artifacts. +2. A successful orchestration-facing stage publishes a valid `result.json` + retrievable through `GET /v1/runs/{run_id}/stages/{stage}/output-file`. +3. Additional artifacts referenced from `result.json` are discoverable via a + stable manifest and retrievable via named artifact endpoints. +4. A run that completes without `result.json` returns a typed + `STRUCTURED_OUTPUT_MISSING` condition. +5. A run with malformed `result.json` returns a typed + `STRUCTURED_OUTPUT_MALFORMED` condition. +6. Run inspection exposes artifact publication status without requiring log + scraping. +7. Active-run listing after daemon restart is sufficient for controller + reconciliation. +8. Contract tests cover: + - structured output retrieval, + - missing output classification, + - malformed output classification, + - manifest publication, + - named artifact retrieval, + - active-run reconciliation support. diff --git a/spec/void-control-iteration-spec-v0.1.md b/spec/void-control-iteration-spec-v0.1.md new file mode 100644 index 0000000..a0d074d --- /dev/null +++ b/spec/void-control-iteration-spec-v0.1.md @@ -0,0 +1,568 @@ +# Void Control Iteration Specification + +## Version: v0.1 + +## Scope +Define the control-plane iteration model for future `void-control` +execution modes, with `swarm` as the first motivating example. + +This specification establishes: +- the control-plane object model, +- iteration and candidate lifecycle, +- event-mediated communication, +- how `void-control` consumes `void-box` completion information, +- strict boundaries between `void-control` and `void-box`. + +This is a specification only. It does not require immediate +implementation of all modes described here. + +--- + +# 1. Core Idea + +`void-control` is the control plane. + +`void-box` is the execution runtime. + +`void-control` owns high-level execution modes that may require one or +many `void-box` child runs. Iterative modes, such as `swarm`, are +therefore a control-plane concern, not an internal `void-box` scheduler +mode. + +The first-class resource is: + +- `Execution` + +The concrete runtime unit remains: + +- `Run` + +An `Execution` may create one or many child `Run`s across one or many +iterations. + +--- + +# 2. Architectural Boundaries + +## 2.1 `void-control` Responsibilities + +- Accept and validate `ExecutionSpec`. +- Persist durable execution state. +- Own iteration state and candidate registry. +- Decide when to create, stop, or replace child runs. +- Consume runtime events and outputs from `void-box`. +- Derive control-plane events and execution status. +- Apply convergence, budget, and policy rules. + +## 2.2 `void-box` Responsibilities + +- Execute one concrete child `Run`. +- Isolate work inside microVM-backed stage execution. +- Emit runtime lifecycle and stage events. +- Persist stage output artifacts. +- Expose run completion status and stage-level output retrieval. + +## 2.3 Strict Boundary Rules + +`void-control` MUST NOT: +- depend on direct candidate-to-candidate transport, +- infer semantic execution state from raw logs alone, +- treat `void-box` as the owner of iteration state. + +`void-box` MUST NOT: +- own swarm memory or iteration memory, +- decide convergence for an `Execution`, +- directly route messages between candidates, +- persist cross-run control-plane state. + +--- + +# 3. Control-Plane Object Model + +## 3.1 Execution + +`Execution` is the top-level control-plane object. + +Suggested shape: + +```json +{ + "execution_id": "exec_123", + "mode": "swarm", + "status": "running", + "goal": "optimize latency", + "current_iteration": 2, + "result": null +} +``` + +Required properties: +- `execution_id` +- `mode` +- `status` +- `goal` +- `policy` +- `created_at` +- `updated_at` + +## 3.2 Iteration + +An `Iteration` is one control-plane decision round inside an `Execution`. + +An iteration owns: +- the candidate set launched in that round, +- the delivery window for messages visible to those candidates, +- evaluation/scoring results, +- iteration completion status. + +## 3.3 Candidate + +A `Candidate` is one evaluated alternative within an iteration. + +A candidate may map to: +- exactly one child `Run` in the simple case, or +- multiple child `Run`s in future modes if needed. + +For v0.1, the default mapping is: + +`candidate -> one child run` + +## 3.4 Child Run + +A child `Run` is the concrete `void-box` execution backing a candidate. + +Users should interact primarily with `Execution`. + +Child runs are drill-down details for: +- logs, +- runtime events, +- stage graph, +- stage artifacts, +- failure debugging. + +--- + +# 4. Execution Spec Model + +`void-control` should accept a single `ExecutionSpec` envelope with +common fields and mode-specific sections. + +Example: + +```json +{ + "mode": "swarm", + "goal": "optimize latency under load", + "inputs": {}, + "policy": {}, + "workflow": { + "template": {} + }, + "swarm": { + "max_iterations": 10 + } +} +``` + +Rules: +- common fields live at the top level, +- `workflow` is an execution primitive, not itself the control-plane mode, +- mode-specific sections are optional unless required by the selected mode, +- validation is mode-aware. + +## 4.1 Mode Taxonomy + +`Execution` modes should be treated as control-plane orchestration +strategies, not as aliases for workflow shape. + +Suggested families: + +- static modes + - `single_run` +- delegated modes + - `one_shot_agent` + - `planner_executor` +- iterative modes + - `swarm` + - `search` + - `tournament` + +`swarm` is therefore not the only future mode. + +For example, a one-shot delegated coding agent flow similar to Stripe's +Minions pattern is better modeled as a delegated mode than as `swarm`: + +- one task is assigned, +- one execution owns the end-to-end lifecycle, +- the control plane tracks progress and artifacts, +- iteration across parallel candidates is not the primary abstraction. + +This distinction keeps: +- iterative comparison logic in iterative modes, +- end-to-end autonomous task delegation in delegated modes, +- concrete workflow execution in child runs. + +--- + +# 5. Iteration Semantics + +## 5.1 Iteration Lifecycle + +Each iteration proceeds through: + +`Planned -> Dispatching -> Running -> Evaluating -> Completed` + +Terminal iteration states: +- `Completed` +- `Failed` +- `Canceled` + +## 5.2 Candidate Lifecycle + +Each candidate proceeds through: + +`Pending -> Scheduled -> Running -> {Succeeded | Failed | Canceled}` + +Candidate completion is driven by the terminal state of its child run +plus any required structured outputs. + +## 5.3 Control Loop + +Iterative modes should follow this model: + +```rust +loop { + let inboxes = materialize_candidate_inboxes(execution_state); + let candidates = plan_next_candidates(execution_state, inboxes); + let child_runs = dispatch_candidates(candidates); + let runtime_updates = collect_runtime_events(child_runs); + let outputs = collect_candidate_outputs(child_runs); + let derived = evaluate_iteration(runtime_updates, outputs); + execution_state = reduce(execution_state, derived); + + if should_stop(execution_state) { + break; + } +} +``` + +This loop lives in `void-control`, not in `void-box`. + +--- + +# 6. Event Model + +## 6.1 Two Event Layers + +The system uses two distinct event layers. + +### Runtime Events + +Produced by `void-box` child runs. + +Examples: +- `RunStarted` +- `StageStarted` +- `StageCompleted` +- `StageFailed` +- `RunCompleted` +- `RunFailed` +- `RunCanceled` + +These are low-level execution facts. + +### Control-Plane Events + +Produced by `void-control`. + +Examples: +- `ExecutionCreated` +- `IterationPlanned` +- `IterationStarted` +- `CandidateScheduled` +- `CandidateMessageProduced` +- `CandidateMessageDelivered` +- `CandidateCompleted` +- `CandidateScored` +- `IterationCompleted` +- `ExecutionCompleted` +- `ExecutionFailed` +- `ExecutionCanceled` + +These are orchestration facts. + +## 6.2 Event Ownership Rule + +Execution state in `void-control` MUST advance from persisted events and +reduced outputs. + +Direct inspection of child runs may be used for reconciliation and repair, +but not as the primary source of orchestration truth. + +## 6.3 Replayability Rule + +Every orchestration decision that changes execution state MUST be +reconstructible from the control-plane event log plus referenced child-run +artifacts. + +--- + +# 7. Candidate Communication + +## 7.1 Communication Model + +Candidates do not communicate directly with each other. + +All candidate communication is mediated by `void-control`. + +A candidate may express an intent to communicate, but delivery is always +a control-plane decision. + +## 7.2 `@` Mentions + +A candidate output such as `@candidate-b` or `@leader` is interpreted as +a communication intent, not direct transport. + +Flow: + +`child run output -> control-plane event -> routing decision -> next inbox` + +## 7.3 Canonical Message Shape + +Suggested control-plane message event: + +```json +{ + "type": "candidate.message", + "execution_id": "exec_123", + "iteration": 2, + "from_candidate_id": "cand_a", + "mentions": ["cand_b"], + "message": "Try the lower-concurrency variant", + "visibility": "swarm" +} +``` + +## 7.4 Delivery Rule + +For v0.1, messages should be delivered to future candidate inboxes, not +to already-running child runs. + +This avoids mid-run coupling and keeps replay semantics simple. + +## 7.5 Mailbox Rule + +The canonical mailbox lives in control-plane state as persisted message +events plus derived delivery state. + +`mailbox.json` is allowed only as: +- a generated inbox snapshot injected into a child run at launch time, +- a debug artifact, +- a convenience input format for agent code. + +`mailbox.json` MUST NOT be the system of record. + +--- + +# 8. Leader and Roles + +## 8.1 Role Assignment + +Leader semantics, when used, are assigned by `void-control`. + +A leader is a role, not an autonomous authority. + +The control plane may mark a candidate as: +- `leader` +- `reviewer` +- `researcher` +- other future logical roles + +## 8.2 Authority Rule + +The leader may produce intents such as: +- propose next candidates, +- summarize results, +- recommend a direction, +- address other candidates by logical role. + +Those intents are advisory until `void-control` accepts and realizes +them. + +## 8.3 Initial Support + +For early versions, `void-control` should support: +- `leaderless` +- `fixed_leader` + +Dynamic leader election may be added later. + +--- + +# 9. State Ownership + +## 9.1 Durable State + +The following state MUST live in `void-control`: +- `ExecutionSpec` +- execution status +- iteration state +- candidate registry +- role assignments +- message history +- scoring history +- child run mapping +- convergence and stop reason +- artifact references + +## 9.2 Ephemeral State + +The following state may live only inside a child `void-box` run: +- local filesystem data for that run, +- task input files, +- generated mailbox snapshot, +- temporary artifacts, +- process-local execution context. + +## 9.3 Restart Rule + +If a restart requires the state to continue or reconstruct the execution, +that state belongs in `void-control`. + +--- + +# 10. Completion Information from `void-box` + +## 10.1 Required Runtime Completion Sources + +`void-control` should use the following existing `void-box` completion +surfaces: + +- child run terminal status, +- stable terminal event id, +- resumable run event stream, +- stage snapshot endpoint, +- persisted stage output artifacts, +- runtime run report when available. + +## 10.2 Completion Mapping + +For each child run, `void-control` should collect: + +### Lifecycle Completion + +From runtime events and run inspection: +- terminal state, +- terminal event id, +- failure/cancel reason, +- timestamps, +- attempt id. + +### Stage Completion + +From stage snapshots: +- per-stage terminal status, +- timing, +- exit code, +- dependency shape, +- stage grouping. + +### Semantic Completion + +From stage artifacts and/or run output: +- candidate result summary, +- candidate metrics, +- communication intents, +- referenced artifacts. + +## 10.3 Structured Output Rule + +Logs alone are insufficient for control-plane iteration decisions. + +Iterative modes SHOULD define a structured artifact contract for child +runs, such as: +- `result.json` +- `intents.json` +- `artifacts.json` + +These names are illustrative in v0.1; exact filenames may be finalized +later. The core requirement is stable structured output, not filename +choice. + +## 10.4 Candidate Completion Rule + +A candidate is only fully complete when: +- its child run is terminal, and +- all required structured outputs for the mode have been collected or + explicitly marked absent. + +--- + +# 11. Reconciliation + +On `void-control` restart: +- reload non-terminal executions, +- reload candidate-to-run mapping, +- inspect known child runs, +- resume runtime event consumption from the last seen event id, +- rebuild derived inboxes and iteration status from control-plane events. + +Reconciliation may use runtime inspection and runtime event replay, but +the rebuilt execution state must still be reduced into the control-plane +model. + +--- + +# 12. UI and API Visibility + +## 12.1 Primary View + +Users should primarily see: +- execution status, +- current iteration, +- candidate counts, +- scores, +- current best result, +- orchestration timeline. + +## 12.2 Drill-Down View + +Users may drill into child runs for: +- per-run events, +- logs, +- stage graph, +- output artifacts, +- detailed failure diagnosis. + +The UI and API should not force users to reason about all child runs by +default. + +--- + +# 13. Non-Goals for v0.1 + +- Direct candidate-to-candidate transport. +- Shared mutable mailbox files as canonical state. +- Mid-run message injection into already-running child runs. +- Leader election semantics. +- Multi-node distributed runtime scheduling. +- A final stable schema for all mode-specific artifacts. + +--- + +# 14. Acceptance Criteria + +This specification is satisfied when a future implementation can: + +1. Create one top-level `Execution` that spans multiple iterations. +2. Launch multiple child `void-box` runs in parallel for one iteration. +3. Track candidate completion using existing `void-box` terminal events + and run state. +4. Collect semantic candidate outputs from structured artifacts rather + than logs alone. +5. Convert candidate communication intents into persisted control-plane + message events. +6. Materialize candidate inbox snapshots for future iterations without + making those snapshots the system of record. +7. Reconstruct execution state after restart from persisted control-plane + state plus replayed child-run information. diff --git a/spec/void-control-iteration-spec-v0.2.md b/spec/void-control-iteration-spec-v0.2.md new file mode 100644 index 0000000..dd82055 --- /dev/null +++ b/spec/void-control-iteration-spec-v0.2.md @@ -0,0 +1,1653 @@ +# Void Control Iteration Specification + +## Version: v0.2 + +## Changelog + +- v0.2: Added policy model, evaluation contract, candidate variation, + failure semantics, artifact retrieval, iteration state threading, + iteration strategy trait, backpressure/concurrency, observability + events, execution checkpointing, mid-execution policy adjustment, + dry-run mode, and result provenance. Updated acceptance criteria. +- v0.1: Initial specification. + +## Scope + +Define the control-plane iteration model for future `void-control` +execution modes, with `swarm` as the first motivating example. + +This specification establishes: +- the control-plane object model, +- iteration and candidate lifecycle, +- event-mediated communication, +- how `void-control` consumes `void-box` completion information, +- strict boundaries between `void-control` and `void-box`, +- the policy, evaluation, and variation models, +- failure semantics and operational controls. + +This is a specification only. It does not require immediate +implementation of all modes described here. + +--- + +# 1. Core Idea + +`void-control` is the control plane. + +`void-box` is the execution runtime. + +`void-control` owns high-level execution modes that may require one or +many `void-box` child runs. Iterative modes, such as `swarm`, are +therefore a control-plane concern, not an internal `void-box` scheduler +mode. + +The first-class resource is: + +- `Execution` + +The concrete runtime unit remains: + +- `Run` + +An `Execution` may create one or many child `Run`s across one or many +iterations. + +--- + +# 2. Layered Architecture + +## 2.1 Two Layers Within `void-control` + +`void-control` is internally organized into two layers: + +### Runtime Integration Layer (existing) + +The contract and runtime modules (`src/contract/`, `src/runtime/`) +provide the integration surface with `void-box`. This layer: + +- defines canonical types for individual runs: `RunState`, + `EventEnvelope`, `EventType`, `EventSequenceTracker`, + `ExecutionPolicy`. +- defines the runtime interaction API: `StartRequest`, `StartResult`, + `StopRequest`, `StopResult`, `RuntimeInspection`, + `SubscribeEventsRequest`. +- provides concrete clients: `VoidBoxRuntimeClient` (HTTP transport) + and `MockRuntime` (testing). +- handles compatibility mapping from void-box wire format to canonical + types (compat layer). + +This layer knows about one run at a time. It has no concept of +executions, iterations, candidates, or scoring. + +### Orchestration Layer (this spec) + +The iteration model defined in this specification sits above the +runtime integration layer. This layer: + +- defines the multi-run object model: `Execution`, `Iteration`, + `Candidate`, `ExecutionAccumulator`. +- owns the control loop that creates, monitors, and evaluates multiple + child runs across iterations. +- manages cross-run concerns: scoring, variation, convergence, + communication, concurrency, and budget. +- produces control-plane events that are distinct from runtime events. + +### How the Layers Connect + +The orchestration layer consumes the runtime layer — it never bypasses +it to talk to void-box directly. + +| Orchestration action | Runtime layer call | +|----------------------|-------------------| +| Dispatch a candidate | `VoidBoxRuntimeClient::start(StartRequest)` with the resolved candidate spec | +| Cancel a child run | `VoidBoxRuntimeClient::stop(StopRequest)` | +| Check child run status | `VoidBoxRuntimeClient::inspect()` → `RuntimeInspection` | +| Consume child run events | `VoidBoxRuntimeClient::subscribe_events(SubscribeEventsRequest)` → stream of `EventEnvelope` | +| Retrieve stage artifacts | `GET /v1/runs/{id}/stages/{stage}/output-file` (via HTTP transport) | + +The shared infrastructure functions in the control loop map to these +calls: + +- `dispatch_candidates()` → iterates candidate specs, calls `start()` + for each, records the `child_run_id` from `StartResult`, emits + `CandidateDispatched` control-plane event. +- `collect_outputs()` → subscribes to events for each child run via + `subscribe_events()`, waits for terminal `EventEnvelope`, then + fetches artifacts. Maps `RuntimeInspection` fields (terminal state, + exit code, timestamps) into `CandidateOutput`. +- Failure handling → calls `stop()` for timeout or cancellation, reads + `RuntimeInspection.terminal_reason` for diagnostics. + +### Policy Mapping + +The existing `ExecutionPolicy` in the contract layer +(`max_parallel_microvms_per_run`, `max_stage_retries`, +`stage_timeout_secs`, `cancel_grace_period_secs`) controls per-run +behavior inside void-box. + +The orchestration-layer `policy` defined in this spec (Section 14) +controls cross-run behavior: budget, concurrency across candidates, +convergence, and failure escalation. + +Both policies coexist. When dispatching a candidate, the orchestration +layer passes the contract-level `ExecutionPolicy` through to +`StartRequest` for the child run, while applying its own policy to +decide whether to dispatch at all. + +## 2.2 Responsibility Boundaries + +### `void-control` Responsibilities + +- Accept and validate `ExecutionSpec`. +- Persist durable execution state. +- Own iteration state and candidate registry. +- Decide when to create, stop, or replace child runs. +- Consume runtime events and outputs from `void-box`. +- Derive control-plane events and execution status. +- Apply convergence, budget, and policy rules. +- Score candidates and track evaluation history. +- Manage concurrency across executions. + +### `void-box` Responsibilities + +- Execute one concrete child `Run`. +- Isolate work inside microVM-backed stage execution. +- Emit runtime lifecycle and stage events. +- Persist stage output artifacts. +- Expose run completion status and stage-level output retrieval. + +## 2.3 Strict Boundary Rules + +`void-control` MUST NOT: +- depend on direct candidate-to-candidate transport, +- infer semantic execution state from raw logs alone, +- treat `void-box` as the owner of iteration state. + +`void-box` MUST NOT: +- own swarm memory or iteration memory, +- decide convergence for an `Execution`, +- directly route messages between candidates, +- persist cross-run control-plane state. + +--- + +# 3. Control-Plane Object Model + +## 3.1 Execution + +`Execution` is the top-level control-plane object. + +Suggested shape: + +```json +{ + "execution_id": "exec_123", + "mode": "swarm", + "status": "running", + "goal": "optimize latency", + "current_iteration": 2, + "policy": {}, + "result": null, + "created_at": "2026-03-18T10:00:00Z", + "updated_at": "2026-03-18T10:05:00Z" +} +``` + +Required properties: +- `execution_id` +- `mode` +- `status` +- `goal` +- `policy` +- `created_at` +- `updated_at` + +Execution status enum: + +`Pending | Running | Paused | Completed | Failed | Canceled` + +Valid transitions: + +``` +Pending -> Running +Running -> Paused +Running -> Completed +Running -> Failed +Running -> Canceled +Paused -> Running +Paused -> Canceled +``` + +Note: `evaluation` and `variation` are part of the `ExecutionSpec` +(submission-time configuration) but are not runtime fields on the +`Execution` object. They are referenced from the persisted +`ExecutionSpec`. + +## 3.2 Iteration + +An `Iteration` is one control-plane decision round inside an `Execution`. + +An iteration owns: +- the candidate set launched in that round, +- the delivery window for messages visible to those candidates, +- evaluation/scoring results, +- iteration completion status. + +## 3.3 Candidate + +A `Candidate` is one evaluated alternative within an iteration. + +A candidate may map to: +- exactly one child `Run` in the simple case, or +- multiple child `Run`s in future modes if needed. + +For v0.2, the default mapping is: + +`candidate -> one child run` + +## 3.4 Child Run + +A child `Run` is the concrete `void-box` execution backing a candidate. + +Users should interact primarily with `Execution`. + +Child runs are drill-down details for: +- logs, +- runtime events, +- stage graph, +- stage artifacts, +- failure debugging. + +--- + +# 4. Execution Spec Model + +`void-control` should accept a single `ExecutionSpec` envelope with +common fields and mode-specific sections. + +Example: + +```json +{ + "mode": "swarm", + "goal": "optimize latency under load", + "inputs": {}, + "policy": { + "budget": { + "max_iterations": 10, + "max_child_runs": 50, + "max_wall_clock_secs": 3600, + "max_cost_usd": 25.00 + }, + "concurrency": { + "max_concurrent_candidates": 4 + }, + "convergence": { + "strategy": "threshold", + "min_score": 0.85, + "max_iterations_without_improvement": 3 + }, + "failure": { + "max_candidate_failures_per_iteration": 2, + "iteration_failure_policy": "fail_execution", + "missing_output_policy": "mark_failed" + } + }, + "evaluation": { + "scoring": { + "type": "weighted_metrics", + "weights": { + "latency_p99_ms": { "weight": 0.6, "direction": "minimize" }, + "cost_usd": { "weight": 0.4, "direction": "minimize" } + }, + "pass_threshold": 0.7 + }, + "ranking": "highest_score", + "tie_breaking": "lowest_cost" + }, + "variation": { + "source": "parameter_space", + "parameter_space": { + "sandbox.memory_mb": [512, 1024, 2048], + "sandbox.env.CONCURRENCY": ["4", "8", "16"] + }, + "candidates_per_iteration": 3, + "selection": "random" + }, + "workflow": { + "template": {} + }, + "swarm": {} +} +``` + +Rules: +- common fields live at the top level, +- `workflow` is an execution primitive, not itself the control-plane mode, +- mode-specific sections are optional unless required by the selected mode, +- validation is mode-aware. + +## 4.1 Mode Taxonomy + +`Execution` modes should be treated as control-plane orchestration +strategies, not as aliases for workflow shape. + +Suggested families: + +- static modes + - `single_run` +- delegated modes + - `one_shot_agent` + - `planner_executor` +- iterative modes + - `swarm` + - `search` + - `tournament` + +`swarm` is therefore not the only future mode. + +For example, a one-shot delegated coding agent flow similar to Stripe's +Minions pattern is better modeled as a delegated mode than as `swarm`: + +- one task is assigned, +- one execution owns the end-to-end lifecycle, +- the control plane tracks progress and artifacts, +- iteration across parallel candidates is not the primary abstraction. + +This distinction keeps: +- iterative comparison logic in iterative modes, +- end-to-end autonomous task delegation in delegated modes, +- concrete workflow execution in child runs. + +--- + +# 5. Iteration Semantics + +## 5.1 Iteration Lifecycle + +Each iteration proceeds through: + +`Planned -> Dispatching -> Running -> Evaluating -> Completed` + +Terminal iteration states: +- `Completed` +- `Failed` +- `Canceled` + +## 5.2 Candidate Lifecycle + +Each candidate proceeds through: + +`Pending -> Queued -> Dispatching -> Running -> {Succeeded | Failed | Canceled}` + +`Queued` indicates the candidate is waiting for a concurrency slot (see +Section 21). + +Candidate completion is driven by the terminal state of its child run +plus any required structured outputs. + +## 5.3 Control Loop + +Iterative modes should follow this model: + +```rust +let strategy = strategy_for_mode(execution.mode); + +loop { + let inboxes = strategy.materialize_inboxes(&accumulator); + let candidates = strategy.plan_candidates(&accumulator, &inboxes); + let child_runs = dispatch_candidates(candidates); + let outputs = collect_outputs(child_runs); + let evaluation = strategy.evaluate(&accumulator, &outputs); + + if let Some(reason) = strategy.should_stop(&accumulator, &evaluation) { + finalize_execution(accumulator, reason); + break; + } + + accumulator = strategy.reduce(accumulator, evaluation); +} +``` + +This loop lives in `void-control`, not in `void-box`. + +`dispatch_candidates()` and `collect_outputs()` are shared +infrastructure that handle void-box interaction, concurrency, +artifact retrieval, and failure handling. They are not mode-specific. + +--- + +# 6. Event Model + +## 6.1 Two Event Layers + +The system uses two distinct event layers. + +### Runtime Events + +Produced by `void-box` child runs. + +Examples: +- `RunStarted` +- `StageStarted` +- `StageSucceeded` +- `StageFailed` +- `RunCompleted` +- `RunFailed` +- `RunCancelled` + +These are low-level execution facts. + +Note: `void-box` uses British spelling (`RunCancelled`, `StageSucceeded`) +for some event types. `void-control` normalizes these via the compatibility +layer (e.g., `RunCancelled` → `RunCanceled`). This spec uses the +`void-control` canonical names throughout. The compat layer handles the +mapping. + +### Control-Plane Events + +Produced by `void-control`. + +Lifecycle events: +- `ExecutionCreated` +- `IterationPlanned` +- `IterationStarted` +- `CandidateScheduled` +- `CandidateMessageProduced` +- `CandidateMessageDelivered` +- `CandidateCompleted` +- `CandidateScored` +- `IterationCompleted` +- `ExecutionCompleted` +- `ExecutionFailed` +- `ExecutionCanceled` + +Operational events (see Section 22): +- `CandidateQueued` +- `CandidateDispatched` +- `CandidateOutputCollected` +- `CandidateOutputError` +- `CandidateTimeout` +- `IterationBudgetWarning` +- `ExecutionBudgetExhausted` +- `ExecutionStalled` +- `ExecutionPaused` +- `ExecutionResumed` +- `PolicyUpdated` + +## 6.2 Event Ownership Rule + +Execution state in `void-control` MUST advance from persisted events and +reduced outputs. + +Direct inspection of child runs may be used for reconciliation and repair, +but not as the primary source of orchestration truth. + +## 6.3 Replayability Rule + +Every orchestration decision that changes execution state MUST be +reconstructible from the control-plane event log plus referenced child-run +artifacts. + +--- + +# 7. Candidate Communication + +## 7.1 Communication Model + +Candidates do not communicate directly with each other. + +All candidate communication is mediated by `void-control`. + +A candidate may express an intent to communicate, but delivery is always +a control-plane decision. + +## 7.2 `@` Mentions + +A candidate output such as `@candidate-b` or `@leader` is interpreted as +a communication intent, not direct transport. + +Flow: + +`child run output -> control-plane event -> routing decision -> next inbox` + +## 7.3 Canonical Message Shape + +Suggested control-plane message event: + +```json +{ + "type": "candidate.message", + "execution_id": "exec_123", + "iteration": 2, + "from_candidate_id": "cand_a", + "mentions": ["cand_b"], + "message": "Try the lower-concurrency variant", + "visibility": "swarm" +} +``` + +## 7.4 Delivery Rule + +For v0.2, messages should be delivered to future candidate inboxes, not +to already-running child runs. + +This avoids mid-run coupling and keeps replay semantics simple. + +## 7.5 Mailbox Rule + +The canonical mailbox lives in control-plane state as persisted message +events plus derived delivery state. + +`mailbox.json` is allowed only as: +- a generated inbox snapshot injected into a child run at launch time, +- a debug artifact, +- a convenience input format for agent code. + +`mailbox.json` MUST NOT be the system of record. + +--- + +# 8. Leader and Roles + +## 8.1 Role Assignment + +Leader semantics, when used, are assigned by `void-control`. + +A leader is a role, not an autonomous authority. + +The control plane may mark a candidate as: +- `leader` +- `reviewer` +- `researcher` +- other future logical roles + +## 8.2 Authority Rule + +The leader may produce intents such as: +- propose next candidates, +- summarize results, +- recommend a direction, +- address other candidates by logical role. + +Those intents are advisory until `void-control` accepts and realizes +them. + +## 8.3 Initial Support + +For early versions, `void-control` should support: +- `leaderless` +- `fixed_leader` + +Dynamic leader election may be added later. + +--- + +# 9. State Ownership + +## 9.1 Durable State + +The following state MUST live in `void-control`: +- `ExecutionSpec` +- execution status +- iteration state +- candidate registry +- role assignments +- message history +- scoring history +- child run mapping +- convergence and stop reason +- artifact references +- execution accumulator + +## 9.2 Ephemeral State + +The following state may live only inside a child `void-box` run: +- local filesystem data for that run, +- task input files, +- generated mailbox snapshot, +- temporary artifacts, +- process-local execution context. + +## 9.3 Restart Rule + +If a restart requires the state to continue or reconstruct the execution, +that state belongs in `void-control`. + +--- + +# 10. Completion Information from `void-box` + +## 10.1 Required Runtime Completion Sources + +`void-control` should use the following existing `void-box` completion +surfaces: + +- child run terminal status, +- stable terminal event id, +- resumable run event stream, +- stage snapshot endpoint, +- persisted stage output artifacts, +- runtime run report when available. + +## 10.2 Completion Mapping + +For each child run, `void-control` should collect: + +### Lifecycle Completion + +From runtime events and run inspection: +- terminal state, +- terminal event id, +- failure/cancel reason, +- timestamps, +- attempt id. + +### Stage Completion + +From stage snapshots: +- per-stage terminal status, +- timing, +- exit code, +- dependency shape, +- stage grouping. + +### Semantic Completion + +From stage artifacts and/or run output: +- candidate result summary, +- candidate metrics, +- communication intents, +- referenced artifacts. + +## 10.3 Structured Output Rule + +Logs alone are insufficient for control-plane iteration decisions. + +Iterative modes SHOULD define a structured artifact contract for child +runs, such as: +- `result.json` +- `intents.json` +- `artifacts.json` + +These names are illustrative in v0.2; exact filenames may be finalized +later. The core requirement is stable structured output, not filename +choice. + +## 10.4 Candidate Completion Rule + +A candidate is only fully complete when: +- its child run is terminal, and +- all required structured outputs for the mode have been collected or + explicitly marked absent. + +--- + +# 11. Reconciliation + +On `void-control` restart: +- reload non-terminal executions, +- reload candidate-to-run mapping, +- inspect known child runs, +- resume runtime event consumption from the last seen event id, +- rebuild derived inboxes and iteration status from control-plane events, +- reconstruct execution accumulator from event log. + +Reconciliation may use runtime inspection and runtime event replay, but +the rebuilt execution state must still be reduced into the control-plane +model. + +Paused executions remain paused after reconciliation. + +--- + +# 12. UI and API Visibility + +## 12.1 Primary View + +Users should primarily see: +- execution status, +- current iteration, +- candidate counts, +- scores, +- current best result, +- orchestration timeline, +- budget consumption. + +## 12.2 Drill-Down View + +Users may drill into child runs for: +- per-run events, +- logs, +- stage graph, +- output artifacts, +- detailed failure diagnosis. + +The UI and API should not force users to reason about all child runs by +default. + +--- + +# 13. Non-Goals for v0.2 + +- Direct candidate-to-candidate transport. +- Shared mutable mailbox files as canonical state. +- Mid-run message injection into already-running child runs. +- Leader election semantics. +- Multi-node distributed runtime scheduling. +- A final stable schema for all mode-specific artifacts. +- LLM-in-the-loop evaluation (leader-as-scorer). +- Execution priority across competing executions. +- Artifact push via event payload (see Section 18.6 future notes). + +--- + +# 14. Policy Model + +## 14.1 Policy Shape + +The `policy` field in `ExecutionSpec` controls budget, concurrency, +convergence, and failure behavior. + +```json +{ + "policy": { + "budget": { + "max_iterations": 10, + "max_child_runs": 50, + "max_wall_clock_secs": 3600, + "max_cost_usd": 25.00 + }, + "concurrency": { + "max_concurrent_candidates": 4 + }, + "convergence": { + "strategy": "threshold", + "min_score": 0.85, + "max_iterations_without_improvement": 3 + }, + "failure": { + "max_candidate_failures_per_iteration": 2, + "iteration_failure_policy": "fail_execution", + "missing_output_policy": "mark_failed" + } + } +} +``` + +## 14.2 Budget + +Budget fields are hard limits. Exceeding any one stops the execution. + +All budget fields are individually optional, but at least one of +`max_iterations` or `max_wall_clock_secs` MUST be set. No unbounded +executions are allowed. + +| Field | Type | Description | +|-------|------|-------------| +| `max_iterations` | integer | Maximum number of iterations | +| `max_child_runs` | integer | Maximum total child runs across all iterations | +| `max_wall_clock_secs` | integer | Maximum wall-clock time (paused time excluded) | +| `max_cost_usd` | float | Maximum total cost across all child runs | + +## 14.3 Concurrency + +| Field | Type | Description | +|-------|------|-------------| +| `max_concurrent_candidates` | integer | Maximum in-flight candidates for this execution | + +Cannot exceed the global pool size. Validated at submission time. + +## 14.4 Convergence + +| Field | Type | Description | +|-------|------|-------------| +| `strategy` | enum | `threshold`, `plateau`, or `exhaustive` | +| `min_score` | float | Stop when best score >= this value (`threshold` strategy) | +| `max_iterations_without_improvement` | integer | Stop after N iterations with no score improvement (`plateau` strategy) | + +- `threshold`: stop when a candidate scores >= `min_score`. Requires + `min_score`. +- `plateau`: stop after `max_iterations_without_improvement` consecutive + iterations where `best_result` does not improve. Requires + `max_iterations_without_improvement`. +- `exhaustive`: run all `max_iterations` iterations regardless of scores. + Requires `policy.budget.max_iterations` to be set (otherwise there is + no bound). + +Providing fields not relevant to the selected strategy (e.g., `min_score` +with `exhaustive`) is ignored — not an error. This allows changing the +strategy without removing unrelated fields. + +## 14.5 Failure + +| Field | Type | Description | +|-------|------|-------------| +| `max_candidate_failures_per_iteration` | integer | Short-circuit iteration after this many candidate failures | +| `iteration_failure_policy` | enum | `fail_execution`, `retry_iteration`, or `continue` | +| `missing_output_policy` | enum | `mark_failed` or `mark_incomplete` | +| `candidate_timeout_secs` | integer | Cancel a child run if it exceeds this duration. Default: inherited from the workflow template's `timeout_secs`. | + +## 14.6 Validation + +Policy validation happens at `ExecutionSpec` submission time: +- At least one of `max_iterations` or `max_wall_clock_secs` must be set. +- All numeric fields must be positive. +- `max_concurrent_candidates` must not exceed the global pool size. +- Convergence strategy must be consistent with provided fields (e.g., + `threshold` requires `min_score`). + +--- + +# 15. Evaluation Contract + +## 15.1 Scoring Model + +The control plane runs a deterministic scoring function against +structured candidate outputs. No LLM participates in evaluation. + +## 15.2 Scoring Input + +For each candidate, void-control collects: +- the structured output artifact (`result.json`), +- child run terminal status, +- child run metrics (duration, cost, token usage). + +## 15.3 Scoring Function + +```rust +trait ScoringFunction { + fn score(&self, candidate_output: &CandidateOutput) -> ScoringResult; +} +``` + +`ScoringResult` shape: + +```json +{ + "candidate_id": "cand_a", + "score": 0.82, + "metrics": { + "latency_p99_ms": 142, + "cost_usd": 0.03, + "duration_ms": 45000 + }, + "pass": true, + "reason": "meets latency target, under cost cap" +} +``` + +## 15.4 Scoring Configuration + +```json +{ + "evaluation": { + "scoring": { + "type": "weighted_metrics", + "weights": { + "latency_p99_ms": { "weight": 0.6, "direction": "minimize" }, + "cost_usd": { "weight": 0.4, "direction": "minimize" } + }, + "pass_threshold": 0.7 + }, + "ranking": "highest_score", + "tie_breaking": "lowest_cost" + } +} +``` + +## 15.5 Scoring Types + +For v0.2: + +- `weighted_metrics`: weighted combination of numeric fields from + `result.json` metrics. Each weight specifies a direction (`minimize` + or `maximize`). Values are normalized using min-max normalization + across all candidates within the current iteration (0.0 = worst, + 1.0 = best, direction-aware). For `minimize` metrics, lower raw + values produce higher normalized scores. When only one candidate + exists, normalized values default to 1.0. Note: because + normalization is per-iteration, raw scores are not directly + comparable across iterations. The `best_result` comparison in the + accumulator uses raw metric values, not normalized scores. +- `pass_fail`: binary scoring based on the presence and validity of + required fields in the candidate output. Score is `1.0` for pass, + `0.0` for fail. + +Future: +- `custom`: user-provided function reference. + +## 15.6 Scoring Rules + +- Every candidate gets a score. Failed candidates get score `0.0` with + `pass: false`. +- Iteration best is determined by the `ranking` strategy. +- Execution best is the best score across all iterations. +- Scoring results are persisted as `CandidateScored` control-plane + events. + +--- + +# 16. Candidate Variation Model + +## 16.1 Two-Layer Design + +Candidate variation separates mechanism from strategy: +- **Mechanism:** template + overrides (how candidates are expressed). +- **Strategy:** variation source (how differences are decided). + +## 16.2 Mechanism: Template + Overrides + +Every candidate is expressed as a base workflow template plus a set of +overrides. + +```json +{ + "candidate_id": "cand_a", + "iteration": 2, + "base_template": "workflow.template", + "overrides": { + "agent.prompt": "Try a streaming approach with chunked responses", + "sandbox.memory_mb": 1024, + "sandbox.env": { + "CONCURRENCY": "8" + } + } +} +``` + +Rules: +- The base template comes from `ExecutionSpec.workflow.template`. +- Overrides use dot-path notation to target specific fields. +- Overrides are shallow-merged — they replace the target value, not + deep-merge. +- The resolved candidate spec is a pure function of + `template + overrides` (reproducible). +- The resolved spec is persisted with the `CandidateScheduled` event + for replay. + +## 16.3 Strategy: Variation Source + +The `variation` section in the `ExecutionSpec` defines how overrides are +generated. + +```json +{ + "variation": { + "source": "parameter_space", + "parameter_space": { + "sandbox.memory_mb": [512, 1024, 2048], + "sandbox.env.CONCURRENCY": ["4", "8", "16"] + }, + "candidates_per_iteration": 3, + "selection": "random" + } +} +``` + +## 16.4 Variation Sources + +For v0.2: + +- `parameter_space`: enumerate or sample from a defined space of + override values. + - `selection`: `random` (sample randomly), `sequential` (enumerate in + order). Future: `latin_hypercube` (space-filling sample). +- `explicit`: user provides a fixed list of override sets. Each + iteration cycles through the list. +- `leader_directed`: overrides come from the leader candidate's + structured output (`intents.json`). + +## 16.5 Leader-Directed Variation + +When `variation.source` is `leader_directed`, the leader candidate's +`intents.json` includes: + +```json +{ + "proposed_candidates": [ + { + "rationale": "lower concurrency showed promise, try even lower", + "overrides": { + "sandbox.env.CONCURRENCY": "2" + } + } + ] +} +``` + +These proposals are advisory. `void-control` validates and may reject +or modify them before scheduling. + +--- + +# 17. Failure Decision Tree + +## 17.1 Candidate-Level Failures + +| Scenario | Action | +|----------|--------| +| Child run fails (non-zero exit, RunFailed) | Candidate marked `Failed`, score `0.0`, `pass: false`. Counts toward `max_candidate_failures_per_iteration`. | +| Child run succeeds but structured output missing | Governed by `policy.failure.missing_output_policy`: `mark_failed` (default) treats as candidate failure; `mark_incomplete` scores `0.0` but does not count as failure. | +| Child run succeeds but structured output malformed | Same as missing — policy decides. Control plane emits `CandidateOutputError` event with diagnostic details. | +| Child run times out | void-control cancels the child run via void-box cancel API. Candidate marked `Failed` with `terminal_reason: "timeout"`. | + +## 17.2 Iteration-Level Failures + +| Scenario | Action | +|----------|--------| +| Some candidates fail, others succeed | Iteration completes normally. Failed candidates are scored but excluded from ranking. | +| All candidates fail | Governed by `policy.failure.iteration_failure_policy`: `fail_execution` (default) terminates the execution; `retry_iteration` re-runs the iteration (hardcoded limit of 1 retry in v0.2 — a configurable retry count may be added later); `continue` advances to next iteration with empty results. | +| Candidate failure count exceeds `max_candidate_failures_per_iteration` | Iteration is short-circuited. Remaining in-flight candidates are allowed to finish but no new candidates are dispatched. Iteration status is `Failed`. | + +## 17.3 Execution-Level Failures + +| Scenario | Action | +|----------|--------| +| Budget exhausted (any limit) | Current iteration completes (in-flight candidates finish). Execution terminates with `stop_reason: "budget_exhausted"` and the specific limit that was hit. Best result so far is the execution result. | +| void-box unreachable | void-control retries with exponential backoff (3 attempts, 1s/2s/4s). If still unreachable, in-flight candidates for that run are marked `Failed` with `terminal_reason: "runtime_unavailable"`. Normal failure cascading applies. | +| Unrecoverable control-plane error | Execution marked `Failed` with `error` field. All in-flight child runs are cancelled. | + +## 17.4 Failure Visibility + +Failures are always explicit — no silent drops. Every failure path +produces a control-plane event with enough context to diagnose. + +--- + +# 18. Artifact Retrieval Protocol + +## 18.1 v0.2: Pull After Terminal Event + +When void-control observes a child run reach terminal state (via +`RunCompleted`, `RunFailed`, or `RunCanceled` event), it fetches +artifacts: + +``` +1. Observe terminal event for child run. +2. GET /v1/runs/{run_id}/stages/{stage_name}/output-file + for each required artifact (result.json, intents.json). +3. Parse and validate artifact against mode's schema. +4. Emit CandidateOutputCollected or CandidateOutputError event. +5. Candidate is now evaluable. +``` + +## 18.2 Required Artifacts Per Mode + +| Mode | Required | Optional | +|------|----------|----------| +| `swarm` | `result.json` | `intents.json` | +| `search` | `result.json` | — | +| `tournament` | `result.json` | — | + +## 18.3 `result.json` Minimal Schema + +```json +{ + "status": "success", + "summary": "human-readable result description", + "metrics": {}, + "artifacts": [] +} +``` + +- `metrics` is a flat key-value map of numeric values. These are the + inputs to the scoring function. +- `artifacts` is a list of references to additional output files (paths + within the stage output). + +## 18.4 void-box API Note + +The current void-box endpoint `GET /v1/runs/{run_id}/stages/{stage}/output-file` +returns a single file per stage. For v0.2, the structured output contract +requires that the child run's output stage produces a single JSON file +containing all required fields (`status`, `summary`, `metrics`, +`artifacts`). This is the `result.json` content, retrieved as the stage's +sole output file. + +If `intents.json` is required (e.g., for `leader_directed` variation), it +should be embedded as a field within the same output file, or a separate +stage should produce it. A future void-box enhancement may add named +artifact retrieval (e.g., `?name=intents.json`), but v0.2 works within +the existing single-file-per-stage constraint. + +## 18.5 Retrieval Rules + +- void-control MUST wait for the terminal event before fetching — no + speculative reads. +- Retrieval has a timeout (default 30s). If the artifact is not + available within the timeout, it is treated as missing. +- Artifacts are fetched once and cached in control-plane state by + reference (not copied in full). +- Artifact content is NOT stored in control-plane events — only + references and parsed metrics. + +## 18.6 Future: Pull With Manifest + +In a future version, the terminal event may include an artifact +manifest: + +```json +{ + "event_type": "RunCompleted", + "payload": { + "artifact_manifest": [ + { "name": "result.json", "size_bytes": 1240, "stage": "main" }, + { "name": "intents.json", "size_bytes": 580, "stage": "main" } + ] + } +} +``` + +This lets void-control decide what to fetch without blind requests. +Requires a void-box enhancement — deferred. + +--- + +# 19. Iteration State Threading + +## 19.1 Execution Accumulator + +The control loop's `reduce()` produces an `ExecutionAccumulator` that +carries forward between iterations. + +```json +{ + "best_result": { + "candidate_id": "cand_b", + "iteration": 2, + "score": 0.91, + "metrics": { "latency_p99_ms": 98, "cost_usd": 0.02 }, + "artifact_ref": "exec_123/iter_2/cand_b/result.json" + }, + "scoring_history": [ + { + "iteration": 1, + "candidates": [ + { "candidate_id": "cand_a", "score": 0.72, "pass": true }, + { "candidate_id": "cand_b", "score": 0.65, "pass": false } + ], + "best_candidate_id": "cand_a" + } + ], + "message_backlog": [], + "budget_consumed": { + "iterations": 2, + "child_runs": 6, + "wall_clock_secs": 340, + "cost_usd": 0.15 + }, + "iterations_without_improvement": 0, + "failure_counts": { + "total_candidate_failures": 3, + "iteration_retries_used": 0 + } +} +``` + +## 19.2 Accumulator Fields + +| Field | Purpose | Consumers | +|-------|---------|-----------| +| `best_result` | Global best across all iterations | `should_stop()` (convergence check), final execution result | +| `scoring_history` | Per-iteration scores for all candidates | `plan_candidates()` (inform variation strategy), UI | +| `message_backlog` | Undelivered candidate communication intents | `materialize_inboxes()` | +| `budget_consumed` | Running totals against policy limits | `should_stop()` (budget check) | +| `iterations_without_improvement` | Counter reset when `best_result` improves | `should_stop()` (plateau convergence) | +| `failure_counts` | Running totals of candidate failures and iteration retries | Failure decision tree, UI | + +## 19.3 Accumulator Rules + +- The accumulator is the only cross-iteration state — no side channels. +- The accumulator is persisted after each iteration completes (crash + recovery). +- `best_result` updates when a new candidate is ranked higher than the + current best using the full ranking function (score comparison first, + then tie-breaking). A candidate that ties on score but wins on the + tie-breaking metric (e.g., `lowest_cost`) does update `best_result`. +- `scoring_history` is append-only. +- `budget_consumed` is derived from events but persisted for fast + access. +- The accumulator is reconstructible from the control-plane event log + (it is a projection, not the source of truth). + +--- + +# 20. Iteration Strategy Trait + +## 20.1 Trait Definition + +Each execution mode implements the `IterationStrategy` trait. + +```rust +trait IterationStrategy { + /// Produce inbox snapshots for candidates in the next iteration. + fn materialize_inboxes( + &self, + accumulator: &ExecutionAccumulator, + ) -> Vec; + + /// Decide which candidates to launch in the next iteration. + fn plan_candidates( + &self, + accumulator: &ExecutionAccumulator, + inboxes: &[CandidateInbox], + ) -> Vec; + + /// Score completed candidates and rank them. + fn evaluate( + &self, + accumulator: &ExecutionAccumulator, + outputs: &[CandidateOutput], + ) -> IterationEvaluation; + + /// Decide whether to stop iterating. + fn should_stop( + &self, + accumulator: &ExecutionAccumulator, + evaluation: &IterationEvaluation, + ) -> Option; + + /// Produce the next accumulator state. + fn reduce( + &self, + accumulator: ExecutionAccumulator, + evaluation: IterationEvaluation, + ) -> ExecutionAccumulator; +} +``` + +## 20.2 Design Rules + +- Trait methods are pure functions of their inputs. No side effects, no + I/O. This keeps them testable and replayable. +- `dispatch_candidates()` and `collect_outputs()` are shared + infrastructure — not part of the trait. They handle void-box + interaction, concurrency, artifact retrieval, and failure handling. +- Each mode registers a strategy at startup. Unknown modes are rejected + at `ExecutionSpec` validation time. + +## 20.3 Mode Implementations + +v0.2 ships with `SwarmStrategy`. `SearchStrategy` and +`TournamentStrategy` are named but not implemented. + +Mode-specific behavior lives in the trait, not in the loop: + +- `SwarmStrategy.plan_candidates()` uses `leader_directed` or + `parameter_space` variation. +- A future `TournamentStrategy.plan_candidates()` would pair candidates + for head-to-head comparison. +- `SearchStrategy.plan_candidates()` would use scoring history to narrow + a parameter space. + +--- + +# 21. Backpressure and Concurrency + +## 21.1 Two-Level Concurrency Model + +### Global Pool + +Configured at void-control startup (config file or CLI flag). + +```json +{ + "global": { + "max_concurrent_child_runs": 20 + } +} +``` + +Shared across all active executions. Acts as an admission gate — +candidates queue until a slot opens. + +### Per-Execution Limit + +Lives in `policy.concurrency`: + +```json +{ + "concurrency": { + "max_concurrent_candidates": 4 + } +} +``` + +Cannot exceed the global pool size. Validated at submission time. + +## 21.2 Scheduling Model + +``` +CandidateSpec created by plan_candidates() + -> enters execution-local queue + -> waits for execution concurrency slot (max_concurrent_candidates) + -> waits for global concurrency slot (max_concurrent_child_runs) + -> dispatched to void-box +``` + +## 21.3 Scheduling Rules + +- Within an execution, candidates are dispatched in the order + `plan_candidates()` returns them. +- Across executions, scheduling is FIFO by candidate creation time (no + execution priority in v0.2). +- When a child run completes, the slot is released immediately. The + next queued candidate is dispatched. +- If an execution is paused (checkpointed), its queued candidates remain + queued but are not dispatched. Its slots are released back to the + global pool. +- Budget checks happen before queuing, not at dispatch time. A candidate + is never queued if the budget is already exhausted. + +## 21.4 Queue Observability + +- `CandidateQueued` event emitted when a candidate enters the queue. +- `CandidateDispatched` event emitted when it gets a slot. +- Queue depth and wait time are available via execution inspection. + +--- + +# 22. Observability Events + +## 22.1 Operational Events + +These extend the control-plane event model from Section 6. + +| Event | Trigger | Payload | +|-------|---------|---------| +| `CandidateQueued` | Candidate enters concurrency queue | `candidate_id`, `queue_position`, `execution_id` | +| `CandidateDispatched` | Candidate gets a concurrency slot | `candidate_id`, `child_run_id`, `queue_wait_ms` | +| `CandidateOutputCollected` | Structured artifact successfully retrieved | `candidate_id`, `artifact_name`, `size_bytes` | +| `CandidateOutputError` | Artifact missing, malformed, or retrieval timeout | `candidate_id`, `artifact_name`, `error`, `policy_action` | +| `IterationBudgetWarning` | Consumed budget crosses 80% of any limit | `limit_name`, `consumed`, `max`, `percent` | +| `ExecutionBudgetExhausted` | Any budget limit hit | `limit_name`, `consumed`, `max`, `stop_reason` | +| `CandidateTimeout` | Child run exceeded expected duration | `candidate_id`, `child_run_id`, `elapsed_secs`, `timeout_secs` | +| `ExecutionStalled` | No progress for configurable duration | `last_progress_at`, `stall_duration_secs` | +| `ExecutionPaused` | User-initiated checkpoint | `iteration`, `reason` | +| `ExecutionResumed` | Execution resumed from checkpoint | `iteration`, `paused_duration_secs` | +| `PolicyUpdated` | Mid-execution policy adjustment | `changed_fields`, `old_values`, `new_values` | + +## 22.2 Stall Detection + +void-control tracks `last_progress_at`, updated when any of: +- a candidate completes (success or failure), +- an iteration completes, +- a new candidate is dispatched. + +If `now - last_progress_at` exceeds `stall_detection_secs`, emit +`ExecutionStalled`. This is informational — it does not stop the +execution. + +`stall_detection_secs` is configured in void-control's global config +(not per-execution). Default: 300. + +## 22.3 Event Rules + +- All new events follow the existing `EventEnvelope` schema + (`event_id`, `event_type`, `timestamp`, `seq`, `payload`). +- Warning and operational events do not advance execution state — they + are side-channel observability. +- All events are persisted in the control-plane event log and + participate in replay. + +--- + +# 23. Execution Checkpointing + +## 23.1 Pause Flow + +``` +User sends: POST /v1/executions/{id}/pause + -> void-control sets execution.pending_pause = true + -> no new candidates are dispatched from the queue + (including candidates for the current iteration awaiting + concurrency slots) + -> already in-flight candidates (child run started) run to completion + -> after all in-flight candidates finish, execution transitions + to status: "paused" + -> queued but not-yet-dispatched candidates remain queued + -> global concurrency slots released + -> ExecutionPaused event emitted + -> accumulator persisted (already happens at iteration boundary) +``` + +Note: if some candidates from the current iteration were queued but not +dispatched when pause was requested, the iteration is considered +incomplete. On resume, those candidates will be dispatched and the +iteration will complete before advancing to the next one. + +## 23.2 Resume Flow + +``` +User sends: POST /v1/executions/{id}/resume + -> execution transitions from "paused" to "running" + -> ExecutionResumed event emitted + -> control loop picks up from next iteration + -> queued candidates become dispatchable again + -> global concurrency slots re-acquired +``` + +## 23.3 Execution Status + +The execution status set includes `Paused`: + +`Pending -> Running -> {Paused -> Running} -> {Completed | Failed | Canceled}` + +## 23.4 Checkpointing Rules + +- Pause is only valid when status is `Running`. +- Resume is only valid when status is `Paused`. +- Cancel is valid in both `Running` and `Paused` states. Cancel from + paused skips to terminal immediately. +- Budget wall-clock timer is paused while the execution is paused + (paused time does not count toward `max_wall_clock_secs`). +- Paused executions still hold their candidate queue — they do not + lose their place. +- If void-control restarts while an execution is paused, it remains + paused after reconciliation. + +--- + +# 24. Mid-Execution Policy Adjustment + +## 24.1 Mutable Fields + +| Field | Effect | +|-------|--------| +| `policy.budget.max_iterations` | New limit checked at next `should_stop()` | +| `policy.budget.max_child_runs` | New limit checked before next candidate queuing | +| `policy.budget.max_wall_clock_secs` | New limit checked at next `should_stop()` | +| `policy.budget.max_cost_usd` | New limit checked at next `should_stop()` | +| `policy.concurrency.max_concurrent_candidates` | Takes effect immediately. May release queued candidates or stop dispatching. | +| `policy.failure.max_candidate_failures_per_iteration` | New limit applied to future iterations. Does not affect the current in-progress iteration. | + +## 24.2 Immutable Fields + +| Field | Reason | +|-------|--------| +| `policy.convergence.*` | Changing scoring criteria mid-execution makes iteration history inconsistent. | +| `policy.failure.iteration_failure_policy` | Changing what happens when all candidates fail mid-execution makes failure handling inconsistent across iterations. | +| `policy.failure.missing_output_policy` | Changing how missing outputs are classified mid-execution makes scoring history inconsistent. | +| `evaluation.*` | Scoring function must be stable across iterations. | +| `variation.source` | Changing variation strategy breaks the relationship between iteration history and candidate planning. | + +## 24.3 API + +``` +PATCH /v1/executions/{id}/policy +Body: { + "budget": { "max_iterations": 15 }, + "concurrency": { "max_concurrent_candidates": 6 } +} +``` + +## 24.4 Adjustment Rules + +- Only valid when execution status is `Running` or `Paused`. +- Attempting to mutate an immutable field returns a validation error + (no partial apply). +- New budget limits must be >= current consumed values. Cannot set + `max_iterations` to 3 if 5 iterations have already completed. +- Changes emit a `PolicyUpdated` event with old and new values. +- Changes are reflected in the persisted execution state immediately. +- Concurrency increases may cause queued candidates to dispatch + immediately. + +--- + +# 25. Execution Dry-Run Mode + +## 25.1 API + +``` +POST /v1/executions/dry-run +Body: { } +``` + +## 25.2 Response + +```json +{ + "valid": true, + "mode": "swarm", + "plan": { + "candidates_per_iteration": 3, + "max_iterations": 10, + "max_child_runs": 30, + "estimated_concurrent_peak": 3, + "variation_source": "parameter_space", + "parameter_space_size": 27 + }, + "warnings": [ + "max_cost_usd not set — execution has no cost cap" + ], + "errors": [] +} +``` + +## 25.3 Validation + +Dry-run validates: +- `ExecutionSpec` schema and required fields. +- Policy validation (non-zero limits, consistent constraints). +- Mode-specific section present and valid (e.g., `swarm` section for + mode `swarm`). +- Evaluation config is well-formed (valid scoring type, weights sum + correctly, tie-breaking field exists). +- Workflow template parses as a valid void-box `RunSpec`. +- Concurrency fits within global pool (warning if + `max_concurrent_candidates` > 50% of global pool). + +## 25.4 Computed Plan + +Dry-run computes: +- Maximum possible child runs + (`candidates_per_iteration * max_iterations`). +- Peak concurrency. +- Parameter space cardinality (if `parameter_space` variation). + +## 25.5 Dry-Run Rules + +- Dry-run does not create an `Execution`, emit events, or contact + void-box. +- Dry-run is idempotent and side-effect free. +- Warnings are informational — they do not make `valid: false`. +- Errors make `valid: false` — the spec would be rejected by + `POST /v1/executions`. + +--- + +# 26. Result Provenance + +## 26.1 Provenance in Execution Result + +When an execution completes, its `result` field includes full +provenance: + +```json +{ + "execution_id": "exec_123", + "status": "completed", + "stop_reason": "convergence_threshold", + "result": { + "candidate_id": "cand_f", + "iteration": 4, + "child_run_id": "run-1700004000", + "score": 0.93, + "metrics": { + "latency_p99_ms": 87, + "cost_usd": 0.018 + }, + "artifact_refs": [ + { + "name": "result.json", + "stage": "main", + "retrieval": "GET /v1/runs/run-1700004000/stages/main/output-file" + } + ], + "variation": { + "overrides": { + "sandbox.env.CONCURRENCY": "2", + "sandbox.memory_mb": 1024 + } + } + } +} +``` + +## 26.2 What Provenance Answers + +| Question | Field | +|----------|-------| +| Which candidate produced the best result? | `candidate_id` | +| Which iteration? | `iteration` | +| What void-box run backs it? | `child_run_id` (drill down to logs, events, stages) | +| How did it score? | `score` + `metrics` | +| Where are the artifacts? | `artifact_refs` with retrieval paths | +| What made this candidate different? | `variation.overrides` | + +## 26.3 Provenance Rules + +- Provenance is set when `best_result` is finalized at execution + completion. +- If the execution fails with no successful candidates, `result` is + `null` and `stop_reason` explains why. +- Provenance is a snapshot — it references but does not duplicate + artifacts. +- The `child_run_id` is the authoritative link for deep inspection + (logs, stage graph, telemetry). + +--- + +# 27. Acceptance Criteria + +This specification is satisfied when a future implementation can: + +1. Create one top-level `Execution` that spans multiple iterations. +2. Launch multiple child `void-box` runs in parallel for one iteration. +3. Track candidate completion using existing `void-box` terminal events + and run state. +4. Collect semantic candidate outputs from structured artifacts rather + than logs alone. +5. Convert candidate communication intents into persisted control-plane + message events. +6. Materialize candidate inbox snapshots for future iterations without + making those snapshots the system of record. +7. Reconstruct execution state after restart from persisted control-plane + state plus replayed child-run information. +8. Validate an `ExecutionSpec` via dry-run without creating an execution. +9. Score candidates using a deterministic scoring function configured in + the `ExecutionSpec`. +10. Generate candidate variation from a parameter space, explicit list, + or leader-directed proposals. +11. Enforce budget limits and stop execution when any limit is exhausted. +12. Manage concurrency across executions via a global pool and + per-execution limits. +13. Pause and resume an execution, with in-flight candidates completing + before the pause takes effect. +14. Adjust budget and concurrency policy on a running execution. +15. Emit operational observability events for queue depth, stalls, + budget warnings, and policy changes. +16. Trace the final execution result back to its originating candidate, + iteration, child run, and variation overrides. diff --git a/spec/void-control-message-box-spec-v0.1.md b/spec/void-control-message-box-spec-v0.1.md new file mode 100644 index 0000000..c97aa2d --- /dev/null +++ b/spec/void-control-message-box-spec-v0.1.md @@ -0,0 +1,679 @@ +# Void Control Message Box Specification + +## Version: v0.1 + +## Scope + +This spec defines the first real control-plane message box for +collaboration between iterative candidates in `void-control`. + +It extends: +- `spec/void-control-iteration-spec-v0.2.md` +- `spec/void-control-runtime-spec-v0.2.md` + +It does not move collaboration ownership into `void-box`. + +This specification is intentionally control-plane first: +- candidate runs may emit structured communication intents, +- `void-control` persists, routes, and delivers those intents, +- future inboxes are reconstructed from persisted routing state, +- delivery is deterministic and replayable. + +This is not a free-form chat protocol. It is decision propagation +infrastructure for iterative execution modes such as `swarm` and +`search`. + +--- + +# 1. Ownership Boundary + +`void-control` owns: +- communication intent extraction from child run structured output, +- message validation and routing, +- persistence of intents, routed messages, and inbox snapshots, +- next-iteration inbox delivery, +- replay and restart reconstruction of collaboration state, +- safety limits such as TTL, deduplication, and fan-out caps. + +`void-box` owns: +- execution of a single child run, +- durable publication of structured stage output artifacts, +- retrieval of `result.json` and any additional artifacts, +- runtime lifecycle and artifact publication status. + +`void-box` MUST NOT: +- route candidate-to-candidate messages, +- persist execution-level collaboration state, +- infer control-plane audiences such as `leader` or `broadcast`, +- decide inbox delivery timing. + +--- + +# 2. Core Idea + +The system does not pass chat messages. It propagates decisions. + +The control-plane collaboration flow is: + +1. a candidate run emits structured communication intents, +2. `void-control` validates and persists those intents, +3. `void-control` routes intents into delivery messages, +4. `void-control` materializes inbox snapshots for a later iteration, +5. future candidates receive those inbox snapshots as launch input. + +V0 delivery is strictly delayed: + +`run -> reduce -> route -> next inbox` + +Messages emitted in iteration `N` MUST NOT be delivered in iteration `N`. +They MAY be delivered in iteration `N + 1` or later if still valid. + +--- + +# 3. Object Model + +## 3.1 CommunicationIntent + +`CommunicationIntent` is the canonical record of what a candidate tried +to communicate. + +Suggested shape: + +```json +{ + "intent_id": "intent_17", + "from_candidate_id": "candidate-2", + "iteration": 0, + "kind": "proposal", + "audience": "leader", + "payload": { + "summary_text": "Rate limit plus cache fallback reduced latency", + "strategy_hint": "rate_limit_cache", + "metric_deltas": { + "latency_p99_ms": -30.0, + "error_rate": -0.02 + } + }, + "priority": "normal", + "ttl_iterations": 1, + "caused_by": null, + "context": { + "family_hint": "incident-mitigation" + } +} +``` + +Required fields: +- `intent_id` +- `from_candidate_id` +- `iteration` +- `kind` +- `audience` +- `payload` +- `priority` +- `ttl_iterations` + +Optional fields: +- `caused_by` +- `context` + +Rules: +- `intent_id` MUST be unique within an `Execution`. +- `from_candidate_id` references the emitting candidate. +- `iteration` is the iteration in which the source run completed. +- `caused_by` references another `intent_id` when an intent is a direct + refinement or response. +- `context` is advisory and strategy-defined. It is optional in v0. + +## 3.2 RoutedMessage + +`RoutedMessage` is the canonical record of a validated intent after +control-plane routing. + +Suggested shape: + +```json +{ + "message_id": "msg_44", + "intent_id": "intent_17", + "to": "leader", + "delivery_iteration": 1, + "routing_reason": "leader_feedback_channel", + "status": "Routed" +} +``` + +Required fields: +- `message_id` +- `intent_id` +- `to` +- `delivery_iteration` +- `routing_reason` +- `status` + +Rules: +- one intent MAY produce zero, one, or many routed messages, +- `delivery_iteration` MUST be greater than the source `iteration`, +- `status` lifecycle is defined in Section 5. + +## 3.3 InboxEntry + +`InboxEntry` is the unit delivered to a future candidate. + +Suggested shape: + +```json +{ + "message_id": "msg_44", + "intent_id": "intent_17", + "from_candidate_id": "candidate-2", + "kind": "proposal", + "payload": { + "summary_text": "Rate limit plus cache fallback reduced latency", + "strategy_hint": "rate_limit_cache" + } +} +``` + +Required fields: +- `message_id` +- `intent_id` +- `from_candidate_id` +- `kind` +- `payload` + +## 3.4 InboxSnapshot + +`InboxSnapshot` is the persisted record of what a candidate actually +received at launch time. + +Suggested shape: + +```json +{ + "execution_id": "exec_1", + "candidate_id": "candidate-3", + "iteration": 1, + "entries": [ + { + "message_id": "msg_44", + "intent_id": "intent_17", + "from_candidate_id": "candidate-2", + "kind": "proposal", + "payload": { + "summary_text": "Rate limit plus cache fallback reduced latency" + } + } + ] +} +``` + +Rules: +- this is the source of truth for delivered inbox content, +- replay MUST prefer persisted inbox snapshots over re-deriving launch + input from current routing rules, +- a candidate with no messages MAY still have an empty inbox snapshot. + +--- + +# 4. Intent Semantics + +## 4.1 Intent kinds + +V0 supports exactly three intent kinds: +- `proposal` +- `signal` +- `evaluation` + +### `proposal` + +Use when a candidate recommends a change, mitigation, or next step. + +Examples: +- try `rate_limit_cache` +- lower prompt verbosity +- switch transform to streaming mode + +### `signal` + +Use when a candidate reports a condition that should influence routing or +planning. + +Examples: +- anomaly detected +- retry strategy caused instability +- current family appears saturated + +### `evaluation` + +Use when a candidate reports structured feedback about a prior proposal or +family of proposals. + +Examples: +- rate limiting improved latency but hurt success rate +- candidate family `friendly_structured` scored highest + +## 4.2 Payload rules + +Payloads MUST remain structured. + +V0 payload shape: + +```json +{ + "summary_text": "short advisory summary", + "strategy_hint": "optional short stable hint", + "metric_deltas": { + "latency_p99_ms": -30.0 + }, + "recommendation": "optional concise action string" +} +``` + +Rules: +- `summary_text` is required, +- `strategy_hint` is optional but recommended, +- `metric_deltas` is optional, +- unknown fields MAY be allowed for forward compatibility, +- large free-form prose blobs SHOULD be rejected or truncated. + +## 4.3 Audience rules + +V0 supported audiences: +- `leader` +- `broadcast` + +`candidate:` addressing is deferred. + +Rules: +- `leader` means the intent should be routed only to the leader inbox or + equivalent strategy-specific supervisory role, +- `broadcast` means the router MAY fan the message out to multiple future + inboxes subject to safety limits. + +--- + +# 5. Message Lifecycle + +`RoutedMessage.status` enum: + +`Routed | Delivered | Expired | Dropped` + +Definitions: +- `Routed`: message has been created by the router but not yet included in + a persisted inbox snapshot. +- `Delivered`: message was included in at least one inbox snapshot. +- `Expired`: message was valid when routed but exceeded its TTL before + delivery. +- `Dropped`: message was rejected by routing or delivery rules such as + deduplication, policy cap, or invalid audience. + +`Consumed` is intentionally deferred in v0 because the control plane cannot +reliably prove semantic use by a child run. + +--- + +# 6. Delivery Rules + +## 6.1 Timing + +V0 delivery MUST be next-iteration or later only. + +For an intent emitted in iteration `N`: +- `delivery_iteration` MUST be `>= N + 1` +- same-iteration delivery is forbidden + +## 6.2 TTL + +`ttl_iterations` is measured in control-plane iterations. + +Rules: +- default TTL SHOULD be `1`, +- a message expires when `current_iteration > source_iteration + ttl`, +- expired messages MUST transition to `Expired`, +- expired messages MUST NOT appear in new inbox snapshots. + +## 6.3 Deduplication + +The router MUST support deduplication. + +Recommended dedup key: +- normalized payload +- audience +- source iteration + +Rules: +- identical messages SHOULD NOT fan out repeatedly within the same + iteration, +- deduped messages SHOULD produce `Dropped` routed message records or + equivalent traceable diagnostics. + +## 6.4 Fan-out limits + +The router MUST bound amplification. + +V0 recommended defaults: +- max `3` intents per candidate per iteration, +- max `1` broadcast intent per candidate per iteration, +- bounded recipient count for a single broadcast, +- bounded payload size. + +## 6.5 Default retention and disk limits + +V0 SHOULD define conservative defaults to prevent unbounded disk growth. + +Recommended default policy: + +```json +{ + "message_box": { + "retention": { + "completed_days": 7, + "failed_days": 14, + "canceled_days": 3 + }, + "limits": { + "max_intent_payload_bytes": 4096, + "max_inbox_snapshot_bytes": 65536, + "max_intents_per_candidate_per_iteration": 3, + "max_broadcast_intents_per_candidate_per_iteration": 1, + "max_execution_message_box_bytes": 10485760 + } + } +} +``` + +Rules: +- active executions MUST NOT be cleaned up, +- terminal executions MAY be cleaned up only after their retention window, +- if message-box artifacts for a single execution exceed + `max_execution_message_box_bytes`, the control plane SHOULD reject or + drop further intents for that execution and emit a warning event, +- failed executions retain logs longer than completed executions because + they have higher debugging value. + +--- + +# 7. Persistence Model + +The control plane MUST persist three distinct records: + +1. `intents.log` +- append-only raw emitted intents + +2. `messages.log` +- append-only routed messages and status changes + +3. `inboxes//.json` +- exact delivered inbox snapshots + +This separation is required so the system can answer: +- what was emitted, +- what was routed, +- what was actually delivered. + +--- + +# 8. Event Model + +The current control-plane event enum is not sufficient to represent +message-box semantics on its own. + +V0 SHOULD introduce additional collaboration events: +- `CommunicationIntentEmitted` +- `CommunicationIntentRejected` +- `MessageRouted` +- `MessageDelivered` +- `MessageExpired` +- `MessageDropped` + +Rules: +- event logs MAY stay lightweight and refer to IDs rather than carrying + full payload bodies, +- full payload data MUST remain available in the persisted intent/message + records, +- replay MAY combine event logs with persisted message-box records. + +--- + +# 9. Execution Output Contract + +Candidate runs emit intents through structured output consumed by +`void-control`. + +Suggested extension to orchestration-facing `result.json`: + +```json +{ + "status": "ok", + "summary": "candidate finished successfully", + "metrics": { + "latency_p99_ms": 72 + }, + "intents": [ + { + "kind": "proposal", + "audience": "leader", + "payload": { + "summary_text": "Rate limit plus cache fallback reduced latency", + "strategy_hint": "rate_limit_cache" + }, + "priority": "normal", + "ttl_iterations": 1 + } + ] +} +``` + +Rules: +- `intents` is optional, +- invalid intents MUST NOT make a successful candidate output + unrecoverable if metrics are otherwise valid, +- invalid intents SHOULD produce rejection diagnostics and + `CommunicationIntentRejected`. + +--- + +# 10. Strategy Interaction + +The message box is generic transport. Strategies still own collaboration +semantics. + +## 10.1 `swarm` + +Expected v0 usage: +- candidates emit `proposal` and `signal` intents, +- `broadcast` and `leader` audiences are common, +- router fans out a bounded set of messages into later inboxes, +- inboxes influence future broad exploration. + +## 10.2 `search` + +Expected v0 usage: +- candidates emit `evaluation` and `proposal` intents, +- most messages route to `leader` or the next refiner role, +- `caused_by` is especially useful to express refinement lineage. + +--- + +# 11. Provider Adapter Boundary + +The collaboration protocol is owned by `void-control`, but message +delivery into a concrete model runtime is provider-specific. + +V0 therefore introduces a provider adapter abstraction. + +## 11.1 Responsibilities + +`void-control` owns: +- canonical `CommunicationIntent`, `RoutedMessage`, and `InboxSnapshot` + semantics, +- routing and delivery timing, +- persistence and replay, +- selection of which inbox snapshot belongs to a launched candidate. + +The provider adapter owns: +- translation of an `InboxSnapshot` into provider-specific launch input, +- any provider-specific formatting or prompt shaping, +- optional future live-delivery optimization when supported by a + provider runtime. + +## 11.2 Adapter contract + +Suggested conceptual interface: + +```rust +trait CollaborationAdapter { + fn prepare_launch_input( + &self, + candidate: &CandidateSpec, + inbox: &InboxSnapshot, + ) -> ProviderLaunchInput; +} +``` + +Optional future capability: + +```rust +trait LiveCollaborationAdapter { + fn supports_live_delivery(&self) -> bool; + fn deliver_live_message( + &self, + runtime_handle: &str, + message: &RoutedMessage, + ) -> Result<(), AdapterError>; +} +``` + +V0 MUST NOT require live delivery. + +## 11.3 Delivery modes + +V0 supported conceptual delivery modes: + +### LaunchInjection + +The inbox snapshot is rendered into the initial candidate prompt/input. + +Properties: +- universal fallback, +- deterministic, +- replay-friendly, +- provider-neutral. + +This is the required v0 mode. + +### StructuredContext + +The inbox snapshot is translated into provider-native structured context +or resource attachments when supported. + +Properties: +- cleaner than raw prompt injection, +- still launch-time only, +- optional in v0. + +### LiveChannel + +The provider may support live side-channel delivery after launch. + +Examples: +- broker/channel injection, +- MCP-backed session message delivery, +- provider-local streaming collaboration APIs. + +Properties: +- optional optimization only, +- MUST preserve the same canonical `InboxSnapshot` and routing semantics, +- MUST NOT replace the control-plane source of truth. + +## 11.4 Fallback rule + +All provider adapters MUST support deterministic launch-time delivery. + +If a provider-specific live or structured delivery mode is unavailable, +`void-control` MUST fall back to `LaunchInjection` without changing the +collaboration semantics. + +## 11.5 Canonical truth + +The canonical collaboration truth is: +- routed messages in the control plane, +- persisted inbox snapshots, +- the launch-time candidate input derived from those snapshots. + +Provider-specific delivery channels are implementation details. They are +not the primary record of collaboration state. + +--- + +# 12. Replay and Restart Semantics + +The message box MUST be replay-safe. + +Required guarantees: +- persisted inbox snapshots are immutable historical facts, +- routed message status is reconstructible after restart, +- delivery decisions are deterministic given persisted state, +- replay does not require re-reading raw runtime logs once intents have + already been extracted and persisted. + +--- + +# 13. Out of Scope for v0 + +Deferred items: +- same-iteration delivery, +- direct candidate-to-candidate addressing, +- thread-like conversations, +- arbitrary free-form chat, +- semantic `Consumed` tracking, +- UI conversation views, +- runtime-side message routing inside `void-box`, +- provider-required live delivery. + +--- + +# 14. Acceptance Criteria + +V0 is acceptable when all of the following are true: + +1. a candidate can emit one or more valid structured intents in + `result.json` +2. `void-control` persists those intents in `intents.log` +3. `void-control` routes valid intents into `messages.log` +4. `void-control` materializes persisted inbox snapshots for a later + iteration +5. delivered inbox snapshots are reconstructible after restart +6. duplicate or oversized intents are rejected or dropped deterministically +7. expired intents do not appear in new inbox snapshots +8. a `swarm` acceptance test proves backlog/inbox delivery semantics using + real routed message records +9. a `search` acceptance test proves refinement lineage using + `caused_by`-linked intents +10. at least one provider adapter delivers inbox snapshots through + deterministic launch-time injection + +--- + +# 15. Recommended Implementation Order + +1. extend the structured output contract to allow `intents` +2. add `CommunicationIntent` parsing and validation +3. persist `intents.log` +4. add routing for `leader` and `broadcast` +5. persist `messages.log` +6. add provider adapter abstraction with required `LaunchInjection` + support +7. persist `InboxSnapshot` +8. add control-plane collaboration events +9. add integration tests for: + - valid intent emission and delivery + - TTL expiry + - deduplication + - restart replay + - launch-time provider delivery + +This order keeps the boundary stable: +- `void-box` only returns structured output, +- `void-control` owns the message bus. diff --git a/spec/void-control-runtime-spec-v0.2.md b/spec/void-control-runtime-spec-v0.2.md index 2a6c4be..ff2ee31 100644 --- a/spec/void-control-runtime-spec-v0.2.md +++ b/spec/void-control-runtime-spec-v0.2.md @@ -227,11 +227,23 @@ Runtime MUST NOT: # 9. Mental Model -`Run` = atomic orchestration unit (control plane scope) +This specification defines the run-level contract between +`void-control` and `void-box`. -`Stage` = atomic isolation unit (runtime scope) +At this boundary: + +`Run` = atomic runtime dispatch and tracking unit + +`Stage` = atomic isolation unit inside one run `microVM` = execution isolation boundary -Controller orchestrates runs. -Runtime orchestrates stages. +Controller orchestrates runs at the runtime boundary. +Runtime orchestrates stages inside a run. + +This does not prevent `void-control` from introducing a higher-level +control-plane resource above `Run`, such as `Execution`, as long as: + +- `void-box` continues to accept and execute one run per runtime request, +- stage orchestration remains inside `void-box`, +- cross-run composition remains a control-plane concern. diff --git a/src/bin/voidctl.rs b/src/bin/voidctl.rs index 090333d..dad051a 100644 --- a/src/bin/voidctl.rs +++ b/src/bin/voidctl.rs @@ -85,7 +85,7 @@ fn run() -> Result<(), String> { let command_candidates = [ "/run", "/status", "/events", "/logs", "/cancel", "/list", "/watch", "/resume", - "/help", "/exit", + "/execution", "/help", "/exit", ]; let mut out = Vec::new(); @@ -118,6 +118,7 @@ fn run() -> Result<(), String> { options.extend(["fast", "balanced", "safe"]); } } + "/execution" => options.extend(["create", "dry-run", "list", "status", "pause", "resume", "cancel", "patch"]), "/events" => options.push("--from"), "/logs" => options.push("--follow"), "/cancel" => options.push("--reason"), @@ -174,6 +175,30 @@ fn run() -> Result<(), String> { Resume { run_id: String, }, + ExecutionCreate { + spec: String, + }, + ExecutionDryRun { + spec: String, + }, + ExecutionList, + ExecutionStatus { + execution_id: String, + }, + ExecutionPause { + execution_id: String, + }, + ExecutionResume { + execution_id: String, + }, + ExecutionCancel { + execution_id: String, + }, + ExecutionPatch { + execution_id: String, + max_iterations: Option, + max_concurrent_candidates: Option, + }, Help, Exit, Empty, @@ -375,6 +400,114 @@ fn run() -> Result<(), String> { .ok_or_else(|| "usage: /resume ".to_string())? .to_string(), }), + "/execution" => { + let action = tokens.next().ok_or_else(|| { + "usage: /execution [args]".to_string() + })?; + match action { + "create" => Ok(Command::ExecutionCreate { + spec: tokens + .next() + .ok_or_else(|| "usage: /execution create ".to_string())? + .to_string(), + }), + "dry-run" => Ok(Command::ExecutionDryRun { + spec: tokens + .next() + .ok_or_else(|| { + "usage: /execution dry-run ".to_string() + })? + .to_string(), + }), + "list" => Ok(Command::ExecutionList), + "status" => Ok(Command::ExecutionStatus { + execution_id: tokens + .next() + .ok_or_else(|| { + "usage: /execution status ".to_string() + })? + .to_string(), + }), + "pause" => Ok(Command::ExecutionPause { + execution_id: tokens + .next() + .ok_or_else(|| { + "usage: /execution pause ".to_string() + })? + .to_string(), + }), + "resume" => Ok(Command::ExecutionResume { + execution_id: tokens + .next() + .ok_or_else(|| { + "usage: /execution resume ".to_string() + })? + .to_string(), + }), + "cancel" => Ok(Command::ExecutionCancel { + execution_id: tokens + .next() + .ok_or_else(|| { + "usage: /execution cancel ".to_string() + })? + .to_string(), + }), + "patch" => { + let execution_id = tokens + .next() + .ok_or_else(|| { + "usage: /execution patch [--max-iterations N] [--max-concurrent-candidates N]".to_string() + })? + .to_string(); + let rest = tokens.collect::>(); + let mut idx = 0usize; + let mut max_iterations = None; + let mut max_concurrent_candidates = None; + while idx < rest.len() { + match rest[idx] { + "--max-iterations" => { + idx += 1; + if idx >= rest.len() { + return Err("missing value for --max-iterations".to_string()); + } + max_iterations = Some( + rest[idx] + .parse::() + .map_err(|_| "invalid integer for --max-iterations".to_string())?, + ); + } + "--max-concurrent-candidates" => { + idx += 1; + if idx >= rest.len() { + return Err("missing value for --max-concurrent-candidates".to_string()); + } + max_concurrent_candidates = Some( + rest[idx] + .parse::() + .map_err(|_| "invalid integer for --max-concurrent-candidates".to_string())?, + ); + } + other => { + return Err(format!("unknown /execution patch option '{other}'")); + } + } + idx += 1; + } + if max_iterations.is_none() && max_concurrent_candidates.is_none() { + return Err( + "usage: /execution patch [--max-iterations N] [--max-concurrent-candidates N]" + .to_string(), + ); + } + Ok(Command::ExecutionPatch { + execution_id, + max_iterations, + max_concurrent_candidates, + }) + } + other => Err(format!("unknown /execution action '{other}'")), + } + } "/help" => Ok(Command::Help), "/exit" | "/quit" => Ok(Command::Exit), other => Err(format!("unknown command '{other}'")), @@ -391,6 +524,14 @@ fn run() -> Result<(), String> { /list [--state active|terminal] /watch /resume + /execution create + /execution dry-run + /execution list + /execution status + /execution pause + /execution resume + /execution cancel + /execution patch [--max-iterations N] [--max-concurrent-candidates N] /help /exit @@ -601,8 +742,59 @@ Policy presets: fast | balanced | safe" let _ = io::stdout().flush(); } + fn parse_host_port(base_url: &str) -> Result<(String, u16), String> { + let stripped = base_url + .strip_prefix("http://") + .ok_or_else(|| format!("bridge URL must start with http://, got '{base_url}'"))?; + let host_port = stripped.split('/').next().unwrap_or(stripped); + match host_port.split_once(':') { + Some((host, port)) => port + .parse::() + .map(|port| (host.to_string(), port)) + .map_err(|_| format!("invalid port in bridge URL '{base_url}'")), + None => Ok((host_port.to_string(), 80)), + } + } + + fn bridge_request( + base_url: &str, + method: &str, + path: &str, + body: Option<&str>, + ) -> Result { + use std::io::{Read, Write}; + use std::net::TcpStream; + + let (host, port) = parse_host_port(base_url)?; + let mut stream = + TcpStream::connect(format!("{host}:{port}")).map_err(|e| format!("connect failed: {e}"))?; + let body = body.unwrap_or(""); + let request = format!( + "{method} {path} HTTP/1.1\r\nHost: {host}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + body.len(), + body + ); + stream + .write_all(request.as_bytes()) + .map_err(|e| format!("request write failed: {e}"))?; + let mut response = String::new(); + stream + .read_to_string(&mut response) + .map_err(|e| format!("response read failed: {e}"))?; + let (_, body) = response + .split_once("\r\n\r\n") + .ok_or_else(|| "invalid HTTP response".to_string())?; + serde_json::from_str(body).map_err(|e| format!("invalid JSON response: {e}")) + } + + fn load_execution_spec_file(path: &str) -> Result { + fs::read_to_string(path).map_err(|e| format!("read execution spec failed: {e}")) + } + let base_url = env::var("VOID_BOX_BASE_URL").unwrap_or_else(|_| "http://127.0.0.1:43100".to_string()); + let bridge_base_url = env::var("VOID_CONTROL_BRIDGE_BASE_URL") + .unwrap_or_else(|_| "http://127.0.0.1:43210".to_string()); let client = VoidBoxRuntimeClient::new(base_url.clone(), 250); let session_file = session_path(); let mut session = load_session(&session_file); @@ -672,6 +864,7 @@ Policy presets: fast | balanced | safe" match client.start(StartRequest { run_id: run_id.clone(), workflow_spec: spec, + launch_context: None, policy, }) { Ok(started) => { @@ -800,6 +993,192 @@ Policy presets: fast | balanced | safe" session.last_selected_run = Some(run_id.clone()); stream_run(&client, &mut session, &run_id, false, true); } + Command::ExecutionCreate { spec } => { + match load_execution_spec_file(&spec).and_then(|spec_text| { + bridge_request( + &bridge_base_url, + "POST", + "/v1/executions", + Some(&spec_text), + ) + }) { + Ok(json) => println!( + "execution_id={} status={} iterations={} best_candidate={}", + json.get("execution_id").and_then(|v| v.as_str()).unwrap_or("-"), + json.get("status") + .and_then(|v| v.as_str()) + .unwrap_or("unknown"), + json.get("completed_iterations") + .and_then(|v| v.as_u64()) + .unwrap_or(0), + json.get("result_best_candidate_id") + .and_then(|v| v.as_str()) + .unwrap_or("-") + ), + Err(err) => println!("error: {err}"), + } + } + Command::ExecutionDryRun { spec } => { + match load_execution_spec_file(&spec).and_then(|spec_text| { + bridge_request( + &bridge_base_url, + "POST", + "/v1/executions/dry-run", + Some(&spec_text), + ) + }) { + Ok(json) => println!( + "valid={} candidates_per_iteration={} max_iterations={} max_child_runs={}", + json.get("valid").and_then(|v| v.as_bool()).unwrap_or(false), + json.get("plan") + .and_then(|v| v.get("candidates_per_iteration")) + .and_then(|v| v.as_u64()) + .unwrap_or(0), + json.get("plan") + .and_then(|v| v.get("max_iterations")) + .and_then(|v| v.as_u64()) + .unwrap_or(0), + json.get("plan") + .and_then(|v| v.get("max_child_runs")) + .and_then(|v| v.as_u64()) + .unwrap_or(0) + ), + Err(err) => println!("error: {err}"), + } + } + Command::ExecutionList => match bridge_request( + &bridge_base_url, + "GET", + "/v1/executions", + None, + ) { + Ok(json) => { + let executions = json + .get("executions") + .and_then(|v| v.as_array()) + .cloned() + .unwrap_or_default(); + if executions.is_empty() { + println!("no executions"); + } else { + for execution in executions { + println!( + "execution_id={} status={} iterations={} best_candidate={}", + execution + .get("execution_id") + .and_then(|v| v.as_str()) + .unwrap_or("-"), + execution + .get("status") + .and_then(|v| v.as_str()) + .unwrap_or("unknown"), + execution + .get("completed_iterations") + .and_then(|v| v.as_u64()) + .unwrap_or(0), + execution + .get("result_best_candidate_id") + .and_then(|v| v.as_str()) + .unwrap_or("-") + ); + } + } + } + Err(err) => println!("error: {err}"), + }, + Command::ExecutionStatus { execution_id } => match bridge_request( + &bridge_base_url, + "GET", + &format!("/v1/executions/{execution_id}"), + None, + ) { + Ok(json) => println!( + "execution_id={} status={} iterations={} best_candidate={}", + json.get("execution_id").and_then(|v| v.as_str()).unwrap_or("-"), + json.get("status") + .and_then(|v| v.as_str()) + .unwrap_or("unknown"), + json.get("completed_iterations") + .and_then(|v| v.as_u64()) + .unwrap_or(0), + json.get("result_best_candidate_id") + .and_then(|v| v.as_str()) + .unwrap_or("-") + ), + Err(err) => println!("error: {err}"), + }, + Command::ExecutionPause { execution_id } => match bridge_request( + &bridge_base_url, + "POST", + &format!("/v1/executions/{execution_id}/pause"), + None, + ) { + Ok(json) => println!( + "execution_id={} status={}", + json.get("execution_id").and_then(|v| v.as_str()).unwrap_or("-"), + json.get("status").and_then(|v| v.as_str()).unwrap_or("unknown"), + ), + Err(err) => println!("error: {err}"), + }, + Command::ExecutionResume { execution_id } => match bridge_request( + &bridge_base_url, + "POST", + &format!("/v1/executions/{execution_id}/resume"), + None, + ) { + Ok(json) => println!( + "execution_id={} status={}", + json.get("execution_id").and_then(|v| v.as_str()).unwrap_or("-"), + json.get("status").and_then(|v| v.as_str()).unwrap_or("unknown"), + ), + Err(err) => println!("error: {err}"), + }, + Command::ExecutionCancel { execution_id } => match bridge_request( + &bridge_base_url, + "POST", + &format!("/v1/executions/{execution_id}/cancel"), + None, + ) { + Ok(json) => println!( + "execution_id={} status={}", + json.get("execution_id").and_then(|v| v.as_str()).unwrap_or("-"), + json.get("status").and_then(|v| v.as_str()).unwrap_or("unknown"), + ), + Err(err) => println!("error: {err}"), + }, + Command::ExecutionPatch { + execution_id, + max_iterations, + max_concurrent_candidates, + } => { + let body = serde_json::json!({ + "budget": { + "max_iterations": max_iterations + }, + "concurrency": { + "max_concurrent_candidates": max_concurrent_candidates + } + }) + .to_string(); + match bridge_request( + &bridge_base_url, + "PATCH", + &format!("/v1/executions/{execution_id}/policy"), + Some(&body), + ) { + Ok(json) => println!( + "execution_id={} max_iterations={} max_concurrent_candidates={}", + json.get("execution_id").and_then(|v| v.as_str()).unwrap_or("-"), + json.get("max_iterations") + .and_then(|v| v.as_u64()) + .unwrap_or(0), + json.get("max_concurrent_candidates") + .and_then(|v| v.as_u64()) + .unwrap_or(0) + ), + Err(err) => println!("error: {err}"), + } + } } if let Err(e) = save_session(&session_file, &session) { diff --git a/src/bridge.rs b/src/bridge.rs index bb38263..610133e 100644 --- a/src/bridge.rs +++ b/src/bridge.rs @@ -1,268 +1,1294 @@ +#[cfg(feature = "serde")] +use std::fs::{self, OpenOptions}; +#[cfg(feature = "serde")] +use std::io::Write; +#[cfg(feature = "serde")] +use std::path::{Path, PathBuf}; +#[cfg(feature = "serde")] +use std::thread; +#[cfg(feature = "serde")] +use std::time::{SystemTime, UNIX_EPOCH}; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +#[cfg(feature = "serde")] +use serde_json::{json, Value}; + +#[cfg(feature = "serde")] +use crate::contract::{ExecutionPolicy, RunState, StartRequest}; +#[cfg(feature = "serde")] +use crate::orchestration::{ + BudgetPolicy, ConcurrencyPolicy, ConvergencePolicy, EvaluationConfig, ExecutionAction, + ExecutionRuntime, ExecutionService, ExecutionSpec, FsExecutionStore, GlobalConfig, + GlobalScheduler, OrchestrationPolicy, PolicyPatch, QueuedCandidate, + VariationConfig, VariationProposal, VariationSelection, WorkflowTemplateRef, +}; +#[cfg(feature = "serde")] +use crate::runtime::{MockRuntime, VoidBoxRuntimeClient}; + +#[cfg(feature = "serde")] +#[derive(Debug, Serialize)] +struct ExecutionProgressResponse { + completed_iterations: u32, + scoring_history_len: u32, + event_count: usize, + last_event: Option, + candidate_queue_count: u32, + candidate_dispatch_count: u32, + candidate_output_count: u32, + queued_candidate_count: u32, + running_candidate_count: u32, + completed_candidate_count: u32, + failed_candidate_count: u32, + canceled_candidate_count: u32, + event_type_counts: std::collections::BTreeMap, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Serialize)] +struct ExecutionDetailResponse { + execution: crate::orchestration::Execution, + progress: ExecutionProgressResponse, + result: ExecutionResultResponse, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Serialize)] +struct ExecutionResultResponse { + best_candidate_id: Option, + completed_iterations: u32, + total_candidate_failures: u32, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct LaunchRequest { + run_id: Option, + file: Option, + spec_text: Option, + spec_format: Option, + policy: Option, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct RunPolicyJson { + max_parallel_microvms_per_run: Option, + max_stage_retries: Option, + stage_timeout_secs: Option, + cancel_grace_period_secs: Option, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct ExecutionSpecRequest { + mode: String, + goal: String, + workflow: WorkflowTemplateRequest, + policy: ExecutionPolicyRequest, + evaluation: EvaluationRequest, + variation: VariationRequest, + swarm: bool, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct WorkflowTemplateRequest { + template: String, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct ExecutionPolicyRequest { + budget: BudgetPolicyRequest, + concurrency: ConcurrencyPolicyRequest, + convergence: ConvergencePolicyRequest, + max_candidate_failures_per_iteration: u32, + missing_output_policy: String, + iteration_failure_policy: String, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct BudgetPolicyRequest { + max_iterations: Option, + max_child_runs: Option, + max_wall_clock_secs: Option, + max_cost_usd_millis: Option, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct ConcurrencyPolicyRequest { + max_concurrent_candidates: u32, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct ConvergencePolicyRequest { + strategy: String, + min_score: Option, + max_iterations_without_improvement: Option, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct EvaluationRequest { + scoring_type: String, + weights: std::collections::BTreeMap, + pass_threshold: Option, + ranking: String, + tie_breaking: String, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct VariationRequest { + source: String, + candidates_per_iteration: u32, + selection: Option, + parameter_space: Option>>, + explicit: Option>, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct VariationProposalRequest { + overrides: std::collections::BTreeMap, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct PolicyPatchRequest { + budget: Option, + concurrency: Option, + convergence: Option, + evaluation: Option, + variation: Option, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct PolicyPatchBudgetRequest { + max_iterations: Option, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Deserialize)] +struct PolicyPatchConcurrencyRequest { + max_concurrent_candidates: Option, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Serialize)] +struct LaunchResponse { + run_id: String, + attempt_id: u32, + state: String, + file: String, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Serialize)] +struct ApiError { + code: &'static str, + message: String, + retryable: bool, +} + +#[cfg(feature = "serde")] +#[derive(Debug)] +pub struct TestBridgeResponse { + pub status: u16, + pub json: Value, +} + +#[cfg(feature = "serde")] +pub fn handle_bridge_request_for_test( + method: &str, + path: &str, + body: Option<&str>, +) -> Result { + let root = std::env::temp_dir().join(format!("void-control-bridge-test-{}", now_ms())); + handle_bridge_request_with_dirs_for_test( + method, + path, + body, + &root.join("specs"), + &root.join("executions"), + ) +} + +#[cfg(feature = "serde")] +pub fn handle_bridge_request_with_dirs_for_test( + method: &str, + path: &str, + body: Option<&str>, + spec_dir: &Path, + execution_dir: &Path, +) -> Result { + let response = handle_bridge_request( + method, + path, + body.unwrap_or(""), + &BridgeConfig { + listen: "127.0.0.1:0".to_string(), + base_url: "http://127.0.0.1:43100".to_string(), + spec_dir: spec_dir.to_path_buf(), + execution_dir: execution_dir.to_path_buf(), + }, + None, + ); + let json = serde_json::from_slice::(&response.body) + .unwrap_or_else(|_| json!({"invalid_json": true})); + Ok(TestBridgeResponse { + status: response.status, + json, + }) +} + #[cfg(feature = "serde")] pub fn run_bridge() -> Result<(), String> { - use std::env; - use std::fs::{self, OpenOptions}; - use std::io::Write; - use std::path::{Path, PathBuf}; - use std::time::{SystemTime, UNIX_EPOCH}; - - use serde::{Deserialize, Serialize}; - use tiny_http::{Header, Method, Response, Server, StatusCode}; - - use crate::contract::{ExecutionPolicy, RunState, StartRequest}; - use crate::runtime::VoidBoxRuntimeClient; - - #[derive(Debug, Deserialize)] - struct LaunchRequest { - run_id: Option, - file: Option, - spec_text: Option, - spec_format: Option, - policy: Option, - } - - #[derive(Debug, Deserialize)] - struct PolicyJson { - max_parallel_microvms_per_run: Option, - max_stage_retries: Option, - stage_timeout_secs: Option, - cancel_grace_period_secs: Option, - } - - #[derive(Debug, Serialize)] - struct LaunchResponse { - run_id: String, - attempt_id: u32, - state: String, - file: String, - } - - #[derive(Debug, Serialize)] - struct ApiError { - code: &'static str, - message: String, - retryable: bool, - } - - fn default_policy() -> ExecutionPolicy { - ExecutionPolicy { - max_parallel_microvms_per_run: 2, - max_stage_retries: 1, - stage_timeout_secs: 300, - cancel_grace_period_secs: 10, + use tiny_http::{Method, Response, Server, StatusCode}; + + let config = BridgeConfig::from_env(); + let worker_config = config.clone(); + thread::spawn(move || { + loop { + let runtime = VoidBoxRuntimeClient::new(worker_config.base_url.clone(), 250); + let _ = process_pending_executions_once( + GlobalConfig { + max_concurrent_child_runs: 20, + }, + runtime, + worker_config.execution_dir.clone(), + ); + std::thread::sleep(std::time::Duration::from_millis(500)); + } + }); + let server = + Server::http(&config.listen).map_err(|e| format!("listen {} failed: {e}", config.listen))?; + let client = VoidBoxRuntimeClient::new(config.base_url.clone(), 250); + println!( + "voidctl bridge listening on http://{} -> {}", + config.listen, config.base_url + ); + + for mut req in server.incoming_requests() { + let method = req.method().as_str().to_string(); + let path = req.url().to_string(); + + if req.method() == &Method::Options { + let _ = req.respond( + Response::empty(204) + .with_header(make_header("Access-Control-Allow-Origin", "*")) + .with_header(make_header("Access-Control-Allow-Methods", "GET,POST,OPTIONS")) + .with_header(make_header("Access-Control-Allow-Headers", "Content-Type")), + ); + continue; + } + + let mut body = String::new(); + if let Err(e) = req.as_reader().read_to_string(&mut body) { + let _ = req.respond(to_tiny_response(json_response( + 400, + &ApiError { + code: "INVALID_SPEC", + message: format!("failed to read request body: {e}"), + retryable: false, + }, + ))); + continue; } + + let response = handle_bridge_request(&method, &path, &body, &config, Some(&client)); + let _ = req.respond( + Response::from_data(response.body) + .with_status_code(StatusCode(response.status)) + .with_header(make_header("Content-Type", "application/json")) + .with_header(make_header("Access-Control-Allow-Origin", "*")) + .with_header(make_header("Access-Control-Allow-Methods", "GET,POST,OPTIONS")) + .with_header(make_header("Access-Control-Allow-Headers", "Content-Type")), + ); } - fn policy_from_json(raw: Option) -> ExecutionPolicy { - let defaults = default_policy(); - let Some(raw) = raw else { - return defaults; - }; - ExecutionPolicy { - max_parallel_microvms_per_run: raw - .max_parallel_microvms_per_run - .unwrap_or(defaults.max_parallel_microvms_per_run), - max_stage_retries: raw - .max_stage_retries - .unwrap_or(defaults.max_stage_retries), - stage_timeout_secs: raw.stage_timeout_secs.unwrap_or(defaults.stage_timeout_secs), - cancel_grace_period_secs: raw - .cancel_grace_period_secs - .unwrap_or(defaults.cancel_grace_period_secs), + Ok(()) +} + +#[cfg(feature = "serde")] +struct BridgeConfig { + listen: String, + base_url: String, + spec_dir: PathBuf, + execution_dir: PathBuf, +} + +#[cfg(feature = "serde")] +impl Clone for BridgeConfig { + fn clone(&self) -> Self { + Self { + listen: self.listen.clone(), + base_url: self.base_url.clone(), + spec_dir: self.spec_dir.clone(), + execution_dir: self.execution_dir.clone(), } } +} + +#[cfg(feature = "serde")] +impl BridgeConfig { + fn from_env() -> Self { + let listen = std::env::var("VOID_CONTROL_BRIDGE_LISTEN") + .unwrap_or_else(|_| "127.0.0.1:43210".to_string()); + let base_url = std::env::var("VOID_BOX_BASE_URL") + .unwrap_or_else(|_| "http://127.0.0.1:43100".to_string()); + let spec_dir = std::env::var("VOID_CONTROL_SPEC_DIR") + .unwrap_or_else(|_| "/tmp/void-control/specs".to_string()); + let execution_dir = std::env::var("VOID_CONTROL_EXECUTION_DIR") + .unwrap_or_else(|_| "/tmp/void-control/executions".to_string()); + Self { + listen, + base_url, + spec_dir: PathBuf::from(spec_dir), + execution_dir: PathBuf::from(execution_dir), + } + } +} + +#[cfg(feature = "serde")] +struct JsonHttpResponse { + status: u16, + body: Vec, +} + +#[cfg(feature = "serde")] +fn handle_bridge_request( + method: &str, + path: &str, + body: &str, + config: &BridgeConfig, + client: Option<&VoidBoxRuntimeClient>, +) -> JsonHttpResponse { + if method == "GET" && path == "/v1/health" { + return json_response(200, &json!({"status":"ok","service":"voidctl-bridge"})); + } + + if method == "POST" && path == "/v1/executions/dry-run" { + return handle_execution_dry_run(body); + } - fn now_ms() -> u128 { - SystemTime::now() - .duration_since(UNIX_EPOCH) - .map(|d| d.as_millis()) - .unwrap_or(0) + if method == "POST" && path == "/v1/executions" { + return handle_execution_create(body, config, client.is_some()); } - fn next_run_id() -> String { - format!("ui-{}", now_ms()) + if method == "GET" && path == "/v1/executions" { + return handle_execution_list(config); } - fn run_id_from_handle(handle: &str) -> String { - handle - .strip_prefix("void-box:") - .or_else(|| handle.strip_prefix("vb:")) - .unwrap_or(handle) - .to_string() + if method == "GET" && path.starts_with("/v1/executions/") && path.ends_with("/events") { + return handle_execution_events(path, config); } - fn state_to_str(state: RunState) -> &'static str { - match state { - RunState::Pending => "pending", - RunState::Starting => "starting", - RunState::Running => "running", - RunState::Succeeded => "succeeded", - RunState::Failed => "failed", - RunState::Canceled => "cancelled", + if method == "GET" && path.starts_with("/v1/executions/") { + return handle_execution_get(path, config); + } + + if method == "PATCH" && path.starts_with("/v1/executions/") && path.ends_with("/policy") { + return handle_execution_policy_patch(path, body, config); + } + + if method == "POST" && path.starts_with("/v1/executions/") && path.ends_with("/pause") { + return handle_execution_action(path, config, ExecutionAction::Pause); + } + + if method == "POST" && path.starts_with("/v1/executions/") && path.ends_with("/resume") { + return handle_execution_action(path, config, ExecutionAction::Resume); + } + + if method == "POST" && path.starts_with("/v1/executions/") && path.ends_with("/cancel") { + return handle_execution_action(path, config, ExecutionAction::Cancel); + } + + if method == "POST" && path == "/v1/launch" { + return handle_launch(body, config, client); + } + + json_response( + 404, + &ApiError { + code: "NOT_FOUND", + message: format!("no route for {} {}", method, path), + retryable: false, + }, + ) +} + +#[cfg(feature = "serde")] +fn handle_execution_dry_run(body: &str) -> JsonHttpResponse { + let spec_request: ExecutionSpecRequest = match serde_json::from_str(body) { + Ok(value) => value, + Err(err) => { + return json_response( + 400, + &ApiError { + code: "INVALID_SPEC", + message: format!("invalid JSON body: {err}"), + retryable: false, + }, + ) + } + }; + + let spec = match spec_request.try_into_spec() { + Ok(spec) => spec, + Err(err) => { + return json_response( + 400, + &json!({ + "valid": false, + "plan": { + "candidates_per_iteration": 0, + "max_iterations": Value::Null, + "max_child_runs": Value::Null, + "estimated_concurrent_peak": 0, + "variation_source": "invalid", + "parameter_space_size": Value::Null + }, + "warnings": [], + "errors": [err] + }), + ) + } + }; + + let temp_root = std::env::temp_dir().join(format!("void-control-dry-run-{}", now_ms())); + let service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 20, + }, + MockRuntime::new(), + FsExecutionStore::new(temp_root), + ); + let result = match service.dry_run(&spec) { + Ok(result) => result, + Err(err) => { + return json_response( + 500, + &ApiError { + code: "INTERNAL_ERROR", + message: err.to_string(), + retryable: true, + }, + ) + } + }; + let status = if result.valid { 200 } else { 400 }; + json_response(status, &result) +} + +#[cfg(feature = "serde")] +fn handle_execution_create( + body: &str, + config: &BridgeConfig, + _use_live_runtime: bool, +) -> JsonHttpResponse { + let spec_request: ExecutionSpecRequest = match serde_json::from_str(body) { + Ok(value) => value, + Err(err) => { + return json_response( + 400, + &ApiError { + code: "INVALID_SPEC", + message: format!("invalid JSON body: {err}"), + retryable: false, + }, + ) + } + }; + let spec = match spec_request.try_into_spec() { + Ok(spec) => spec, + Err(err) => { + return json_response( + 400, + &ApiError { + code: "INVALID_SPEC", + message: err, + retryable: false, + }, + ) } + }; + + let store = FsExecutionStore::new(config.execution_dir.clone()); + let execution_id = format!("exec-{}", now_ms()); + match ExecutionService::::submit_execution(&store, &execution_id, &spec) { + Ok(execution) => json_response(200, &execution), + Err(err) => json_response( + 500, + &ApiError { + code: "INTERNAL_ERROR", + message: err.to_string(), + retryable: true, + }, + ), } +} - fn infer_ext(spec_format: Option<&str>, spec_text: &str) -> &'static str { - if let Some(fmt) = spec_format { - let f = fmt.to_ascii_lowercase(); - if f.contains("json") { - return "json"; - } - if f.contains("yaml") || f.contains("yml") { - return "yaml"; - } +#[cfg(feature = "serde")] +fn handle_execution_list(config: &BridgeConfig) -> JsonHttpResponse { + let store = FsExecutionStore::new(config.execution_dir.clone()); + match store.list_execution_ids() { + Ok(ids) => { + let executions: Vec<_> = ids + .into_iter() + .filter_map(|execution_id| store.load_execution(&execution_id).ok()) + .map(|snapshot| snapshot.execution) + .collect(); + json_response(200, &json!({ "executions": executions })) } - if spec_text.trim_start().starts_with('{') || spec_text.trim_start().starts_with('[') { - "json" - } else { - "yaml" + Err(err) => json_response( + 500, + &ApiError { + code: "INTERNAL_ERROR", + message: err.to_string(), + retryable: true, + }, + ), + } +} + +#[cfg(feature = "serde")] +fn handle_execution_get(path: &str, config: &BridgeConfig) -> JsonHttpResponse { + let Some(execution_id) = path.strip_prefix("/v1/executions/") else { + return json_response( + 404, + &ApiError { + code: "NOT_FOUND", + message: format!("no route for GET {path}"), + retryable: false, + }, + ); + }; + let store = FsExecutionStore::new(config.execution_dir.clone()); + match store.load_execution(execution_id) { + Ok(snapshot) => { + let progress = summarize_progress(&snapshot); + let result = ExecutionResultResponse { + best_candidate_id: snapshot.execution.result_best_candidate_id.clone(), + completed_iterations: snapshot.execution.completed_iterations, + total_candidate_failures: snapshot.execution.failure_counts.total_candidate_failures, + }; + json_response( + 200, + &ExecutionDetailResponse { + execution: snapshot.execution, + progress, + result, + }, + ) } + Err(err) if err.kind() == std::io::ErrorKind::NotFound => json_response( + 404, + &ApiError { + code: "NOT_FOUND", + message: format!("execution '{execution_id}' not found"), + retryable: false, + }, + ), + Err(err) => json_response( + 500, + &ApiError { + code: "INTERNAL_ERROR", + message: err.to_string(), + retryable: true, + }, + ), + } +} + +#[cfg(feature = "serde")] +fn summarize_progress( + snapshot: &crate::orchestration::ExecutionSnapshot, +) -> ExecutionProgressResponse { + let mut event_type_counts = std::collections::BTreeMap::new(); + for event in &snapshot.events { + *event_type_counts + .entry(event.event_type.as_str().to_string()) + .or_insert(0) += 1; + } + let queued_candidate_count = snapshot + .candidates + .iter() + .filter(|candidate| candidate.status == crate::orchestration::CandidateStatus::Queued) + .count() as u32; + let running_candidate_count = snapshot + .candidates + .iter() + .filter(|candidate| candidate.status == crate::orchestration::CandidateStatus::Running) + .count() as u32; + let completed_candidate_count = snapshot + .candidates + .iter() + .filter(|candidate| candidate.status == crate::orchestration::CandidateStatus::Completed) + .count() as u32; + let failed_candidate_count = snapshot + .candidates + .iter() + .filter(|candidate| candidate.status == crate::orchestration::CandidateStatus::Failed) + .count() as u32; + let canceled_candidate_count = snapshot + .candidates + .iter() + .filter(|candidate| candidate.status == crate::orchestration::CandidateStatus::Canceled) + .count() as u32; + + ExecutionProgressResponse { + completed_iterations: snapshot.accumulator.completed_iterations, + scoring_history_len: snapshot.accumulator.scoring_history_len, + event_count: snapshot.events.len(), + last_event: snapshot + .events + .last() + .map(|event| event.event_type.as_str().to_string()), + candidate_queue_count: snapshot + .events + .iter() + .filter(|event| { + event.event_type == crate::orchestration::ControlEventType::CandidateQueued + }) + .count() as u32, + candidate_dispatch_count: snapshot + .events + .iter() + .filter(|event| { + event.event_type == crate::orchestration::ControlEventType::CandidateDispatched + }) + .count() as u32, + candidate_output_count: snapshot + .events + .iter() + .filter(|event| { + event.event_type + == crate::orchestration::ControlEventType::CandidateOutputCollected + }) + .count() as u32, + queued_candidate_count, + running_candidate_count, + completed_candidate_count, + failed_candidate_count, + canceled_candidate_count, + event_type_counts, } +} - fn write_spec_file(spec_dir: &Path, spec_text: &str, spec_format: Option<&str>) -> Result { - fs::create_dir_all(spec_dir) - .map_err(|e| format!("failed to create spec dir {}: {e}", spec_dir.display()))?; - let ext = infer_ext(spec_format, spec_text); - let filename = format!("spec-{}-{}.{}", now_ms(), std::process::id(), ext); - let path = spec_dir.join(filename); - let mut file = OpenOptions::new() - .create_new(true) - .write(true) - .open(&path) - .map_err(|e| format!("failed to create spec file {}: {e}", path.display()))?; - file.write_all(spec_text.as_bytes()) - .and_then(|_| file.flush()) - .map_err(|e| format!("failed to write spec file {}: {e}", path.display()))?; - Ok(path.display().to_string()) - } - - fn make_header(name: &str, value: &str) -> Header { - Header::from_bytes(name.as_bytes(), value.as_bytes()).expect("valid header") - } - - fn json_response(status: u16, body: &T) -> Response>> { - let payload = serde_json::to_vec(body).unwrap_or_else(|_| b"{\"code\":\"INTERNAL_ERROR\",\"message\":\"serialization failed\",\"retryable\":true}".to_vec()); - Response::from_data(payload) - .with_status_code(StatusCode(status)) - .with_header(make_header("Content-Type", "application/json")) - .with_header(make_header("Access-Control-Allow-Origin", "*")) - .with_header(make_header("Access-Control-Allow-Methods", "GET,POST,OPTIONS")) - .with_header(make_header("Access-Control-Allow-Headers", "Content-Type")) - } - - fn json_error(status: u16, code: &'static str, message: String, retryable: bool) -> Response>> { - json_response( - status, +#[cfg(feature = "serde")] +fn handle_execution_events(path: &str, config: &BridgeConfig) -> JsonHttpResponse { + let Some(execution_id) = path + .strip_prefix("/v1/executions/") + .and_then(|rest| rest.strip_suffix("/events")) + else { + return json_response( + 404, + &ApiError { + code: "NOT_FOUND", + message: format!("no route for GET {path}"), + retryable: false, + }, + ); + }; + let store = FsExecutionStore::new(config.execution_dir.clone()); + match store.load_execution(execution_id) { + Ok(snapshot) => json_response( + 200, + &json!({ + "execution_id": execution_id, + "events": snapshot.events + .into_iter() + .map(|event| json!({ + "seq": event.seq, + "event_type": event.event_type.as_str(), + })) + .collect::>() + }), + ), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => json_response( + 404, + &ApiError { + code: "NOT_FOUND", + message: format!("execution '{execution_id}' not found"), + retryable: false, + }, + ), + Err(err) => json_response( + 500, + &ApiError { + code: "INTERNAL_ERROR", + message: err.to_string(), + retryable: true, + }, + ), + } +} + +#[cfg(feature = "serde")] +fn handle_execution_action( + path: &str, + config: &BridgeConfig, + action: ExecutionAction, +) -> JsonHttpResponse { + let suffix = match action { + ExecutionAction::Pause => "/pause", + ExecutionAction::Resume => "/resume", + ExecutionAction::Cancel => "/cancel", + }; + let Some(execution_id) = path + .strip_prefix("/v1/executions/") + .and_then(|rest| rest.strip_suffix(suffix)) + else { + return json_response( + 404, + &ApiError { + code: "NOT_FOUND", + message: format!("no route for POST {path}"), + retryable: false, + }, + ); + }; + let store = FsExecutionStore::new(config.execution_dir.clone()); + match ExecutionService::::update_execution_status(&store, execution_id, action) { + Ok(execution) => json_response(200, &execution), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => json_response( + 404, + &ApiError { + code: "NOT_FOUND", + message: format!("execution '{execution_id}' not found"), + retryable: false, + }, + ), + Err(err) if err.kind() == std::io::ErrorKind::InvalidInput => json_response( + 400, + &ApiError { + code: "INVALID_STATE", + message: err.to_string(), + retryable: false, + }, + ), + Err(err) => json_response( + 500, &ApiError { - code, - message, - retryable, + code: "INTERNAL_ERROR", + message: err.to_string(), + retryable: true, }, - ) + ), } +} - let listen = env::var("VOID_CONTROL_BRIDGE_LISTEN").unwrap_or_else(|_| "127.0.0.1:43210".to_string()); - let base_url = env::var("VOID_BOX_BASE_URL").unwrap_or_else(|_| "http://127.0.0.1:43100".to_string()); - let spec_dir = env::var("VOID_CONTROL_SPEC_DIR").unwrap_or_else(|_| "/tmp/void-control/specs".to_string()); - let spec_dir_path = PathBuf::from(spec_dir); +#[cfg(feature = "serde")] +fn handle_execution_policy_patch( + path: &str, + body: &str, + config: &BridgeConfig, +) -> JsonHttpResponse { + let Some(execution_id) = path + .strip_prefix("/v1/executions/") + .and_then(|rest| rest.strip_suffix("/policy")) + else { + return json_response( + 404, + &ApiError { + code: "NOT_FOUND", + message: format!("no route for PATCH {path}"), + retryable: false, + }, + ); + }; - let server = Server::http(&listen).map_err(|e| format!("listen {listen} failed: {e}"))?; - let client = VoidBoxRuntimeClient::new(base_url.clone(), 250); - println!("voidctl bridge listening on http://{listen} -> {base_url}"); + let request: PolicyPatchRequest = match serde_json::from_str(body) { + Ok(value) => value, + Err(err) => { + return json_response( + 400, + &ApiError { + code: "INVALID_POLICY", + message: format!("invalid JSON body: {err}"), + retryable: false, + }, + ) + } + }; - for mut req in server.incoming_requests() { - let method = req.method().clone(); - let path = req.url().to_string(); + if request.convergence.is_some() || request.evaluation.is_some() || request.variation.is_some() + { + return json_response( + 400, + &ApiError { + code: "INVALID_POLICY", + message: "convergence, evaluation, and variation fields are immutable".to_string(), + retryable: false, + }, + ); + } - if method == Method::Options { - let _ = req.respond( - Response::empty(204) - .with_header(make_header("Access-Control-Allow-Origin", "*")) - .with_header(make_header("Access-Control-Allow-Methods", "GET,POST,OPTIONS")) - .with_header(make_header("Access-Control-Allow-Headers", "Content-Type")), - ); - continue; + let patch = PolicyPatch { + max_iterations: request.budget.and_then(|budget| budget.max_iterations), + max_concurrent_candidates: request + .concurrency + .and_then(|concurrency| concurrency.max_concurrent_candidates), + }; + let store = FsExecutionStore::new(config.execution_dir.clone()); + match ExecutionService::::patch_execution_policy( + &store, + execution_id, + patch, + &GlobalConfig { + max_concurrent_child_runs: 20, + }, + ) { + Ok(spec) => json_response( + 200, + &json!({ + "execution_id": execution_id, + "max_iterations": spec.policy.budget.max_iterations, + "max_concurrent_candidates": spec.policy.concurrency.max_concurrent_candidates + }), + ), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => json_response( + 404, + &ApiError { + code: "NOT_FOUND", + message: format!("execution '{execution_id}' not found"), + retryable: false, + }, + ), + Err(err) if err.kind() == std::io::ErrorKind::InvalidInput => json_response( + 400, + &ApiError { + code: "INVALID_POLICY", + message: err.to_string(), + retryable: false, + }, + ), + Err(err) => json_response( + 500, + &ApiError { + code: "INTERNAL_ERROR", + message: err.to_string(), + retryable: true, + }, + ), + } +} + +#[cfg(feature = "serde")] +fn process_pending_executions_once( + global: GlobalConfig, + runtime: R, + execution_dir: PathBuf, +) -> std::io::Result<()> { + let store = FsExecutionStore::new(execution_dir); + let mut service = ExecutionService::new(global.clone(), runtime, store.clone()); + + let ids = store.list_execution_ids()?; + for execution_id in &ids { + let snapshot = store.load_execution(execution_id)?; + if matches!( + snapshot.execution.status, + crate::orchestration::ExecutionStatus::Pending + | crate::orchestration::ExecutionStatus::Running + ) { + match service.plan_execution(execution_id) { + Ok(_) => {} + Err(err) + if matches!( + err.kind(), + std::io::ErrorKind::WouldBlock | std::io::ErrorKind::InvalidInput + ) => {} + Err(err) => return Err(err), + } } + } - if method == Method::Get && path == "/v1/health" { - let _ = req.respond(json_response(200, &serde_json::json!({"status":"ok","service":"voidctl-bridge"}))); + let mut scheduler = GlobalScheduler::new(global.max_concurrent_child_runs as usize); + for execution_id in ids { + let snapshot = store.load_execution(&execution_id)?; + if !matches!( + snapshot.execution.status, + crate::orchestration::ExecutionStatus::Running + | crate::orchestration::ExecutionStatus::Paused + ) { continue; } + let spec = match store.load_spec(&execution_id) { + Ok(spec) => spec, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue, + Err(err) => return Err(err), + }; + let paused = snapshot.execution.status == crate::orchestration::ExecutionStatus::Paused; + let running = snapshot + .candidates + .iter() + .filter(|candidate| { + candidate.status == crate::orchestration::CandidateStatus::Running + }) + .count(); + scheduler.register_execution( + &execution_id, + paused, + running, + spec.policy.concurrency.max_concurrent_candidates as usize, + ); + if let Some(candidate) = snapshot + .candidates + .iter() + .filter(|candidate| candidate.status == crate::orchestration::CandidateStatus::Queued) + .min_by_key(|candidate| candidate.created_seq) + { + scheduler.enqueue(QueuedCandidate::new( + &execution_id, + &candidate.candidate_id, + candidate.created_seq, + )); + } + } - if method == Method::Post && path == "/v1/launch" { - let mut body = String::new(); - if let Err(e) = req.as_reader().read_to_string(&mut body) { - let _ = req.respond(json_error(400, "INVALID_SPEC", format!("failed to read request body: {e}"), false)); - continue; - } - let launch: LaunchRequest = match serde_json::from_str(&body) { - Ok(v) => v, - Err(e) => { - let _ = req.respond(json_error(400, "INVALID_SPEC", format!("invalid JSON body: {e}"), false)); - continue; - } - }; + while let Some(grant) = scheduler.next_dispatch() { + match service.dispatch_execution_once(&grant.execution_id) { + Ok(_) => {} + Err(err) + if matches!( + err.kind(), + std::io::ErrorKind::WouldBlock | std::io::ErrorKind::InvalidInput + ) => {} + Err(err) => return Err(err), + } + } + Ok(()) +} - let file = if let Some(spec_text) = launch.spec_text.as_ref().map(|s| s.trim()).filter(|s| !s.is_empty()) { - match write_spec_file(&spec_dir_path, spec_text, launch.spec_format.as_deref()) { - Ok(path) => path, - Err(e) => { - let _ = req.respond(json_error(500, "INTERNAL_ERROR", e, true)); - continue; - } - } - } else if let Some(file) = launch.file.as_ref().map(|s| s.trim().to_string()).filter(|s| !s.is_empty()) { - file - } else { - let _ = req.respond(json_error( - 400, - "INVALID_SPEC", - "provide either `spec_text` or `file`".to_string(), - false, - )); - continue; - }; +#[cfg(feature = "serde")] +pub fn process_pending_executions_once_for_test( + global: GlobalConfig, + runtime: R, + execution_dir: PathBuf, +) -> std::io::Result<()> { + process_pending_executions_once(global, runtime, execution_dir) +} - let run_id = launch.run_id.filter(|s| !s.trim().is_empty()).unwrap_or_else(next_run_id); - let policy = policy_from_json(launch.policy); - if let Err(msg) = policy.validate() { - let _ = req.respond(json_error(400, "INVALID_POLICY", msg.to_string(), false)); - continue; +#[cfg(feature = "serde")] +fn handle_launch( + body: &str, + config: &BridgeConfig, + client: Option<&VoidBoxRuntimeClient>, +) -> JsonHttpResponse { + let launch: LaunchRequest = match serde_json::from_str(body) { + Ok(v) => v, + Err(e) => { + return json_response( + 400, + &ApiError { + code: "INVALID_SPEC", + message: format!("invalid JSON body: {e}"), + retryable: false, + }, + ) + } + }; + + let file = if let Some(spec_text) = launch + .spec_text + .as_ref() + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + { + match write_spec_file(&config.spec_dir, spec_text, launch.spec_format.as_deref()) { + Ok(path) => path, + Err(e) => { + return json_response( + 500, + &ApiError { + code: "INTERNAL_ERROR", + message: e, + retryable: true, + }, + ) } + } + } else if let Some(file) = launch + .file + .as_ref() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + { + file + } else { + return json_response( + 400, + &ApiError { + code: "INVALID_SPEC", + message: "provide either `spec_text` or `file`".to_string(), + retryable: false, + }, + ); + }; + + let run_id = launch + .run_id + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(next_run_id); + let policy = policy_from_json(launch.policy); + if let Err(msg) = policy.validate() { + return json_response( + 400, + &ApiError { + code: "INVALID_POLICY", + message: msg.to_string(), + retryable: false, + }, + ); + } - match client.start(StartRequest { - run_id: run_id.clone(), - workflow_spec: file.clone(), - policy, - }) { - Ok(started) => { - let response = LaunchResponse { - run_id: run_id_from_handle(&started.handle), - attempt_id: started.attempt_id, - state: state_to_str(started.state).to_string(), - file, - }; - let _ = req.respond(json_response(200, &response)); - } - Err(e) => { - let _ = req.respond(json_error( - 500, - "INTERNAL_ERROR", - e.message, - e.retryable, - )); - } + let Some(client) = client else { + return json_response( + 500, + &ApiError { + code: "INTERNAL_ERROR", + message: "launch route requires runtime client".to_string(), + retryable: false, + }, + ); + }; + + match client.start(StartRequest { + run_id: run_id.clone(), + workflow_spec: file.clone(), + launch_context: None, + policy, + }) { + Ok(started) => json_response( + 200, + &LaunchResponse { + run_id: run_id_from_handle(&started.handle), + attempt_id: started.attempt_id, + state: state_to_str(started.state).to_string(), + file, + }, + ), + Err(e) => json_response( + 500, + &ApiError { + code: "INTERNAL_ERROR", + message: e.message, + retryable: e.retryable, + }, + ), + } +} + +#[cfg(feature = "serde")] +impl ExecutionSpecRequest { + fn try_into_spec(self) -> Result { + let variation = match self.variation.source.as_str() { + "parameter_space" => VariationConfig::parameter_space( + self.variation.candidates_per_iteration, + match self + .variation + .selection + .as_deref() + .unwrap_or("sequential") + { + "random" => VariationSelection::Random, + _ => VariationSelection::Sequential, + }, + self.variation.parameter_space.unwrap_or_default(), + ), + "explicit" => VariationConfig::explicit( + self.variation.candidates_per_iteration, + self.variation + .explicit + .unwrap_or_default() + .into_iter() + .map(|proposal| VariationProposal { + overrides: proposal.overrides, + }) + .collect(), + ), + "leader_directed" => { + VariationConfig::leader_directed(self.variation.candidates_per_iteration) } - continue; + other => return Err(format!("unsupported variation source '{other}'")), + }; + + Ok(ExecutionSpec { + mode: self.mode, + goal: self.goal, + workflow: WorkflowTemplateRef { + template: self.workflow.template, + }, + policy: OrchestrationPolicy { + budget: BudgetPolicy { + max_iterations: self.policy.budget.max_iterations, + max_child_runs: self.policy.budget.max_child_runs, + max_wall_clock_secs: self.policy.budget.max_wall_clock_secs, + max_cost_usd_millis: self.policy.budget.max_cost_usd_millis, + }, + concurrency: ConcurrencyPolicy { + max_concurrent_candidates: self.policy.concurrency.max_concurrent_candidates, + }, + convergence: ConvergencePolicy { + strategy: self.policy.convergence.strategy, + min_score: self.policy.convergence.min_score, + max_iterations_without_improvement: self + .policy + .convergence + .max_iterations_without_improvement, + }, + max_candidate_failures_per_iteration: self + .policy + .max_candidate_failures_per_iteration, + missing_output_policy: self.policy.missing_output_policy, + iteration_failure_policy: self.policy.iteration_failure_policy, + }, + evaluation: EvaluationConfig { + scoring_type: self.evaluation.scoring_type, + weights: self.evaluation.weights, + pass_threshold: self.evaluation.pass_threshold, + ranking: self.evaluation.ranking, + tie_breaking: self.evaluation.tie_breaking, + }, + variation, + swarm: self.swarm, + }) + } +} + +#[cfg(feature = "serde")] +fn default_policy() -> ExecutionPolicy { + ExecutionPolicy { + max_parallel_microvms_per_run: 2, + max_stage_retries: 1, + stage_timeout_secs: 300, + cancel_grace_period_secs: 10, + } +} + +#[cfg(feature = "serde")] +fn policy_from_json(raw: Option) -> ExecutionPolicy { + let defaults = default_policy(); + let Some(raw) = raw else { + return defaults; + }; + ExecutionPolicy { + max_parallel_microvms_per_run: raw + .max_parallel_microvms_per_run + .unwrap_or(defaults.max_parallel_microvms_per_run), + max_stage_retries: raw + .max_stage_retries + .unwrap_or(defaults.max_stage_retries), + stage_timeout_secs: raw.stage_timeout_secs.unwrap_or(defaults.stage_timeout_secs), + cancel_grace_period_secs: raw + .cancel_grace_period_secs + .unwrap_or(defaults.cancel_grace_period_secs), + } +} + +#[cfg(feature = "serde")] +fn now_ms() -> u128 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_millis()) + .unwrap_or(0) +} + +#[cfg(feature = "serde")] +fn next_run_id() -> String { + format!("ui-{}", now_ms()) +} + +#[cfg(feature = "serde")] +fn run_id_from_handle(handle: &str) -> String { + handle + .strip_prefix("void-box:") + .or_else(|| handle.strip_prefix("vb:")) + .unwrap_or(handle) + .to_string() +} + +#[cfg(feature = "serde")] +fn state_to_str(state: RunState) -> &'static str { + match state { + RunState::Pending => "pending", + RunState::Starting => "starting", + RunState::Running => "running", + RunState::Succeeded => "succeeded", + RunState::Failed => "failed", + RunState::Canceled => "cancelled", + } +} + +#[cfg(feature = "serde")] +fn infer_ext(spec_format: Option<&str>, spec_text: &str) -> &'static str { + if let Some(fmt) = spec_format { + let f = fmt.to_ascii_lowercase(); + if f.contains("json") { + return "json"; } + if f.contains("yaml") || f.contains("yml") { + return "yaml"; + } + } + if spec_text.trim_start().starts_with('{') || spec_text.trim_start().starts_with('[') { + "json" + } else { + "yaml" + } +} - let _ = req.respond(json_error( - 404, - "NOT_FOUND", - format!("no route for {} {}", method.as_str(), path), - false, - )); +#[cfg(feature = "serde")] +fn write_spec_file( + spec_dir: &Path, + spec_text: &str, + spec_format: Option<&str>, +) -> Result { + fs::create_dir_all(spec_dir) + .map_err(|e| format!("failed to create spec dir {}: {e}", spec_dir.display()))?; + let ext = infer_ext(spec_format, spec_text); + let filename = format!("spec-{}-{}.{}", now_ms(), std::process::id(), ext); + let path = spec_dir.join(filename); + let mut file = OpenOptions::new() + .create_new(true) + .write(true) + .open(&path) + .map_err(|e| format!("failed to create spec file {}: {e}", path.display()))?; + file.write_all(spec_text.as_bytes()) + .and_then(|_| file.flush()) + .map_err(|e| format!("failed to write spec file {}: {e}", path.display()))?; + Ok(path.display().to_string()) +} + +#[cfg(feature = "serde")] +fn json_response(status: u16, body: &T) -> JsonHttpResponse { + let payload = serde_json::to_vec(body).unwrap_or_else(|_| { + b"{\"code\":\"INTERNAL_ERROR\",\"message\":\"serialization failed\",\"retryable\":true}" + .to_vec() + }); + JsonHttpResponse { + status, + body: payload, } +} - Ok(()) +#[cfg(feature = "serde")] +fn make_header(name: &str, value: &str) -> tiny_http::Header { + tiny_http::Header::from_bytes(name.as_bytes(), value.as_bytes()).expect("valid header") +} + +#[cfg(feature = "serde")] +fn to_tiny_response(response: JsonHttpResponse) -> tiny_http::Response>> { + tiny_http::Response::from_data(response.body) + .with_status_code(tiny_http::StatusCode(response.status)) + .with_header(make_header("Content-Type", "application/json")) + .with_header(make_header("Access-Control-Allow-Origin", "*")) + .with_header(make_header("Access-Control-Allow-Methods", "GET,POST,OPTIONS")) + .with_header(make_header("Access-Control-Allow-Headers", "Content-Type")) } diff --git a/src/contract/api.rs b/src/contract/api.rs index b744305..dff0a9c 100644 --- a/src/contract/api.rs +++ b/src/contract/api.rs @@ -4,6 +4,7 @@ use crate::contract::{ExecutionPolicy, RunState}; pub struct StartRequest { pub run_id: String, pub workflow_spec: String, + pub launch_context: Option, pub policy: ExecutionPolicy, } diff --git a/src/contract/compat.rs b/src/contract/compat.rs index a616a1d..a754bac 100644 --- a/src/contract/compat.rs +++ b/src/contract/compat.rs @@ -49,8 +49,10 @@ pub struct ConvertedRunView { pub fn map_void_box_status(status: &str) -> Option { match status.to_ascii_lowercase().as_str() { + "pending" => Some(RunState::Pending), + "starting" => Some(RunState::Starting), "running" => Some(RunState::Running), - "completed" => Some(RunState::Succeeded), + "completed" | "succeeded" | "success" => Some(RunState::Succeeded), "failed" => Some(RunState::Failed), "cancelled" | "canceled" => Some(RunState::Canceled), _ => None, @@ -222,8 +224,11 @@ mod tests { #[test] fn maps_void_box_status_values() { + assert_eq!(map_void_box_status("Pending"), Some(RunState::Pending)); + assert_eq!(map_void_box_status("Starting"), Some(RunState::Starting)); assert_eq!(map_void_box_status("Running"), Some(RunState::Running)); assert_eq!(map_void_box_status("Completed"), Some(RunState::Succeeded)); + assert_eq!(map_void_box_status("Succeeded"), Some(RunState::Succeeded)); assert_eq!(map_void_box_status("Failed"), Some(RunState::Failed)); assert_eq!(map_void_box_status("Cancelled"), Some(RunState::Canceled)); } diff --git a/src/contract/error.rs b/src/contract/error.rs index 3cde186..a74ed92 100644 --- a/src/contract/error.rs +++ b/src/contract/error.rs @@ -5,6 +5,12 @@ pub enum ContractErrorCode { NotFound, AlreadyTerminal, ResourceLimitExceeded, + StructuredOutputMissing, + StructuredOutputMalformed, + ArtifactNotFound, + ArtifactPublicationIncomplete, + ArtifactStoreUnavailable, + RetrievalTimeout, InternalError, } diff --git a/src/lib.rs b/src/lib.rs index fcef45c..6109500 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ pub mod contract; +pub mod orchestration; pub mod runtime; #[cfg(feature = "serde")] pub mod bridge; diff --git a/src/orchestration/events.rs b/src/orchestration/events.rs new file mode 100644 index 0000000..b5f0e5c --- /dev/null +++ b/src/orchestration/events.rs @@ -0,0 +1,166 @@ +use super::types::{Execution, ExecutionAccumulator, ExecutionSnapshot, ExecutionStatus}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ControlEventType { + ExecutionCreated, + ExecutionSubmitted, + ExecutionStarted, + IterationStarted, + CandidateQueued, + CandidateDispatched, + CandidateOutputCollected, + CandidateScored, + IterationCompleted, + ExecutionCompleted, + ExecutionFailed, + ExecutionPaused, + ExecutionResumed, + ExecutionCanceled, + ExecutionStalled, + CommunicationIntentEmitted, + CommunicationIntentRejected, + MessageRouted, + MessageDelivered, + MessageExpired, +} + +impl ControlEventType { + pub fn as_str(self) -> &'static str { + match self { + Self::ExecutionCreated => "ExecutionCreated", + Self::ExecutionSubmitted => "ExecutionSubmitted", + Self::ExecutionStarted => "ExecutionStarted", + Self::IterationStarted => "IterationStarted", + Self::CandidateQueued => "CandidateQueued", + Self::CandidateDispatched => "CandidateDispatched", + Self::CandidateOutputCollected => "CandidateOutputCollected", + Self::CandidateScored => "CandidateScored", + Self::IterationCompleted => "IterationCompleted", + Self::ExecutionCompleted => "ExecutionCompleted", + Self::ExecutionFailed => "ExecutionFailed", + Self::ExecutionPaused => "ExecutionPaused", + Self::ExecutionResumed => "ExecutionResumed", + Self::ExecutionCanceled => "ExecutionCanceled", + Self::ExecutionStalled => "ExecutionStalled", + Self::CommunicationIntentEmitted => "CommunicationIntentEmitted", + Self::CommunicationIntentRejected => "CommunicationIntentRejected", + Self::MessageRouted => "MessageRouted", + Self::MessageDelivered => "MessageDelivered", + Self::MessageExpired => "MessageExpired", + } + } + + pub fn from_str(value: &str) -> Option { + match value { + "ExecutionCreated" => Some(Self::ExecutionCreated), + "ExecutionSubmitted" => Some(Self::ExecutionSubmitted), + "ExecutionStarted" => Some(Self::ExecutionStarted), + "IterationStarted" => Some(Self::IterationStarted), + "CandidateQueued" => Some(Self::CandidateQueued), + "CandidateDispatched" => Some(Self::CandidateDispatched), + "CandidateOutputCollected" => Some(Self::CandidateOutputCollected), + "CandidateScored" => Some(Self::CandidateScored), + "IterationCompleted" => Some(Self::IterationCompleted), + "ExecutionCompleted" => Some(Self::ExecutionCompleted), + "ExecutionFailed" => Some(Self::ExecutionFailed), + "ExecutionPaused" => Some(Self::ExecutionPaused), + "ExecutionResumed" => Some(Self::ExecutionResumed), + "ExecutionCanceled" => Some(Self::ExecutionCanceled), + "ExecutionStalled" => Some(Self::ExecutionStalled), + "CommunicationIntentEmitted" => Some(Self::CommunicationIntentEmitted), + "CommunicationIntentRejected" => Some(Self::CommunicationIntentRejected), + "MessageRouted" => Some(Self::MessageRouted), + "MessageDelivered" => Some(Self::MessageDelivered), + "MessageExpired" => Some(Self::MessageExpired), + _ => None, + } + } + + pub fn advances_state(self) -> bool { + !matches!( + self, + Self::ExecutionSubmitted + | Self::CandidateQueued + | Self::CandidateDispatched + | Self::CandidateOutputCollected + | Self::ExecutionStalled + | Self::CommunicationIntentEmitted + | Self::CommunicationIntentRejected + | Self::MessageRouted + | Self::MessageDelivered + | Self::MessageExpired + ) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ControlEventEnvelope { + pub execution_id: String, + pub seq: u64, + pub event_type: ControlEventType, +} + +impl ControlEventEnvelope { + pub fn new(execution_id: &str, seq: u64, event_type: ControlEventType) -> Self { + Self { + execution_id: execution_id.to_string(), + seq, + event_type, + } + } +} + +impl ExecutionSnapshot { + pub fn replay( + mut execution: Execution, + events: &[ControlEventEnvelope], + ) -> ExecutionSnapshot { + let mut accumulator = ExecutionAccumulator::default(); + + for event in events { + match event.event_type { + ControlEventType::ExecutionCreated | ControlEventType::ExecutionSubmitted => {} + ControlEventType::ExecutionStarted | ControlEventType::IterationStarted => { + execution.status = ExecutionStatus::Running; + } + ControlEventType::CandidateQueued + | ControlEventType::CandidateDispatched + | ControlEventType::CandidateOutputCollected => {} + ControlEventType::CandidateScored => { + accumulator.scoring_history_len += 1; + } + ControlEventType::IterationCompleted => { + accumulator.completed_iterations += 1; + } + ControlEventType::ExecutionCompleted => { + execution.status = ExecutionStatus::Completed; + } + ControlEventType::ExecutionFailed => { + execution.status = ExecutionStatus::Failed; + } + ControlEventType::ExecutionPaused => { + execution.status = ExecutionStatus::Paused; + } + ControlEventType::ExecutionResumed => { + execution.status = ExecutionStatus::Running; + } + ControlEventType::ExecutionCanceled => { + execution.status = ExecutionStatus::Canceled; + } + ControlEventType::ExecutionStalled + | ControlEventType::CommunicationIntentEmitted + | ControlEventType::CommunicationIntentRejected + | ControlEventType::MessageRouted + | ControlEventType::MessageDelivered + | ControlEventType::MessageExpired => {} + } + } + + ExecutionSnapshot { + execution, + events: events.to_vec(), + accumulator, + candidates: Vec::new(), + } + } +} diff --git a/src/orchestration/message_box.rs b/src/orchestration/message_box.rs new file mode 100644 index 0000000..7dedd27 --- /dev/null +++ b/src/orchestration/message_box.rs @@ -0,0 +1,193 @@ +#[cfg(feature = "serde")] +use std::collections::{BTreeMap, HashMap}; + +#[cfg(feature = "serde")] +use serde_json::Value; + +#[cfg(feature = "serde")] +use super::types::{ + CandidateInbox, CommunicationIntent, CommunicationIntentAudience, InboxEntry, InboxSnapshot, + RoutedMessage, RoutedMessageStatus, +}; + +#[cfg(feature = "serde")] +pub fn normalize_intents( + candidate_id: &str, + iteration: u32, + intents: &[CommunicationIntent], +) -> (Vec, usize) { + let mut valid = Vec::new(); + let mut rejected = 0usize; + let mut broadcast_count = 0usize; + + for intent in intents { + if valid.len() >= 3 { + rejected += 1; + continue; + } + if intent.intent_id.trim().is_empty() || intent.ttl_iterations == 0 { + rejected += 1; + continue; + } + if !payload_has_summary_text(&intent.payload) { + rejected += 1; + continue; + } + if matches!(intent.audience, CommunicationIntentAudience::Broadcast) { + broadcast_count += 1; + if broadcast_count > 1 { + rejected += 1; + continue; + } + } + + let mut normalized = intent.clone(); + normalized.from_candidate_id = candidate_id.to_string(); + normalized.iteration = iteration; + valid.push(normalized); + } + + (valid, rejected) +} + +#[cfg(feature = "serde")] +pub fn route_intents(intents: &[CommunicationIntent]) -> Vec { + intents + .iter() + .map(|intent| { + let (to, routing_reason) = match intent.audience { + CommunicationIntentAudience::Leader => { + ("leader".to_string(), "leader_feedback_channel".to_string()) + } + CommunicationIntentAudience::Broadcast => { + ("broadcast".to_string(), "broadcast_fanout".to_string()) + } + }; + RoutedMessage { + message_id: format!("msg-{}-{}", intent.intent_id, to), + intent_id: intent.intent_id.clone(), + to, + delivery_iteration: intent.iteration + 1, + routing_reason, + status: RoutedMessageStatus::Routed, + } + }) + .collect() +} + +#[cfg(feature = "serde")] +pub fn pending_delivery_messages( + intents: &[CommunicationIntent], + messages: &[RoutedMessage], + delivery_iteration: u32, +) -> Vec<(CommunicationIntent, RoutedMessage)> { + let intents_by_id: HashMap<_, _> = intents + .iter() + .cloned() + .map(|intent| (intent.intent_id.clone(), intent)) + .collect(); + let mut latest_by_message = BTreeMap::new(); + for message in messages { + latest_by_message.insert(message.message_id.clone(), message.clone()); + } + + latest_by_message + .into_values() + .filter(|message| { + message.delivery_iteration == delivery_iteration + && message.status == RoutedMessageStatus::Routed + }) + .filter_map(|message| { + let intent = intents_by_id.get(&message.intent_id)?.clone(); + if intent.iteration + intent.ttl_iterations < delivery_iteration { + return None; + } + Some((intent, message)) + }) + .collect() +} + +#[cfg(feature = "serde")] +pub fn backlog_from_pending_messages( + intents: &[CommunicationIntent], + messages: &[RoutedMessage], + delivery_iteration: u32, +) -> Vec { + pending_delivery_messages(intents, messages, delivery_iteration) + .into_iter() + .map(|(intent, _)| summary_text(&intent.payload)) + .collect() +} + +#[cfg(feature = "serde")] +pub fn materialize_inbox_snapshots( + execution_id: &str, + delivery_iteration: u32, + candidate_inboxes: &[CandidateInbox], + intents: &[CommunicationIntent], + messages: &[RoutedMessage], +) -> Vec<(InboxSnapshot, Vec)> { + let pending = pending_delivery_messages(intents, messages, delivery_iteration); + if candidate_inboxes.is_empty() { + return Vec::new(); + } + + let mut snapshots: Vec<_> = candidate_inboxes + .iter() + .map(|inbox| InboxSnapshot { + execution_id: execution_id.to_string(), + candidate_id: inbox.candidate_id.clone(), + iteration: delivery_iteration, + entries: Vec::new(), + }) + .collect(); + let mut delivered_records = vec![Vec::new(); snapshots.len()]; + + for (intent, message) in pending { + let entry = InboxEntry { + message_id: message.message_id.clone(), + intent_id: intent.intent_id.clone(), + from_candidate_id: intent.from_candidate_id.clone(), + kind: intent.kind.clone(), + payload: intent.payload.clone(), + }; + match message.to.as_str() { + "broadcast" => { + for (idx, snapshot) in snapshots.iter_mut().enumerate() { + snapshot.entries.push(entry.clone()); + delivered_records[idx].push(RoutedMessage { + status: RoutedMessageStatus::Delivered, + ..message.clone() + }); + } + } + _ => { + snapshots[0].entries.push(entry); + delivered_records[0].push(RoutedMessage { + status: RoutedMessageStatus::Delivered, + ..message + }); + } + } + } + + snapshots.into_iter().zip(delivered_records).collect() +} + +#[cfg(feature = "serde")] +fn payload_has_summary_text(payload: &Value) -> bool { + payload + .get("summary_text") + .and_then(Value::as_str) + .map(|value| !value.trim().is_empty()) + .unwrap_or(false) +} + +#[cfg(feature = "serde")] +fn summary_text(payload: &Value) -> String { + payload + .get("summary_text") + .and_then(Value::as_str) + .unwrap_or_default() + .to_string() +} diff --git a/src/orchestration/mod.rs b/src/orchestration/mod.rs new file mode 100644 index 0000000..c512281 --- /dev/null +++ b/src/orchestration/mod.rs @@ -0,0 +1,42 @@ +pub mod events; +pub mod message_box; +pub mod policy; +pub mod reconcile; +pub mod scoring; +pub mod scheduler; +pub mod spec; +pub mod service; +pub mod store; +pub mod strategy; +pub mod types; +pub mod variation; + +pub use events::{ControlEventEnvelope, ControlEventType}; +pub use policy::{ + BudgetPolicy, ConcurrencyPolicy, ConvergencePolicy, GlobalConfig, OrchestrationPolicy, +}; +pub use scoring::{ + score_iteration, MetricDirection, RankedCandidate, ScoringConfig, WeightedMetric, +}; +pub use reconcile::ReconciliationService; +pub use scheduler::{DispatchGrant, GlobalScheduler, QueuedCandidate, SchedulerDecision}; +pub use service::{ + DryRunPlan, DryRunResult, ExecutionAction, ExecutionRuntime, ExecutionService, + StructuredOutputResult, +}; +#[cfg(feature = "serde")] +pub use service::PolicyPatch; +pub use spec::ExecutionSpec; +pub use spec::{EvaluationConfig, WorkflowTemplateRef}; +pub use store::{ExecutionStore, FsExecutionStore}; +pub use strategy::{IterationEvaluation, SearchStrategy, StopReason, SwarmStrategy}; +pub use types::{ + CandidateInbox, CandidateOutput, CandidateSpec, CandidateStatus, Execution, + ExecutionAccumulator, ExecutionCandidate, ExecutionSnapshot, ExecutionStatus, FailureCounts, +}; +#[cfg(feature = "serde")] +pub use types::{ + CommunicationIntent, CommunicationIntentAudience, CommunicationIntentKind, + CommunicationIntentPriority, InboxEntry, InboxSnapshot, RoutedMessage, RoutedMessageStatus, +}; +pub use variation::{VariationConfig, VariationProposal, VariationSelection}; diff --git a/src/orchestration/policy.rs b/src/orchestration/policy.rs new file mode 100644 index 0000000..f464365 --- /dev/null +++ b/src/orchestration/policy.rs @@ -0,0 +1,162 @@ +use std::error::Error; +use std::fmt::{Display, Formatter}; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct GlobalConfig { + pub max_concurrent_child_runs: u32, +} + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BudgetPolicy { + pub max_iterations: Option, + pub max_child_runs: Option, + pub max_wall_clock_secs: Option, + pub max_cost_usd_millis: Option, +} + +impl Default for BudgetPolicy { + fn default() -> Self { + Self { + max_iterations: Some(10), + max_child_runs: None, + max_wall_clock_secs: Some(600), + max_cost_usd_millis: None, + } + } +} + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ConcurrencyPolicy { + pub max_concurrent_candidates: u32, +} + +impl Default for ConcurrencyPolicy { + fn default() -> Self { + Self { + max_concurrent_candidates: 1, + } + } +} + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq)] +pub struct ConvergencePolicy { + pub strategy: String, + pub min_score: Option, + pub max_iterations_without_improvement: Option, +} + +impl Default for ConvergencePolicy { + fn default() -> Self { + Self { + strategy: "plateau".to_string(), + min_score: None, + max_iterations_without_improvement: Some(2), + } + } +} + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq)] +pub struct OrchestrationPolicy { + pub budget: BudgetPolicy, + pub concurrency: ConcurrencyPolicy, + pub convergence: ConvergencePolicy, + pub max_candidate_failures_per_iteration: u32, + pub missing_output_policy: String, + pub iteration_failure_policy: String, +} + +impl Default for OrchestrationPolicy { + fn default() -> Self { + Self { + budget: BudgetPolicy::default(), + concurrency: ConcurrencyPolicy::default(), + convergence: ConvergencePolicy::default(), + max_candidate_failures_per_iteration: u32::MAX, + missing_output_policy: "mark_failed".to_string(), + iteration_failure_policy: "fail_execution".to_string(), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PolicyValidationError(String); + +impl PolicyValidationError { + fn new(message: impl Into) -> Self { + Self(message.into()) + } +} + +impl Display for PolicyValidationError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} + +impl Error for PolicyValidationError {} + +impl OrchestrationPolicy { + pub fn validate(&self, global: &GlobalConfig) -> Result<(), PolicyValidationError> { + if self.budget.max_iterations.is_none() && self.budget.max_wall_clock_secs.is_none() { + return Err(PolicyValidationError::new( + "at least one of policy.budget.max_iterations or policy.budget.max_wall_clock_secs must be set", + )); + } + + if self.concurrency.max_concurrent_candidates == 0 { + return Err(PolicyValidationError::new( + "policy.concurrency.max_concurrent_candidates must be positive", + )); + } + + if self.concurrency.max_concurrent_candidates > global.max_concurrent_child_runs { + return Err(PolicyValidationError::new( + "policy.concurrency.max_concurrent_candidates cannot exceed global.max_concurrent_child_runs", + )); + } + + match self.convergence.strategy.as_str() { + "threshold" => { + if self.convergence.min_score.is_none() { + return Err(PolicyValidationError::new( + "policy.convergence.min_score is required for threshold strategy", + )); + } + } + "plateau" => { + if self + .convergence + .max_iterations_without_improvement + .is_none() + { + return Err(PolicyValidationError::new( + "policy.convergence.max_iterations_without_improvement is required for plateau strategy", + )); + } + } + "exhaustive" => { + if self.budget.max_iterations.is_none() { + return Err(PolicyValidationError::new( + "policy.budget.max_iterations is required for exhaustive strategy", + )); + } + } + other => { + return Err(PolicyValidationError::new(format!( + "unknown convergence strategy '{}'", + other + ))); + } + } + + Ok(()) + } +} diff --git a/src/orchestration/reconcile.rs b/src/orchestration/reconcile.rs new file mode 100644 index 0000000..a5f27f4 --- /dev/null +++ b/src/orchestration/reconcile.rs @@ -0,0 +1,42 @@ +use std::io; + +use super::store::ExecutionStore; +use super::types::{CandidateStatus, ExecutionCandidate, ExecutionSnapshot, ExecutionStatus}; + +pub struct ReconciliationService { + store: S, +} + +impl ReconciliationService +where + S: ExecutionStore, +{ + pub fn new(store: S) -> Self { + Self { store } + } + + pub fn reload_active_executions(&self) -> io::Result> { + self.store + .list_active_execution_ids()? + .into_iter() + .map(|execution_id| self.store.load_execution(&execution_id)) + .collect() + } + + pub fn reload_queued_candidates(&self) -> io::Result> { + let mut queued = Vec::new(); + for snapshot in self.reload_active_executions()? { + if snapshot.execution.status == ExecutionStatus::Paused { + continue; + } + queued.extend( + snapshot + .candidates + .into_iter() + .filter(|candidate| candidate.status == CandidateStatus::Queued), + ); + } + queued.sort_by_key(|candidate| candidate.created_seq); + Ok(queued) + } +} diff --git a/src/orchestration/scheduler.rs b/src/orchestration/scheduler.rs new file mode 100644 index 0000000..3d6494a --- /dev/null +++ b/src/orchestration/scheduler.rs @@ -0,0 +1,169 @@ +use std::collections::{BTreeMap, VecDeque}; + +use super::types::ExecutionAccumulator; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct QueuedCandidate { + pub execution_id: String, + pub candidate_id: String, + pub created_seq: u64, +} + +impl QueuedCandidate { + pub fn new(execution_id: &str, candidate_id: &str, created_seq: u64) -> Self { + Self { + execution_id: execution_id.to_string(), + candidate_id: candidate_id.to_string(), + created_seq, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SchedulerDecision { + Enqueued, + RejectedBudgetExceeded, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DispatchGrant { + pub execution_id: String, + pub candidate_id: String, +} + +#[derive(Debug, Default)] +struct ExecutionQueue { + paused: bool, + queued: VecDeque, + running: usize, + max_concurrent: usize, +} + +pub struct GlobalScheduler { + max_concurrent_child_runs: usize, + active_slots: usize, + executions: BTreeMap, +} + +impl GlobalScheduler { + pub fn new(max_concurrent_child_runs: usize) -> Self { + Self { + max_concurrent_child_runs, + active_slots: 0, + executions: BTreeMap::new(), + } + } + + pub fn enqueue(&mut self, candidate: QueuedCandidate) { + self.executions + .entry(candidate.execution_id.clone()) + .or_insert_with(|| ExecutionQueue { + paused: false, + queued: VecDeque::new(), + running: 0, + max_concurrent: usize::MAX, + }) + .queued + .push_back(candidate); + } + + pub fn register_execution( + &mut self, + execution_id: &str, + paused: bool, + running: usize, + max_concurrent: usize, + ) { + let queue = self + .executions + .entry(execution_id.to_string()) + .or_insert_with(|| ExecutionQueue { + paused, + queued: VecDeque::new(), + running, + max_concurrent, + }); + queue.paused = paused; + queue.running = running; + queue.max_concurrent = max_concurrent; + } + + pub fn enqueue_if_budget_allows( + &mut self, + candidate: QueuedCandidate, + accumulator: &ExecutionAccumulator, + max_iterations: u32, + ) -> SchedulerDecision { + if accumulator.completed_iterations >= max_iterations { + return SchedulerDecision::RejectedBudgetExceeded; + } + self.enqueue(candidate); + SchedulerDecision::Enqueued + } + + pub fn next_dispatch(&mut self) -> Option { + if self.active_slots >= self.max_concurrent_child_runs { + return None; + } + + let next = self + .executions + .iter() + .filter(|(_, queue)| !queue.paused) + .filter(|(_, queue)| queue.running < queue.max_concurrent) + .filter_map(|(execution_id, queue)| { + queue.queued.front().map(|candidate| { + ( + candidate.created_seq, + execution_id.clone(), + candidate.candidate_id.clone(), + ) + }) + }) + .min_by_key(|(created_seq, _, _)| *created_seq)?; + + let queue = self.executions.get_mut(&next.1)?; + queue.queued.pop_front(); + Some(DispatchGrant { + execution_id: next.1, + candidate_id: next.2, + }) + } + + pub fn mark_running(&mut self, grant: &DispatchGrant) { + if let Some(queue) = self.executions.get_mut(&grant.execution_id) { + queue.running += 1; + self.active_slots += 1; + } + } + + pub fn release(&mut self, execution_id: &str, _candidate_id: &str) { + if let Some(queue) = self.executions.get_mut(execution_id) { + if queue.running > 0 { + queue.running -= 1; + } + } + if self.active_slots > 0 { + self.active_slots -= 1; + } + } + + pub fn pause_execution(&mut self, execution_id: &str) { + if let Some(queue) = self.executions.get_mut(execution_id) { + queue.paused = true; + self.active_slots = self.active_slots.saturating_sub(queue.running); + queue.running = 0; + } + } + + pub fn execution_queue_depth(&self, execution_id: &str) -> usize { + self.executions + .get(execution_id) + .map(|queue| queue.queued.len()) + .unwrap_or(0) + } + + pub fn active_slots(&self) -> usize { + self.active_slots + } +} diff --git a/src/orchestration/scoring.rs b/src/orchestration/scoring.rs new file mode 100644 index 0000000..0e5c755 --- /dev/null +++ b/src/orchestration/scoring.rs @@ -0,0 +1,105 @@ +use std::cmp::Ordering; +use std::collections::BTreeMap; + +use super::types::CandidateOutput; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MetricDirection { + Minimize, + Maximize, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct WeightedMetric { + pub name: String, + pub weight: f64, + pub direction: MetricDirection, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ScoringConfig { + pub metrics: Vec, + pub pass_threshold: f64, + pub tie_break_metric: String, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct RankedCandidate { + pub candidate_id: String, + pub score: f64, + pub pass: bool, + pub metrics: BTreeMap, +} + +pub fn score_iteration(config: &ScoringConfig, outputs: &[CandidateOutput]) -> Vec { + let mut ranked: Vec = outputs + .iter() + .map(|output| { + if !output.succeeded { + return RankedCandidate { + candidate_id: output.candidate_id.clone(), + score: 0.0, + pass: false, + metrics: output.metrics.clone(), + }; + } + + let score = config + .metrics + .iter() + .map(|metric| metric.weight * normalized_value(metric, outputs, output)) + .sum::(); + + RankedCandidate { + candidate_id: output.candidate_id.clone(), + score, + pass: score >= config.pass_threshold, + metrics: output.metrics.clone(), + } + }) + .collect(); + + ranked.sort_by(|left, right| compare_ranked(config, left, right)); + ranked +} + +fn normalized_value( + metric: &WeightedMetric, + outputs: &[CandidateOutput], + output: &CandidateOutput, +) -> f64 { + let values: Vec = outputs + .iter() + .filter(|candidate| candidate.succeeded) + .filter_map(|candidate| candidate.metrics.get(&metric.name).copied()) + .collect(); + let Some(current) = output.metrics.get(&metric.name).copied() else { + return 0.0; + }; + if values.len() <= 1 { + return 1.0; + } + let min = values.iter().copied().fold(f64::INFINITY, f64::min); + let max = values.iter().copied().fold(f64::NEG_INFINITY, f64::max); + if (max - min).abs() < f64::EPSILON { + return 1.0; + } + match metric.direction { + MetricDirection::Minimize => (max - current) / (max - min), + MetricDirection::Maximize => (current - min) / (max - min), + } +} + +fn compare_ranked(config: &ScoringConfig, left: &RankedCandidate, right: &RankedCandidate) -> Ordering { + right + .score + .partial_cmp(&left.score) + .unwrap_or(Ordering::Equal) + .then_with(|| { + let left_metric = left.metrics.get(&config.tie_break_metric).copied().unwrap_or(f64::INFINITY); + let right_metric = right.metrics.get(&config.tie_break_metric).copied().unwrap_or(f64::INFINITY); + left_metric + .partial_cmp(&right_metric) + .unwrap_or(Ordering::Equal) + }) +} diff --git a/src/orchestration/service.rs b/src/orchestration/service.rs new file mode 100644 index 0000000..f61a464 --- /dev/null +++ b/src/orchestration/service.rs @@ -0,0 +1,1210 @@ +use std::io; + +use crate::contract::{ContractError, ExecutionPolicy, RuntimeInspection, StartRequest, StartResult}; + +use super::events::{ControlEventEnvelope, ControlEventType}; +#[cfg(feature = "serde")] +use super::message_box; +use super::policy::GlobalConfig; +use super::scoring::{MetricDirection, ScoringConfig, WeightedMetric}; +use super::spec::ExecutionSpec; +use super::store::FsExecutionStore; +use super::strategy::{IterationEvaluation, SearchStrategy, StopReason, SwarmStrategy}; +use super::types::{ + CandidateOutput, CandidateSpec, CandidateStatus, Execution, ExecutionAccumulator, + ExecutionCandidate, ExecutionStatus, +}; + +#[cfg(feature = "serde")] +use crate::runtime::{LaunchInjectionAdapter, ProviderLaunchAdapter}; +#[cfg(feature = "serde")] +use serde::Serialize; + +pub trait ExecutionRuntime { + fn start_run(&mut self, request: StartRequest) -> Result; + fn inspect_run(&self, handle: &str) -> Result; + fn take_structured_output(&mut self, run_id: &str) -> StructuredOutputResult; +} + +#[derive(Debug, Clone)] +pub enum StructuredOutputResult { + Found(CandidateOutput), + Missing, + Error(ContractError), +} + +pub struct ExecutionService { + global: GlobalConfig, + runtime: R, + store: FsExecutionStore, + #[cfg(feature = "serde")] + launch_adapter: Box, + next_execution_id: u64, + next_candidate_id: u64, +} + +enum ExecutionControl { + Continue, + Paused, + Canceled, +} + +enum DispatchOutcome { + Output { + output: CandidateOutput, + failed: bool, + }, + Paused(io::Error), + Retryable(io::Error), + Canceled, +} + +enum SelectedStrategy { + Swarm(SwarmStrategy), + Search(SearchStrategy), +} + +impl SelectedStrategy { + fn new(spec: &ExecutionSpec) -> Self { + let scoring = scoring_from_spec(spec); + match spec.mode.as_str() { + "search" => Self::Search(SearchStrategy::new( + spec.variation.clone(), + scoring, + spec.policy.convergence.clone(), + )), + _ => Self::Swarm(SwarmStrategy::new( + spec.variation.clone(), + scoring, + spec.policy.convergence.clone(), + )), + } + } + + fn materialize_inboxes( + &self, + accumulator: &ExecutionAccumulator, + ) -> Vec { + match self { + Self::Swarm(strategy) => strategy.materialize_inboxes(accumulator), + Self::Search(strategy) => strategy.materialize_inboxes(accumulator), + } + } + + fn plan_candidates( + &self, + accumulator: &ExecutionAccumulator, + inboxes: &[super::types::CandidateInbox], + ) -> Vec { + match self { + Self::Swarm(strategy) => strategy.plan_candidates(accumulator, inboxes), + Self::Search(strategy) => strategy.plan_candidates(accumulator, inboxes), + } + } + + fn evaluate( + &self, + accumulator: &ExecutionAccumulator, + outputs: &[CandidateOutput], + ) -> IterationEvaluation { + match self { + Self::Swarm(strategy) => strategy.evaluate(accumulator, outputs), + Self::Search(strategy) => strategy.evaluate(accumulator, outputs), + } + } + + fn reduce( + &self, + accumulator: ExecutionAccumulator, + evaluation: IterationEvaluation, + ) -> ExecutionAccumulator { + match self { + Self::Swarm(strategy) => strategy.reduce(accumulator, evaluation), + Self::Search(strategy) => strategy.reduce(accumulator, evaluation), + } + } + + fn should_stop( + &self, + accumulator: &ExecutionAccumulator, + evaluation: &IterationEvaluation, + ) -> Option { + match self { + Self::Swarm(strategy) => strategy.should_stop(accumulator, evaluation), + Self::Search(strategy) => strategy.should_stop(accumulator, evaluation), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ExecutionAction { + Pause, + Resume, + Cancel, +} + +#[cfg(feature = "serde")] +#[derive(Debug, Clone, Default)] +pub struct PolicyPatch { + pub max_iterations: Option, + pub max_concurrent_candidates: Option, +} + +#[cfg_attr(feature = "serde", derive(Serialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DryRunPlan { + pub candidates_per_iteration: u32, + pub max_iterations: Option, + pub max_child_runs: Option, + pub estimated_concurrent_peak: u32, + pub variation_source: String, + pub parameter_space_size: Option, +} + +#[cfg_attr(feature = "serde", derive(Serialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DryRunResult { + pub valid: bool, + pub plan: DryRunPlan, + pub warnings: Vec, + pub errors: Vec, +} + +impl ExecutionService +where + R: ExecutionRuntime, +{ + fn with_claimed_execution( + &mut self, + execution_id: &str, + operation: impl FnOnce(&mut Self, &str) -> io::Result, + ) -> io::Result { + let worker_id = Self::worker_id(); + if !self.store.claim_execution(execution_id, &worker_id)? { + return Err(io::Error::new( + io::ErrorKind::WouldBlock, + "execution is already claimed", + )); + } + + let result = operation(self, &worker_id); + let release_result = self.store.release_claim(execution_id); + match (result, release_result) { + (Ok(value), Ok(())) => Ok(value), + (Err(err), Ok(())) => Err(err), + (Ok(_), Err(err)) | (Err(_), Err(err)) => Err(err), + } + } + + #[cfg(feature = "serde")] + fn load_workable_execution( + &mut self, + execution_id: &str, + invalid_message: &str, + ) -> io::Result<(super::types::ExecutionSnapshot, ExecutionSpec)> { + let snapshot = self.store.load_execution(execution_id)?; + if !matches!( + snapshot.execution.status, + ExecutionStatus::Pending | ExecutionStatus::Running + ) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + invalid_message, + )); + } + let spec = self.store.load_spec(execution_id)?; + Ok((snapshot, spec)) + } + + fn check_execution_control( + &self, + execution_id: &str, + worker_id: &str, + ) -> io::Result { + let _ = self.store.refresh_claim(execution_id, worker_id); + let snapshot = self.store.load_execution(execution_id)?; + Ok(match snapshot.execution.status { + ExecutionStatus::Pending | ExecutionStatus::Running => ExecutionControl::Continue, + ExecutionStatus::Paused => ExecutionControl::Paused, + ExecutionStatus::Canceled => ExecutionControl::Canceled, + ExecutionStatus::Completed | ExecutionStatus::Failed => ExecutionControl::Canceled, + }) + } + + fn wait_for_terminal_run( + &self, + execution_id: &str, + worker_id: &str, + handle: &str, + ) -> io::Result> { + const MAX_POLLS: usize = 40; + const POLL_SLEEP_MS: u64 = 100; + + let mut last = None; + for _ in 0..MAX_POLLS { + match self.check_execution_control(execution_id, worker_id)? { + ExecutionControl::Continue => {} + ExecutionControl::Paused => { + return Err(io::Error::new(io::ErrorKind::WouldBlock, "execution paused")); + } + ExecutionControl::Canceled => return Ok(None), + } + let inspection = self + .runtime + .inspect_run(handle) + .map_err(|err| io::Error::new(io::ErrorKind::Other, err.message.clone()))?; + if inspection.state.is_terminal() { + return Ok(Some(inspection)); + } + last = Some(inspection); + std::thread::sleep(std::time::Duration::from_millis(POLL_SLEEP_MS)); + } + + let message = match last { + Some(inspection) => format!( + "run '{}' did not reach terminal state, last state was {:?}", + inspection.run_id, inspection.state + ), + None => "run did not reach terminal state".to_string(), + }; + Err(io::Error::new(io::ErrorKind::WouldBlock, message)) + } + + fn worker_id() -> String { + format!("pid-{}", std::process::id()) + } + + pub fn new(global: GlobalConfig, runtime: R, store: FsExecutionStore) -> Self { + Self { + global, + runtime, + store, + #[cfg(feature = "serde")] + launch_adapter: Box::new(LaunchInjectionAdapter), + next_execution_id: 1, + next_candidate_id: 1, + } + } + + #[cfg(feature = "serde")] + pub fn with_launch_adapter( + global: GlobalConfig, + runtime: R, + store: FsExecutionStore, + launch_adapter: Box, + ) -> Self { + Self { + global, + runtime, + store, + launch_adapter, + next_execution_id: 1, + next_candidate_id: 1, + } + } + + pub fn run_to_completion(&mut self, spec: ExecutionSpec) -> io::Result { + spec.validate(&self.global) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidInput, err.to_string()))?; + + let execution_id = format!("exec-{}", self.next_execution_id); + self.next_execution_id += 1; + let mut execution = Execution::new(&execution_id, &spec.mode, &spec.goal); + let worker_id = Self::worker_id(); + self.store.create_execution(&execution)?; + self.append_event(&execution.execution_id, ControlEventType::ExecutionCreated)?; + self.append_event(&execution.execution_id, ControlEventType::ExecutionSubmitted)?; + execution.status = ExecutionStatus::Running; + self.store.save_execution(&execution)?; + self.append_event(&execution.execution_id, ControlEventType::ExecutionStarted)?; + self.execute_execution( + &mut execution, + &spec, + &ExecutionAccumulator::default(), + &worker_id, + None, + ) + } + + pub fn dry_run(&self, spec: &ExecutionSpec) -> io::Result { + let mut warnings = Vec::new(); + let mut errors = Vec::new(); + + if let Err(err) = spec.validate(&self.global) { + errors.push(err.to_string()); + } + + if spec.policy.budget.max_cost_usd_millis.is_none() { + warnings.push("max_cost_usd not set".to_string()); + } + + let parameter_space_size = if spec.variation.source == "parameter_space" { + Some( + spec.variation + .parameter_space + .values() + .map(|values| values.len() as u64) + .product(), + ) + } else { + None + }; + + let max_child_runs = spec + .policy + .budget + .max_iterations + .map(|iterations| iterations * spec.variation.candidates_per_iteration); + + Ok(DryRunResult { + valid: errors.is_empty(), + plan: DryRunPlan { + candidates_per_iteration: spec.variation.candidates_per_iteration, + max_iterations: spec.policy.budget.max_iterations, + max_child_runs, + estimated_concurrent_peak: spec.policy.concurrency.max_concurrent_candidates, + variation_source: spec.variation.source.clone(), + parameter_space_size, + }, + warnings, + errors, + }) + } + + pub fn submit_execution( + store: &FsExecutionStore, + execution_id: &str, + spec: &ExecutionSpec, + ) -> io::Result { + let execution = Execution::new(execution_id, &spec.mode, &spec.goal); + store.create_execution(&execution)?; + store.append_event( + execution_id, + &ControlEventEnvelope::new(execution_id, 1, ControlEventType::ExecutionCreated), + )?; + store.append_event( + execution_id, + &ControlEventEnvelope::new(execution_id, 2, ControlEventType::ExecutionSubmitted), + )?; + #[cfg(feature = "serde")] + store.save_spec(execution_id, spec)?; + Ok(execution) + } + + #[cfg(feature = "serde")] + pub fn process_execution(&mut self, execution_id: &str) -> io::Result { + self.with_claimed_execution(execution_id, |service, worker_id| { + service.process_execution_claimed(execution_id, worker_id) + }) + } + + #[cfg(feature = "serde")] + pub fn dispatch_execution_once(&mut self, execution_id: &str) -> io::Result { + self.with_claimed_execution(execution_id, |service, worker_id| { + service.dispatch_execution_once_claimed(execution_id, worker_id) + }) + } + + #[cfg(feature = "serde")] + pub fn plan_execution(&mut self, execution_id: &str) -> io::Result { + self.with_claimed_execution(execution_id, |service, _worker_id| { + service.plan_execution_claimed(execution_id) + }) + } + + #[cfg(feature = "serde")] + fn process_execution_claimed( + &mut self, + execution_id: &str, + worker_id: &str, + ) -> io::Result { + let (snapshot, spec) = self.load_workable_execution( + execution_id, + "only pending or running executions can be processed", + )?; + let mut execution = snapshot.execution; + execution.status = ExecutionStatus::Running; + self.store.save_execution(&execution)?; + self.append_event(execution_id, ControlEventType::ExecutionStarted)?; + let accumulator = snapshot.accumulator; + self.execute_execution(&mut execution, &spec, &accumulator, worker_id, None) + } + + #[cfg(feature = "serde")] + fn dispatch_execution_once_claimed( + &mut self, + execution_id: &str, + worker_id: &str, + ) -> io::Result { + let (snapshot, spec) = self.load_workable_execution( + execution_id, + "only pending or running executions can be dispatched", + )?; + let mut execution = snapshot.execution; + if execution.status == ExecutionStatus::Pending { + execution.status = ExecutionStatus::Running; + self.store.save_execution(&execution)?; + self.append_event(execution_id, ControlEventType::ExecutionStarted)?; + } + let accumulator = snapshot.accumulator; + self.execute_execution(&mut execution, &spec, &accumulator, worker_id, Some(1)) + } + + #[cfg(feature = "serde")] + fn plan_execution_claimed(&mut self, execution_id: &str) -> io::Result { + let (snapshot, spec) = self.load_workable_execution( + execution_id, + "only pending or running executions can be planned", + )?; + let mut execution = snapshot.execution; + if execution.status == ExecutionStatus::Pending { + execution.status = ExecutionStatus::Running; + self.store.save_execution(&execution)?; + self.append_event(execution_id, ControlEventType::ExecutionStarted)?; + } + + let accumulator = snapshot.accumulator; + let iteration = accumulator.completed_iterations; + let already_planned = snapshot + .candidates + .iter() + .any(|candidate| candidate.iteration == iteration); + if already_planned { + return Ok(execution); + } + + self.plan_iteration_candidates(&execution, &spec, &accumulator, iteration)?; + Ok(execution) + } + + fn append_event(&self, execution_id: &str, event_type: ControlEventType) -> io::Result<()> { + let seq = self.store.load_execution(execution_id)?.events.len() as u64 + 1; + self.store + .append_event(execution_id, &ControlEventEnvelope::new(execution_id, seq, event_type)) + } + + fn save_candidate_state( + &self, + execution_id: &str, + candidate_id: &str, + created_seq: u64, + iteration: u32, + status: CandidateStatus, + runtime_run_id: Option, + overrides: &std::collections::BTreeMap, + succeeded: Option, + metrics: &std::collections::BTreeMap, + ) -> io::Result<()> { + let mut record = + ExecutionCandidate::new(execution_id, candidate_id, created_seq, iteration, status); + record.runtime_run_id = runtime_run_id; + record.overrides = overrides.clone(); + record.succeeded = succeeded; + record.metrics = metrics.clone(); + self.store.save_candidate(&record) + } + + #[cfg(feature = "serde")] + fn load_launch_inbox_snapshot( + &self, + execution_id: &str, + iteration: u32, + candidate_id: &str, + ) -> io::Result { + match self + .store + .load_inbox_snapshot(execution_id, iteration, candidate_id) + { + Ok(snapshot) => Ok(snapshot), + Err(err) if err.kind() == io::ErrorKind::NotFound => { + Ok(crate::orchestration::InboxSnapshot { + execution_id: execution_id.to_string(), + candidate_id: candidate_id.to_string(), + iteration, + entries: Vec::new(), + }) + } + Err(err) => Err(err), + } + } + + #[cfg(feature = "serde")] + fn persist_candidate_intents( + &self, + execution_id: &str, + iteration: u32, + candidate_id: &str, + output: &CandidateOutput, + ) -> io::Result<()> { + let (valid, rejected) = + message_box::normalize_intents(candidate_id, iteration, &output.intents); + for _ in 0..rejected { + self.append_event(execution_id, ControlEventType::CommunicationIntentRejected)?; + } + for intent in &valid { + self.store.append_intent(execution_id, intent)?; + self.append_event(execution_id, ControlEventType::CommunicationIntentEmitted)?; + } + for message in message_box::route_intents(&valid) { + self.store.append_routed_message(execution_id, &message)?; + self.append_event(execution_id, ControlEventType::MessageRouted)?; + } + Ok(()) + } + + #[cfg(feature = "serde")] + fn materialize_iteration_inboxes( + &self, + execution_id: &str, + iteration: u32, + inboxes: &[super::types::CandidateInbox], + ) -> io::Result<()> { + let intents = self.store.load_intents(execution_id)?; + let messages = self.store.load_routed_messages(execution_id)?; + for (snapshot, delivered) in message_box::materialize_inbox_snapshots( + execution_id, + iteration, + inboxes, + &intents, + &messages, + ) { + self.store.save_inbox_snapshot(&snapshot)?; + for delivered_message in delivered { + self.store + .append_routed_message(execution_id, &delivered_message)?; + self.append_event(execution_id, ControlEventType::MessageDelivered)?; + } + } + Ok(()) + } + + fn plan_iteration_candidates( + &mut self, + execution: &Execution, + spec: &ExecutionSpec, + accumulator: &ExecutionAccumulator, + iteration: u32, + ) -> io::Result> { + let strategy = SelectedStrategy::new(spec); + self.append_event(&execution.execution_id, ControlEventType::IterationStarted)?; + #[cfg(feature = "serde")] + let effective_accumulator = { + let mut effective = accumulator.clone(); + let intents = self.store.load_intents(&execution.execution_id)?; + let messages = self.store.load_routed_messages(&execution.execution_id)?; + let message_backlog = + message_box::backlog_from_pending_messages(&intents, &messages, iteration); + if !message_backlog.is_empty() { + effective.message_backlog = message_backlog; + } + effective + }; + #[cfg(not(feature = "serde"))] + let effective_accumulator = accumulator.clone(); + + let inboxes = strategy.materialize_inboxes(&effective_accumulator); + #[cfg(feature = "serde")] + self.materialize_iteration_inboxes(&execution.execution_id, iteration, &inboxes)?; + let candidates = strategy.plan_candidates(&effective_accumulator, &inboxes); + for candidate in &candidates { + let candidate_seq = self.next_candidate_id; + self.save_candidate_state( + &execution.execution_id, + &candidate.candidate_id, + candidate_seq, + iteration, + CandidateStatus::Queued, + None, + &candidate.overrides, + None, + &Default::default(), + )?; + self.append_event(&execution.execution_id, ControlEventType::CandidateQueued)?; + self.next_candidate_id += 1; + } + Ok(candidates) + } + + fn load_or_plan_iteration_candidates( + &mut self, + execution: &Execution, + spec: &ExecutionSpec, + accumulator: &ExecutionAccumulator, + iteration: u32, + ) -> io::Result> { + let persisted: Vec<_> = self + .store + .load_candidates(&execution.execution_id)? + .into_iter() + .filter(|candidate| candidate.iteration == iteration) + .collect(); + if persisted.is_empty() { + return self.plan_iteration_candidates(execution, spec, accumulator, iteration); + } + + let mut persisted = persisted; + persisted.sort_by_key(|candidate| candidate.created_seq); + Ok(persisted + .into_iter() + .map(|candidate| super::types::CandidateSpec { + candidate_id: candidate.candidate_id, + overrides: candidate.overrides, + }) + .collect()) + } + + fn dispatch_candidate( + &mut self, + execution: &mut Execution, + spec: &ExecutionSpec, + worker_id: &str, + candidate: &CandidateSpec, + iteration: u32, + created_seq: u64, + ) -> io::Result { + let run_id = format!("exec-run-candidate-{created_seq}"); + self.append_event(&execution.execution_id, ControlEventType::CandidateDispatched)?; + #[cfg(feature = "serde")] + let launch_inbox = self.load_launch_inbox_snapshot( + &execution.execution_id, + iteration, + &candidate.candidate_id, + )?; + #[cfg(feature = "serde")] + let launch_request = self.launch_adapter.prepare_launch_request( + StartRequest { + run_id: run_id.clone(), + workflow_spec: spec.workflow.template.clone(), + launch_context: None, + policy: default_runtime_policy(), + }, + candidate, + &launch_inbox, + ); + #[cfg(feature = "serde")] + let started = self + .runtime + .start_run(launch_request) + .map_err(|err| io::Error::new(io::ErrorKind::Other, err.message))?; + #[cfg(not(feature = "serde"))] + let started = self + .runtime + .start_run(StartRequest { + run_id: run_id.clone(), + workflow_spec: spec.workflow.template.clone(), + launch_context: None, + policy: default_runtime_policy(), + }) + .map_err(|err| io::Error::new(io::ErrorKind::Other, err.message))?; + self.save_candidate_state( + &execution.execution_id, + &candidate.candidate_id, + created_seq, + iteration, + CandidateStatus::Running, + Some(run_id.clone()), + &candidate.overrides, + None, + &Default::default(), + )?; + + let inspection = match self.wait_for_terminal_run(&execution.execution_id, worker_id, &started.handle) + { + Ok(Some(inspection)) => inspection, + Ok(None) => { + self.save_candidate_state( + &execution.execution_id, + &candidate.candidate_id, + created_seq, + iteration, + CandidateStatus::Canceled, + Some(run_id), + &candidate.overrides, + None, + &Default::default(), + )?; + return Ok(DispatchOutcome::Canceled); + } + Err(err) if err.kind() == io::ErrorKind::WouldBlock => { + return Ok(DispatchOutcome::Paused(err)); + } + Err(err) => return Err(err), + }; + + if inspection.state == crate::contract::RunState::Failed { + self.save_candidate_state( + &execution.execution_id, + &candidate.candidate_id, + created_seq, + iteration, + CandidateStatus::Failed, + Some(inspection.run_id.clone()), + &candidate.overrides, + Some(false), + &Default::default(), + )?; + self.append_event( + &execution.execution_id, + ControlEventType::CandidateOutputCollected, + )?; + return Ok(DispatchOutcome::Output { + output: CandidateOutput::new( + candidate.candidate_id.clone(), + false, + Default::default(), + ), + failed: true, + }); + } + + match self.runtime.take_structured_output(&inspection.run_id) { + StructuredOutputResult::Found(mut output) => { + self.save_candidate_state( + &execution.execution_id, + &candidate.candidate_id, + created_seq, + iteration, + CandidateStatus::Completed, + Some(inspection.run_id.clone()), + &candidate.overrides, + Some(output.succeeded), + &output.metrics, + )?; + output.candidate_id = candidate.candidate_id.clone(); + self.append_event( + &execution.execution_id, + ControlEventType::CandidateOutputCollected, + )?; + #[cfg(feature = "serde")] + self.persist_candidate_intents( + &execution.execution_id, + iteration, + &candidate.candidate_id, + &output, + )?; + Ok(DispatchOutcome::Output { + output, + failed: false, + }) + } + StructuredOutputResult::Missing => { + let failed = spec.policy.missing_output_policy == "mark_failed"; + self.save_candidate_state( + &execution.execution_id, + &candidate.candidate_id, + created_seq, + iteration, + if failed { + CandidateStatus::Failed + } else { + CandidateStatus::Completed + }, + Some(inspection.run_id.clone()), + &candidate.overrides, + Some(!failed), + &Default::default(), + )?; + self.append_event( + &execution.execution_id, + ControlEventType::CandidateOutputCollected, + )?; + Ok(DispatchOutcome::Output { + output: CandidateOutput::new( + candidate.candidate_id.clone(), + !failed, + Default::default(), + ), + failed, + }) + } + StructuredOutputResult::Error(err) => match err.code { + crate::contract::ContractErrorCode::StructuredOutputMissing => { + let failed = spec.policy.missing_output_policy == "mark_failed"; + self.save_candidate_state( + &execution.execution_id, + &candidate.candidate_id, + created_seq, + iteration, + if failed { + CandidateStatus::Failed + } else { + CandidateStatus::Completed + }, + Some(inspection.run_id.clone()), + &candidate.overrides, + Some(!failed), + &Default::default(), + )?; + self.append_event( + &execution.execution_id, + ControlEventType::CandidateOutputCollected, + )?; + Ok(DispatchOutcome::Output { + output: CandidateOutput::new( + candidate.candidate_id.clone(), + !failed, + Default::default(), + ), + failed, + }) + } + crate::contract::ContractErrorCode::ArtifactPublicationIncomplete + | crate::contract::ContractErrorCode::ArtifactStoreUnavailable + | crate::contract::ContractErrorCode::RetrievalTimeout + if err.retryable => + { + Ok(DispatchOutcome::Retryable(io::Error::new( + io::ErrorKind::WouldBlock, + err.message, + ))) + } + _ => { + self.save_candidate_state( + &execution.execution_id, + &candidate.candidate_id, + created_seq, + iteration, + CandidateStatus::Failed, + Some(inspection.run_id.clone()), + &candidate.overrides, + Some(false), + &Default::default(), + )?; + self.append_event( + &execution.execution_id, + ControlEventType::CandidateOutputCollected, + )?; + Ok(DispatchOutcome::Output { + output: CandidateOutput::new( + candidate.candidate_id.clone(), + false, + Default::default(), + ), + failed: true, + }) + } + }, + } + } + + fn execute_execution( + &mut self, + execution: &mut Execution, + spec: &ExecutionSpec, + starting_accumulator: &ExecutionAccumulator, + worker_id: &str, + dispatch_limit: Option, + ) -> io::Result { + let strategy = SelectedStrategy::new(spec); + let mut accumulator = starting_accumulator.clone(); + let mut iteration = accumulator.completed_iterations; + let mut retry_used = false; + let mut dispatches_used = 0usize; + + while iteration < spec.policy.budget.max_iterations.unwrap_or(0) { + match self.check_execution_control(&execution.execution_id, worker_id)? { + ExecutionControl::Continue => {} + ExecutionControl::Paused => { + execution.status = ExecutionStatus::Paused; + self.store.save_execution(execution)?; + return Err(io::Error::new(io::ErrorKind::WouldBlock, "execution paused")); + } + ExecutionControl::Canceled => { + execution.status = ExecutionStatus::Canceled; + self.store.save_execution(execution)?; + self.append_event(&execution.execution_id, ControlEventType::ExecutionCanceled)?; + return Ok(execution.clone()); + } + } + let candidates = + self.load_or_plan_iteration_candidates(execution, spec, &accumulator, iteration)?; + + for candidate in candidates { + let candidate_record = self + .store + .load_candidates(&execution.execution_id)? + .into_iter() + .find(|saved| { + saved.iteration == iteration && saved.candidate_id == candidate.candidate_id + }) + .ok_or_else(|| { + io::Error::new( + io::ErrorKind::NotFound, + format!("missing persisted candidate '{}'", candidate.candidate_id), + ) + })?; + if candidate_record.status != CandidateStatus::Queued { + continue; + } + let candidate_seq = candidate_record.created_seq; + match self.check_execution_control(&execution.execution_id, worker_id)? { + ExecutionControl::Continue => {} + ExecutionControl::Paused => { + execution.status = ExecutionStatus::Paused; + self.store.save_execution(execution)?; + return Err(io::Error::new(io::ErrorKind::WouldBlock, "execution paused")); + } + ExecutionControl::Canceled => { + execution.status = ExecutionStatus::Canceled; + self.store.save_execution(execution)?; + self.append_event(&execution.execution_id, ControlEventType::ExecutionCanceled)?; + return Ok(execution.clone()); + } + } + if let Some(limit) = dispatch_limit { + if dispatches_used >= limit { + self.store.save_execution(execution)?; + return Ok(execution.clone()); + } + } + match self.dispatch_candidate( + execution, + spec, + worker_id, + &candidate, + iteration, + candidate_seq, + )? { + DispatchOutcome::Output { output, failed } => { + let _ = (output, failed); + dispatches_used += 1; + } + DispatchOutcome::Paused(err) => { + execution.status = ExecutionStatus::Paused; + self.store.save_execution(execution)?; + return Err(err); + } + DispatchOutcome::Retryable(err) => { + execution.status = ExecutionStatus::Pending; + self.store.save_execution(execution)?; + return Err(err); + } + DispatchOutcome::Canceled => { + execution.status = ExecutionStatus::Canceled; + self.store.save_execution(execution)?; + self.append_event(&execution.execution_id, ControlEventType::ExecutionCanceled)?; + return Ok(execution.clone()); + } + } + } + + let persisted_iteration_candidates: Vec<_> = self + .store + .load_candidates(&execution.execution_id)? + .into_iter() + .filter(|candidate| candidate.iteration == iteration) + .collect(); + let has_pending_candidates = persisted_iteration_candidates.iter().any(|candidate| { + matches!(candidate.status, CandidateStatus::Queued | CandidateStatus::Running) + }); + if has_pending_candidates { + self.store.save_execution(execution)?; + return Ok(execution.clone()); + } + + let outputs: Vec<_> = persisted_iteration_candidates + .iter() + .map(|candidate| { + CandidateOutput::new( + candidate.candidate_id.clone(), + candidate.succeeded.unwrap_or(false), + candidate.metrics.clone(), + ) + }) + .collect(); + let iteration_failures = persisted_iteration_candidates + .iter() + .filter(|candidate| candidate.succeeded == Some(false)) + .count() as u32; + let evaluation = strategy.evaluate(&accumulator, &outputs); + self.append_event(&execution.execution_id, ControlEventType::CandidateScored)?; + accumulator = strategy.reduce(accumulator, evaluation.clone()); + accumulator.failure_counts.total_candidate_failures = accumulator + .failure_counts + .total_candidate_failures + .saturating_sub( + evaluation + .ranked_candidates + .iter() + .filter(|candidate| !candidate.pass) + .count() as u32, + ) + + iteration_failures; + execution.completed_iterations = accumulator.completed_iterations; + execution.failure_counts = accumulator.failure_counts.clone(); + execution.result_best_candidate_id = accumulator.best_candidate_id.clone(); + self.store.save_accumulator(&execution.execution_id, &accumulator)?; + + let all_failed = outputs.iter().all(|output| !output.succeeded); + if all_failed { + match spec.policy.iteration_failure_policy.as_str() { + "continue" => { + iteration += 1; + continue; + } + "retry_iteration" if !retry_used => { + retry_used = true; + self.store + .clear_iteration_candidates(&execution.execution_id, iteration)?; + accumulator.completed_iterations = + accumulator.completed_iterations.saturating_sub(1); + execution.completed_iterations = accumulator.completed_iterations; + continue; + } + _ => { + execution.status = ExecutionStatus::Failed; + self.store.save_execution(execution)?; + self.append_event(&execution.execution_id, ControlEventType::ExecutionFailed)?; + return Ok(execution.clone()); + } + } + } + + if iteration_failures >= spec.policy.max_candidate_failures_per_iteration { + execution.status = ExecutionStatus::Failed; + self.store.save_execution(execution)?; + self.append_event(&execution.execution_id, ControlEventType::ExecutionFailed)?; + return Ok(execution.clone()); + } + + if strategy.should_stop(&accumulator, &evaluation).is_some() { + execution.status = ExecutionStatus::Completed; + self.store.save_execution(execution)?; + self.append_event(&execution.execution_id, ControlEventType::IterationCompleted)?; + self.append_event(&execution.execution_id, ControlEventType::ExecutionCompleted)?; + return Ok(execution.clone()); + } + + self.append_event(&execution.execution_id, ControlEventType::IterationCompleted)?; + iteration += 1; + } + + execution.status = if execution.result_best_candidate_id.is_some() { + ExecutionStatus::Completed + } else { + ExecutionStatus::Failed + }; + self.store.save_execution(execution)?; + self.append_event( + &execution.execution_id, + if execution.status == ExecutionStatus::Completed { + ControlEventType::ExecutionCompleted + } else { + ControlEventType::ExecutionFailed + }, + )?; + Ok(execution.clone()) + } +} + +impl ExecutionService { + pub fn update_execution_status( + store: &FsExecutionStore, + execution_id: &str, + action: ExecutionAction, + ) -> io::Result { + let mut snapshot = store.load_execution(execution_id)?; + let next_status = match (action, &snapshot.execution.status) { + (ExecutionAction::Pause, ExecutionStatus::Running) => ExecutionStatus::Paused, + (ExecutionAction::Resume, ExecutionStatus::Paused) => ExecutionStatus::Running, + (ExecutionAction::Cancel, ExecutionStatus::Running | ExecutionStatus::Paused) => { + ExecutionStatus::Canceled + } + (ExecutionAction::Cancel, ExecutionStatus::Pending) => ExecutionStatus::Canceled, + (ExecutionAction::Pause, _) => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "pause is only valid for running executions", + )) + } + (ExecutionAction::Resume, _) => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "resume is only valid for paused executions", + )) + } + (ExecutionAction::Cancel, _) => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "cancel is only valid for pending, running, or paused executions", + )) + } + }; + snapshot.execution.status = next_status; + store.save_execution(&snapshot.execution)?; + let event_type = match action { + ExecutionAction::Pause => ControlEventType::ExecutionPaused, + ExecutionAction::Resume => ControlEventType::ExecutionResumed, + ExecutionAction::Cancel => ControlEventType::ExecutionCanceled, + }; + store.append_event( + execution_id, + &ControlEventEnvelope::new( + execution_id, + snapshot.events.len() as u64 + 1, + event_type, + ), + )?; + Ok(snapshot.execution) + } + + #[cfg(feature = "serde")] + pub fn patch_execution_policy( + store: &FsExecutionStore, + execution_id: &str, + patch: PolicyPatch, + global: &GlobalConfig, + ) -> io::Result { + let snapshot = store.load_execution(execution_id)?; + if !matches!( + snapshot.execution.status, + ExecutionStatus::Pending | ExecutionStatus::Running | ExecutionStatus::Paused + ) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "policy updates are only valid for pending, running, or paused executions", + )); + } + let mut spec = store.load_spec(execution_id)?; + if let Some(max_iterations) = patch.max_iterations { + spec.policy.budget.max_iterations = Some(max_iterations); + } + if let Some(max_concurrent_candidates) = patch.max_concurrent_candidates { + spec.policy.concurrency.max_concurrent_candidates = max_concurrent_candidates; + } + spec.validate(global) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidInput, err.to_string()))?; + store.save_spec(execution_id, &spec)?; + Ok(spec) + } +} + +fn default_runtime_policy() -> ExecutionPolicy { + ExecutionPolicy { + max_parallel_microvms_per_run: 1, + max_stage_retries: 1, + stage_timeout_secs: 60, + cancel_grace_period_secs: 5, + } +} + +fn scoring_from_spec(spec: &ExecutionSpec) -> ScoringConfig { + ScoringConfig { + metrics: spec + .evaluation + .weights + .iter() + .map(|(name, weight)| WeightedMetric { + name: name.clone(), + weight: weight.abs(), + direction: if *weight < 0.0 { + MetricDirection::Minimize + } else { + MetricDirection::Maximize + }, + }) + .collect(), + pass_threshold: spec.evaluation.pass_threshold.unwrap_or(0.0), + tie_break_metric: spec.evaluation.tie_breaking.clone(), + } +} diff --git a/src/orchestration/spec.rs b/src/orchestration/spec.rs new file mode 100644 index 0000000..7cd5ffa --- /dev/null +++ b/src/orchestration/spec.rs @@ -0,0 +1,87 @@ +use std::collections::BTreeMap; +use std::error::Error; +use std::fmt::{Display, Formatter}; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +use super::policy::{GlobalConfig, OrchestrationPolicy}; +use super::variation::VariationConfig; + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct WorkflowTemplateRef { + pub template: String, +} + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq)] +pub struct EvaluationConfig { + pub scoring_type: String, + pub weights: BTreeMap, + pub pass_threshold: Option, + pub ranking: String, + pub tie_breaking: String, +} + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq)] +pub struct ExecutionSpec { + pub mode: String, + pub goal: String, + pub workflow: WorkflowTemplateRef, + pub policy: OrchestrationPolicy, + pub evaluation: EvaluationConfig, + pub variation: VariationConfig, + pub swarm: bool, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SpecValidationError(String); + +impl SpecValidationError { + fn new(message: impl Into) -> Self { + Self(message.into()) + } +} + +impl Display for SpecValidationError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} + +impl Error for SpecValidationError {} + +impl ExecutionSpec { + pub fn validate(&self, global: &GlobalConfig) -> Result<(), SpecValidationError> { + if !matches!(self.mode.as_str(), "swarm" | "search") { + return Err(SpecValidationError::new(format!( + "unknown mode '{}'", + self.mode + ))); + } + + if self.mode == "swarm" && !self.swarm { + return Err(SpecValidationError::new( + "swarm mode requires the swarm section", + )); + } + + self.policy + .validate(global) + .map_err(|err| SpecValidationError::new(err.to_string()))?; + + if self.variation.candidates_per_iteration == 0 { + return Err(SpecValidationError::new( + "variation.candidates_per_iteration must be positive", + )); + } + + if self.workflow.template.trim().is_empty() { + return Err(SpecValidationError::new("workflow.template is required")); + } + + Ok(()) + } +} diff --git a/src/orchestration/store.rs b/src/orchestration/store.rs new file mode 100644 index 0000000..ab6b63e --- /dev/null +++ b/src/orchestration/store.rs @@ -0,0 +1,13 @@ +mod fs; + +use std::io; + +use super::types::{ExecutionCandidate, ExecutionSnapshot}; + +pub use fs::FsExecutionStore; + +pub trait ExecutionStore { + fn load_execution(&self, execution_id: &str) -> io::Result; + fn list_active_execution_ids(&self) -> io::Result>; + fn load_candidates(&self, execution_id: &str) -> io::Result>; +} diff --git a/src/orchestration/store/fs.rs b/src/orchestration/store/fs.rs new file mode 100644 index 0000000..e988b59 --- /dev/null +++ b/src/orchestration/store/fs.rs @@ -0,0 +1,912 @@ +use std::fs; +use std::io; +use std::path::PathBuf; +#[cfg(feature = "serde")] +use std::path::{Component, Path}; + +use super::ExecutionStore; +use crate::orchestration::events::{ControlEventEnvelope, ControlEventType}; +#[cfg(feature = "serde")] +use crate::orchestration::spec::ExecutionSpec; +use crate::orchestration::types::{ + CandidateStatus, Execution, ExecutionAccumulator, ExecutionCandidate, ExecutionSnapshot, + ExecutionStatus, +}; +#[cfg(feature = "serde")] +use crate::orchestration::types::{ + CommunicationIntent, InboxSnapshot, RoutedMessage, +}; + +#[cfg(not(feature = "serde"))] +mod serde_json { + use std::collections::BTreeMap; + use std::fmt; + + #[derive(Debug, Clone)] + pub struct Error(String); + + impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } + } + + impl std::error::Error for Error {} + + pub trait LocalJson: Sized { + fn to_json_string(&self) -> Result; + fn from_json_str(value: &str) -> Result; + } + + pub fn to_string(value: &T) -> Result { + value.to_json_string() + } + + pub fn from_str(value: &str) -> Result { + T::from_json_str(value) + } + + impl LocalJson for BTreeMap { + fn to_json_string(&self) -> Result { + Ok(encode_map_string(self)) + } + + fn from_json_str(value: &str) -> Result { + decode_map_string(value) + } + } + + impl LocalJson for BTreeMap { + fn to_json_string(&self) -> Result { + Ok(encode_map_f64(self)) + } + + fn from_json_str(value: &str) -> Result { + decode_map_f64(value) + } + } + + impl LocalJson for Vec { + fn to_json_string(&self) -> Result { + Ok(encode_list(self)) + } + + fn from_json_str(value: &str) -> Result { + decode_list(value) + } + } + + fn encode_map_string(value: &BTreeMap) -> String { + value + .iter() + .map(|(key, value)| format!("{}={}", escape(key), escape(value))) + .collect::>() + .join(";") + } + + fn encode_map_f64(value: &BTreeMap) -> String { + value + .iter() + .map(|(key, value)| format!("{}={}", escape(key), value)) + .collect::>() + .join(";") + } + + fn encode_list(value: &[String]) -> String { + value.iter().map(|item| escape(item)).collect::>().join(";") + } + + fn decode_map_string(value: &str) -> Result, Error> { + let mut map = BTreeMap::new(); + if value.is_empty() { + return Ok(map); + } + for pair in split_escaped(value, ';') { + let Some((key, value)) = split_once_escaped(&pair, '=') else { + return Err(Error(format!("invalid encoded map entry '{pair}'"))); + }; + map.insert(unescape(&key)?, unescape(&value)?); + } + Ok(map) + } + + fn decode_map_f64(value: &str) -> Result, Error> { + let mut map = BTreeMap::new(); + if value.is_empty() { + return Ok(map); + } + for pair in split_escaped(value, ';') { + let Some((key, value)) = split_once_escaped(&pair, '=') else { + return Err(Error(format!("invalid encoded map entry '{pair}'"))); + }; + let parsed = unescape(&value)? + .parse::() + .map_err(|err| Error(err.to_string()))?; + map.insert(unescape(&key)?, parsed); + } + Ok(map) + } + + fn decode_list(value: &str) -> Result, Error> { + if value.is_empty() { + return Ok(Vec::new()); + } + split_escaped(value, ';') + .into_iter() + .map(|item| unescape(&item)) + .collect() + } + + fn split_escaped(value: &str, separator: char) -> Vec { + let mut parts = Vec::new(); + let mut current = String::new(); + let mut escaped = false; + for ch in value.chars() { + if escaped { + current.push('\\'); + current.push(ch); + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == separator { + parts.push(current); + current = String::new(); + } else { + current.push(ch); + } + } + if escaped { + current.push('\\'); + } + parts.push(current); + parts + } + + fn split_once_escaped(value: &str, separator: char) -> Option<(String, String)> { + let mut left = String::new(); + let mut right = String::new(); + let mut escaped = false; + let mut seen_separator = false; + for ch in value.chars() { + if escaped { + let target = if seen_separator { &mut right } else { &mut left }; + target.push('\\'); + target.push(ch); + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == separator && !seen_separator { + seen_separator = true; + } else if seen_separator { + right.push(ch); + } else { + left.push(ch); + } + } + if seen_separator { + Some((left, right)) + } else { + None + } + } + + fn escape(value: &str) -> String { + let mut escaped = String::new(); + for ch in value.chars() { + match ch { + '\\' => escaped.push_str("\\\\"), + ';' => escaped.push_str("\\;"), + '=' => escaped.push_str("\\="), + '\n' => escaped.push_str("\\n"), + '\r' => escaped.push_str("\\r"), + '\t' => escaped.push_str("\\t"), + other => escaped.push(other), + } + } + escaped + } + + fn unescape(value: &str) -> Result { + let mut output = String::new(); + let mut escaped = false; + for ch in value.chars() { + if escaped { + output.push(match ch { + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + '\\' => '\\', + ';' => ';', + '=' => '=', + other => other, + }); + escaped = false; + } else if ch == '\\' { + escaped = true; + } else { + output.push(ch); + } + } + if escaped { + return Err(Error("dangling escape sequence".to_string())); + } + Ok(output) + } +} + +#[derive(Debug, Clone)] +pub struct FsExecutionStore { + root: PathBuf, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct ExecutionClaim { + worker_id: String, + claimed_at_ms: u64, +} + +impl FsExecutionStore { + const CLAIM_TTL_MS: u64 = 30_000; + + pub fn new(root: PathBuf) -> Self { + Self { root } + } + + pub fn create_execution(&self, execution: &Execution) -> io::Result<()> { + let dir = self.execution_dir(&execution.execution_id); + fs::create_dir_all(&dir)?; + self.save_execution(execution) + } + + pub fn save_execution(&self, execution: &Execution) -> io::Result<()> { + let dir = self.execution_dir(&execution.execution_id); + fs::create_dir_all(&dir)?; + fs::write( + dir.join("execution.txt"), + format!( + "{}\n{}\n{}\n{}\n{}\n{}\n{}", + execution.execution_id, + execution.mode, + execution.goal, + status_to_str(&execution.status), + execution.result_best_candidate_id.as_deref().unwrap_or(""), + execution.completed_iterations, + execution.failure_counts.total_candidate_failures + ), + ) + } + + pub fn claim_execution(&self, execution_id: &str, worker_id: &str) -> io::Result { + let dir = self.execution_dir(execution_id); + fs::create_dir_all(&dir)?; + let claim_path = dir.join("claim.txt"); + let claim = ExecutionClaim { + worker_id: worker_id.to_string(), + claimed_at_ms: now_ms(), + }; + match fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&claim_path) + { + Ok(mut file) => { + use std::io::Write; + file.write_all(serialize_claim(&claim).as_bytes())?; + Ok(true) + } + Err(err) if err.kind() == io::ErrorKind::AlreadyExists => { + let existing = match self.load_claim_record(execution_id)? { + Some(existing) => existing, + None => { + fs::remove_file(&claim_path)?; + return self.claim_execution(execution_id, worker_id); + } + }; + if claim_is_stale(&existing) { + fs::remove_file(&claim_path)?; + return self.claim_execution(execution_id, worker_id); + } + Ok(false) + } + Err(err) => Err(err), + } + } + + pub fn release_claim(&self, execution_id: &str) -> io::Result<()> { + let claim_path = self.execution_dir(execution_id).join("claim.txt"); + match fs::remove_file(claim_path) { + Ok(()) => Ok(()), + Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(()), + Err(err) => Err(err), + } + } + + pub fn refresh_claim(&self, execution_id: &str, worker_id: &str) -> io::Result<()> { + let claim_path = self.execution_dir(execution_id).join("claim.txt"); + let Some(existing) = self.load_claim_record(execution_id)? else { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "claim not found", + )); + }; + if existing.worker_id != worker_id { + return Err(io::Error::new( + io::ErrorKind::PermissionDenied, + "claim owned by another worker", + )); + } + fs::write( + claim_path, + serialize_claim(&ExecutionClaim { + worker_id: worker_id.to_string(), + claimed_at_ms: now_ms(), + }), + ) + } + + pub fn load_claim(&self, execution_id: &str) -> io::Result> { + Ok(self + .load_claim_record(execution_id)? + .map(|claim| claim.worker_id)) + } + + fn load_claim_record(&self, execution_id: &str) -> io::Result> { + let claim_path = self.execution_dir(execution_id).join("claim.txt"); + match fs::read_to_string(claim_path) { + Ok(contents) => Ok(parse_claim(&contents)), + Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None), + Err(err) => Err(err), + } + } + + pub fn append_event( + &self, + execution_id: &str, + event: &ControlEventEnvelope, + ) -> io::Result<()> { + let path = self.execution_dir(execution_id).join("events.log"); + let existing = fs::read_to_string(&path).unwrap_or_default(); + let next = format!( + "{}{}|{}|{}\n", + existing, + event.execution_id, + event.seq, + event.event_type.as_str() + ); + fs::write(path, next) + } + + pub fn save_accumulator( + &self, + execution_id: &str, + accumulator: &ExecutionAccumulator, + ) -> io::Result<()> { + let best_candidate_overrides = serde_json::to_string(&accumulator.best_candidate_overrides) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; + let explored_signatures = serde_json::to_string(&accumulator.explored_signatures) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; + let message_backlog = serde_json::to_string(&accumulator.message_backlog) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; + fs::write( + self.execution_dir(execution_id).join("accumulator.txt"), + format!( + "{}\n{}\n{}\n{}\n{}\n{}\n{}", + accumulator.scoring_history_len, + accumulator.completed_iterations, + accumulator.best_candidate_id.as_deref().unwrap_or(""), + best_candidate_overrides, + accumulator.search_phase.as_deref().unwrap_or(""), + explored_signatures, + message_backlog, + ), + ) + } + + pub fn save_candidate(&self, candidate: &ExecutionCandidate) -> io::Result<()> { + let dir = self.execution_dir(&candidate.execution_id).join("candidates"); + fs::create_dir_all(&dir)?; + let overrides = serde_json::to_string(&candidate.overrides) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; + let metrics = serde_json::to_string(&candidate.metrics) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; + fs::write( + dir.join(format!("{}-{}.txt", candidate.created_seq, candidate.candidate_id)), + format!( + "{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}", + candidate.execution_id, + candidate.candidate_id, + candidate.created_seq, + candidate.iteration, + candidate_status_to_str(&candidate.status), + candidate.runtime_run_id.as_deref().unwrap_or(""), + overrides, + candidate + .succeeded + .map(|value| if value { "true" } else { "false" }) + .unwrap_or(""), + metrics, + ), + ) + } + + pub fn load_candidates(&self, execution_id: &str) -> io::Result> { + let dir = self.execution_dir(execution_id).join("candidates"); + match fs::read_dir(&dir) { + Ok(entries) => { + let mut candidates = Vec::new(); + for entry in entries { + let entry = entry?; + if entry.file_type()?.is_file() { + let body = fs::read_to_string(entry.path())?; + candidates.push(parse_candidate(body)?); + } + } + candidates.sort_by_key(|candidate| candidate.created_seq); + Ok(candidates) + } + Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(Vec::new()), + Err(err) => Err(err), + } + } + + pub fn clear_iteration_candidates(&self, execution_id: &str, iteration: u32) -> io::Result<()> { + let dir = self.execution_dir(execution_id).join("candidates"); + match fs::read_dir(&dir) { + Ok(entries) => { + for entry in entries { + let entry = entry?; + if !entry.file_type()?.is_file() { + continue; + } + let body = fs::read_to_string(entry.path())?; + let candidate = parse_candidate(body)?; + if candidate.iteration == iteration { + fs::remove_file(entry.path())?; + } + } + Ok(()) + } + Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(()), + Err(err) => Err(err), + } + } + + #[cfg(feature = "serde")] + pub fn append_intent( + &self, + execution_id: &str, + intent: &CommunicationIntent, + ) -> io::Result<()> { + append_ndjson_record(self.execution_dir(execution_id).join("intents.log"), intent) + } + + #[cfg(feature = "serde")] + pub fn load_intents(&self, execution_id: &str) -> io::Result> { + load_ndjson_records(self.execution_dir(execution_id).join("intents.log")) + } + + #[cfg(feature = "serde")] + pub fn append_routed_message( + &self, + execution_id: &str, + message: &RoutedMessage, + ) -> io::Result<()> { + append_ndjson_record(self.execution_dir(execution_id).join("messages.log"), message) + } + + #[cfg(feature = "serde")] + pub fn load_routed_messages(&self, execution_id: &str) -> io::Result> { + load_ndjson_records(self.execution_dir(execution_id).join("messages.log")) + } + + #[cfg(feature = "serde")] + pub fn save_inbox_snapshot(&self, snapshot: &InboxSnapshot) -> io::Result<()> { + let path = self.inbox_snapshot_path( + &snapshot.execution_id, + snapshot.iteration, + &snapshot.candidate_id, + )?; + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + let payload = serde_json::to_vec_pretty(snapshot) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; + fs::write(path, payload) + } + + #[cfg(feature = "serde")] + pub fn load_inbox_snapshot( + &self, + execution_id: &str, + iteration: u32, + candidate_id: &str, + ) -> io::Result { + let path = self.inbox_snapshot_path(execution_id, iteration, candidate_id)?; + let body = fs::read(path)?; + serde_json::from_slice(&body) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string())) + } + + #[cfg(feature = "serde")] + pub fn save_spec(&self, execution_id: &str, spec: &ExecutionSpec) -> io::Result<()> { + let dir = self.execution_dir(execution_id); + fs::create_dir_all(&dir)?; + let payload = + serde_json::to_vec_pretty(spec).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?; + fs::write(dir.join("spec.json"), payload) + } + + #[cfg(feature = "serde")] + pub fn load_spec(&self, execution_id: &str) -> io::Result { + let path = self.execution_dir(execution_id).join("spec.json"); + let body = fs::read(path)?; + serde_json::from_slice(&body) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string())) + } + + pub fn load_execution(&self, execution_id: &str) -> io::Result { + let dir = self.execution_dir(execution_id); + let execution = parse_execution(fs::read_to_string(dir.join("execution.txt"))?)?; + let events = match fs::read_to_string(dir.join("events.log")) { + Ok(contents) => parse_events(&contents), + Err(err) if err.kind() == io::ErrorKind::NotFound => Vec::new(), + Err(err) => return Err(err), + }; + let accumulator = match fs::read_to_string(dir.join("accumulator.txt")) { + Ok(contents) => parse_accumulator(&contents)?, + Err(err) if err.kind() == io::ErrorKind::NotFound => ExecutionAccumulator::default(), + Err(err) => return Err(err), + }; + let candidates = self.load_candidates(execution_id)?; + + Ok(ExecutionSnapshot { + execution, + events, + accumulator, + candidates, + }) + } + + pub fn list_execution_ids(&self) -> io::Result> { + let mut ids = Vec::new(); + for entry in fs::read_dir(&self.root)? { + let entry = entry?; + if entry.file_type()?.is_dir() { + ids.push(entry.file_name().to_string_lossy().to_string()); + } + } + ids.sort(); + Ok(ids) + } + + fn execution_dir(&self, execution_id: &str) -> PathBuf { + self.root.join(execution_id) + } + + #[cfg(feature = "serde")] + fn inbox_snapshot_path( + &self, + execution_id: &str, + iteration: u32, + candidate_id: &str, + ) -> io::Result { + validate_inbox_candidate_id(candidate_id)?; + Ok(self + .execution_dir(execution_id) + .join("inboxes") + .join(iteration.to_string()) + .join(format!("{}.json", candidate_id))) + } +} + +impl ExecutionStore for FsExecutionStore { + fn load_execution(&self, execution_id: &str) -> io::Result { + FsExecutionStore::load_execution(self, execution_id) + } + + fn list_active_execution_ids(&self) -> io::Result> { + let mut ids = Vec::new(); + for execution_id in self.list_execution_ids()? { + let snapshot = self.load_execution(&execution_id)?; + if matches!( + snapshot.execution.status, + ExecutionStatus::Pending | ExecutionStatus::Running | ExecutionStatus::Paused + ) { + ids.push(execution_id); + } + } + Ok(ids) + } + + fn load_candidates(&self, execution_id: &str) -> io::Result> { + FsExecutionStore::load_candidates(self, execution_id) + } +} + +#[cfg(feature = "serde")] +fn append_ndjson_record( + path: PathBuf, + record: &T, +) -> io::Result<()> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + let mut file = fs::OpenOptions::new() + .create(true) + .append(true) + .open(path)?; + serde_json::to_writer(&mut file, record) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; + use std::io::Write; + file.write_all(b"\n") +} + +#[cfg(feature = "serde")] +fn load_ndjson_records(path: PathBuf) -> io::Result> { + match fs::read_to_string(path) { + Ok(contents) => { + let mut records = Vec::new(); + let mut lines = contents.lines().peekable(); + while let Some(line) = lines.next() { + if line.trim().is_empty() { + continue; + } + match serde_json::from_str(line) { + Ok(record) => records.push(record), + Err(err) if lines.peek().is_none() => break, + Err(err) => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + err.to_string(), + )) + } + } + } + Ok(records) + } + Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(Vec::new()), + Err(err) => Err(err), + } +} + +fn parse_execution(contents: String) -> io::Result { + let mut lines = contents.lines(); + let execution_id = required_line(&mut lines, "execution_id")?; + let mode = required_line(&mut lines, "mode")?; + let goal = required_line(&mut lines, "goal")?; + let status = required_line(&mut lines, "status")?; + let result_best_candidate_id = optional_line(&mut lines) + .filter(|value| !value.is_empty()); + let completed_iterations = optional_line(&mut lines) + .map(|value| value.parse().map_err(invalid_data)) + .transpose()? + .unwrap_or(0); + let total_candidate_failures = optional_line(&mut lines) + .map(|value| value.parse().map_err(invalid_data)) + .transpose()? + .unwrap_or(0); + Ok(Execution { + execution_id, + mode, + goal, + status: str_to_status(&status)?, + result_best_candidate_id, + completed_iterations, + failure_counts: crate::orchestration::FailureCounts { + total_candidate_failures, + }, + }) +} + +fn parse_events(contents: &str) -> Vec { + contents + .lines() + .filter_map(|line| { + let mut parts = line.split('|'); + let execution_id = parts.next()?; + let seq = parts.next()?.parse().ok()?; + let event_type = ControlEventType::from_str(parts.next()?)?; + Some(ControlEventEnvelope::new(execution_id, seq, event_type)) + }) + .collect() +} + +fn parse_accumulator(contents: &str) -> io::Result { + let mut lines = contents.lines(); + let scoring_history_len = required_line(&mut lines, "scoring_history_len")? + .parse() + .map_err(invalid_data)?; + let completed_iterations = required_line(&mut lines, "completed_iterations")? + .parse() + .map_err(invalid_data)?; + let best_candidate_id = optional_line(&mut lines).filter(|value| !value.is_empty()); + let best_candidate_overrides = optional_line(&mut lines) + .filter(|value| !value.is_empty()) + .map(|value| serde_json::from_str(&value).map_err(invalid_data)) + .transpose()? + .unwrap_or_default(); + let search_phase = optional_line(&mut lines).filter(|value| !value.is_empty()); + let explored_signatures = optional_line(&mut lines) + .filter(|value| !value.is_empty()) + .map(|value| serde_json::from_str(&value).map_err(invalid_data)) + .transpose()? + .unwrap_or_default(); + let message_backlog = optional_line(&mut lines) + .filter(|value| !value.is_empty()) + .map(|value| serde_json::from_str(&value).map_err(invalid_data)) + .transpose()? + .unwrap_or_default(); + Ok(ExecutionAccumulator { + scoring_history_len, + completed_iterations, + best_candidate_id, + best_candidate_overrides, + search_phase, + explored_signatures, + message_backlog, + ..ExecutionAccumulator::default() + }) +} + +fn parse_candidate(contents: String) -> io::Result { + let mut lines = contents.lines(); + let execution_id = required_line(&mut lines, "execution_id")?; + let candidate_id = required_line(&mut lines, "candidate_id")?; + let created_seq = required_line(&mut lines, "created_seq")? + .parse() + .map_err(invalid_data)?; + let iteration = required_line(&mut lines, "iteration")? + .parse() + .map_err(invalid_data)?; + let status = required_line(&mut lines, "status")?; + let runtime_run_id = optional_line(&mut lines).filter(|value| !value.is_empty()); + let overrides = optional_line(&mut lines) + .filter(|value| !value.is_empty()) + .map(|value| serde_json::from_str(&value).map_err(invalid_data)) + .transpose()? + .unwrap_or_default(); + let succeeded = optional_line(&mut lines) + .filter(|value| !value.is_empty()) + .map(|value| match value.as_str() { + "true" => Ok(true), + "false" => Ok(false), + _ => Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("invalid candidate success flag '{value}'"), + )), + }) + .transpose()?; + let metrics = optional_line(&mut lines) + .filter(|value| !value.is_empty()) + .map(|value| serde_json::from_str(&value).map_err(invalid_data)) + .transpose()? + .unwrap_or_default(); + Ok(ExecutionCandidate { + execution_id, + candidate_id, + created_seq, + iteration, + status: str_to_candidate_status(&status)?, + runtime_run_id, + overrides, + succeeded, + metrics, + }) +} + +fn required_line<'a>( + lines: &mut impl Iterator, + name: &str, +) -> io::Result { + lines + .next() + .map(|line| line.to_string()) + .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, format!("missing {name}"))) +} + +fn optional_line<'a>(lines: &mut impl Iterator) -> Option { + lines.next().map(|line| line.to_string()) +} + +fn status_to_str(status: &ExecutionStatus) -> &'static str { + match status { + ExecutionStatus::Pending => "Pending", + ExecutionStatus::Running => "Running", + ExecutionStatus::Paused => "Paused", + ExecutionStatus::Completed => "Completed", + ExecutionStatus::Failed => "Failed", + ExecutionStatus::Canceled => "Canceled", + } +} + +fn candidate_status_to_str(status: &CandidateStatus) -> &'static str { + match status { + CandidateStatus::Queued => "Queued", + CandidateStatus::Running => "Running", + CandidateStatus::Completed => "Completed", + CandidateStatus::Failed => "Failed", + CandidateStatus::Canceled => "Canceled", + } +} + +fn str_to_status(value: &str) -> io::Result { + match value { + "Pending" => Ok(ExecutionStatus::Pending), + "Running" => Ok(ExecutionStatus::Running), + "Paused" => Ok(ExecutionStatus::Paused), + "Completed" => Ok(ExecutionStatus::Completed), + "Failed" => Ok(ExecutionStatus::Failed), + "Canceled" => Ok(ExecutionStatus::Canceled), + _ => Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("unknown execution status '{value}'"), + )), + } +} + +fn str_to_candidate_status(value: &str) -> io::Result { + match value { + "Queued" => Ok(CandidateStatus::Queued), + "Running" => Ok(CandidateStatus::Running), + "Completed" => Ok(CandidateStatus::Completed), + "Failed" => Ok(CandidateStatus::Failed), + "Canceled" => Ok(CandidateStatus::Canceled), + _ => Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("unknown candidate status '{value}'"), + )), + } +} + +fn invalid_data(err: impl std::fmt::Display) -> io::Error { + io::Error::new(io::ErrorKind::InvalidData, err.to_string()) +} + +#[cfg(feature = "serde")] +fn validate_inbox_candidate_id(candidate_id: &str) -> io::Result<()> { + if candidate_id.is_empty() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "candidate_id cannot be empty", + )); + } + let path = Path::new(candidate_id); + let mut components = path.components(); + match (components.next(), components.next()) { + (Some(Component::Normal(_)), None) => {} + _ => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("unsafe candidate_id '{candidate_id}'"), + )) + } + } + Ok(()) +} + +fn now_ms() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|duration| duration.as_millis() as u64) + .unwrap_or(0) +} + +fn serialize_claim(claim: &ExecutionClaim) -> String { + format!("{}|{}", claim.worker_id, claim.claimed_at_ms) +} + +fn parse_claim(contents: &str) -> Option { + let trimmed = contents.trim(); + if trimmed.is_empty() { + return None; + } + let (worker_id, claimed_at_ms) = trimmed.split_once('|')?; + Some(ExecutionClaim { + worker_id: worker_id.to_string(), + claimed_at_ms: claimed_at_ms.parse().ok()?, + }) +} + +fn claim_is_stale(claim: &ExecutionClaim) -> bool { + now_ms().saturating_sub(claim.claimed_at_ms) > FsExecutionStore::CLAIM_TTL_MS +} diff --git a/src/orchestration/strategy.rs b/src/orchestration/strategy.rs new file mode 100644 index 0000000..4e1ab22 --- /dev/null +++ b/src/orchestration/strategy.rs @@ -0,0 +1,383 @@ +use super::policy::ConvergencePolicy; +use super::scoring::{score_iteration, RankedCandidate, ScoringConfig}; +use super::types::{CandidateInbox, CandidateOutput, CandidateSpec, ExecutionAccumulator}; +use super::variation::VariationConfig; +use std::collections::BTreeMap; + +#[derive(Debug, Clone, PartialEq)] +pub struct IterationEvaluation { + pub ranked_candidates: Vec, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StopReason { + ConvergenceThreshold, + ConvergencePlateau, +} + +#[derive(Debug, Clone)] +pub struct SwarmStrategy { + variation: VariationConfig, + scoring: ScoringConfig, + convergence: ConvergencePolicy, +} + +#[derive(Debug, Clone)] +pub struct SearchStrategy { + variation: VariationConfig, + scoring: ScoringConfig, + convergence: ConvergencePolicy, +} + +impl Default for SwarmStrategy { + fn default() -> Self { + Self { + variation: VariationConfig::explicit(1, Vec::new()), + scoring: ScoringConfig { + metrics: Vec::new(), + pass_threshold: 0.0, + tie_break_metric: String::new(), + }, + convergence: ConvergencePolicy::default(), + } + } +} + +impl SwarmStrategy { + pub fn new( + variation: VariationConfig, + scoring: ScoringConfig, + convergence: ConvergencePolicy, + ) -> Self { + Self { + variation, + scoring, + convergence, + } + } + + pub fn materialize_inboxes( + &self, + accumulator: &ExecutionAccumulator, + ) -> Vec { + if accumulator.message_backlog.is_empty() { + return vec![CandidateInbox::new("candidate-1")]; + } + + accumulator + .message_backlog + .iter() + .enumerate() + .map(|(idx, message)| CandidateInbox { + candidate_id: format!("candidate-{}", idx + 1), + messages: vec![message.clone()], + }) + .collect() + } + + pub fn plan_candidates( + &self, + accumulator: &ExecutionAccumulator, + inboxes: &[CandidateInbox], + ) -> Vec { + self.variation + .generate(accumulator) + .into_iter() + .enumerate() + .map(|(idx, proposal)| CandidateSpec { + candidate_id: inboxes + .get(idx) + .map(|inbox| inbox.candidate_id.clone()) + .unwrap_or_else(|| format!("candidate-{}", idx + 1)), + overrides: proposal.overrides, + }) + .collect() + } + + pub fn evaluate( + &self, + _accumulator: &ExecutionAccumulator, + outputs: &[CandidateOutput], + ) -> IterationEvaluation { + IterationEvaluation { + ranked_candidates: score_iteration(&self.scoring, outputs), + } + } + + pub fn should_stop( + &self, + accumulator: &ExecutionAccumulator, + evaluation: &IterationEvaluation, + ) -> Option { + match self.convergence.strategy.as_str() { + "threshold" => { + let best = evaluation.ranked_candidates.first()?; + if best.score >= self.convergence.min_score.unwrap_or(f64::INFINITY) { + Some(StopReason::ConvergenceThreshold) + } else { + None + } + } + "plateau" => { + if accumulator.iterations_without_improvement + >= self + .convergence + .max_iterations_without_improvement + .unwrap_or(u32::MAX) + { + Some(StopReason::ConvergencePlateau) + } else { + None + } + } + _ => None, + } + } + + pub fn reduce( + &self, + mut accumulator: ExecutionAccumulator, + evaluation: IterationEvaluation, + ) -> ExecutionAccumulator { + accumulator.scoring_history_len += 1; + accumulator.completed_iterations += 1; + accumulator.failure_counts.total_candidate_failures += evaluation + .ranked_candidates + .iter() + .filter(|candidate| !candidate.pass) + .count() as u32; + if let Some(best) = evaluation.ranked_candidates.first() { + accumulator.best_candidate_id = Some(best.candidate_id.clone()); + } + accumulator + } +} + +impl SearchStrategy { + pub fn new( + variation: VariationConfig, + scoring: ScoringConfig, + convergence: ConvergencePolicy, + ) -> Self { + Self { + variation, + scoring, + convergence, + } + } + + pub fn materialize_inboxes( + &self, + accumulator: &ExecutionAccumulator, + ) -> Vec { + SwarmStrategy::default().materialize_inboxes(accumulator) + } + + pub fn plan_candidates( + &self, + accumulator: &ExecutionAccumulator, + inboxes: &[CandidateInbox], + ) -> Vec { + let proposals = if accumulator.best_candidate_overrides.is_empty() { + self.bootstrap_proposals(accumulator) + } else { + self.refinement_proposals(accumulator) + }; + + proposals + .into_iter() + .enumerate() + .map(|(idx, proposal)| CandidateSpec { + candidate_id: inboxes + .get(idx) + .map(|inbox| inbox.candidate_id.clone()) + .unwrap_or_else(|| format!("candidate-{}", idx + 1)), + overrides: proposal.overrides, + }) + .collect() + } + + pub fn evaluate( + &self, + _accumulator: &ExecutionAccumulator, + outputs: &[CandidateOutput], + ) -> IterationEvaluation { + IterationEvaluation { + ranked_candidates: score_iteration(&self.scoring, outputs), + } + } + + pub fn should_stop( + &self, + accumulator: &ExecutionAccumulator, + evaluation: &IterationEvaluation, + ) -> Option { + match self.convergence.strategy.as_str() { + "threshold" => { + let best = evaluation.ranked_candidates.first()?; + if best.score >= self.convergence.min_score.unwrap_or(f64::INFINITY) { + return Some(StopReason::ConvergenceThreshold); + } + } + "plateau" => { + if accumulator.iterations_without_improvement + >= self + .convergence + .max_iterations_without_improvement + .unwrap_or(u32::MAX) + { + return Some(StopReason::ConvergencePlateau); + } + } + _ => {} + } + + if !accumulator.best_candidate_overrides.is_empty() + && self.refinement_proposals(accumulator).is_empty() + { + return Some(StopReason::ConvergencePlateau); + } + None + } + + pub fn reduce( + &self, + mut accumulator: ExecutionAccumulator, + evaluation: IterationEvaluation, + ) -> ExecutionAccumulator { + let planned_candidates = self.plan_candidates( + &accumulator, + &self.materialize_inboxes(&accumulator), + ); + + accumulator.scoring_history_len += 1; + accumulator.completed_iterations += 1; + accumulator.failure_counts.total_candidate_failures += evaluation + .ranked_candidates + .iter() + .filter(|candidate| !candidate.pass) + .count() as u32; + if let Some(best) = evaluation.ranked_candidates.first() { + accumulator.best_candidate_id = Some(best.candidate_id.clone()); + if let Some(spec) = planned_candidates + .iter() + .find(|candidate| candidate.candidate_id == best.candidate_id) + { + accumulator.best_candidate_overrides = spec.overrides.clone(); + let signature = candidate_signature(&spec.overrides); + if !signature.is_empty() && !accumulator.explored_signatures.contains(&signature) { + accumulator.explored_signatures.push(signature); + } + } + } + for candidate in &planned_candidates { + let signature = candidate_signature(&candidate.overrides); + if !signature.is_empty() && !accumulator.explored_signatures.contains(&signature) { + accumulator.explored_signatures.push(signature); + } + } + accumulator.search_phase = Some(if accumulator.best_candidate_overrides.is_empty() { + "bootstrap".to_string() + } else { + "refine".to_string() + }); + accumulator + } + + fn bootstrap_proposals( + &self, + accumulator: &ExecutionAccumulator, + ) -> Vec { + let mut generated = self.variation.generate(accumulator); + let bootstrap_size = self + .variation + .candidates_per_iteration + .min(2) + .max(1) as usize; + generated.truncate(bootstrap_size); + generated + } + + fn refinement_proposals( + &self, + accumulator: &ExecutionAccumulator, + ) -> Vec { + match self.variation.source.as_str() { + "explicit" => self.refine_explicit(accumulator), + "parameter_space" => self.refine_parameter_space(accumulator), + _ => Vec::new(), + } + .into_iter() + .filter(|proposal| { + let signature = candidate_signature(&proposal.overrides); + !accumulator.explored_signatures.contains(&signature) + }) + .take(self.variation.candidates_per_iteration as usize) + .collect() + } + + fn refine_explicit( + &self, + accumulator: &ExecutionAccumulator, + ) -> Vec { + if self.variation.explicit.is_empty() { + return Vec::new(); + } + let incumbent_index = self + .variation + .explicit + .iter() + .position(|proposal| proposal.overrides == accumulator.best_candidate_overrides) + .unwrap_or(0); + + let mut indices = Vec::new(); + if incumbent_index > 0 { + indices.push(incumbent_index - 1); + } + if incumbent_index + 1 < self.variation.explicit.len() { + indices.push(incumbent_index + 1); + } + for idx in 0..self.variation.explicit.len() { + if idx != incumbent_index && !indices.contains(&idx) { + indices.push(idx); + } + } + indices + .into_iter() + .map(|idx| self.variation.explicit[idx].clone()) + .collect() + } + + fn refine_parameter_space( + &self, + accumulator: &ExecutionAccumulator, + ) -> Vec { + let mut proposals = Vec::new(); + let incumbent = &accumulator.best_candidate_overrides; + for (path, values) in &self.variation.parameter_space { + let current = incumbent.get(path); + let Some(current_idx) = current.and_then(|value| values.iter().position(|candidate| candidate == value)) else { + continue; + }; + for neighbor_idx in [current_idx.checked_sub(1), Some(current_idx + 1)] + .into_iter() + .flatten() + { + if let Some(value) = values.get(neighbor_idx) { + let mut overrides = incumbent.clone(); + overrides.insert(path.clone(), value.clone()); + proposals.push(super::variation::VariationProposal { overrides }); + } + } + } + proposals + } +} + +fn candidate_signature(overrides: &BTreeMap) -> String { + overrides + .iter() + .map(|(key, value)| format!("{key}={value}")) + .collect::>() + .join("|") +} diff --git a/src/orchestration/types.rs b/src/orchestration/types.rs new file mode 100644 index 0000000..833ef25 --- /dev/null +++ b/src/orchestration/types.rs @@ -0,0 +1,256 @@ +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +#[cfg(feature = "serde")] +use serde_json::Value; + +#[cfg_attr(feature = "serde", derive(Serialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ExecutionStatus { + Pending, + Running, + Paused, + Completed, + Failed, + Canceled, +} + +#[cfg_attr(feature = "serde", derive(Serialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Execution { + pub execution_id: String, + pub mode: String, + pub goal: String, + pub status: ExecutionStatus, + pub result_best_candidate_id: Option, + pub completed_iterations: u32, + pub failure_counts: FailureCounts, +} + +impl Execution { + pub fn new(execution_id: &str, mode: &str, goal: &str) -> Self { + Self { + execution_id: execution_id.to_string(), + mode: mode.to_string(), + goal: goal.to_string(), + status: ExecutionStatus::Pending, + result_best_candidate_id: None, + completed_iterations: 0, + failure_counts: FailureCounts::default(), + } + } +} + +#[cfg_attr(feature = "serde", derive(Serialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CandidateStatus { + Queued, + Running, + Completed, + Failed, + Canceled, +} + +#[cfg_attr(feature = "serde", derive(Serialize))] +#[derive(Debug, Clone, PartialEq)] +pub struct ExecutionCandidate { + pub execution_id: String, + pub candidate_id: String, + pub created_seq: u64, + pub iteration: u32, + pub status: CandidateStatus, + pub runtime_run_id: Option, + pub overrides: std::collections::BTreeMap, + pub succeeded: Option, + pub metrics: std::collections::BTreeMap, +} + +impl ExecutionCandidate { + pub fn new( + execution_id: &str, + candidate_id: &str, + created_seq: u64, + iteration: u32, + status: CandidateStatus, + ) -> Self { + Self { + execution_id: execution_id.to_string(), + candidate_id: candidate_id.to_string(), + created_seq, + iteration, + status, + runtime_run_id: None, + overrides: std::collections::BTreeMap::new(), + succeeded: None, + metrics: std::collections::BTreeMap::new(), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub struct ExecutionAccumulator { + pub scoring_history_len: u32, + pub completed_iterations: u32, + pub message_backlog: Vec, + pub leader_proposals: Vec, + pub iterations_without_improvement: u32, + pub best_candidate_id: Option, + pub best_candidate_overrides: std::collections::BTreeMap, + pub search_phase: Option, + pub explored_signatures: Vec, + pub failure_counts: FailureCounts, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ExecutionSnapshot { + pub execution: Execution, + pub events: Vec, + pub accumulator: ExecutionAccumulator, + pub candidates: Vec, +} + +#[cfg_attr(feature = "serde", derive(Serialize))] +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub struct FailureCounts { + pub total_candidate_failures: u32, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CandidateInbox { + pub candidate_id: String, + pub messages: Vec, +} + +impl CandidateInbox { + pub fn new(candidate_id: &str) -> Self { + Self { + candidate_id: candidate_id.to_string(), + messages: Vec::new(), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CandidateSpec { + pub candidate_id: String, + pub overrides: std::collections::BTreeMap, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct CandidateOutput { + pub candidate_id: String, + pub succeeded: bool, + pub metrics: std::collections::BTreeMap, + #[cfg(feature = "serde")] + pub intents: Vec, +} + +impl CandidateOutput { + pub fn new( + candidate_id: impl Into, + succeeded: bool, + metrics: std::collections::BTreeMap, + ) -> Self { + Self { + candidate_id: candidate_id.into(), + succeeded, + metrics, + #[cfg(feature = "serde")] + intents: Vec::new(), + } + } + + #[cfg(feature = "serde")] + pub fn with_intents(mut self, intents: Vec) -> Self { + self.intents = intents; + self + } +} + +#[cfg(feature = "serde")] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum CommunicationIntentKind { + Proposal, + Signal, + Evaluation, +} + +#[cfg(feature = "serde")] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum CommunicationIntentAudience { + Leader, + Broadcast, +} + +#[cfg(feature = "serde")] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum CommunicationIntentPriority { + Low, + Normal, + High, +} + +#[cfg(feature = "serde")] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq)] +pub struct CommunicationIntent { + pub intent_id: String, + pub from_candidate_id: String, + pub iteration: u32, + pub kind: CommunicationIntentKind, + pub audience: CommunicationIntentAudience, + pub payload: Value, + pub priority: CommunicationIntentPriority, + pub ttl_iterations: u32, + pub caused_by: Option, + pub context: Option, +} + +#[cfg(feature = "serde")] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum RoutedMessageStatus { + Routed, + Delivered, + Expired, + Dropped, +} + +#[cfg(feature = "serde")] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq)] +pub struct RoutedMessage { + pub message_id: String, + pub intent_id: String, + pub to: String, + pub delivery_iteration: u32, + pub routing_reason: String, + pub status: RoutedMessageStatus, +} + +#[cfg(feature = "serde")] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq)] +pub struct InboxEntry { + pub message_id: String, + pub intent_id: String, + pub from_candidate_id: String, + pub kind: CommunicationIntentKind, + pub payload: Value, +} + +#[cfg(feature = "serde")] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq)] +pub struct InboxSnapshot { + pub execution_id: String, + pub candidate_id: String, + pub iteration: u32, + pub entries: Vec, +} diff --git a/src/orchestration/variation.rs b/src/orchestration/variation.rs new file mode 100644 index 0000000..fbca2b0 --- /dev/null +++ b/src/orchestration/variation.rs @@ -0,0 +1,108 @@ +use std::collections::BTreeMap; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +use super::types::ExecutionAccumulator; + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VariationSelection { + Random, + Sequential, +} + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VariationProposal { + pub overrides: BTreeMap, +} + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VariationConfig { + pub source: String, + pub candidates_per_iteration: u32, + pub selection: Option, + pub parameter_space: BTreeMap>, + pub explicit: Vec, +} + +impl VariationConfig { + pub fn parameter_space( + candidates_per_iteration: u32, + selection: VariationSelection, + parameter_space: BTreeMap>, + ) -> Self { + Self { + source: "parameter_space".to_string(), + candidates_per_iteration, + selection: Some(selection), + parameter_space, + explicit: Vec::new(), + } + } + + pub fn explicit(candidates_per_iteration: u32, explicit: Vec) -> Self { + Self { + source: "explicit".to_string(), + candidates_per_iteration, + selection: None, + parameter_space: BTreeMap::new(), + explicit, + } + } + + pub fn leader_directed(candidates_per_iteration: u32) -> Self { + Self { + source: "leader_directed".to_string(), + candidates_per_iteration, + selection: None, + parameter_space: BTreeMap::new(), + explicit: Vec::new(), + } + } + + pub fn generate(&self, accumulator: &ExecutionAccumulator) -> Vec { + match self.source.as_str() { + "parameter_space" => self.generate_parameter_space(), + "explicit" => self.generate_explicit(accumulator), + "leader_directed" => accumulator + .leader_proposals + .iter() + .filter(|proposal| !proposal.overrides.is_empty()) + .take(self.candidates_per_iteration as usize) + .cloned() + .collect(), + _ => Vec::new(), + } + } + + fn generate_parameter_space(&self) -> Vec { + let Some((path, values)) = self.parameter_space.iter().next() else { + return Vec::new(); + }; + + let iter: Box> = match self.selection.unwrap_or(VariationSelection::Sequential) { + VariationSelection::Sequential => Box::new(values.iter().cloned()), + VariationSelection::Random => Box::new(values.iter().rev().cloned()), + }; + + iter.take(self.candidates_per_iteration as usize) + .map(|value| VariationProposal { + overrides: BTreeMap::from([(path.clone(), value)]), + }) + .collect() + } + + fn generate_explicit(&self, accumulator: &ExecutionAccumulator) -> Vec { + if self.explicit.is_empty() { + return Vec::new(); + } + + let start = accumulator.scoring_history_len as usize % self.explicit.len(); + (0..self.candidates_per_iteration as usize) + .map(|offset| self.explicit[(start + offset) % self.explicit.len()].clone()) + .collect() + } +} diff --git a/src/runtime/mock.rs b/src/runtime/mock.rs index 81ea723..9a703cb 100644 --- a/src/runtime/mock.rs +++ b/src/runtime/mock.rs @@ -5,6 +5,7 @@ use crate::contract::{ ContractError, ContractErrorCode, EventEnvelope, EventType, RunState, RuntimeInspection, StartRequest, StartResult, StopRequest, StopResult, SubscribeEventsRequest, }; +use crate::orchestration::{CandidateOutput, StructuredOutputResult}; #[derive(Debug, Clone)] struct RunRecord { @@ -20,6 +21,14 @@ struct RunRecord { next_seq: u64, } +#[derive(Debug, Clone)] +enum SeededOutcome { + Success(CandidateOutput), + Failure, + MissingOutput, + MalformedOutput, +} + impl RunRecord { fn new(run_id: String) -> Self { let now = now_rfc3339_like(); @@ -58,6 +67,7 @@ impl RunRecord { #[derive(Debug, Default)] pub struct MockRuntime { runs: Vec, + seeded: BTreeMap, } impl MockRuntime { @@ -88,6 +98,25 @@ impl MockRuntime { let mut record = RunRecord::new(request.run_id); record.push_event(EventType::RunStarted, BTreeMap::new()); + match self.seeded.get(&record.run_id) { + Some(SeededOutcome::Success(_)) | Some(SeededOutcome::MissingOutput) => { + record.state = RunState::Succeeded; + record.push_event(EventType::RunCompleted, BTreeMap::new()); + } + Some(SeededOutcome::MalformedOutput) => { + record.state = RunState::Failed; + record.exit_code = Some(1); + record.terminal_reason = Some("malformed structured output".to_string()); + record.push_event(EventType::RunFailed, BTreeMap::new()); + } + Some(SeededOutcome::Failure) => { + record.state = RunState::Failed; + record.exit_code = Some(1); + record.terminal_reason = Some("seeded failure".to_string()); + record.push_event(EventType::RunFailed, BTreeMap::new()); + } + None => {} + } let result = StartResult { handle: record.handle.clone(), attempt_id: record.attempt_id, @@ -176,6 +205,38 @@ impl MockRuntime { Ok(record.events.clone()) } + + pub fn seed_success(&mut self, run_id: &str, output: CandidateOutput) { + self.seeded + .insert(run_id.to_string(), SeededOutcome::Success(output)); + } + + pub fn seed_failure(&mut self, run_id: &str) { + self.seeded + .insert(run_id.to_string(), SeededOutcome::Failure); + } + + pub fn seed_missing_output(&mut self, run_id: &str) { + self.seeded + .insert(run_id.to_string(), SeededOutcome::MissingOutput); + } + + pub fn seed_malformed_output(&mut self, run_id: &str) { + self.seeded + .insert(run_id.to_string(), SeededOutcome::MalformedOutput); + } + + pub fn take_structured_output(&mut self, run_id: &str) -> StructuredOutputResult { + match self.seeded.get(run_id) { + Some(SeededOutcome::Success(output)) => StructuredOutputResult::Found(output.clone()), + Some(SeededOutcome::MalformedOutput) => StructuredOutputResult::Error(ContractError::new( + ContractErrorCode::StructuredOutputMalformed, + format!("run '{run_id}' emitted malformed structured output"), + false, + )), + _ => StructuredOutputResult::Missing, + } + } } fn now_rfc3339_like() -> String { @@ -207,6 +268,7 @@ mod tests { let req = StartRequest { run_id: "run-1".to_string(), workflow_spec: "workflow".to_string(), + launch_context: None, policy: policy(), }; @@ -223,6 +285,7 @@ mod tests { let req = StartRequest { run_id: "run-2".to_string(), workflow_spec: "workflow".to_string(), + launch_context: None, policy: policy(), }; let started = runtime.start(req).expect("start"); @@ -244,6 +307,7 @@ mod tests { let req = StartRequest { run_id: "run-3".to_string(), workflow_spec: "workflow".to_string(), + launch_context: None, policy: policy(), }; let started = runtime.start(req).expect("start"); @@ -271,6 +335,7 @@ mod tests { .start(StartRequest { run_id: "run-4".to_string(), workflow_spec: "workflow".to_string(), + launch_context: None, policy: ExecutionPolicy { max_parallel_microvms_per_run: 0, max_stage_retries: 1, @@ -290,6 +355,7 @@ mod tests { .start(StartRequest { run_id: "run-5".to_string(), workflow_spec: "workflow".to_string(), + launch_context: None, policy: policy(), }) .expect("start"); @@ -310,4 +376,3 @@ mod tests { assert!(events.iter().any(|e| e.event_type == EventType::RunCanceled)); } } - diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs index 48fa928..fc3515c 100644 --- a/src/runtime/mod.rs +++ b/src/runtime/mod.rs @@ -2,6 +2,76 @@ mod mock; #[cfg(feature = "serde")] mod void_box; +use crate::contract::{ContractError, RuntimeInspection, StartRequest, StartResult}; +use crate::orchestration::{ExecutionRuntime, StructuredOutputResult}; + pub use mock::MockRuntime; #[cfg(feature = "serde")] pub use void_box::VoidBoxRuntimeClient; + +#[cfg(feature = "serde")] +use crate::orchestration::{CandidateSpec, InboxSnapshot}; + +#[cfg(feature = "serde")] +pub trait ProviderLaunchAdapter { + fn prepare_launch_request( + &self, + request: StartRequest, + candidate: &CandidateSpec, + inbox: &InboxSnapshot, + ) -> StartRequest; +} + +#[cfg(feature = "serde")] +#[derive(Debug, Default, Clone, Copy)] +pub struct LaunchInjectionAdapter; + +#[cfg(feature = "serde")] +impl ProviderLaunchAdapter for LaunchInjectionAdapter { + fn prepare_launch_request( + &self, + request: StartRequest, + candidate: &CandidateSpec, + inbox: &InboxSnapshot, + ) -> StartRequest { + debug_assert_eq!(candidate.candidate_id, inbox.candidate_id); + let launch_context = serde_json::to_string(inbox).expect("serialize inbox snapshot"); + StartRequest { + launch_context: Some(launch_context), + ..request + } + } +} + +impl ExecutionRuntime for MockRuntime { + fn start_run(&mut self, request: StartRequest) -> Result { + self.start(request) + } + + fn inspect_run(&self, handle: &str) -> Result { + self.inspect(handle) + } + + fn take_structured_output(&mut self, run_id: &str) -> StructuredOutputResult { + self.take_structured_output(run_id) + } +} + +#[cfg(feature = "serde")] +impl ExecutionRuntime for VoidBoxRuntimeClient { + fn start_run(&mut self, request: StartRequest) -> Result { + self.start(request) + } + + fn inspect_run(&self, handle: &str) -> Result { + self.inspect(handle) + } + + fn take_structured_output(&mut self, run_id: &str) -> StructuredOutputResult { + match self.fetch_structured_output(run_id) { + Ok(Some(output)) => StructuredOutputResult::Found(output), + Ok(None) => StructuredOutputResult::Missing, + Err(err) => StructuredOutputResult::Error(err), + } + } +} diff --git a/src/runtime/void_box.rs b/src/runtime/void_box.rs index 80fea23..00ec891 100644 --- a/src/runtime/void_box.rs +++ b/src/runtime/void_box.rs @@ -6,6 +6,7 @@ use crate::contract::{ map_void_box_status, ConvertedRunView, EventEnvelope, EventType, RunState, RuntimeInspection, StartRequest, StartResult, StopRequest, StopResult, SubscribeEventsRequest, }; +use crate::orchestration::CandidateOutput; pub struct VoidBoxRuntimeClient { base_url: String, @@ -44,9 +45,19 @@ impl VoidBoxRuntimeClient { ContractError::new(ContractErrorCode::InvalidPolicy, msg, false) })?; + let input = match request.launch_context.as_deref() { + Some(context) => Some(serde_json::from_str(context).map_err(|e| { + ContractError::new( + ContractErrorCode::InvalidSpec, + format!("invalid launch_context JSON: {e}"), + false, + ) + })?), + None => None, + }; let payload = serde_json::json!({ "file": request.workflow_spec, - "input": serde_json::Value::Null + "input": input.unwrap_or(serde_json::Value::Null) }) .to_string(); @@ -268,6 +279,64 @@ impl VoidBoxRuntimeClient { )) } + pub fn fetch_structured_output( + &self, + run_id: &str, + ) -> Result, ContractError> { + if let Some(retrieval_path) = self.find_manifest_artifact_path(run_id, None, "result.json")? { + let response = self.http_get(&retrieval_path)?; + return match parse_artifact_response(&response, ContractErrorCode::StructuredOutputMissing)? { + Some(body) => parse_structured_output(run_id, &body).map(Some), + None => Ok(None), + }; + } + + let mut last_missing_error = None; + for stage in ["main", "output"] { + let path = format!("/v1/runs/{run_id}/stages/{stage}/output-file"); + let response = self.http_get(&path)?; + if response.status == 404 { + if let Some(err) = parse_api_error(&response.body) { + match err.code { + ContractErrorCode::StructuredOutputMissing + | ContractErrorCode::ArtifactNotFound + | ContractErrorCode::NotFound => { + last_missing_error = Some(err); + continue; + } + _ => return Err(err), + } + } + continue; + } + if response.status >= 400 { + return Err(map_http_error( + response.status, + &response.body, + "structured output fetch failed", + )); + } + return parse_structured_output(run_id, &response.body).map(Some); + } + if let Some(err) = last_missing_error { + return Err(err); + } + Ok(None) + } + + pub fn fetch_named_artifact( + &self, + run_id: &str, + stage: &str, + name: &str, + ) -> Result, ContractError> { + let path = self + .find_manifest_artifact_path(run_id, Some(stage), name)? + .unwrap_or_else(|| format!("/v1/runs/{run_id}/stages/{stage}/artifacts/{name}")); + let response = self.http_get(&path)?; + parse_artifact_response(&response, ContractErrorCode::ArtifactNotFound) + } + fn fetch_converted_run(&self, run_id: &str) -> Result { let run_path = format!("/v1/runs/{run_id}"); let events_path = format!("/v1/runs/{run_id}/events"); @@ -304,6 +373,28 @@ impl VoidBoxRuntimeClient { fn http_post(&self, path: &str, body: &str) -> Result { self.transport.request(&self.base_url, "POST", path, body) } + + fn find_manifest_artifact_path( + &self, + run_id: &str, + stage: Option<&str>, + name: &str, + ) -> Result, ContractError> { + let run_path = format!("/v1/runs/{run_id}"); + let run_resp = self.http_get(&run_path)?; + if run_resp.status == 404 { + return Ok(None); + } + if run_resp.status >= 400 { + return Err(map_http_error( + run_resp.status, + &run_resp.body, + "inspect failed while resolving artifact manifest", + )); + } + + manifest_retrieval_path(&run_resp.body, stage, name) + } } trait HttpTransport { @@ -447,6 +538,187 @@ fn filter_events_from_id(events: Vec, from_event_id: Option<&str> events } +fn manifest_retrieval_path( + run_body: &str, + stage: Option<&str>, + name: &str, +) -> Result, ContractError> { + let value: serde_json::Value = serde_json::from_str(run_body).map_err(|e| { + ContractError::new( + ContractErrorCode::InvalidSpec, + format!("invalid run JSON: {e}"), + false, + ) + })?; + let Some(manifest) = value + .get("artifact_publication") + .and_then(|value| value.get("manifest")) + .and_then(serde_json::Value::as_array) + else { + return Ok(None); + }; + + for entry in manifest { + let entry_name = entry.get("name").and_then(serde_json::Value::as_str); + let entry_stage = entry.get("stage").and_then(serde_json::Value::as_str); + let retrieval_path = entry + .get("retrieval_path") + .and_then(serde_json::Value::as_str); + if entry_name == Some(name) + && retrieval_path.is_some() + && stage.map(|wanted| Some(wanted) == entry_stage).unwrap_or(true) + { + return Ok(retrieval_path.map(normalize_retrieval_path)); + } + } + + Ok(None) +} + +fn parse_artifact_response( + response: &HttpResponse, + default_not_found: ContractErrorCode, +) -> Result, ContractError> { + if response.status == 404 { + if let Some(err) = parse_api_error(&response.body) { + return match err.code { + ContractErrorCode::ArtifactNotFound | ContractErrorCode::NotFound + if default_not_found == ContractErrorCode::ArtifactNotFound => + { + Ok(None) + } + ContractErrorCode::StructuredOutputMissing => Err(err), + _ => Err(err), + }; + } + return Ok(None); + } + if response.status >= 400 { + return Err(map_http_error( + response.status, + &response.body, + "artifact retrieval failed", + )); + } + if response.body.trim().is_empty() { + return Err(ContractError::new( + default_not_found, + "artifact body was empty", + false, + )); + } + Ok(Some(response.body.clone())) +} + +fn map_http_error(status: u16, body: &str, fallback: &str) -> ContractError { + parse_api_error(body).unwrap_or_else(|| { + ContractError::new( + ContractErrorCode::InternalError, + format!("{fallback}: HTTP {status}"), + status >= 500, + ) + }) +} + +fn normalize_retrieval_path(path: &str) -> String { + if path.starts_with('/') { + path.to_string() + } else { + format!("/{path}") + } +} + +fn parse_api_error(body: &str) -> Option { + let value: serde_json::Value = serde_json::from_str(body).ok()?; + let code = value.get("code")?.as_str()?; + let message = value + .get("message") + .and_then(serde_json::Value::as_str) + .unwrap_or(code) + .to_string(); + let retryable = value + .get("retryable") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false); + Some(ContractError::new( + map_error_code(code), + message, + retryable, + )) +} + +fn map_error_code(code: &str) -> ContractErrorCode { + match code { + "INVALID_SPEC" => ContractErrorCode::InvalidSpec, + "INVALID_POLICY" => ContractErrorCode::InvalidPolicy, + "NOT_FOUND" => ContractErrorCode::NotFound, + "ALREADY_TERMINAL" => ContractErrorCode::AlreadyTerminal, + "RESOURCE_LIMIT_EXCEEDED" => ContractErrorCode::ResourceLimitExceeded, + "STRUCTURED_OUTPUT_MISSING" => ContractErrorCode::StructuredOutputMissing, + "STRUCTURED_OUTPUT_MALFORMED" => ContractErrorCode::StructuredOutputMalformed, + "ARTIFACT_NOT_FOUND" => ContractErrorCode::ArtifactNotFound, + "ARTIFACT_PUBLICATION_INCOMPLETE" => ContractErrorCode::ArtifactPublicationIncomplete, + "ARTIFACT_STORE_UNAVAILABLE" => ContractErrorCode::ArtifactStoreUnavailable, + "RETRIEVAL_TIMEOUT" => ContractErrorCode::RetrievalTimeout, + _ => ContractErrorCode::InternalError, + } +} + +fn parse_structured_output( + run_id: &str, + body: &str, +) -> Result { + let value: serde_json::Value = serde_json::from_str(body).map_err(|e| { + ContractError::new( + ContractErrorCode::StructuredOutputMalformed, + format!("invalid structured output JSON: {e}"), + false, + ) + })?; + let metrics = value + .get("metrics") + .and_then(serde_json::Value::as_object) + .ok_or_else(|| { + ContractError::new( + ContractErrorCode::StructuredOutputMalformed, + "structured output missing metrics object", + false, + ) + })?; + + let parsed_metrics = metrics + .iter() + .filter_map(|(key, value)| value.as_f64().map(|number| (key.clone(), number))) + .collect(); + + let mut output = CandidateOutput::new( + run_id.to_string(), + value + .get("status") + .and_then(serde_json::Value::as_str) + .map(|status| status.eq_ignore_ascii_case("success")) + .unwrap_or(true), + parsed_metrics, + ); + #[cfg(feature = "serde")] + if let Some(intents) = value.get("intents").and_then(serde_json::Value::as_array) { + output.intents = intents + .iter() + .cloned() + .map(serde_json::from_value) + .collect::, _>>() + .map_err(|e| { + ContractError::new( + ContractErrorCode::StructuredOutputMalformed, + format!("invalid structured output intents: {e}"), + false, + ) + })?; + } + + Ok(output) +} + #[cfg(test)] mod tests { use super::{filter_events_from_id, HttpResponse, HttpTransport, VoidBoxRuntimeClient}; @@ -455,7 +727,7 @@ mod tests { StopRequest, SubscribeEventsRequest, }; use std::collections::{BTreeMap, HashMap}; - use std::sync::Mutex; + use std::sync::{Arc, Mutex}; struct MockTransport { routes: Mutex>, @@ -500,6 +772,38 @@ mod tests { } } + #[derive(Clone)] + struct CaptureTransport { + response: HttpResponse, + requests: Arc>>, + } + + impl CaptureTransport { + fn new(response: HttpResponse) -> Self { + Self { + response, + requests: Arc::new(Mutex::new(Vec::new())), + } + } + } + + impl HttpTransport for CaptureTransport { + fn request( + &self, + _base_url: &str, + method: &str, + path: &str, + body: &str, + ) -> Result { + self.requests.lock().expect("lock").push(( + method.to_string(), + path.to_string(), + body.to_string(), + )); + Ok(self.response.clone()) + } + } + fn client(routes: Vec<(&str, &str, u16, &str)>) -> VoidBoxRuntimeClient { VoidBoxRuntimeClient::with_transport( "http://mock:3000".to_string(), @@ -517,6 +821,170 @@ mod tests { } } + #[test] + fn fetches_structured_output_from_stage_output_file() { + let client = client(vec![( + "GET", + "/v1/runs/run-123/stages/main/output-file", + 200, + r#"{"status":"success","summary":"ok","metrics":{"latency_p99_ms":87,"cost_usd":0.018},"artifacts":[]}"#, + )]); + + let output = client + .fetch_structured_output("run-123") + .expect("fetch") + .expect("output"); + + assert_eq!(output.candidate_id, "run-123"); + assert!(output.succeeded); + assert_eq!(output.metrics.get("latency_p99_ms"), Some(&87.0)); + assert_eq!(output.metrics.get("cost_usd"), Some(&0.018)); + } + + #[test] + fn returns_none_when_structured_output_file_missing() { + let client = client(vec![]); + + let output = client.fetch_structured_output("run-missing").expect("fetch"); + + assert!(output.is_none()); + } + + #[test] + fn fetch_structured_output_prefers_manifested_result_json() { + let client = client(vec![ + ( + "GET", + "/v1/runs/run-123", + 200, + r#"{ + "artifact_publication": { + "manifest": [ + { + "name": "result.json", + "stage": "main", + "retrieval_path": "/v1/runs/run-123/stages/main/artifacts/result.json" + } + ] + } + }"#, + ), + ( + "GET", + "/v1/runs/run-123/stages/main/artifacts/result.json", + 200, + r#"{"status":"success","summary":"ok","metrics":{"latency_p99_ms":77},"artifacts":[]}"#, + ), + ]); + + let output = client + .fetch_structured_output("run-123") + .expect("fetch") + .expect("output"); + + assert_eq!(output.metrics.get("latency_p99_ms"), Some(&77.0)); + } + + #[test] + fn fetch_structured_output_maps_missing_output_error() { + let client = client(vec![ + ("GET", "/v1/runs/run-missing-output", 200, r#"{"id":"run-missing-output","status":"Completed"}"#), + ( + "GET", + "/v1/runs/run-missing-output/stages/main/output-file", + 404, + r#"{"code":"STRUCTURED_OUTPUT_MISSING","message":"missing result.json","retryable":false}"#, + ), + ]); + + let err = client + .fetch_structured_output("run-missing-output") + .expect_err("expected missing-output error"); + + assert_eq!(err.code, ContractErrorCode::StructuredOutputMissing); + assert!(!err.retryable); + } + + #[test] + fn fetch_structured_output_falls_back_to_output_stage_after_main_404() { + let client = client(vec![ + ("GET", "/v1/runs/run-output-stage", 200, r#"{"id":"run-output-stage","status":"Completed"}"#), + ( + "GET", + "/v1/runs/run-output-stage/stages/main/output-file", + 404, + r#"{"code":"STRUCTURED_OUTPUT_MISSING","message":"main missing result.json","retryable":false}"#, + ), + ( + "GET", + "/v1/runs/run-output-stage/stages/output/output-file", + 200, + r#"{"status":"success","summary":"ok","metrics":{"latency_p99_ms":66},"artifacts":[]}"#, + ), + ]); + + let output = client + .fetch_structured_output("run-output-stage") + .expect("fetch") + .expect("output"); + + assert_eq!(output.metrics.get("latency_p99_ms"), Some(&66.0)); + } + + #[test] + fn fetch_structured_output_maps_malformed_output_error() { + let client = client(vec![ + ("GET", "/v1/runs/run-malformed", 200, r#"{"id":"run-malformed","status":"Completed"}"#), + ( + "GET", + "/v1/runs/run-malformed/stages/main/output-file", + 200, + r#"{"status":"success","metrics":not-json}"#, + ), + ]); + + let err = client + .fetch_structured_output("run-malformed") + .expect_err("expected malformed-output error"); + + assert_eq!(err.code, ContractErrorCode::StructuredOutputMalformed); + } + + #[test] + fn fetch_named_artifact_uses_manifest_retrieval_path() { + let client = client(vec![ + ( + "GET", + "/v1/runs/run-123", + 200, + r#"{ + "artifact_publication": { + "manifest": [ + { + "name": "report.md", + "stage": "main", + "retrieval_path": "v1/runs/run-123/stages/main/artifacts/report.md" + } + ] + } + }"#, + ), + ( + "GET", + "/v1/runs/run-123/stages/main/artifacts/report.md", + 200, + "# report\nartifact body", + ), + ]); + + let artifact = client + .fetch_named_artifact("run-123", "main", "report.md") + .expect("fetch") + .expect("artifact"); + + assert!(artifact.contains("artifact body")); + } + #[test] fn start_returns_handle_and_running_state() { let c = client(vec![("POST", "/v1/runs", 200, r#"{"run_id":"run-123"}"#)]); @@ -524,6 +992,7 @@ mod tests { .start(StartRequest { run_id: "controller-run-1".to_string(), workflow_spec: "fixtures/sample.vbrun".to_string(), + launch_context: None, policy: policy(), }) .expect("start"); @@ -533,6 +1002,57 @@ mod tests { assert_eq!(c.poll_interval_ms(), 250); } + #[test] + fn start_serializes_launch_context_into_input_payload() { + let transport = CaptureTransport::new(HttpResponse { + status: 200, + body: r#"{"run_id":"run-123"}"#.to_string(), + }); + let requests = transport.requests.clone(); + let c = VoidBoxRuntimeClient::with_transport( + "http://mock:3000".to_string(), + 250, + Box::new(transport), + ); + + let snapshot = serde_json::json!({ + "execution_id": "exec-message-box", + "candidate_id": "candidate-1", + "iteration": 1, + "entries": [ + { + "message_id": "message-1", + "intent_id": "intent-1", + "from_candidate_id": "candidate-source", + "kind": "proposal", + "payload": { + "summary_text": "summary-one", + "strategy_hint": "hint-one" + } + } + ] + }); + + let started = c + .start(StartRequest { + run_id: "controller-run-1".to_string(), + workflow_spec: "fixtures/sample.vbrun".to_string(), + launch_context: Some(snapshot.to_string()), + policy: policy(), + }) + .expect("start"); + + assert_eq!(started.handle, "vb:run-123"); + let recorded = requests.lock().expect("lock"); + assert_eq!(recorded.len(), 1); + assert_eq!(recorded[0].0, "POST"); + assert_eq!(recorded[0].1, "/v1/runs"); + let body: serde_json::Value = + serde_json::from_str(&recorded[0].2).expect("parse request body"); + assert_eq!(body.get("file").and_then(serde_json::Value::as_str), Some("fixtures/sample.vbrun")); + assert_eq!(body.get("input"), Some(&snapshot)); + } + #[test] fn inspect_maps_daemon_run_state() { let c = client(vec![( diff --git a/tests/execution_artifact_collection.rs b/tests/execution_artifact_collection.rs new file mode 100644 index 0000000..c040edd --- /dev/null +++ b/tests/execution_artifact_collection.rs @@ -0,0 +1,203 @@ +use std::collections::BTreeMap; + +use void_control::orchestration::{ + CandidateOutput, ExecutionService, ExecutionSpec, ExecutionStatus, FsExecutionStore, + GlobalConfig, OrchestrationPolicy, VariationConfig, VariationProposal, +}; +use void_control::runtime::MockRuntime; + +#[test] +fn missing_output_can_mark_failed() { + let mut runtime = MockRuntime::new(); + runtime.seed_missing_output("exec-run-candidate-1"); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + ); + + let store = FsExecutionStore::new(temp_store_dir("missing-failed")); + let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let execution = service + .run_to_completion(spec_with_missing_output_policy("mark_failed")) + .expect("run execution"); + + assert_eq!(execution.status, ExecutionStatus::Failed); + assert_eq!(execution.failure_counts.total_candidate_failures, 1); +} + +#[test] +fn missing_output_can_mark_incomplete_without_failure_count() { + let mut runtime = MockRuntime::new(); + runtime.seed_missing_output("exec-run-candidate-1"); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + ); + + let store = FsExecutionStore::new(temp_store_dir("missing-incomplete")); + let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let execution = service + .run_to_completion(spec_with_continue_missing_output()) + .expect("run execution"); + + assert_eq!(execution.status, ExecutionStatus::Completed); + assert_eq!(execution.failure_counts.total_candidate_failures, 0); + assert_eq!(execution.result_best_candidate_id.as_deref(), Some("candidate-2")); +} + +#[test] +fn iteration_failure_policy_continue_advances_despite_all_failures() { + let mut runtime = MockRuntime::new(); + runtime.seed_failure("exec-run-candidate-1"); + runtime.seed_failure("exec-run-candidate-2"); + runtime.seed_success( + "exec-run-candidate-3", + output("candidate-3", &[("latency_p99_ms", 75.0), ("cost_usd", 0.02)]), + ); + runtime.seed_success( + "exec-run-candidate-4", + output("candidate-4", &[("latency_p99_ms", 78.0), ("cost_usd", 0.02)]), + ); + + let store = FsExecutionStore::new(temp_store_dir("continue")); + let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let execution = service + .run_to_completion(spec_with_iteration_failure_policy("continue", 2)) + .expect("run execution"); + + assert_eq!(execution.status, ExecutionStatus::Completed); + assert_eq!(execution.completed_iterations, 2); +} + +#[test] +fn iteration_failure_policy_retry_retries_once() { + let mut runtime = MockRuntime::new(); + runtime.seed_failure("exec-run-candidate-1"); + runtime.seed_failure("exec-run-candidate-2"); + runtime.seed_success( + "exec-run-candidate-3", + output("candidate-3", &[("latency_p99_ms", 74.0), ("cost_usd", 0.02)]), + ); + runtime.seed_success( + "exec-run-candidate-4", + output("candidate-4", &[("latency_p99_ms", 76.0), ("cost_usd", 0.02)]), + ); + + let store = FsExecutionStore::new(temp_store_dir("retry")); + let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let execution = service + .run_to_completion(spec_with_iteration_failure_policy("retry_iteration", 1)) + .expect("run execution"); + + assert_eq!(execution.status, ExecutionStatus::Completed); + assert_eq!(execution.completed_iterations, 1); +} + +#[test] +fn malformed_output_is_counted_as_candidate_failure() { + let mut runtime = MockRuntime::new(); + runtime.seed_malformed_output("exec-run-candidate-1"); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + ); + + let store = FsExecutionStore::new(temp_store_dir("malformed-output")); + let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let execution = service + .run_to_completion(spec_with_missing_output_policy("mark_failed")) + .expect("run execution"); + + assert_eq!(execution.status, ExecutionStatus::Failed); + assert_eq!(execution.failure_counts.total_candidate_failures, 1); +} + +fn spec_with_missing_output_policy(policy_name: &str) -> ExecutionSpec { + let mut spec = base_spec(1); + spec.policy.max_candidate_failures_per_iteration = 1; + spec.policy.missing_output_policy = policy_name.to_string(); + spec +} + +fn spec_with_continue_missing_output() -> ExecutionSpec { + let mut spec = base_spec(1); + spec.policy.missing_output_policy = "mark_incomplete".to_string(); + spec.policy.max_candidate_failures_per_iteration = 10; + spec +} + +fn spec_with_iteration_failure_policy(policy_name: &str, max_iterations: u32) -> ExecutionSpec { + let mut spec = base_spec(max_iterations); + spec.policy.iteration_failure_policy = policy_name.to_string(); + spec.policy.max_candidate_failures_per_iteration = 10; + spec +} + +fn base_spec(max_iterations: u32) -> ExecutionSpec { + ExecutionSpec { + mode: "swarm".to_string(), + goal: "optimize latency".to_string(), + workflow: void_control::orchestration::WorkflowTemplateRef { + template: "fixtures/sample.vbrun".to_string(), + }, + policy: OrchestrationPolicy { + budget: void_control::orchestration::BudgetPolicy { + max_iterations: Some(max_iterations), + max_child_runs: None, + max_wall_clock_secs: Some(60), + max_cost_usd_millis: None, + }, + concurrency: void_control::orchestration::ConcurrencyPolicy { + max_concurrent_candidates: 2, + }, + convergence: void_control::orchestration::ConvergencePolicy { + strategy: "exhaustive".to_string(), + min_score: None, + max_iterations_without_improvement: None, + }, + max_candidate_failures_per_iteration: 10, + missing_output_policy: "mark_failed".to_string(), + iteration_failure_policy: "fail_execution".to_string(), + }, + evaluation: void_control::orchestration::EvaluationConfig { + scoring_type: "weighted_metrics".to_string(), + weights: BTreeMap::from([ + ("latency_p99_ms".to_string(), -0.6), + ("cost_usd".to_string(), -0.4), + ]), + pass_threshold: Some(0.7), + ranking: "highest_score".to_string(), + tie_breaking: "cost_usd".to_string(), + }, + variation: VariationConfig::explicit( + 2, + vec![ + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "a".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "b".to_string())]), + }, + ], + ), + swarm: true, + } +} + +fn output(candidate_id: &str, metrics: &[(&str, f64)]) -> CandidateOutput { + CandidateOutput::new( + candidate_id.to_string(), + true, + metrics.iter().map(|(k, v)| (k.to_string(), *v)).collect(), + ) +} + +fn temp_store_dir(label: &str) -> std::path::PathBuf { + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let dir = std::env::temp_dir().join(format!("void-control-artifacts-{label}-{nanos}")); + std::fs::create_dir_all(&dir).expect("create temp dir"); + dir +} diff --git a/tests/execution_bridge.rs b/tests/execution_bridge.rs new file mode 100644 index 0000000..17131b3 --- /dev/null +++ b/tests/execution_bridge.rs @@ -0,0 +1,491 @@ +#![cfg(feature = "serde")] + +use serde_json::json; + +#[test] +fn dry_run_endpoint_returns_plan_without_creating_execution() { + let body = json!({ + "mode": "swarm", + "goal": "optimize latency", + "workflow": { "template": "fixtures/sample.vbrun" }, + "policy": { + "budget": { + "max_iterations": 3, + "max_wall_clock_secs": 60 + }, + "concurrency": { + "max_concurrent_candidates": 2 + }, + "convergence": { + "strategy": "exhaustive" + }, + "max_candidate_failures_per_iteration": 10, + "missing_output_policy": "mark_failed", + "iteration_failure_policy": "fail_execution" + }, + "evaluation": { + "scoring_type": "weighted_metrics", + "weights": { + "latency_p99_ms": -0.6, + "cost_usd": -0.4 + }, + "pass_threshold": 0.7, + "ranking": "highest_score", + "tie_breaking": "cost_usd" + }, + "variation": { + "source": "explicit", + "candidates_per_iteration": 2, + "explicit": [ + { "overrides": { "agent.prompt": "a" } }, + { "overrides": { "agent.prompt": "b" } } + ] + }, + "swarm": true + }) + .to_string(); + + let response = void_control::bridge::handle_bridge_request_for_test( + "POST", + "/v1/executions/dry-run", + Some(&body), + ) + .expect("response"); + + assert_eq!(response.status, 200); + assert_eq!(response.json["valid"], true); + assert_eq!(response.json["plan"]["max_child_runs"], 6); +} + +#[test] +fn dry_run_endpoint_returns_validation_errors() { + let body = json!({ + "mode": "swarm", + "goal": "optimize latency", + "workflow": { "template": "fixtures/sample.vbrun" }, + "policy": { + "budget": {}, + "concurrency": { + "max_concurrent_candidates": 2 + }, + "convergence": { + "strategy": "threshold" + }, + "max_candidate_failures_per_iteration": 10, + "missing_output_policy": "mark_failed", + "iteration_failure_policy": "fail_execution" + }, + "evaluation": { + "scoring_type": "weighted_metrics", + "weights": { + "latency_p99_ms": -0.6, + "cost_usd": -0.4 + }, + "pass_threshold": 0.7, + "ranking": "highest_score", + "tie_breaking": "cost_usd" + }, + "variation": { + "source": "explicit", + "candidates_per_iteration": 2, + "explicit": [ + { "overrides": { "agent.prompt": "a" } } + ] + }, + "swarm": true + }) + .to_string(); + + let response = void_control::bridge::handle_bridge_request_for_test( + "POST", + "/v1/executions/dry-run", + Some(&body), + ) + .expect("response"); + + assert_eq!(response.status, 400); + assert!(response.json["errors"].as_array().is_some()); +} + +#[test] +fn create_list_and_get_execution_routes_round_trip() { + let root = temp_root("create-round-trip"); + let spec_dir = root.join("specs"); + let execution_dir = root.join("executions"); + let body = valid_spec_body(); + + let created = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions", + Some(&body), + &spec_dir, + &execution_dir, + ) + .expect("create"); + + assert_eq!(created.status, 200); + assert_eq!(created.json["status"], "Pending"); + let execution_id = created.json["execution_id"] + .as_str() + .expect("execution_id") + .to_string(); + + let listed = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "GET", + "/v1/executions", + None, + &spec_dir, + &execution_dir, + ) + .expect("list"); + assert_eq!(listed.status, 200); + assert_eq!( + listed.json["executions"].as_array().map(|items| items.len()), + Some(1) + ); + + let fetched = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "GET", + &format!("/v1/executions/{execution_id}"), + None, + &spec_dir, + &execution_dir, + ) + .expect("get"); + assert_eq!(fetched.status, 200); + assert_eq!(fetched.json["execution"]["execution_id"], execution_id); + assert_eq!(fetched.json["progress"]["event_count"], 2); + assert_eq!(fetched.json["progress"]["event_type_counts"]["ExecutionCreated"], 1); + assert_eq!(fetched.json["progress"]["event_type_counts"]["ExecutionSubmitted"], 1); + assert_eq!(fetched.json["progress"]["candidate_queue_count"], 0); + assert_eq!(fetched.json["result"]["best_candidate_id"], serde_json::Value::Null); + assert_eq!(fetched.json["result"]["completed_iterations"], 0); + assert_eq!(fetched.json["result"]["total_candidate_failures"], 0); +} + +#[test] +fn create_execution_route_accepts_search_specs() { + let root = temp_root("create-search"); + let spec_dir = root.join("specs"); + let execution_dir = root.join("executions"); + let body = valid_spec_body_for_mode("search"); + + let created = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions", + Some(&body), + &spec_dir, + &execution_dir, + ) + .expect("create"); + + assert_eq!(created.status, 200); + assert_eq!(created.json["status"], "Pending"); + assert_eq!(created.json["mode"], "search"); +} + +#[test] +fn get_execution_events_route_returns_persisted_event_stream() { + let root = temp_root("execution-events"); + let spec_dir = root.join("specs"); + let execution_dir = root.join("executions"); + let body = valid_spec_body(); + + let created = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions", + Some(&body), + &spec_dir, + &execution_dir, + ) + .expect("create"); + let execution_id = created.json["execution_id"] + .as_str() + .expect("execution_id") + .to_string(); + + let events = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "GET", + &format!("/v1/executions/{execution_id}/events"), + None, + &spec_dir, + &execution_dir, + ) + .expect("events"); + + assert_eq!(events.status, 200); + let items = events.json["events"].as_array().expect("events array"); + assert_eq!(items.len(), 2); + assert_eq!(items[0]["event_type"], "ExecutionCreated"); + assert_eq!(items[1]["event_type"], "ExecutionSubmitted"); +} + +#[test] +fn get_execution_route_reports_current_candidate_status_counts() { + let root = temp_root("execution-progress-counts"); + let spec_dir = root.join("specs"); + let execution_dir = root.join("executions"); + let body = valid_spec_body(); + + let created = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions", + Some(&body), + &spec_dir, + &execution_dir, + ) + .expect("create"); + let execution_id = created.json["execution_id"] + .as_str() + .expect("execution_id") + .to_string(); + + let store = void_control::orchestration::FsExecutionStore::new(execution_dir.clone()); + let mut planner = void_control::orchestration::ExecutionService::new( + void_control::orchestration::GlobalConfig { + max_concurrent_child_runs: 2, + }, + void_control::runtime::MockRuntime::new(), + store, + ); + planner.plan_execution(&execution_id).expect("plan"); + + let fetched = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "GET", + &format!("/v1/executions/{execution_id}"), + None, + &spec_dir, + &execution_dir, + ) + .expect("get"); + + assert_eq!(fetched.status, 200); + assert_eq!(fetched.json["progress"]["queued_candidate_count"], 2); + assert_eq!(fetched.json["progress"]["running_candidate_count"], 0); + assert_eq!(fetched.json["progress"]["completed_candidate_count"], 0); + assert_eq!(fetched.json["progress"]["failed_candidate_count"], 0); + assert_eq!(fetched.json["progress"]["canceled_candidate_count"], 0); +} + +#[test] +fn get_execution_route_returns_not_found_for_missing_execution() { + let root = temp_root("missing-execution"); + let response = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "GET", + "/v1/executions/does-not-exist", + None, + &root.join("specs"), + &root.join("executions"), + ) + .expect("response"); + + assert_eq!(response.status, 404); + assert_eq!(response.json["code"], "NOT_FOUND"); +} + +#[test] +fn pause_resume_and_cancel_execution_routes_update_persisted_status() { + let root = temp_root("status-transitions"); + let spec_dir = root.join("specs"); + let execution_dir = root.join("executions"); + seed_execution(&execution_dir, "exec-running", "Running"); + + let paused = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions/exec-running/pause", + None, + &spec_dir, + &execution_dir, + ) + .expect("pause"); + assert_eq!(paused.status, 200); + assert_eq!(paused.json["status"], "Paused"); + + let resumed = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions/exec-running/resume", + None, + &spec_dir, + &execution_dir, + ) + .expect("resume"); + assert_eq!(resumed.status, 200); + assert_eq!(resumed.json["status"], "Running"); + + let canceled = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions/exec-running/cancel", + None, + &spec_dir, + &execution_dir, + ) + .expect("cancel"); + assert_eq!(canceled.status, 200); + assert_eq!(canceled.json["status"], "Canceled"); +} + +#[test] +fn pause_route_rejects_invalid_transition() { + let root = temp_root("invalid-transition"); + let spec_dir = root.join("specs"); + let execution_dir = root.join("executions"); + seed_execution(&execution_dir, "exec-complete", "Completed"); + + let response = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions/exec-complete/pause", + None, + &spec_dir, + &execution_dir, + ) + .expect("pause"); + + assert_eq!(response.status, 400); + assert_eq!(response.json["code"], "INVALID_STATE"); +} + +#[test] +fn patch_policy_updates_mutable_budget_and_concurrency_fields() { + let root = temp_root("policy-patch"); + let spec_dir = root.join("specs"); + let execution_dir = root.join("executions"); + let body = valid_spec_body(); + + let created = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions", + Some(&body), + &spec_dir, + &execution_dir, + ) + .expect("create"); + let execution_id = created.json["execution_id"].as_str().expect("execution_id"); + + let patch = serde_json::json!({ + "budget": { + "max_iterations": 5 + }, + "concurrency": { + "max_concurrent_candidates": 4 + } + }) + .to_string(); + + let patched = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "PATCH", + &format!("/v1/executions/{execution_id}/policy"), + Some(&patch), + &spec_dir, + &execution_dir, + ) + .expect("patch"); + + assert_eq!(patched.status, 200); + assert_eq!(patched.json["max_iterations"], 5); + assert_eq!(patched.json["max_concurrent_candidates"], 4); +} + +#[test] +fn patch_policy_rejects_immutable_convergence_fields() { + let root = temp_root("policy-immutable"); + let spec_dir = root.join("specs"); + let execution_dir = root.join("executions"); + let body = valid_spec_body(); + + let created = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions", + Some(&body), + &spec_dir, + &execution_dir, + ) + .expect("create"); + let execution_id = created.json["execution_id"].as_str().expect("execution_id"); + + let patch = serde_json::json!({ + "convergence": { + "strategy": "threshold", + "min_score": 0.9 + } + }) + .to_string(); + + let patched = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "PATCH", + &format!("/v1/executions/{execution_id}/policy"), + Some(&patch), + &spec_dir, + &execution_dir, + ) + .expect("patch"); + + assert_eq!(patched.status, 400); + assert_eq!(patched.json["code"], "INVALID_POLICY"); +} + +fn valid_spec_body() -> String { + valid_spec_body_for_mode("swarm") +} + +fn valid_spec_body_for_mode(mode: &str) -> String { + json!({ + "mode": mode, + "goal": "optimize latency", + "workflow": { "template": "fixtures/sample.vbrun" }, + "policy": { + "budget": { + "max_iterations": 1, + "max_wall_clock_secs": 60 + }, + "concurrency": { + "max_concurrent_candidates": 2 + }, + "convergence": { + "strategy": "exhaustive" + }, + "max_candidate_failures_per_iteration": 10, + "missing_output_policy": "mark_incomplete", + "iteration_failure_policy": "continue" + }, + "evaluation": { + "scoring_type": "weighted_metrics", + "weights": { + "latency_p99_ms": -0.6, + "cost_usd": -0.4 + }, + "pass_threshold": 0.7, + "ranking": "highest_score", + "tie_breaking": "cost_usd" + }, + "variation": { + "source": "explicit", + "candidates_per_iteration": 2, + "explicit": [ + { "overrides": { "agent.prompt": "a" } }, + { "overrides": { "agent.prompt": "b" } } + ] + }, + "swarm": true + }) + .to_string() +} + +fn temp_root(label: &str) -> std::path::PathBuf { + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let dir = std::env::temp_dir().join(format!("void-control-bridge-{label}-{nanos}")); + std::fs::create_dir_all(&dir).expect("create temp dir"); + dir +} + +fn seed_execution(root: &std::path::Path, execution_id: &str, status: &str) { + let dir = root.join(execution_id); + std::fs::create_dir_all(&dir).expect("execution dir"); + std::fs::write( + dir.join("execution.txt"), + format!("{execution_id}\nswarm\ngoal\n{status}"), + ) + .expect("write execution"); +} diff --git a/tests/execution_bridge_live.rs b/tests/execution_bridge_live.rs new file mode 100644 index 0000000..1ac729c --- /dev/null +++ b/tests/execution_bridge_live.rs @@ -0,0 +1,515 @@ +#![cfg(feature = "serde")] + +use std::collections::BTreeMap; +use std::fs; +use std::path::PathBuf; +use std::time::{SystemTime, UNIX_EPOCH}; + +use serde_json::json; + +use void_control::orchestration::{ + BudgetPolicy, ConcurrencyPolicy, ConvergencePolicy, EvaluationConfig, ExecutionSpec, + GlobalConfig, OrchestrationPolicy, VariationConfig, VariationProposal, + WorkflowTemplateRef, +}; +use void_control::runtime::VoidBoxRuntimeClient; + +#[test] +#[ignore = "requires live void-box daemon"] +fn bridge_submission_and_worker_loop_complete_execution_against_live_daemon() { + let root = temp_root("bridge-live"); + let spec_dir = root.join("specs"); + let execution_dir = root.join("executions"); + let spec = structured_output_spec(); + let body = execution_request_json(&spec); + + let created = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions", + Some(&body), + &spec_dir, + &execution_dir, + ) + .expect("create execution"); + assert_eq!(created.status, 200); + + let execution_id = created.json["execution_id"] + .as_str() + .expect("execution_id") + .to_string(); + + let base_url = std::env::var("VOID_BOX_BASE_URL") + .unwrap_or_else(|_| "http://127.0.0.1:43100".to_string()); + + let mut attempts = 0; + loop { + attempts += 1; + void_control::bridge::process_pending_executions_once_for_test( + GlobalConfig { + max_concurrent_child_runs: 20, + }, + VoidBoxRuntimeClient::new(base_url.clone(), 250), + execution_dir.clone(), + ) + .expect("process pending"); + + let fetched = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "GET", + &format!("/v1/executions/{execution_id}"), + None, + &spec_dir, + &execution_dir, + ) + .expect("get execution"); + + let status = fetched.json["execution"]["status"] + .as_str() + .expect("status"); + if matches!(status, "Completed" | "Failed" | "Canceled") { + assert_eq!(status, "Completed", "execution payload={}", fetched.json); + assert!( + fetched.json["progress"]["event_count"] + .as_u64() + .unwrap_or_default() + >= 6 + ); + + let events = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "GET", + &format!("/v1/executions/{execution_id}/events"), + None, + &spec_dir, + &execution_dir, + ) + .expect("get execution events"); + assert_eq!(events.status, 200); + let items = events.json["events"].as_array().expect("events array"); + assert!(items.iter().any(|event| event["event_type"] == "ExecutionStarted")); + assert!(items + .iter() + .any(|event| event["event_type"] == "CandidateOutputCollected")); + assert!(items + .iter() + .any(|event| event["event_type"] == "ExecutionCompleted")); + break; + } + + assert!(attempts < 20, "execution did not reach terminal state"); + std::thread::sleep(std::time::Duration::from_millis(250)); + } +} + +#[test] +#[ignore = "requires live void-box daemon"] +fn bridge_multiple_executions_complete_against_live_daemon() { + let root = temp_root("bridge-live-multi"); + let spec_dir = root.join("specs"); + let execution_dir = root.join("executions"); + let spec = structured_output_spec(); + let body = execution_request_json(&spec); + let base_url = std::env::var("VOID_BOX_BASE_URL") + .unwrap_or_else(|_| "http://127.0.0.1:43100".to_string()); + + let first = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions", + Some(&body), + &spec_dir, + &execution_dir, + ) + .expect("create first execution"); + let second = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions", + Some(&body), + &spec_dir, + &execution_dir, + ) + .expect("create second execution"); + + let first_id = first.json["execution_id"] + .as_str() + .expect("first execution_id") + .to_string(); + let second_id = second.json["execution_id"] + .as_str() + .expect("second execution_id") + .to_string(); + + let mut first_done = false; + let mut second_done = false; + for _ in 0..20 { + void_control::bridge::process_pending_executions_once_for_test( + GlobalConfig { + max_concurrent_child_runs: 20, + }, + VoidBoxRuntimeClient::new(base_url.clone(), 250), + execution_dir.clone(), + ) + .expect("process pending"); + + for (execution_id, done) in [(&first_id, &mut first_done), (&second_id, &mut second_done)] { + if *done { + continue; + } + let fetched = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "GET", + &format!("/v1/executions/{execution_id}"), + None, + &spec_dir, + &execution_dir, + ) + .expect("get execution"); + let status = fetched.json["execution"]["status"] + .as_str() + .expect("status"); + if status == "Completed" { + *done = true; + } else { + assert_eq!(status, "Running", "execution payload={}", fetched.json); + } + } + + if first_done && second_done { + break; + } + std::thread::sleep(std::time::Duration::from_millis(150)); + } + + assert!(first_done, "first execution did not complete"); + assert!(second_done, "second execution did not complete"); +} + +#[test] +#[ignore = "requires live void-box daemon"] +fn bridge_pause_resume_and_cancel_work_against_live_daemon() { + let root = temp_root("bridge-live-control"); + let spec_dir = root.join("specs"); + let execution_dir = root.join("executions"); + let spec = long_running_spec(); + let body = execution_request_json(&spec); + let base_url = std::env::var("VOID_BOX_BASE_URL") + .unwrap_or_else(|_| "http://127.0.0.1:43100".to_string()); + + let created = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + "/v1/executions", + Some(&body), + &spec_dir, + &execution_dir, + ) + .expect("create execution"); + let execution_id = created.json["execution_id"] + .as_str() + .expect("execution_id") + .to_string(); + + let pause_execution_dir = execution_dir.clone(); + let pause_spec_dir = spec_dir.clone(); + let pause_execution_id = execution_id.clone(); + std::thread::spawn(move || { + std::thread::sleep(std::time::Duration::from_millis(250)); + let _ = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + &format!("/v1/executions/{pause_execution_id}/pause"), + None, + &pause_spec_dir, + &pause_execution_dir, + ); + }); + + void_control::bridge::process_pending_executions_once_for_test( + GlobalConfig { + max_concurrent_child_runs: 20, + }, + VoidBoxRuntimeClient::new(base_url.clone(), 250), + execution_dir.clone(), + ) + .expect("pause processing pass"); + + let paused = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "GET", + &format!("/v1/executions/{execution_id}"), + None, + &spec_dir, + &execution_dir, + ) + .expect("get paused execution"); + assert_eq!(paused.json["execution"]["status"], "Paused"); + + let resumed = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + &format!("/v1/executions/{execution_id}/resume"), + None, + &spec_dir, + &execution_dir, + ) + .expect("resume"); + assert_eq!(resumed.json["status"], "Running"); + + let cancel_execution_dir = execution_dir.clone(); + let cancel_spec_dir = spec_dir.clone(); + let cancel_execution_id = execution_id.clone(); + std::thread::spawn(move || { + std::thread::sleep(std::time::Duration::from_millis(250)); + let _ = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "POST", + &format!("/v1/executions/{cancel_execution_id}/cancel"), + None, + &cancel_spec_dir, + &cancel_execution_dir, + ); + }); + + let mut canceled = None; + for _ in 0..10 { + void_control::bridge::process_pending_executions_once_for_test( + GlobalConfig { + max_concurrent_child_runs: 20, + }, + VoidBoxRuntimeClient::new(base_url.clone(), 250), + execution_dir.clone(), + ) + .expect("cancel processing pass"); + + let fetched = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "GET", + &format!("/v1/executions/{execution_id}"), + None, + &spec_dir, + &execution_dir, + ) + .expect("get canceled execution"); + if fetched.json["execution"]["status"] == "Canceled" { + canceled = Some(fetched); + break; + } + std::thread::sleep(std::time::Duration::from_millis(100)); + } + let canceled = canceled.expect("execution should reach canceled state"); + assert_eq!(canceled.json["execution"]["status"], "Canceled"); + + let events = void_control::bridge::handle_bridge_request_with_dirs_for_test( + "GET", + &format!("/v1/executions/{execution_id}/events"), + None, + &spec_dir, + &execution_dir, + ) + .expect("get events"); + let items = events.json["events"].as_array().expect("events array"); + assert!(items.iter().any(|event| event["event_type"] == "ExecutionPaused")); + assert!(items.iter().any(|event| event["event_type"] == "ExecutionResumed")); + assert!(items.iter().any(|event| event["event_type"] == "ExecutionCanceled")); +} + +fn structured_output_spec() -> ExecutionSpec { + let path = fallback_structured_output_spec_path(); + fs::write( + &path, + r#"api_version: v1 +kind: workflow +name: structured-output-success + +sandbox: + mode: mock + network: false + +workflow: + steps: + - name: produce + run: + program: sh + args: + - -lc + - | + cat > result.json <<'JSON' + {"status":"success","summary":"ok","metrics":{"latency_p99_ms":87,"cost_usd":0.018},"artifacts":[]} + JSON + output_step: produce +"#, + ) + .expect("write fixture"); + + ExecutionSpec { + mode: "swarm".to_string(), + goal: "optimize latency".to_string(), + workflow: WorkflowTemplateRef { + template: path.to_string_lossy().to_string(), + }, + policy: OrchestrationPolicy { + budget: BudgetPolicy { + max_iterations: Some(1), + max_child_runs: None, + max_wall_clock_secs: Some(60), + max_cost_usd_millis: None, + }, + concurrency: ConcurrencyPolicy { + max_concurrent_candidates: 1, + }, + convergence: ConvergencePolicy { + strategy: "exhaustive".to_string(), + min_score: None, + max_iterations_without_improvement: None, + }, + max_candidate_failures_per_iteration: 1, + missing_output_policy: "mark_failed".to_string(), + iteration_failure_policy: "fail_execution".to_string(), + }, + evaluation: EvaluationConfig { + scoring_type: "weighted_metrics".to_string(), + weights: BTreeMap::from([ + ("latency_p99_ms".to_string(), -0.6), + ("cost_usd".to_string(), -0.4), + ]), + pass_threshold: Some(0.7), + ranking: "highest_score".to_string(), + tie_breaking: "cost_usd".to_string(), + }, + variation: VariationConfig::explicit( + 1, + vec![VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "a".to_string())]), + }], + ), + swarm: true, + } +} + +fn long_running_spec() -> ExecutionSpec { + let path = fallback_long_running_spec_path(); + fs::write( + &path, + r#"api_version: v1 +kind: workflow +name: long-running + +sandbox: + mode: local + network: false + +workflow: + steps: + - name: wait + run: + program: sleep + args: ["5"] + - name: produce + depends_on: [wait] + run: + program: sh + args: + - -lc + - | + cat > result.json <<'JSON' + {"status":"success","summary":"ok","metrics":{"duration":5},"artifacts":[]} + JSON + output_step: produce +"#, + ) + .expect("write fixture"); + + ExecutionSpec { + mode: "swarm".to_string(), + goal: "exercise pause cancel".to_string(), + workflow: WorkflowTemplateRef { + template: path.to_string_lossy().to_string(), + }, + policy: OrchestrationPolicy { + budget: BudgetPolicy { + max_iterations: Some(1), + max_child_runs: None, + max_wall_clock_secs: Some(60), + max_cost_usd_millis: None, + }, + concurrency: ConcurrencyPolicy { + max_concurrent_candidates: 1, + }, + convergence: ConvergencePolicy { + strategy: "exhaustive".to_string(), + min_score: None, + max_iterations_without_improvement: None, + }, + max_candidate_failures_per_iteration: 1, + missing_output_policy: "mark_failed".to_string(), + iteration_failure_policy: "fail_execution".to_string(), + }, + evaluation: EvaluationConfig { + scoring_type: "weighted_metrics".to_string(), + weights: BTreeMap::from([("duration".to_string(), -1.0)]), + pass_threshold: Some(0.0), + ranking: "highest_score".to_string(), + tie_breaking: "duration".to_string(), + }, + variation: VariationConfig::explicit( + 1, + vec![VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "a".to_string())]), + }], + ), + swarm: true, + } +} + +fn execution_request_json(spec: &ExecutionSpec) -> String { + serde_json::to_string(&json!({ + "mode": spec.mode, + "goal": spec.goal, + "workflow": { "template": spec.workflow.template }, + "policy": { + "budget": { + "max_iterations": spec.policy.budget.max_iterations, + "max_wall_clock_secs": spec.policy.budget.max_wall_clock_secs + }, + "concurrency": { + "max_concurrent_candidates": spec.policy.concurrency.max_concurrent_candidates + }, + "convergence": { + "strategy": spec.policy.convergence.strategy + }, + "max_candidate_failures_per_iteration": spec.policy.max_candidate_failures_per_iteration, + "missing_output_policy": spec.policy.missing_output_policy, + "iteration_failure_policy": spec.policy.iteration_failure_policy + }, + "evaluation": { + "scoring_type": spec.evaluation.scoring_type, + "weights": spec.evaluation.weights, + "pass_threshold": spec.evaluation.pass_threshold, + "ranking": spec.evaluation.ranking, + "tie_breaking": spec.evaluation.tie_breaking + }, + "variation": { + "source": "explicit", + "candidates_per_iteration": spec.variation.candidates_per_iteration, + "explicit": spec.variation.explicit.iter().map(|proposal| json!({"overrides": proposal.overrides})).collect::>() + }, + "swarm": spec.swarm + })) + .expect("serialize spec") +} + +fn fallback_structured_output_spec_path() -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("clock") + .as_nanos(); + std::env::temp_dir().join(format!("void-control-bridge-live-structured-output-{nanos}.yaml")) +} + +fn fallback_long_running_spec_path() -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("clock") + .as_nanos(); + std::env::temp_dir().join(format!("void-control-bridge-live-long-running-{nanos}.yaml")) +} + +fn temp_root(label: &str) -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let dir = std::env::temp_dir().join(format!("void-control-bridge-{label}-{nanos}")); + fs::create_dir_all(&dir).expect("create temp dir"); + dir +} diff --git a/tests/execution_dry_run.rs b/tests/execution_dry_run.rs new file mode 100644 index 0000000..24f65bb --- /dev/null +++ b/tests/execution_dry_run.rs @@ -0,0 +1,117 @@ +use std::collections::BTreeMap; + +use void_control::orchestration::{ + ExecutionService, ExecutionSpec, FsExecutionStore, GlobalConfig, OrchestrationPolicy, + VariationConfig, VariationProposal, +}; +use void_control::runtime::MockRuntime; + +#[test] +fn dry_run_validates_without_creating_execution() { + let store_dir = temp_store_dir("dry-run-valid"); + let store = FsExecutionStore::new(store_dir.clone()); + let service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 4 }, MockRuntime::new(), store); + + let result = service.dry_run(&spec(3)).expect("dry run"); + + assert!(result.valid); + assert!(std::fs::read_dir(store_dir).expect("read dir").next().is_none()); +} + +#[test] +fn dry_run_returns_plan_warnings_and_errors() { + let store = FsExecutionStore::new(temp_store_dir("dry-run-errors")); + let service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 4 }, MockRuntime::new(), store); + let mut spec = spec(3); + spec.policy.budget.max_wall_clock_secs = None; + spec.policy.budget.max_iterations = None; + spec.policy.budget.max_cost_usd_millis = None; + + let result = service.dry_run(&spec).expect("dry run"); + + assert!(!result.valid); + assert!(!result.errors.is_empty()); +} + +#[test] +fn dry_run_reports_parameter_space_cardinality() { + let store = FsExecutionStore::new(temp_store_dir("dry-run-cardinality")); + let service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 4 }, MockRuntime::new(), store); + let spec = ExecutionSpec { + variation: VariationConfig::parameter_space( + 2, + void_control::orchestration::VariationSelection::Sequential, + BTreeMap::from([ + ("sandbox.env.CONCURRENCY".to_string(), vec!["2".to_string(), "4".to_string()]), + ("sandbox.memory_mb".to_string(), vec!["512".to_string(), "1024".to_string()]), + ]), + ), + ..spec(3) + }; + + let result = service.dry_run(&spec).expect("dry run"); + + assert_eq!(result.plan.parameter_space_size, Some(4)); + assert_eq!(result.plan.max_child_runs, Some(6)); +} + +fn spec(max_iterations: u32) -> ExecutionSpec { + ExecutionSpec { + mode: "swarm".to_string(), + goal: "optimize latency".to_string(), + workflow: void_control::orchestration::WorkflowTemplateRef { + template: "fixtures/sample.vbrun".to_string(), + }, + policy: OrchestrationPolicy { + budget: void_control::orchestration::BudgetPolicy { + max_iterations: Some(max_iterations), + max_child_runs: None, + max_wall_clock_secs: Some(60), + max_cost_usd_millis: None, + }, + concurrency: void_control::orchestration::ConcurrencyPolicy { + max_concurrent_candidates: 2, + }, + convergence: void_control::orchestration::ConvergencePolicy { + strategy: "exhaustive".to_string(), + min_score: None, + max_iterations_without_improvement: None, + }, + max_candidate_failures_per_iteration: 10, + missing_output_policy: "mark_failed".to_string(), + iteration_failure_policy: "fail_execution".to_string(), + }, + evaluation: void_control::orchestration::EvaluationConfig { + scoring_type: "weighted_metrics".to_string(), + weights: BTreeMap::from([ + ("latency_p99_ms".to_string(), -0.6), + ("cost_usd".to_string(), -0.4), + ]), + pass_threshold: Some(0.7), + ranking: "highest_score".to_string(), + tie_breaking: "cost_usd".to_string(), + }, + variation: VariationConfig::explicit( + 2, + vec![ + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "a".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "b".to_string())]), + }, + ], + ), + swarm: true, + } +} + +fn temp_store_dir(label: &str) -> std::path::PathBuf { + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let dir = std::env::temp_dir().join(format!("void-control-dry-run-{label}-{nanos}")); + std::fs::create_dir_all(&dir).expect("create temp dir"); + dir +} diff --git a/tests/execution_event_replay.rs b/tests/execution_event_replay.rs new file mode 100644 index 0000000..18bd27a --- /dev/null +++ b/tests/execution_event_replay.rs @@ -0,0 +1,145 @@ +use std::env; +use std::fs; +use std::path::PathBuf; +use std::time::{SystemTime, UNIX_EPOCH}; + +use void_control::orchestration::{ + ControlEventEnvelope, ControlEventType, Execution, ExecutionAccumulator, ExecutionSnapshot, + ExecutionStatus, FsExecutionStore, +}; + +#[test] +fn execution_state_advances_from_control_plane_events() { + let execution = Execution::new("exec-1", "swarm", "optimize latency"); + let events = vec![ + event(ControlEventType::ExecutionCreated), + event(ControlEventType::ExecutionSubmitted), + event(ControlEventType::ExecutionStarted), + event(ControlEventType::ExecutionCompleted), + ]; + + let snapshot = ExecutionSnapshot::replay(execution, &events); + + assert_eq!(snapshot.execution.status, ExecutionStatus::Completed); +} + +#[test] +fn warning_events_do_not_advance_execution_state() { + let execution = Execution::new("exec-2", "swarm", "optimize latency"); + let events = vec![ + event(ControlEventType::ExecutionCreated), + event(ControlEventType::ExecutionSubmitted), + event(ControlEventType::CandidateQueued), + event(ControlEventType::CandidateDispatched), + event(ControlEventType::CandidateOutputCollected), + event(ControlEventType::ExecutionStalled), + ]; + + let snapshot = ExecutionSnapshot::replay(execution, &events); + + assert_eq!(snapshot.execution.status, ExecutionStatus::Pending); +} + +#[test] +fn accumulator_is_reconstructible_from_event_log() { + let execution = Execution::new("exec-3", "swarm", "optimize latency"); + let events = vec![ + event(ControlEventType::ExecutionCreated), + event(ControlEventType::CandidateScored), + event(ControlEventType::IterationCompleted), + ]; + + let snapshot = ExecutionSnapshot::replay(execution, &events); + + assert_eq!(snapshot.accumulator.scoring_history_len, 1); + assert_eq!(snapshot.accumulator.completed_iterations, 1); +} + +#[test] +fn execution_started_event_advances_state_to_running() { + let execution = Execution::new("exec-4", "swarm", "advance"); + let events = vec![ + event(ControlEventType::ExecutionCreated), + event(ControlEventType::ExecutionSubmitted), + event(ControlEventType::ExecutionStarted), + ]; + + let snapshot = ExecutionSnapshot::replay(execution, &events); + + assert_eq!(snapshot.execution.status, ExecutionStatus::Running); +} + +#[test] +fn store_round_trips_execution_and_events() { + let root = temp_store_root("round_trip"); + let store = FsExecutionStore::new(root.clone()); + let execution = Execution::new("exec-store-1", "swarm", "persist state"); + let events = vec![ + event(ControlEventType::ExecutionCreated), + event(ControlEventType::IterationStarted), + ]; + + store.create_execution(&execution).expect("create execution"); + for event in &events { + store.append_event("exec-store-1", event).expect("append event"); + } + store + .save_accumulator( + "exec-store-1", + &ExecutionAccumulator { + scoring_history_len: 2, + completed_iterations: 1, + ..ExecutionAccumulator::default() + }, + ) + .expect("save accumulator"); + + let snapshot = store.load_execution("exec-store-1").expect("load snapshot"); + + assert_eq!(snapshot.execution.execution_id, "exec-store-1"); + assert_eq!(snapshot.events.len(), 2); + assert_eq!(snapshot.accumulator.scoring_history_len, 2); +} + +#[test] +fn store_can_reload_accumulator_after_restart() { + let root = temp_store_root("restart"); + let execution = Execution::new("exec-store-2", "swarm", "reload accumulator"); + + { + let store = FsExecutionStore::new(root.clone()); + store.create_execution(&execution).expect("create execution"); + store + .save_accumulator( + "exec-store-2", + &ExecutionAccumulator { + scoring_history_len: 3, + completed_iterations: 2, + ..ExecutionAccumulator::default() + }, + ) + .expect("save accumulator"); + } + + let reloaded_store = FsExecutionStore::new(root); + let snapshot = reloaded_store + .load_execution("exec-store-2") + .expect("reload execution"); + + assert_eq!(snapshot.accumulator.scoring_history_len, 3); + assert_eq!(snapshot.accumulator.completed_iterations, 2); +} + +fn event(event_type: ControlEventType) -> ControlEventEnvelope { + ControlEventEnvelope::new("exec-test", 1, event_type) +} + +fn temp_store_root(label: &str) -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let dir = env::temp_dir().join(format!("void-control-{label}-{nanos}")); + fs::create_dir_all(&dir).expect("create temp dir"); + dir +} diff --git a/tests/execution_message_box.rs b/tests/execution_message_box.rs new file mode 100644 index 0000000..fa5abb1 --- /dev/null +++ b/tests/execution_message_box.rs @@ -0,0 +1,580 @@ +#![cfg(feature = "serde")] + +use std::env; +use std::fs; +use std::path::PathBuf; +use std::cell::RefCell; +use std::collections::BTreeMap; +use std::rc::Rc; +use std::time::{SystemTime, UNIX_EPOCH}; + +use void_control::contract::{ + ContractError, RuntimeInspection, StartRequest, StartResult, RunState, +}; +use void_control::orchestration::{ + CandidateOutput, CandidateSpec, CandidateStatus, CommunicationIntent, CommunicationIntentAudience, + CommunicationIntentKind, CommunicationIntentPriority, ExecutionCandidate, ExecutionService, ExecutionSpec, + FsExecutionStore, GlobalConfig, InboxEntry, InboxSnapshot, OrchestrationPolicy, RoutedMessage, RoutedMessageStatus, + StructuredOutputResult, VariationConfig, VariationProposal, WorkflowTemplateRef, +}; +use void_control::orchestration::service::ExecutionRuntime; +use void_control::runtime::MockRuntime; +use void_control::runtime::{LaunchInjectionAdapter, ProviderLaunchAdapter}; + +#[test] +fn fs_store_round_trips_message_box_logs() { + let root = temp_store_root("message-box-logs"); + let store = FsExecutionStore::new(root.clone()); + + let intent_one = CommunicationIntent { + intent_id: "intent-1".to_string(), + from_candidate_id: "candidate-1".to_string(), + iteration: 0, + kind: CommunicationIntentKind::Proposal, + audience: CommunicationIntentAudience::Leader, + payload: json_payload("summary-one", "hint-one"), + priority: CommunicationIntentPriority::Normal, + ttl_iterations: 1, + caused_by: None, + context: None, + }; + let intent_two = CommunicationIntent { + intent_id: "intent-2".to_string(), + from_candidate_id: "candidate-2".to_string(), + iteration: 1, + kind: CommunicationIntentKind::Signal, + audience: CommunicationIntentAudience::Broadcast, + payload: json_payload("summary-two", "hint-two"), + priority: CommunicationIntentPriority::High, + ttl_iterations: 2, + caused_by: Some("intent-1".to_string()), + context: Some(json_context("family-a")), + }; + let message_one = RoutedMessage { + message_id: "message-1".to_string(), + intent_id: "intent-1".to_string(), + to: "leader".to_string(), + delivery_iteration: 1, + routing_reason: "leader_feedback_channel".to_string(), + status: RoutedMessageStatus::Routed, + }; + let message_two = RoutedMessage { + message_id: "message-2".to_string(), + intent_id: "intent-2".to_string(), + to: "broadcast".to_string(), + delivery_iteration: 2, + routing_reason: "broadcast_fanout".to_string(), + status: RoutedMessageStatus::Delivered, + }; + + store + .append_intent("exec-message-box", &intent_one) + .expect("append first intent"); + store + .append_intent("exec-message-box", &intent_two) + .expect("append second intent"); + store + .append_routed_message("exec-message-box", &message_one) + .expect("append first message"); + store + .append_routed_message("exec-message-box", &message_two) + .expect("append second message"); + + let loaded_intents = store + .load_intents("exec-message-box") + .expect("load intents"); + let loaded_messages = store + .load_routed_messages("exec-message-box") + .expect("load messages"); + + assert_eq!(loaded_intents, vec![intent_one, intent_two]); + assert_eq!(loaded_messages, vec![message_one, message_two]); + + let intent_log = fs::read_to_string(root.join("exec-message-box").join("intents.log")) + .expect("read intents log"); + assert_eq!(intent_log.lines().count(), 2); + let message_log = fs::read_to_string(root.join("exec-message-box").join("messages.log")) + .expect("read messages log"); + assert_eq!(message_log.lines().count(), 2); +} + +#[test] +fn fs_store_round_trips_inbox_snapshot() { + let root = temp_store_root("message-box-inbox"); + let store = FsExecutionStore::new(root.clone()); + + let snapshot = InboxSnapshot { + execution_id: "exec-message-box".to_string(), + candidate_id: "candidate-3".to_string(), + iteration: 1, + entries: vec![ + InboxEntry { + message_id: "message-1".to_string(), + intent_id: "intent-1".to_string(), + from_candidate_id: "candidate-1".to_string(), + kind: CommunicationIntentKind::Proposal, + payload: json_payload("summary-one", "hint-one"), + }, + InboxEntry { + message_id: "message-2".to_string(), + intent_id: "intent-2".to_string(), + from_candidate_id: "candidate-2".to_string(), + kind: CommunicationIntentKind::Evaluation, + payload: json_payload("summary-two", "hint-two"), + }, + ], + }; + + store + .save_inbox_snapshot(&snapshot) + .expect("save inbox snapshot"); + + let loaded = store + .load_inbox_snapshot("exec-message-box", 1, "candidate-3") + .expect("load inbox snapshot"); + + assert_eq!(loaded, snapshot); + + let path = root + .join("exec-message-box") + .join("inboxes") + .join("1") + .join("candidate-3.json"); + let raw = fs::read_to_string(path).expect("read snapshot file"); + assert!(raw.contains("\"candidate_id\": \"candidate-3\"")); +} + +#[test] +fn fs_store_rejects_unsafe_inbox_snapshot_paths() { + let root = temp_store_root("message-box-paths"); + let store = FsExecutionStore::new(root.clone()); + + let snapshot = InboxSnapshot { + execution_id: "exec-message-box".to_string(), + candidate_id: "../escape".to_string(), + iteration: 1, + entries: Vec::new(), + }; + + let err = store + .save_inbox_snapshot(&snapshot) + .expect_err("reject traversal candidate id"); + assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); + + let err = store + .load_inbox_snapshot("exec-message-box", 1, "/absolute") + .expect_err("reject absolute candidate id"); + assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); + + let err = store + .load_inbox_snapshot("exec-message-box", 1, "nested/id") + .expect_err("reject nested candidate id"); + assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); + + let inbox_dir = root.join("exec-message-box").join("inboxes"); + assert!(!inbox_dir.exists(), "unsafe path should not create inbox dirs"); +} + +#[test] +fn fs_store_ignores_truncated_ndjson_tail_when_loading_intents() { + let root = temp_store_root("message-box-ndjson"); + let store = FsExecutionStore::new(root.clone()); + + let intent = CommunicationIntent { + intent_id: "intent-1".to_string(), + from_candidate_id: "candidate-1".to_string(), + iteration: 0, + kind: CommunicationIntentKind::Proposal, + audience: CommunicationIntentAudience::Leader, + payload: json_payload("summary-one", "hint-one"), + priority: CommunicationIntentPriority::Normal, + ttl_iterations: 1, + caused_by: None, + context: None, + }; + + store + .append_intent("exec-message-box", &intent) + .expect("append valid intent"); + + let log_path = root.join("exec-message-box").join("intents.log"); + fs::write(&log_path, format!("{}\n{{\"intent_id\":", serde_json::to_string(&intent).expect("serialize intent"))) + .expect("truncate tail"); + + let loaded = store + .load_intents("exec-message-box") + .expect("load with truncated tail"); + + assert_eq!(loaded, vec![intent]); +} + +#[test] +fn service_launches_through_adapter_and_injects_inbox_content() { + let runtime_requests = Rc::new(RefCell::new(Vec::new())); + let adapter_calls = Rc::new(RefCell::new(Vec::<(String, InboxSnapshot)>::new())); + + let runtime = RecordingRuntime::new(runtime_requests.clone()); + let adapter = RecordingLaunchAdapter::new(adapter_calls.clone()); + + let root = temp_store_root("message-box-launch-adapter"); + let store = FsExecutionStore::new(root); + let spec = launch_spec(); + let snapshot = InboxSnapshot { + execution_id: "exec-message-box".to_string(), + candidate_id: "candidate-1".to_string(), + iteration: 0, + entries: vec![InboxEntry { + message_id: "message-1".to_string(), + intent_id: "intent-1".to_string(), + from_candidate_id: "candidate-source".to_string(), + kind: CommunicationIntentKind::Proposal, + payload: json_payload("summary-one", "hint-one"), + }], + }; + + ExecutionService::::submit_execution(&store, "exec-message-box", &spec) + .expect("submit execution"); + store + .save_candidate(&ExecutionCandidate::new( + "exec-message-box", + "candidate-1", + 1, + 0, + CandidateStatus::Queued, + )) + .expect("seed queued candidate"); + store + .save_inbox_snapshot(&snapshot) + .expect("seed inbox snapshot"); + + let mut service = ExecutionService::with_launch_adapter( + GlobalConfig { + max_concurrent_child_runs: 1, + }, + runtime, + store, + Box::new(adapter), + ); + + let _ = service + .dispatch_execution_once("exec-message-box") + .expect("dispatch once"); + + assert_eq!(adapter_calls.borrow().len(), 1); + assert_eq!(adapter_calls.borrow()[0].0, "candidate-1"); + assert_eq!(adapter_calls.borrow()[0].1, snapshot); + + let requests = runtime_requests.borrow(); + assert_eq!(requests.len(), 1); + assert_eq!(requests[0].workflow_spec, "workflow-template"); + let launch_context = requests[0] + .launch_context + .as_ref() + .expect("launch context"); + let decoded: InboxSnapshot = serde_json::from_str(launch_context).expect("decode launch context"); + assert_eq!(decoded, snapshot); +} + +#[test] +fn service_persists_routes_and_delivers_message_box_artifacts_across_iterations() { + let root = temp_store_root("message-box-routing"); + let store = FsExecutionStore::new(root.clone()); + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + CandidateOutput::new( + "candidate-1", + true, + BTreeMap::from([("latency_p99_ms".to_string(), 95.0)]), + ) + .with_intents(vec![CommunicationIntent { + intent_id: "intent-1".to_string(), + from_candidate_id: "placeholder".to_string(), + iteration: 0, + kind: CommunicationIntentKind::Proposal, + audience: CommunicationIntentAudience::Leader, + payload: json_payload("try cache fallback", "cache"), + priority: CommunicationIntentPriority::Normal, + ttl_iterations: 1, + caused_by: None, + context: None, + }]), + ); + runtime.seed_success( + "exec-run-candidate-2", + CandidateOutput::new( + "candidate-2", + true, + BTreeMap::from([("latency_p99_ms".to_string(), 80.0)]), + ) + .with_intents(vec![CommunicationIntent { + intent_id: "intent-2".to_string(), + from_candidate_id: "placeholder".to_string(), + iteration: 0, + kind: CommunicationIntentKind::Signal, + audience: CommunicationIntentAudience::Broadcast, + payload: json_payload("jitter reduced spikes", "jitter"), + priority: CommunicationIntentPriority::High, + ttl_iterations: 1, + caused_by: Some("intent-1".to_string()), + context: None, + }]), + ); + runtime.seed_success( + "exec-run-candidate-3", + CandidateOutput::new( + "candidate-3", + true, + BTreeMap::from([("latency_p99_ms".to_string(), 70.0)]), + ), + ); + runtime.seed_success( + "exec-run-candidate-4", + CandidateOutput::new( + "candidate-4", + true, + BTreeMap::from([("latency_p99_ms".to_string(), 72.0)]), + ), + ); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); + let execution = service + .run_to_completion(two_iteration_swarm_spec()) + .expect("run execution"); + + let store = FsExecutionStore::new(root); + let snapshot = store + .load_execution(&execution.execution_id) + .expect("load execution"); + let intents = store + .load_intents(&execution.execution_id) + .expect("load intents"); + let messages = store + .load_routed_messages(&execution.execution_id) + .expect("load messages"); + let inbox_one = store + .load_inbox_snapshot(&execution.execution_id, 1, "candidate-1") + .expect("load candidate-1 inbox"); + let inbox_two = store + .load_inbox_snapshot(&execution.execution_id, 1, "candidate-2") + .expect("load candidate-2 inbox"); + + assert_eq!(intents.len(), 2); + assert_eq!( + messages + .iter() + .filter(|message| message.status == RoutedMessageStatus::Routed) + .count(), + 2 + ); + assert_eq!( + messages + .iter() + .filter(|message| message.status == RoutedMessageStatus::Delivered) + .count(), + 3 + ); + assert_eq!(inbox_one.entries.len(), 2); + assert_eq!(inbox_two.entries.len(), 1); + assert_event_count( + &snapshot.events, + void_control::orchestration::ControlEventType::CommunicationIntentEmitted, + 2, + ); + assert_event_count( + &snapshot.events, + void_control::orchestration::ControlEventType::MessageRouted, + 2, + ); + assert_event_count( + &snapshot.events, + void_control::orchestration::ControlEventType::MessageDelivered, + 3, + ); +} + +struct RecordingRuntime { + starts: Rc>>, +} + +impl RecordingRuntime { + fn new(starts: Rc>>) -> Self { + Self { starts } + } +} + +impl ExecutionRuntime for RecordingRuntime { + fn start_run(&mut self, request: StartRequest) -> Result { + self.starts.borrow_mut().push(request.clone()); + Ok(StartResult { + handle: format!("run-handle:{}", request.run_id), + attempt_id: 1, + state: RunState::Running, + }) + } + + fn inspect_run(&self, handle: &str) -> Result { + Ok(RuntimeInspection { + run_id: handle + .strip_prefix("run-handle:") + .unwrap_or(handle) + .to_string(), + attempt_id: 1, + state: RunState::Succeeded, + active_stage_count: 0, + active_microvm_count: 0, + started_at: "now".to_string(), + updated_at: "now".to_string(), + terminal_reason: None, + exit_code: None, + }) + } + + fn take_structured_output(&mut self, _run_id: &str) -> StructuredOutputResult { + StructuredOutputResult::Missing + } +} + +struct RecordingLaunchAdapter { + calls: Rc>>, +} + +impl RecordingLaunchAdapter { + fn new(calls: Rc>>) -> Self { + Self { calls } + } +} + +impl ProviderLaunchAdapter for RecordingLaunchAdapter { + fn prepare_launch_request( + &self, + request: StartRequest, + candidate: &CandidateSpec, + inbox: &InboxSnapshot, + ) -> StartRequest { + self.calls + .borrow_mut() + .push((candidate.candidate_id.clone(), inbox.clone())); + LaunchInjectionAdapter.prepare_launch_request(request, candidate, inbox) + } +} + +fn launch_spec() -> ExecutionSpec { + ExecutionSpec { + mode: "swarm".to_string(), + goal: "launch adapter test".to_string(), + workflow: WorkflowTemplateRef { + template: "workflow-template".to_string(), + }, + policy: OrchestrationPolicy::default(), + evaluation: void_control::orchestration::EvaluationConfig { + scoring_type: "weighted".to_string(), + weights: Default::default(), + pass_threshold: None, + ranking: "descending".to_string(), + tie_breaking: "lexicographic".to_string(), + }, + variation: VariationConfig::explicit( + 1, + vec![VariationProposal { + overrides: BTreeMap::new(), + }], + ), + swarm: true, + } +} + +fn two_iteration_swarm_spec() -> ExecutionSpec { + ExecutionSpec { + mode: "swarm".to_string(), + goal: "message routing".to_string(), + workflow: WorkflowTemplateRef { + template: "workflow-template".to_string(), + }, + policy: OrchestrationPolicy { + budget: void_control::orchestration::BudgetPolicy { + max_iterations: Some(2), + max_child_runs: None, + max_wall_clock_secs: Some(60), + max_cost_usd_millis: None, + }, + concurrency: void_control::orchestration::ConcurrencyPolicy { + max_concurrent_candidates: 2, + }, + convergence: void_control::orchestration::ConvergencePolicy { + strategy: "exhaustive".to_string(), + min_score: None, + max_iterations_without_improvement: None, + }, + max_candidate_failures_per_iteration: 10, + missing_output_policy: "mark_failed".to_string(), + iteration_failure_policy: "fail_execution".to_string(), + }, + evaluation: void_control::orchestration::EvaluationConfig { + scoring_type: "weighted".to_string(), + weights: BTreeMap::from([("latency_p99_ms".to_string(), -1.0)]), + pass_threshold: None, + ranking: "descending".to_string(), + tie_breaking: "lexicographic".to_string(), + }, + variation: VariationConfig::explicit( + 2, + vec![ + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "baseline".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "v2".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "v3".to_string())]), + }, + ], + ), + swarm: true, + } +} + +fn json_payload(summary_text: &str, strategy_hint: &str) -> serde_json::Value { + serde_json::json!({ + "summary_text": summary_text, + "strategy_hint": strategy_hint, + }) +} + +fn json_context(family_hint: &str) -> serde_json::Value { + serde_json::json!({ + "family_hint": family_hint, + }) +} + +fn temp_store_root(label: &str) -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let dir = env::temp_dir().join(format!("void-control-{label}-{nanos}")); + fs::create_dir_all(&dir).expect("create temp dir"); + dir +} + +fn assert_event_count( + events: &[void_control::orchestration::ControlEventEnvelope], + event_type: void_control::orchestration::ControlEventType, + expected: usize, +) { + let actual = events + .iter() + .filter(|event| event.event_type == event_type) + .count(); + assert_eq!(actual, expected, "{event_type:?}"); +} diff --git a/tests/execution_reconciliation.rs b/tests/execution_reconciliation.rs new file mode 100644 index 0000000..b515740 --- /dev/null +++ b/tests/execution_reconciliation.rs @@ -0,0 +1,157 @@ +use std::env; +use std::fs; +use std::path::PathBuf; +use std::time::{SystemTime, UNIX_EPOCH}; + +use void_control::orchestration::{ + CandidateStatus, ControlEventEnvelope, ControlEventType, Execution, ExecutionAccumulator, + ExecutionCandidate, ExecutionStatus, FsExecutionStore, ReconciliationService, +}; + +#[test] +fn reloads_non_terminal_executions_after_restart() { + let root = temp_store_root("reload-active"); + let store = FsExecutionStore::new(root.clone()); + let mut execution = Execution::new("exec-reload", "swarm", "reload state"); + execution.status = ExecutionStatus::Running; + + store.create_execution(&execution).expect("create execution"); + store + .append_event( + "exec-reload", + &ControlEventEnvelope::new("exec-reload", 1, ControlEventType::ExecutionCreated), + ) + .expect("append created"); + store + .append_event( + "exec-reload", + &ControlEventEnvelope::new("exec-reload", 2, ControlEventType::IterationStarted), + ) + .expect("append running"); + store + .save_accumulator( + "exec-reload", + &ExecutionAccumulator { + scoring_history_len: 1, + completed_iterations: 1, + ..ExecutionAccumulator::default() + }, + ) + .expect("save accumulator"); + + let reconciler = ReconciliationService::new(FsExecutionStore::new(root)); + let active = reconciler.reload_active_executions().expect("reload"); + + assert_eq!(active.len(), 1); + assert_eq!(active[0].execution.execution_id, "exec-reload"); + assert_eq!(active[0].accumulator.completed_iterations, 1); +} + +#[test] +fn paused_execution_remains_paused_after_restart() { + let root = temp_store_root("paused"); + let store = FsExecutionStore::new(root.clone()); + let mut execution = Execution::new("exec-paused", "swarm", "stay paused"); + execution.status = ExecutionStatus::Paused; + + store.create_execution(&execution).expect("create execution"); + + let reconciler = ReconciliationService::new(FsExecutionStore::new(root)); + let active = reconciler.reload_active_executions().expect("reload"); + + assert_eq!(active.len(), 1); + assert_eq!(active[0].execution.status, ExecutionStatus::Paused); +} + +#[test] +fn completed_execution_is_not_reloaded_as_active() { + let root = temp_store_root("completed"); + let store = FsExecutionStore::new(root.clone()); + let mut execution = Execution::new("exec-complete", "swarm", "done"); + execution.status = ExecutionStatus::Completed; + + store.create_execution(&execution).expect("create execution"); + + let reconciler = ReconciliationService::new(FsExecutionStore::new(root)); + let active = reconciler.reload_active_executions().expect("reload"); + + assert!(active.is_empty()); +} + +#[test] +fn reloads_queued_candidates_fifo_across_active_executions() { + let root = temp_store_root("queued-candidates"); + let store = FsExecutionStore::new(root.clone()); + + let mut exec_a = Execution::new("exec-a", "swarm", "a"); + exec_a.status = ExecutionStatus::Running; + store.create_execution(&exec_a).expect("create a"); + let mut a1 = ExecutionCandidate::new("exec-a", "cand-a1", 2, 0, CandidateStatus::Queued); + a1.overrides.insert("agent.prompt".to_string(), "a1".to_string()); + store.save_candidate(&a1).expect("save a1"); + + let mut exec_b = Execution::new("exec-b", "swarm", "b"); + exec_b.status = ExecutionStatus::Running; + store.create_execution(&exec_b).expect("create b"); + let mut b1 = ExecutionCandidate::new("exec-b", "cand-b1", 1, 0, CandidateStatus::Queued); + b1.overrides.insert("agent.prompt".to_string(), "b1".to_string()); + store.save_candidate(&b1).expect("save b1"); + + let reconciler = ReconciliationService::new(FsExecutionStore::new(root)); + let queued = reconciler.reload_queued_candidates().expect("reload queued"); + + assert_eq!(queued.len(), 2); + assert_eq!(queued[0].execution_id, "exec-b"); + assert_eq!(queued[0].candidate_id, "cand-b1"); + assert_eq!(queued[1].execution_id, "exec-a"); + assert_eq!(queued[1].candidate_id, "cand-a1"); +} + +#[test] +fn paused_execution_candidates_are_excluded_from_reloaded_queue() { + let root = temp_store_root("paused-queued-candidates"); + let store = FsExecutionStore::new(root.clone()); + + let mut paused = Execution::new("exec-paused", "swarm", "paused"); + paused.status = ExecutionStatus::Paused; + store.create_execution(&paused).expect("create paused"); + store + .save_candidate(&ExecutionCandidate::new( + "exec-paused", + "cand-paused", + 1, + 0, + CandidateStatus::Queued, + )) + .expect("save paused candidate"); + + let mut running = Execution::new("exec-running", "swarm", "running"); + running.status = ExecutionStatus::Running; + store.create_execution(&running).expect("create running"); + store + .save_candidate(&ExecutionCandidate::new( + "exec-running", + "cand-running", + 2, + 0, + CandidateStatus::Queued, + )) + .expect("save running candidate"); + + let reconciler = ReconciliationService::new(FsExecutionStore::new(root)); + let queued = reconciler.reload_queued_candidates().expect("reload queued"); + + assert_eq!(queued.len(), 1); + assert_eq!(queued[0].execution_id, "exec-running"); + assert_eq!(queued[0].candidate_id, "cand-running"); +} + +fn temp_store_root(label: &str) -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let dir = env::temp_dir().join(format!("void-control-reconcile-{label}-{nanos}")); + fs::create_dir_all(&dir).expect("create temp dir"); + dir +} diff --git a/tests/execution_scheduler.rs b/tests/execution_scheduler.rs new file mode 100644 index 0000000..61012bf --- /dev/null +++ b/tests/execution_scheduler.rs @@ -0,0 +1,335 @@ +use std::collections::BTreeMap; + +use void_control::orchestration::{ + CandidateOutput, ExecutionAccumulator, ExecutionService, ExecutionSpec, ExecutionStatus, + FsExecutionStore, GlobalConfig, OrchestrationPolicy, QueuedCandidate, SchedulerDecision, + StructuredOutputResult, + VariationConfig, VariationProposal, +}; +use void_control::runtime::MockRuntime; + +#[test] +fn mock_runtime_can_complete_runs_with_structured_outputs() { + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "run-1", + output("cand-a", &[("latency_p99_ms", 100.0), ("cost_usd", 0.02)]), + ); + + let started = runtime.start(test_start_request("run-1")).expect("start"); + let inspection = runtime.inspect(&started.handle).expect("inspect"); + let output = runtime + .take_structured_output("run-1"); + let StructuredOutputResult::Found(output) = output else { + panic!("expected structured output") + }; + + assert_eq!(inspection.state, void_control::contract::RunState::Succeeded); + assert_eq!(output.metrics["latency_p99_ms"], 100.0); +} + +#[test] +fn mock_runtime_can_simulate_failure_timeout_and_missing_output() { + let mut runtime = MockRuntime::new(); + runtime.seed_failure("run-fail"); + runtime.seed_missing_output("run-missing"); + + let fail = runtime.start(test_start_request("run-fail")).expect("start fail"); + let missing = runtime + .start(test_start_request("run-missing")) + .expect("start missing"); + + assert_eq!( + runtime.inspect(&fail.handle).expect("inspect fail").state, + void_control::contract::RunState::Failed + ); + assert_eq!( + runtime.inspect(&missing.handle).expect("inspect missing").state, + void_control::contract::RunState::Succeeded + ); + assert!(matches!( + runtime.take_structured_output("run-missing"), + StructuredOutputResult::Missing + )); +} + +#[test] +fn preserves_plan_candidates_order_within_execution() { + let mut scheduler = void_control::orchestration::GlobalScheduler::new(2); + scheduler.enqueue(QueuedCandidate::new("exec-1", "cand-1", 1)); + scheduler.enqueue(QueuedCandidate::new("exec-1", "cand-2", 2)); + + let first = scheduler.next_dispatch().expect("first dispatch"); + scheduler.mark_running(&first); + scheduler.release(&first.execution_id, &first.candidate_id); + let second = scheduler.next_dispatch().expect("second dispatch"); + + assert_eq!(first.candidate_id, "cand-1"); + assert_eq!(second.candidate_id, "cand-2"); +} + +#[test] +fn dispatches_across_executions_fifo_by_candidate_creation_time() { + let mut scheduler = void_control::orchestration::GlobalScheduler::new(1); + scheduler.enqueue(QueuedCandidate::new("exec-1", "cand-late", 2)); + scheduler.enqueue(QueuedCandidate::new("exec-2", "cand-early", 1)); + + let grant = scheduler.next_dispatch().expect("dispatch"); + + assert_eq!(grant.execution_id, "exec-2"); + assert_eq!(grant.candidate_id, "cand-early"); +} + +#[test] +fn releases_slots_immediately_on_completion() { + let mut scheduler = void_control::orchestration::GlobalScheduler::new(1); + scheduler.enqueue(QueuedCandidate::new("exec-1", "cand-1", 1)); + scheduler.enqueue(QueuedCandidate::new("exec-2", "cand-2", 2)); + + let first = scheduler.next_dispatch().expect("first dispatch"); + scheduler.mark_running(&first); + assert!(scheduler.next_dispatch().is_none()); + scheduler.release(&first.execution_id, &first.candidate_id); + + let second = scheduler.next_dispatch().expect("second dispatch"); + assert_eq!(second.candidate_id, "cand-2"); +} + +#[test] +fn paused_execution_keeps_queue_but_releases_slots() { + let mut scheduler = void_control::orchestration::GlobalScheduler::new(2); + scheduler.enqueue(QueuedCandidate::new("exec-1", "cand-1", 1)); + scheduler.enqueue(QueuedCandidate::new("exec-1", "cand-2", 2)); + + let grant = scheduler.next_dispatch().expect("dispatch"); + scheduler.mark_running(&grant); + scheduler.pause_execution("exec-1"); + + assert_eq!(scheduler.execution_queue_depth("exec-1"), 1); + assert_eq!(scheduler.active_slots(), 0); + assert!(scheduler.next_dispatch().is_none()); +} + +#[test] +fn per_execution_concurrency_cap_blocks_dispatch_until_release() { + let mut scheduler = void_control::orchestration::GlobalScheduler::new(4); + scheduler.register_execution("exec-1", false, 1, 1); + scheduler.enqueue(QueuedCandidate::new("exec-1", "cand-1", 1)); + scheduler.enqueue(QueuedCandidate::new("exec-2", "cand-2", 2)); + + let grant = scheduler.next_dispatch().expect("dispatch"); + assert_eq!(grant.execution_id, "exec-2"); + assert_eq!(grant.candidate_id, "cand-2"); + + scheduler.release("exec-1", "running"); + let second = scheduler.next_dispatch().expect("second dispatch"); + assert_eq!(second.execution_id, "exec-1"); + assert_eq!(second.candidate_id, "cand-1"); +} + +#[test] +fn exhausted_budget_prevents_queue_entry() { + let mut scheduler = void_control::orchestration::GlobalScheduler::new(1); + let mut accumulator = ExecutionAccumulator::default(); + accumulator.completed_iterations = 1; + + let decision = scheduler.enqueue_if_budget_allows( + QueuedCandidate::new("exec-1", "cand-1", 1), + &accumulator, + 1, + ); + + assert_eq!(decision, SchedulerDecision::RejectedBudgetExceeded); +} + +#[test] +fn runs_single_iteration_and_completes_with_best_result() { + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 120.0), ("cost_usd", 0.04)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + ); + + let store = FsExecutionStore::new(temp_store_dir("single")); + let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let execution = service.run_to_completion(test_spec(1)).expect("run execution"); + + assert_eq!(execution.status, ExecutionStatus::Completed); + assert_eq!(execution.result_best_candidate_id.as_deref(), Some("candidate-2")); +} + +#[test] +fn runs_multiple_iterations_until_threshold() { + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 100.0), ("cost_usd", 0.02)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 95.0), ("cost_usd", 0.20)]), + ); + runtime.seed_success( + "exec-run-candidate-3", + output("candidate-3", &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)]), + ); + runtime.seed_success( + "exec-run-candidate-4", + output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.02)]), + ); + + let store = FsExecutionStore::new(temp_store_dir("threshold")); + let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let execution = service + .run_to_completion(test_spec_with_threshold(0.9, 2)) + .expect("run execution"); + + assert_eq!(execution.status, ExecutionStatus::Completed); + assert_eq!(execution.completed_iterations, 2); +} + +#[test] +fn short_circuits_iteration_after_failure_limit() { + let mut runtime = MockRuntime::new(); + runtime.seed_failure("exec-run-candidate-1"); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 95.0), ("cost_usd", 0.03)]), + ); + + let store = FsExecutionStore::new(temp_store_dir("fail-limit")); + let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let execution = service + .run_to_completion(test_spec_with_failure_limit(1)) + .expect("run execution"); + + assert_eq!(execution.status, ExecutionStatus::Failed); + assert_eq!(execution.failure_counts.total_candidate_failures, 1); +} + +#[test] +fn marks_execution_failed_when_all_candidates_fail_and_policy_says_fail() { + let mut runtime = MockRuntime::new(); + runtime.seed_failure("exec-run-candidate-1"); + runtime.seed_failure("exec-run-candidate-2"); + + let store = FsExecutionStore::new(temp_store_dir("all-fail")); + let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let execution = service + .run_to_completion(test_spec_with_failure_limit(2)) + .expect("run execution"); + + assert_eq!(execution.status, ExecutionStatus::Failed); +} + +fn test_start_request(run_id: &str) -> void_control::contract::StartRequest { + void_control::contract::StartRequest { + run_id: run_id.to_string(), + workflow_spec: "workflow".to_string(), + launch_context: None, + policy: void_control::contract::ExecutionPolicy { + max_parallel_microvms_per_run: 1, + max_stage_retries: 1, + stage_timeout_secs: 60, + cancel_grace_period_secs: 5, + }, + } +} + +fn test_spec(max_iterations: u32) -> ExecutionSpec { + test_spec_inner(max_iterations, None, 10) +} + +fn test_spec_with_threshold(min_score: f64, max_iterations: u32) -> ExecutionSpec { + let mut spec = test_spec_inner(max_iterations, Some(min_score), 10); + spec.policy.convergence.strategy = "threshold".to_string(); + spec.policy.convergence.min_score = Some(min_score); + spec +} + +fn test_spec_with_failure_limit(limit: u32) -> ExecutionSpec { + let mut spec = test_spec_inner(1, None, limit); + spec.policy.max_candidate_failures_per_iteration = limit; + spec +} + +fn test_spec_inner( + max_iterations: u32, + min_score: Option, + max_candidate_failures_per_iteration: u32, +) -> ExecutionSpec { + ExecutionSpec { + mode: "swarm".to_string(), + goal: "optimize latency".to_string(), + workflow: void_control::orchestration::WorkflowTemplateRef { + template: "fixtures/sample.vbrun".to_string(), + }, + policy: OrchestrationPolicy { + budget: void_control::orchestration::BudgetPolicy { + max_iterations: Some(max_iterations), + max_child_runs: None, + max_wall_clock_secs: Some(60), + max_cost_usd_millis: None, + }, + concurrency: void_control::orchestration::ConcurrencyPolicy { + max_concurrent_candidates: 2, + }, + convergence: void_control::orchestration::ConvergencePolicy { + strategy: if min_score.is_some() { + "threshold".to_string() + } else { + "exhaustive".to_string() + }, + min_score, + max_iterations_without_improvement: None, + }, + max_candidate_failures_per_iteration, + missing_output_policy: "mark_failed".to_string(), + iteration_failure_policy: "fail_execution".to_string(), + }, + evaluation: void_control::orchestration::EvaluationConfig { + scoring_type: "weighted_metrics".to_string(), + weights: BTreeMap::from([ + ("latency_p99_ms".to_string(), -0.6), + ("cost_usd".to_string(), -0.4), + ]), + pass_threshold: Some(0.7), + ranking: "highest_score".to_string(), + tie_breaking: "cost_usd".to_string(), + }, + variation: VariationConfig::explicit( + 2, + vec![ + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "a".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "b".to_string())]), + }, + ], + ), + swarm: true, + } +} + +fn output(candidate_id: &str, metrics: &[(&str, f64)]) -> CandidateOutput { + CandidateOutput::new( + candidate_id.to_string(), + true, + metrics.iter().map(|(k, v)| (k.to_string(), *v)).collect(), + ) +} + +fn temp_store_dir(label: &str) -> std::path::PathBuf { + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let dir = std::env::temp_dir().join(format!("void-control-scheduler-{label}-{nanos}")); + std::fs::create_dir_all(&dir).expect("create temp dir"); + dir +} diff --git a/tests/execution_search_strategy.rs b/tests/execution_search_strategy.rs new file mode 100644 index 0000000..f944602 --- /dev/null +++ b/tests/execution_search_strategy.rs @@ -0,0 +1,200 @@ +use std::collections::BTreeMap; + +use void_control::orchestration::{ + CandidateInbox, CandidateOutput, ConvergencePolicy, ExecutionAccumulator, IterationEvaluation, + MetricDirection, SearchStrategy, ScoringConfig, StopReason, VariationConfig, + VariationProposal, VariationSelection, WeightedMetric, +}; + +#[test] +fn search_bootstraps_when_no_seed_exists() { + let strategy = SearchStrategy::new( + VariationConfig::parameter_space( + 4, + VariationSelection::Sequential, + BTreeMap::from([( + "sandbox.env.CONCURRENCY".to_string(), + vec!["2".to_string(), "4".to_string(), "8".to_string(), "16".to_string()], + )]), + ), + scoring_config(), + ConvergencePolicy::default(), + ); + + let candidates = strategy.plan_candidates( + &ExecutionAccumulator::default(), + &[ + CandidateInbox::new("candidate-1"), + CandidateInbox::new("candidate-2"), + CandidateInbox::new("candidate-3"), + CandidateInbox::new("candidate-4"), + ], + ); + + assert!(!candidates.is_empty()); + assert!(candidates.len() < 4); +} + +#[test] +fn search_refines_around_explicit_incumbent() { + let strategy = SearchStrategy::new( + VariationConfig::explicit( + 2, + vec![ + proposal(&[("agent.prompt", "baseline")]), + proposal(&[("agent.prompt", "v1")]), + proposal(&[("agent.prompt", "v2")]), + ], + ), + scoring_config(), + ConvergencePolicy::default(), + ); + let mut accumulator = ExecutionAccumulator::default(); + accumulator.best_candidate_overrides = BTreeMap::from([( + "agent.prompt".to_string(), + "v1".to_string(), + )]); + + let candidates = strategy.plan_candidates( + &accumulator, + &[CandidateInbox::new("candidate-1"), CandidateInbox::new("candidate-2")], + ); + + assert_eq!(candidates.len(), 2); + assert_eq!(candidates[0].overrides["agent.prompt"], "baseline"); + assert_eq!(candidates[1].overrides["agent.prompt"], "v2"); +} + +#[test] +fn search_avoids_explored_signatures() { + let strategy = SearchStrategy::new( + VariationConfig::explicit( + 2, + vec![ + proposal(&[("agent.prompt", "baseline")]), + proposal(&[("agent.prompt", "v1")]), + proposal(&[("agent.prompt", "v2")]), + ], + ), + scoring_config(), + ConvergencePolicy::default(), + ); + let mut accumulator = ExecutionAccumulator::default(); + accumulator.best_candidate_overrides = BTreeMap::from([( + "agent.prompt".to_string(), + "v1".to_string(), + )]); + accumulator.explored_signatures = vec!["agent.prompt=baseline".to_string()]; + + let candidates = strategy.plan_candidates( + &accumulator, + &[CandidateInbox::new("candidate-1"), CandidateInbox::new("candidate-2")], + ); + + assert_eq!(candidates.len(), 1); + assert_eq!(candidates[0].overrides["agent.prompt"], "v2"); +} + +#[test] +fn search_reduce_updates_incumbent_phase_and_signatures() { + let strategy = SearchStrategy::new( + VariationConfig::explicit( + 2, + vec![ + proposal(&[("agent.prompt", "baseline")]), + proposal(&[("agent.prompt", "v1")]), + ], + ), + scoring_config(), + ConvergencePolicy::default(), + ); + + let next = strategy.reduce( + ExecutionAccumulator::default(), + IterationEvaluation { + ranked_candidates: void_control::orchestration::score_iteration( + &scoring_config(), + &[ + candidate_output("candidate-1", true, &[("latency_p99_ms", 100.0)]), + candidate_output("candidate-2", true, &[("latency_p99_ms", 80.0)]), + ], + ), + }, + ); + + assert_eq!(next.best_candidate_id.as_deref(), Some("candidate-2")); + assert_eq!( + next.best_candidate_overrides.get("agent.prompt").map(String::as_str), + Some("v1") + ); + assert_eq!(next.search_phase.as_deref(), Some("refine")); + assert!(next.explored_signatures.contains(&"agent.prompt=baseline".to_string())); + assert!(next.explored_signatures.contains(&"agent.prompt=v1".to_string())); +} + +#[test] +fn search_stops_when_no_new_neighbors_remain() { + let strategy = SearchStrategy::new( + VariationConfig::explicit( + 2, + vec![ + proposal(&[("agent.prompt", "baseline")]), + proposal(&[("agent.prompt", "v1")]), + ], + ), + scoring_config(), + ConvergencePolicy::default(), + ); + let mut accumulator = ExecutionAccumulator::default(); + accumulator.best_candidate_overrides = BTreeMap::from([( + "agent.prompt".to_string(), + "baseline".to_string(), + )]); + accumulator.explored_signatures = + vec!["agent.prompt=v1".to_string(), "agent.prompt=baseline".to_string()]; + + let stop = strategy.should_stop( + &accumulator, + &IterationEvaluation { + ranked_candidates: vec![], + }, + ); + + assert_eq!(stop, Some(StopReason::ConvergencePlateau)); +} + +fn scoring_config() -> ScoringConfig { + ScoringConfig { + metrics: vec![WeightedMetric { + name: "latency_p99_ms".to_string(), + weight: 1.0, + direction: MetricDirection::Minimize, + }], + pass_threshold: 0.7, + tie_break_metric: "latency_p99_ms".to_string(), + } +} + +fn candidate_output( + candidate_id: &str, + succeeded: bool, + metrics: &[(&str, f64)], +) -> CandidateOutput { + CandidateOutput::new( + candidate_id.to_string(), + succeeded, + metrics + .iter() + .map(|(name, value)| (name.to_string(), *value)) + .collect(), + ) +} + +fn proposal(items: &[(&str, &str)]) -> VariationProposal { + VariationProposal { + overrides: items + .iter() + .map(|(key, value)| ((*key).to_string(), (*value).to_string())) + .collect(), + } +} diff --git a/tests/execution_spec_validation.rs b/tests/execution_spec_validation.rs new file mode 100644 index 0000000..d3ae083 --- /dev/null +++ b/tests/execution_spec_validation.rs @@ -0,0 +1,130 @@ +use std::collections::BTreeMap; + +use void_control::orchestration::{ + ConvergencePolicy, EvaluationConfig, ExecutionSpec, GlobalConfig, OrchestrationPolicy, + VariationConfig, VariationSelection, WorkflowTemplateRef, +}; + +#[test] +fn orchestration_module_exports_execution_spec() { + let _ = std::any::type_name::(); +} + +#[test] +fn rejects_unbounded_execution() { + let err = spec_with(|policy| { + policy.budget.max_iterations = None; + policy.budget.max_wall_clock_secs = None; + }) + .validate(&global_config()) + .expect_err("expected unbounded execution to be rejected"); + + assert!(err.to_string().contains("max_iterations")); +} + +#[test] +fn rejects_concurrency_above_global_pool() { + let err = spec_with(|policy| { + policy.concurrency.max_concurrent_candidates = 3; + }) + .validate(&GlobalConfig { + max_concurrent_child_runs: 2, + }) + .expect_err("expected concurrency validation error"); + + assert!(err + .to_string() + .contains("max_concurrent_candidates")); +} + +#[test] +fn rejects_threshold_without_min_score() { + let err = spec_with(|policy| { + policy.convergence = ConvergencePolicy { + strategy: "threshold".to_string(), + min_score: None, + max_iterations_without_improvement: None, + }; + }) + .validate(&global_config()) + .expect_err("expected threshold validation error"); + + assert!(err.to_string().contains("min_score")); +} + +#[test] +fn accepts_exhaustive_with_max_iterations() { + spec_with(|policy| { + policy.convergence = ConvergencePolicy { + strategy: "exhaustive".to_string(), + min_score: None, + max_iterations_without_improvement: None, + }; + policy.budget.max_iterations = Some(5); + }) + .validate(&global_config()) + .expect("expected exhaustive plan to validate"); +} + +#[test] +fn rejects_unknown_mode() { + let mut spec = base_spec(); + spec.mode = "unknown".to_string(); + + let err = spec + .validate(&global_config()) + .expect_err("expected unknown mode to fail"); + + assert!(err.to_string().contains("unknown mode")); +} + +#[test] +fn accepts_search_mode() { + let mut spec = base_spec(); + spec.mode = "search".to_string(); + + spec.validate(&global_config()) + .expect("expected search mode to validate"); +} + +fn global_config() -> GlobalConfig { + GlobalConfig { + max_concurrent_child_runs: 4, + } +} + +fn spec_with(edit: impl FnOnce(&mut OrchestrationPolicy)) -> ExecutionSpec { + let mut spec = base_spec(); + edit(&mut spec.policy); + spec +} + +fn base_spec() -> ExecutionSpec { + ExecutionSpec { + mode: "swarm".to_string(), + goal: "optimize latency".to_string(), + workflow: WorkflowTemplateRef { + template: "fixtures/sample.vbrun".to_string(), + }, + policy: OrchestrationPolicy::default(), + evaluation: EvaluationConfig { + scoring_type: "weighted_metrics".to_string(), + weights: BTreeMap::from([ + ("latency_p99_ms".to_string(), -0.6), + ("cost_usd".to_string(), -0.4), + ]), + pass_threshold: Some(0.7), + ranking: "highest_score".to_string(), + tie_breaking: "lowest_cost".to_string(), + }, + variation: VariationConfig::parameter_space( + 2, + VariationSelection::Sequential, + BTreeMap::from([( + "sandbox.env.CONCURRENCY".to_string(), + vec!["2".to_string(), "4".to_string()], + )]), + ), + swarm: true, + } +} diff --git a/tests/execution_strategy_acceptance.rs b/tests/execution_strategy_acceptance.rs new file mode 100644 index 0000000..1928616 --- /dev/null +++ b/tests/execution_strategy_acceptance.rs @@ -0,0 +1,549 @@ +use std::collections::BTreeMap; + +use void_control::orchestration::{ + CandidateOutput, CandidateStatus, ControlEventType, ExecutionService, ExecutionSpec, + ExecutionStatus, FsExecutionStore, GlobalConfig, OrchestrationPolicy, VariationConfig, VariationProposal, +}; +#[cfg(feature = "serde")] +use void_control::orchestration::{ + CommunicationIntent, CommunicationIntentAudience, CommunicationIntentKind, + CommunicationIntentPriority, +}; +use void_control::runtime::MockRuntime; + +#[test] +fn swarm_strategy_runs_end_to_end() { + let (execution, _, _) = run_mode_to_completion("swarm", temp_store_dir("swarm-acceptance")); + + assert_eq!(execution.status, ExecutionStatus::Completed); + assert!(execution.result_best_candidate_id.is_some()); +} + +#[test] +fn search_strategy_runs_end_to_end() { + let store_dir = temp_store_dir("search-acceptance"); + let (execution, store, _) = run_mode_to_completion("search", store_dir.clone()); + + assert_eq!(execution.status, ExecutionStatus::Completed); + assert!(execution.result_best_candidate_id.is_some()); + + let candidates = store + .load_candidates(&execution.execution_id) + .expect("load candidates"); + let mut refinement_prompts: Vec<_> = candidates + .iter() + .filter(|candidate| candidate.iteration == 1) + .map(|candidate| { + candidate + .overrides + .get("agent.prompt") + .expect("agent.prompt override") + .clone() + }) + .collect(); + refinement_prompts.sort(); + + assert_eq!(refinement_prompts, vec!["v2".to_string(), "v3".to_string()]); +} + +#[test] +fn supported_strategies_emit_expected_completion_events() { + for mode in ["swarm", "search"] { + let label = format!("{mode}-events"); + let (execution, _, snapshot) = run_mode_to_completion(mode, temp_store_dir(&label)); + + assert_eq!(execution.status, ExecutionStatus::Completed, "{mode}"); + assert_event_counts( + mode, + &snapshot.events, + &[ + (ControlEventType::ExecutionCreated, 1), + (ControlEventType::ExecutionSubmitted, 1), + (ControlEventType::ExecutionStarted, 1), + (ControlEventType::IterationStarted, 2), + (ControlEventType::CandidateQueued, 4), + (ControlEventType::CandidateDispatched, 4), + (ControlEventType::CandidateOutputCollected, 4), + (ControlEventType::CandidateScored, 2), + (ControlEventType::IterationCompleted, 2), + (ControlEventType::ExecutionCompleted, 1), + (ControlEventType::ExecutionFailed, 0), + ], + ); + } +} + +#[test] +fn supported_strategies_persist_terminal_candidate_records() { + for mode in ["swarm", "search"] { + let label = format!("{mode}-candidates"); + let (execution, store, snapshot) = run_mode_to_completion(mode, temp_store_dir(&label)); + let candidates = store + .load_candidates(&execution.execution_id) + .expect("load candidates"); + let queued_count = snapshot + .events + .iter() + .filter(|event| event.event_type == ControlEventType::CandidateQueued) + .count(); + + assert_eq!(candidates.len(), queued_count, "{mode}"); + assert!(!candidates.is_empty(), "{mode}"); + assert!(candidates.iter().all(|candidate| candidate.status == CandidateStatus::Completed), "{mode}"); + assert!(candidates.iter().all(|candidate| candidate.runtime_run_id.is_some()), "{mode}"); + assert!(candidates.iter().all(|candidate| candidate.succeeded == Some(true)), "{mode}"); + } +} + +#[test] +fn supported_strategies_emit_failed_terminal_events_on_all_failure() { + for mode in ["swarm", "search"] { + let label = format!("{mode}-failed"); + let (execution, _, snapshot) = run_mode_with_all_failures(mode, temp_store_dir(&label)); + + assert_eq!(execution.status, ExecutionStatus::Failed, "{mode}"); + assert_event_counts( + mode, + &snapshot.events, + &[ + (ControlEventType::ExecutionCreated, 1), + (ControlEventType::ExecutionSubmitted, 1), + (ControlEventType::ExecutionStarted, 1), + (ControlEventType::IterationStarted, 1), + (ControlEventType::CandidateQueued, 2), + (ControlEventType::CandidateDispatched, 2), + (ControlEventType::CandidateOutputCollected, 2), + (ControlEventType::CandidateScored, 1), + (ControlEventType::IterationCompleted, 0), + (ControlEventType::ExecutionCompleted, 0), + (ControlEventType::ExecutionFailed, 1), + ], + ); + } +} + +#[test] +fn search_strategy_refines_across_incremental_worker_ticks() { + let store_dir = temp_store_dir("search-incremental"); + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 95.0), ("cost_usd", 0.05)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-3", + output("candidate-3", &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)]), + ); + runtime.seed_success( + "exec-run-candidate-4", + output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)]), + ); + + let store = FsExecutionStore::new(store_dir.clone()); + ExecutionService::::submit_execution(&store, "exec-search", &strategy_spec("search")) + .expect("submit execution"); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); + service.plan_execution("exec-search").expect("plan execution"); + + for _ in 0..8 { + let execution = service + .dispatch_execution_once("exec-search") + .expect("dispatch execution"); + if matches!(execution.status, ExecutionStatus::Completed | ExecutionStatus::Failed) { + break; + } + } + + let store = FsExecutionStore::new(store_dir); + let snapshot = store.load_execution("exec-search").expect("load execution"); + assert_eq!(snapshot.execution.status, ExecutionStatus::Completed); + + let mut refinement_prompts: Vec<_> = snapshot + .candidates + .iter() + .filter(|candidate| candidate.iteration == 1) + .map(|candidate| candidate.overrides["agent.prompt"].clone()) + .collect(); + refinement_prompts.sort(); + assert_eq!(refinement_prompts, vec!["v2".to_string(), "v3".to_string()]); + assert_eq!(snapshot.accumulator.search_phase.as_deref(), Some("refine")); +} + +#[cfg(feature = "serde")] +#[test] +fn swarm_strategy_routes_intents_into_next_iteration_message_box_and_events() { + let store_dir = temp_store_dir("swarm-message-box"); + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output_with_intents( + "candidate-1", + &[("latency_p99_ms", 95.0), ("cost_usd", 0.05)], + vec![proposal_intent( + "intent-swarm-leader", + CommunicationIntentAudience::Leader, + "leader: favor cache fallback", + None, + )], + ), + ); + runtime.seed_success( + "exec-run-candidate-2", + output_with_intents( + "candidate-2", + &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)], + vec![proposal_intent( + "intent-swarm-broadcast", + CommunicationIntentAudience::Broadcast, + "broadcast: jitter helps", + None, + )], + ), + ); + runtime.seed_success( + "exec-run-candidate-3", + output("candidate-3", &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)]), + ); + runtime.seed_success( + "exec-run-candidate-4", + output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)]), + ); + + let store = FsExecutionStore::new(store_dir.clone()); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); + let execution = service + .run_to_completion(strategy_spec("swarm")) + .expect("run execution"); + + let store = FsExecutionStore::new(store_dir); + let snapshot = store.load_execution(&execution.execution_id).expect("load execution"); + let intents = store.load_intents(&execution.execution_id).expect("load intents"); + let messages = store + .load_routed_messages(&execution.execution_id) + .expect("load routed messages"); + let inbox_one = store + .load_inbox_snapshot(&execution.execution_id, 1, "candidate-1") + .expect("load candidate-1 inbox"); + let inbox_two = store + .load_inbox_snapshot(&execution.execution_id, 1, "candidate-2") + .expect("load candidate-2 inbox"); + + assert_eq!(intents.len(), 2); + assert_eq!( + messages + .iter() + .filter(|message| message.status == void_control::orchestration::RoutedMessageStatus::Routed) + .count(), + 2 + ); + assert_eq!( + messages + .iter() + .filter(|message| message.status == void_control::orchestration::RoutedMessageStatus::Delivered) + .count(), + 3 + ); + assert_eq!(inbox_one.entries.len(), 2); + assert_eq!(inbox_two.entries.len(), 1); + assert_event_counts( + "swarm-message-box", + &snapshot.events, + &[ + (ControlEventType::CommunicationIntentEmitted, 2), + (ControlEventType::MessageRouted, 2), + (ControlEventType::MessageDelivered, 3), + (ControlEventType::ExecutionCompleted, 1), + ], + ); +} + +#[cfg(feature = "serde")] +#[test] +fn search_strategy_persists_lineage_and_delivers_parent_intent_to_refinement_iteration() { + let store_dir = temp_store_dir("search-message-box"); + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output_with_intents( + "candidate-1", + &[("latency_p99_ms", 95.0), ("cost_usd", 0.05)], + vec![proposal_intent( + "intent-search-parent", + CommunicationIntentAudience::Leader, + "start from rate limit baseline", + None, + )], + ), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-3", + output_with_intents( + "candidate-3", + &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)], + vec![proposal_intent( + "intent-search-child", + CommunicationIntentAudience::Leader, + "refine with jitter", + Some("intent-search-parent"), + )], + ), + ); + runtime.seed_success( + "exec-run-candidate-4", + output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)]), + ); + + let store = FsExecutionStore::new(store_dir.clone()); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); + let execution = service + .run_to_completion(strategy_spec("search")) + .expect("run execution"); + + let store = FsExecutionStore::new(store_dir); + let intents = store.load_intents(&execution.execution_id).expect("load intents"); + let inbox = store + .load_inbox_snapshot(&execution.execution_id, 1, "candidate-1") + .expect("load iteration-1 inbox"); + let child = intents + .iter() + .find(|intent| intent.intent_id == "intent-search-child") + .expect("child intent"); + + assert_eq!(intents.len(), 2); + assert_eq!(child.caused_by.as_deref(), Some("intent-search-parent")); + assert!(inbox + .entries + .iter() + .any(|entry| entry.intent_id == "intent-search-parent")); +} + +fn run_mode_to_completion( + mode: &str, + store_dir: std::path::PathBuf, +) -> ( + void_control::orchestration::Execution, + FsExecutionStore, + void_control::orchestration::ExecutionSnapshot, +) { + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 95.0), ("cost_usd", 0.05)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-3", + output("candidate-3", &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)]), + ); + runtime.seed_success( + "exec-run-candidate-4", + output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)]), + ); + + let store = FsExecutionStore::new(store_dir.clone()); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); + let execution = service + .run_to_completion(strategy_spec(mode)) + .expect("run execution"); + + let store = FsExecutionStore::new(store_dir); + let snapshot = store + .load_execution(&execution.execution_id) + .expect("load execution snapshot"); + (execution, store, snapshot) +} + +fn run_mode_with_all_failures( + mode: &str, + store_dir: std::path::PathBuf, +) -> ( + void_control::orchestration::Execution, + FsExecutionStore, + void_control::orchestration::ExecutionSnapshot, +) { + let mut runtime = MockRuntime::new(); + runtime.seed_failure("exec-run-candidate-1"); + runtime.seed_failure("exec-run-candidate-2"); + + let store = FsExecutionStore::new(store_dir.clone()); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); + let execution = service + .run_to_completion(failing_strategy_spec(mode)) + .expect("run execution"); + + let store = FsExecutionStore::new(store_dir); + let snapshot = store + .load_execution(&execution.execution_id) + .expect("load execution snapshot"); + (execution, store, snapshot) +} + +fn strategy_spec(mode: &str) -> ExecutionSpec { + ExecutionSpec { + mode: mode.to_string(), + goal: "optimize latency".to_string(), + workflow: void_control::orchestration::WorkflowTemplateRef { + template: "fixtures/sample.vbrun".to_string(), + }, + policy: OrchestrationPolicy { + budget: void_control::orchestration::BudgetPolicy { + max_iterations: Some(2), + max_child_runs: None, + max_wall_clock_secs: Some(60), + max_cost_usd_millis: None, + }, + concurrency: void_control::orchestration::ConcurrencyPolicy { + max_concurrent_candidates: 2, + }, + convergence: void_control::orchestration::ConvergencePolicy { + strategy: "exhaustive".to_string(), + min_score: None, + max_iterations_without_improvement: None, + }, + max_candidate_failures_per_iteration: 10, + missing_output_policy: "mark_failed".to_string(), + iteration_failure_policy: "fail_execution".to_string(), + }, + evaluation: void_control::orchestration::EvaluationConfig { + scoring_type: "weighted_metrics".to_string(), + weights: BTreeMap::from([ + ("latency_p99_ms".to_string(), -0.6), + ("cost_usd".to_string(), -0.4), + ]), + pass_threshold: Some(0.7), + ranking: "highest_score".to_string(), + tie_breaking: "cost_usd".to_string(), + }, + variation: VariationConfig::explicit( + 2, + vec![ + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "baseline".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "v2".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "v3".to_string())]), + }, + ], + ), + swarm: true, + } +} + +fn failing_strategy_spec(mode: &str) -> ExecutionSpec { + let mut spec = strategy_spec(mode); + spec.policy.budget.max_iterations = Some(1); + spec +} + +fn assert_event_counts( + mode: &str, + events: &[void_control::orchestration::ControlEventEnvelope], + expected: &[(ControlEventType, usize)], +) { + for (event_type, count) in expected { + let actual = events + .iter() + .filter(|event| event.event_type == *event_type) + .count(); + assert_eq!(actual, *count, "{mode} {:?}", event_type); + } +} + +fn output(candidate_id: &str, metrics: &[(&str, f64)]) -> CandidateOutput { + CandidateOutput::new( + candidate_id.to_string(), + true, + metrics.iter().map(|(k, v)| (k.to_string(), *v)).collect(), + ) +} + +#[cfg(feature = "serde")] +fn output_with_intents( + candidate_id: &str, + metrics: &[(&str, f64)], + intents: Vec, +) -> CandidateOutput { + output(candidate_id, metrics).with_intents(intents) +} + +#[cfg(feature = "serde")] +fn proposal_intent( + intent_id: &str, + audience: CommunicationIntentAudience, + summary_text: &str, + caused_by: Option<&str>, +) -> CommunicationIntent { + CommunicationIntent { + intent_id: intent_id.to_string(), + from_candidate_id: "placeholder".to_string(), + iteration: 0, + kind: CommunicationIntentKind::Proposal, + audience, + payload: serde_json::json!({ + "summary_text": summary_text, + "strategy_hint": "message-box-test", + }), + priority: CommunicationIntentPriority::Normal, + ttl_iterations: 1, + caused_by: caused_by.map(str::to_string), + context: None, + } +} + +fn temp_store_dir(label: &str) -> std::path::PathBuf { + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let dir = std::env::temp_dir().join(format!("void-control-strategy-{label}-{nanos}")); + std::fs::create_dir_all(&dir).expect("create temp dir"); + dir +} diff --git a/tests/execution_swarm_strategy.rs b/tests/execution_swarm_strategy.rs new file mode 100644 index 0000000..abf7beb --- /dev/null +++ b/tests/execution_swarm_strategy.rs @@ -0,0 +1,268 @@ +use std::collections::BTreeMap; + +use void_control::orchestration::{ + CandidateInbox, CandidateOutput, ConvergencePolicy, ExecutionAccumulator, + IterationEvaluation, MetricDirection, ScoringConfig, StopReason, SwarmStrategy, + VariationConfig, VariationProposal, VariationSelection, WeightedMetric, score_iteration, +}; + +#[test] +fn weighted_metrics_normalizes_within_iteration() { + let scores = score_iteration( + &scoring_config(), + &[ + candidate_output("cand-a", true, &[("latency_p99_ms", 100.0), ("cost_usd", 0.02)]), + candidate_output("cand-b", true, &[("latency_p99_ms", 200.0), ("cost_usd", 0.05)]), + ], + ); + + assert!(scores[0].score > scores[1].score); +} + +#[test] +fn failed_candidate_scores_zero() { + let scores = score_iteration( + &scoring_config(), + &[candidate_output("cand-fail", false, &[("latency_p99_ms", 100.0)])], + ); + + assert_eq!(scores[0].score, 0.0); + assert!(!scores[0].pass); +} + +#[test] +fn best_result_uses_tie_breaking_after_score() { + let scores = score_iteration( + &scoring_config(), + &[ + candidate_output("cand-a", true, &[("latency_p99_ms", 100.0), ("cost_usd", 0.05)]), + candidate_output("cand-b", true, &[("latency_p99_ms", 100.0), ("cost_usd", 0.03)]), + ], + ); + + assert_eq!(scores[0].candidate_id, "cand-b"); +} + +#[test] +fn parameter_space_random_respects_candidates_per_iteration() { + let proposals = VariationConfig::parameter_space( + 2, + VariationSelection::Random, + BTreeMap::from([( + "sandbox.env.CONCURRENCY".to_string(), + vec!["2".to_string(), "4".to_string(), "8".to_string()], + )]), + ) + .generate(&ExecutionAccumulator::default()); + + assert_eq!(proposals.len(), 2); +} + +#[test] +fn parameter_space_sequential_preserves_order() { + let proposals = VariationConfig::parameter_space( + 2, + VariationSelection::Sequential, + BTreeMap::from([( + "sandbox.env.CONCURRENCY".to_string(), + vec!["2".to_string(), "4".to_string(), "8".to_string()], + )]), + ) + .generate(&ExecutionAccumulator::default()); + + assert_eq!(proposals[0].overrides["sandbox.env.CONCURRENCY"], "2"); + assert_eq!(proposals[1].overrides["sandbox.env.CONCURRENCY"], "4"); +} + +#[test] +fn explicit_variation_cycles_through_overrides() { + let mut accumulator = ExecutionAccumulator::default(); + accumulator.scoring_history_len = 1; + let proposals = VariationConfig::explicit( + 2, + vec![ + proposal(&[("agent.prompt", "first")]), + proposal(&[("agent.prompt", "second")]), + proposal(&[("agent.prompt", "third")]), + ], + ) + .generate(&accumulator); + + assert_eq!(proposals[0].overrides["agent.prompt"], "second"); + assert_eq!(proposals[1].overrides["agent.prompt"], "third"); +} + +#[test] +fn leader_directed_proposals_are_validated_before_use() { + let mut accumulator = ExecutionAccumulator::default(); + accumulator.leader_proposals = vec![ + proposal(&[("sandbox.env.CONCURRENCY", "2")]), + VariationProposal { + overrides: BTreeMap::new(), + }, + ]; + + let proposals = VariationConfig::leader_directed(2).generate(&accumulator); + + assert_eq!(proposals.len(), 1); + assert_eq!(proposals[0].overrides["sandbox.env.CONCURRENCY"], "2"); +} + +#[test] +fn swarm_materializes_inboxes_from_message_backlog() { + let mut accumulator = ExecutionAccumulator::default(); + accumulator.message_backlog = vec!["hello".to_string(), "world".to_string()]; + + let inboxes = SwarmStrategy::default().materialize_inboxes(&accumulator); + + assert_eq!(inboxes.len(), 2); + assert_eq!(inboxes[0].messages[0], "hello"); +} + +#[test] +fn swarm_plans_candidates_from_variation_source() { + let strategy = SwarmStrategy::new( + VariationConfig::explicit( + 2, + vec![ + proposal(&[("agent.prompt", "first")]), + proposal(&[("agent.prompt", "second")]), + ], + ), + scoring_config(), + ConvergencePolicy::default(), + ); + + let candidates = strategy.plan_candidates( + &ExecutionAccumulator::default(), + &[CandidateInbox::new("candidate-1"), CandidateInbox::new("candidate-2")], + ); + + assert_eq!(candidates.len(), 2); + assert_eq!(candidates[0].overrides["agent.prompt"], "first"); +} + +#[test] +fn swarm_should_stop_on_threshold() { + let strategy = SwarmStrategy::new( + VariationConfig::explicit(1, vec![proposal(&[("agent.prompt", "only")])]), + scoring_config(), + ConvergencePolicy { + strategy: "threshold".to_string(), + min_score: Some(0.8), + max_iterations_without_improvement: None, + }, + ); + + let stop = strategy.should_stop( + &ExecutionAccumulator::default(), + &IterationEvaluation { + ranked_candidates: score_iteration( + &scoring_config(), + &[candidate_output( + "cand-a", + true, + &[("latency_p99_ms", 100.0), ("cost_usd", 0.02)], + )], + ), + }, + ); + + assert_eq!(stop, Some(StopReason::ConvergenceThreshold)); +} + +#[test] +fn swarm_should_stop_on_plateau() { + let strategy = SwarmStrategy::new( + VariationConfig::explicit(1, vec![proposal(&[("agent.prompt", "only")])]), + scoring_config(), + ConvergencePolicy { + strategy: "plateau".to_string(), + min_score: None, + max_iterations_without_improvement: Some(2), + }, + ); + let mut accumulator = ExecutionAccumulator::default(); + accumulator.iterations_without_improvement = 2; + + let stop = strategy.should_stop( + &accumulator, + &IterationEvaluation { + ranked_candidates: vec![], + }, + ); + + assert_eq!(stop, Some(StopReason::ConvergencePlateau)); +} + +#[test] +fn swarm_reduce_updates_best_result_and_failure_counts() { + let strategy = SwarmStrategy::new( + VariationConfig::explicit(1, vec![proposal(&[("agent.prompt", "only")])]), + scoring_config(), + ConvergencePolicy::default(), + ); + + let next = strategy.reduce( + ExecutionAccumulator::default(), + IterationEvaluation { + ranked_candidates: score_iteration( + &scoring_config(), + &[ + candidate_output( + "cand-a", + true, + &[("latency_p99_ms", 100.0), ("cost_usd", 0.02)], + ), + candidate_output("cand-b", false, &[("latency_p99_ms", 200.0)]), + ], + ), + }, + ); + + assert_eq!(next.best_candidate_id.as_deref(), Some("cand-a")); + assert_eq!(next.failure_counts.total_candidate_failures, 1); +} + +fn scoring_config() -> ScoringConfig { + ScoringConfig { + metrics: vec![ + WeightedMetric { + name: "latency_p99_ms".to_string(), + weight: 0.6, + direction: MetricDirection::Minimize, + }, + WeightedMetric { + name: "cost_usd".to_string(), + weight: 0.4, + direction: MetricDirection::Minimize, + }, + ], + pass_threshold: 0.7, + tie_break_metric: "cost_usd".to_string(), + } +} + +fn candidate_output( + candidate_id: &str, + succeeded: bool, + metrics: &[(&str, f64)], +) -> CandidateOutput { + CandidateOutput::new( + candidate_id.to_string(), + succeeded, + metrics + .iter() + .map(|(name, value)| (name.to_string(), *value)) + .collect(), + ) +} + +fn proposal(values: &[(&str, &str)]) -> VariationProposal { + VariationProposal { + overrides: values + .iter() + .map(|(key, value)| (key.to_string(), value.to_string())) + .collect(), + } +} diff --git a/tests/execution_worker.rs b/tests/execution_worker.rs new file mode 100644 index 0000000..3b61f97 --- /dev/null +++ b/tests/execution_worker.rs @@ -0,0 +1,878 @@ +#![cfg(feature = "serde")] + +use std::collections::BTreeMap; + +use void_control::orchestration::{ + CandidateOutput, CandidateStatus, ExecutionCandidate, ExecutionService, ExecutionSpec, + ExecutionStatus, FsExecutionStore, GlobalConfig, OrchestrationPolicy, VariationConfig, + VariationProposal, +}; +use void_control::runtime::MockRuntime; + +#[test] +fn submitted_pending_execution_can_be_processed_to_completion() { + let root = temp_store_dir("worker"); + let store = FsExecutionStore::new(root); + let spec = spec(1); + let execution = ExecutionService::::submit_execution(&store, "exec-worker", &spec) + .expect("submit"); + assert_eq!(execution.status, ExecutionStatus::Pending); + + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + ); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store.clone(), + ); + let processed = service.process_execution("exec-worker").expect("process"); + + assert_eq!(processed.status, ExecutionStatus::Completed); + assert_eq!(processed.result_best_candidate_id.as_deref(), Some("candidate-2")); + + let snapshot = store.load_execution("exec-worker").expect("reload"); + assert_eq!(snapshot.execution.status, ExecutionStatus::Completed); +} + +#[test] +fn bridge_worker_helper_processes_pending_executions() { + let root = temp_store_dir("bridge-worker"); + let store = FsExecutionStore::new(root.clone()); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-bridge-worker", &spec) + .expect("submit"); + + tick_bridge_worker_until_terminal(root.clone(), "exec-bridge-worker"); + + let snapshot = store.load_execution("exec-bridge-worker").expect("reload"); + assert_eq!(snapshot.execution.status, ExecutionStatus::Completed); +} + +#[test] +fn planning_execution_persists_queued_candidates_without_dispatching() { + let root = temp_store_dir("worker-plan-only"); + let store = FsExecutionStore::new(root); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-plan-only", &spec) + .expect("submit"); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + MockRuntime::new(), + store.clone(), + ); + let execution = service.plan_execution("exec-plan-only").expect("plan"); + + assert_eq!(execution.status, ExecutionStatus::Running); + let snapshot = store.load_execution("exec-plan-only").expect("reload"); + assert_eq!(snapshot.execution.status, ExecutionStatus::Running); + assert_eq!(snapshot.candidates.len(), 2); + assert_eq!(snapshot.candidates[0].status, CandidateStatus::Queued); + assert_eq!(snapshot.candidates[1].status, CandidateStatus::Queued); + assert_eq!(snapshot.candidates[0].runtime_run_id, None); + assert_eq!(snapshot.candidates[1].runtime_run_id, None); + let event_types: Vec<_> = snapshot.events.iter().map(|event| event.event_type).collect(); + assert!(event_types.contains(&void_control::orchestration::ControlEventType::ExecutionStarted)); + assert!(event_types.contains(&void_control::orchestration::ControlEventType::IterationStarted)); + assert_eq!( + event_types + .iter() + .filter(|&&event| event == void_control::orchestration::ControlEventType::CandidateQueued) + .count(), + 2 + ); +} + +#[test] +fn processing_reuses_preplanned_candidates_without_duplication() { + let root = temp_store_dir("worker-plan-then-process"); + let store = FsExecutionStore::new(root); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-plan-then-process", &spec) + .expect("submit"); + + let mut planner = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + MockRuntime::new(), + store.clone(), + ); + planner + .plan_execution("exec-plan-then-process") + .expect("plan"); + + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + ); + let mut worker = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store.clone(), + ); + let execution = worker + .process_execution("exec-plan-then-process") + .expect("process"); + + assert_eq!(execution.status, ExecutionStatus::Completed); + let snapshot = store + .load_execution("exec-plan-then-process") + .expect("reload"); + assert_eq!(snapshot.candidates.len(), 2); + assert_eq!(snapshot.candidates[0].created_seq, 1); + assert_eq!(snapshot.candidates[1].created_seq, 2); + assert_eq!(snapshot.candidates[0].status, CandidateStatus::Completed); + assert_eq!(snapshot.candidates[1].status, CandidateStatus::Completed); + assert_eq!( + snapshot + .events + .iter() + .filter(|event| { + event.event_type + == void_control::orchestration::ControlEventType::CandidateQueued + }) + .count(), + 2 + ); +} + +#[test] +fn dispatch_execution_once_runs_only_one_queued_candidate() { + let root = temp_store_dir("worker-dispatch-once"); + let store = FsExecutionStore::new(root); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-dispatch-once", &spec) + .expect("submit"); + + let mut planner = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + MockRuntime::new(), + store.clone(), + ); + planner.plan_execution("exec-dispatch-once").expect("plan"); + + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + ); + let mut worker = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store.clone(), + ); + let execution = worker + .dispatch_execution_once("exec-dispatch-once") + .expect("dispatch once"); + + assert_eq!(execution.status, ExecutionStatus::Running); + let snapshot = store.load_execution("exec-dispatch-once").expect("reload"); + assert_eq!(snapshot.execution.status, ExecutionStatus::Running); + assert_eq!(snapshot.candidates.len(), 2); + assert_eq!(snapshot.candidates[0].status, CandidateStatus::Completed); + assert_eq!(snapshot.candidates[1].status, CandidateStatus::Queued); + assert_eq!( + snapshot + .events + .iter() + .filter(|event| { + event.event_type + == void_control::orchestration::ControlEventType::CandidateDispatched + }) + .count(), + 1 + ); + assert_eq!( + snapshot + .events + .iter() + .filter(|event| { + event.event_type + == void_control::orchestration::ControlEventType::CandidateOutputCollected + }) + .count(), + 1 + ); +} + +#[test] +fn process_execution_skips_already_claimed_execution() { + let root = temp_store_dir("worker-claimed"); + let store = FsExecutionStore::new(root); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-claimed", &spec) + .expect("submit"); + assert!(store + .claim_execution("exec-claimed", "other-worker") + .expect("claim")); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + MockRuntime::new(), + store.clone(), + ); + let err = service + .process_execution("exec-claimed") + .expect_err("claimed execution should not process"); + assert_eq!(err.kind(), std::io::ErrorKind::WouldBlock); + + let snapshot = store.load_execution("exec-claimed").expect("reload"); + assert_eq!(snapshot.execution.status, ExecutionStatus::Pending); + assert_eq!( + store.load_claim("exec-claimed").expect("claim").as_deref(), + Some("other-worker") + ); +} + +#[test] +fn stale_claim_is_recovered_and_processing_can_proceed() { + let root = temp_store_dir("worker-stale-claim"); + let store = FsExecutionStore::new(root.clone()); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-stale-claim", &spec) + .expect("submit"); + + let execution_dir = root.join("exec-stale-claim"); + std::fs::write( + execution_dir.join("claim.txt"), + "dead-worker|1", + ) + .expect("seed stale claim"); + + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + ); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store.clone(), + ); + let processed = service + .process_execution("exec-stale-claim") + .expect("process"); + assert_eq!(processed.status, ExecutionStatus::Completed); + assert_eq!(store.load_claim("exec-stale-claim").expect("claim"), None); +} + +#[test] +fn refresh_claim_keeps_owned_claim_valid() { + let root = temp_store_dir("worker-refresh-claim"); + let store = FsExecutionStore::new(root.clone()); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-refresh-claim", &spec) + .expect("submit"); + + assert!(store + .claim_execution("exec-refresh-claim", "worker-a") + .expect("claim")); + store + .refresh_claim("exec-refresh-claim", "worker-a") + .expect("refresh"); + assert_eq!( + store.load_claim("exec-refresh-claim").expect("load claim").as_deref(), + Some("worker-a") + ); + store + .release_claim("exec-refresh-claim") + .expect("release claim"); +} + +#[test] +fn candidate_records_round_trip_through_store() { + let root = temp_store_dir("worker-candidates"); + let store = FsExecutionStore::new(root); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-candidates", &spec) + .expect("submit"); + + let queued = ExecutionCandidate::new( + "exec-candidates", + "candidate-1", + 1, + 0, + CandidateStatus::Queued, + ); + let mut queued = queued; + queued + .overrides + .insert("agent.prompt".to_string(), "a".to_string()); + let mut running = ExecutionCandidate::new( + "exec-candidates", + "candidate-2", + 2, + 0, + CandidateStatus::Running, + ); + running.runtime_run_id = Some("run-2".to_string()); + running + .overrides + .insert("agent.prompt".to_string(), "b".to_string()); + + store.save_candidate(&queued).expect("save queued"); + store.save_candidate(&running).expect("save running"); + + let snapshot = store.load_execution("exec-candidates").expect("reload"); + assert_eq!(snapshot.candidates.len(), 2); + assert_eq!(snapshot.candidates[0].candidate_id, "candidate-1"); + assert_eq!(snapshot.candidates[0].status, CandidateStatus::Queued); + assert_eq!( + snapshot.candidates[0] + .overrides + .get("agent.prompt") + .map(String::as_str), + Some("a") + ); + assert_eq!(snapshot.candidates[1].candidate_id, "candidate-2"); + assert_eq!(snapshot.candidates[1].status, CandidateStatus::Running); + assert_eq!(snapshot.candidates[1].runtime_run_id.as_deref(), Some("run-2")); + assert_eq!( + snapshot.candidates[1] + .overrides + .get("agent.prompt") + .map(String::as_str), + Some("b") + ); +} + +#[test] +fn process_execution_persists_terminal_candidate_records() { + let root = temp_store_dir("worker-candidate-lifecycle"); + let store = FsExecutionStore::new(root); + let spec = spec(1); + ExecutionService::::submit_execution( + &store, + "exec-candidate-lifecycle", + &spec, + ) + .expect("submit"); + + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + ); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store.clone(), + ); + service + .process_execution("exec-candidate-lifecycle") + .expect("process"); + + let snapshot = store.load_execution("exec-candidate-lifecycle").expect("reload"); + assert_eq!(snapshot.candidates.len(), 2); + assert_eq!(snapshot.candidates[0].candidate_id, "candidate-1"); + assert_eq!(snapshot.candidates[0].status, CandidateStatus::Completed); + assert_eq!( + snapshot.candidates[0].runtime_run_id.as_deref(), + Some("exec-run-candidate-1") + ); + assert_eq!( + snapshot.candidates[0] + .overrides + .get("agent.prompt") + .map(String::as_str), + Some("a") + ); + assert_eq!(snapshot.candidates[0].succeeded, Some(true)); + assert_eq!(snapshot.candidates[0].metrics.get("latency_p99_ms"), Some(&90.0)); + assert_eq!(snapshot.candidates[1].candidate_id, "candidate-2"); + assert_eq!(snapshot.candidates[1].status, CandidateStatus::Completed); + assert_eq!( + snapshot.candidates[1].runtime_run_id.as_deref(), + Some("exec-run-candidate-2") + ); + assert_eq!( + snapshot.candidates[1] + .overrides + .get("agent.prompt") + .map(String::as_str), + Some("b") + ); + assert_eq!(snapshot.candidates[1].succeeded, Some(true)); + assert_eq!(snapshot.candidates[1].metrics.get("latency_p99_ms"), Some(&85.0)); +} + +#[test] +fn process_execution_persists_mixed_candidate_terminal_states() { + let root = temp_store_dir("worker-candidate-mixed"); + let store = FsExecutionStore::new(root); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-candidate-mixed", &spec) + .expect("submit"); + + let mut runtime = MockRuntime::new(); + runtime.seed_failure("exec-run-candidate-1"); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + ); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store.clone(), + ); + let execution = service + .process_execution("exec-candidate-mixed") + .expect("process"); + + assert_eq!(execution.status, ExecutionStatus::Completed); + let snapshot = store.load_execution("exec-candidate-mixed").expect("reload"); + assert_eq!(snapshot.candidates.len(), 2); + assert_eq!(snapshot.candidates[0].candidate_id, "candidate-1"); + assert_eq!(snapshot.candidates[0].status, CandidateStatus::Failed); + assert_eq!(snapshot.candidates[0].succeeded, Some(false)); + assert_eq!(snapshot.candidates[1].candidate_id, "candidate-2"); + assert_eq!(snapshot.candidates[1].status, CandidateStatus::Completed); + assert_eq!(snapshot.candidates[1].succeeded, Some(true)); + assert_eq!(snapshot.candidates[1].metrics.get("cost_usd"), Some(&0.02)); +} + +#[test] +fn process_execution_releases_claim_after_completion() { + let root = temp_store_dir("worker-release"); + let store = FsExecutionStore::new(root); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-release", &spec) + .expect("submit"); + + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + ); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store.clone(), + ); + let processed = service.process_execution("exec-release").expect("process"); + assert_eq!(processed.status, ExecutionStatus::Completed); + assert_eq!(store.load_claim("exec-release").expect("claim"), None); + + let snapshot = store.load_execution("exec-release").expect("reload"); + assert_eq!( + snapshot.execution.result_best_candidate_id.as_deref(), + Some("candidate-2") + ); + assert_eq!(snapshot.execution.completed_iterations, 1); + assert_eq!(snapshot.execution.failure_counts.total_candidate_failures, 0); +} + +#[test] +fn process_execution_persists_lifecycle_events() { + let root = temp_store_dir("worker-events"); + let store = FsExecutionStore::new(root); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-events", &spec) + .expect("submit"); + + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + ); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store.clone(), + ); + service.process_execution("exec-events").expect("process"); + + let snapshot = store.load_execution("exec-events").expect("reload"); + let event_types: Vec<_> = snapshot.events.iter().map(|event| event.event_type).collect(); + assert!(event_types.contains(&void_control::orchestration::ControlEventType::ExecutionSubmitted)); + assert!(event_types.contains(&void_control::orchestration::ControlEventType::ExecutionStarted)); + assert!(event_types.contains(&void_control::orchestration::ControlEventType::CandidateQueued)); + assert!(event_types.contains(&void_control::orchestration::ControlEventType::CandidateDispatched)); + assert!(event_types.contains( + &void_control::orchestration::ControlEventType::CandidateOutputCollected + )); + assert!(event_types.contains(&void_control::orchestration::ControlEventType::ExecutionCompleted)); +} + +#[test] +fn pause_interrupts_active_processing_and_persists_paused_status() { + let root = temp_store_dir("worker-pause-active"); + let store = FsExecutionStore::new(root); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-pause-active", &spec) + .expect("submit"); + + let pause_store = store.clone(); + std::thread::spawn(move || { + std::thread::sleep(std::time::Duration::from_millis(150)); + let _ = ExecutionService::::update_execution_status( + &pause_store, + "exec-pause-active", + void_control::orchestration::ExecutionAction::Pause, + ); + }); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + MockRuntime::new(), + store.clone(), + ); + let err = service + .process_execution("exec-pause-active") + .expect_err("pause should interrupt processing"); + assert_eq!(err.kind(), std::io::ErrorKind::WouldBlock); + + let snapshot = store.load_execution("exec-pause-active").expect("reload"); + assert_eq!(snapshot.execution.status, ExecutionStatus::Paused); + assert!(snapshot.events.iter().any(|event| { + event.event_type == void_control::orchestration::ControlEventType::ExecutionPaused + })); +} + +#[test] +fn cancel_interrupts_active_processing_and_returns_canceled_execution() { + let root = temp_store_dir("worker-cancel-active"); + let store = FsExecutionStore::new(root); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-cancel-active", &spec) + .expect("submit"); + + let cancel_store = store.clone(); + std::thread::spawn(move || { + std::thread::sleep(std::time::Duration::from_millis(150)); + let _ = ExecutionService::::update_execution_status( + &cancel_store, + "exec-cancel-active", + void_control::orchestration::ExecutionAction::Cancel, + ); + }); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + MockRuntime::new(), + store.clone(), + ); + let execution = service + .process_execution("exec-cancel-active") + .expect("cancel should return terminal execution"); + assert_eq!(execution.status, ExecutionStatus::Canceled); + + let snapshot = store.load_execution("exec-cancel-active").expect("reload"); + assert_eq!(snapshot.execution.status, ExecutionStatus::Canceled); + assert!(snapshot.events.iter().any(|event| { + event.event_type == void_control::orchestration::ControlEventType::ExecutionCanceled + })); +} + +#[test] +fn resumed_execution_can_be_processed_by_worker_loop() { + let root = temp_store_dir("worker-resume"); + let store = FsExecutionStore::new(root.clone()); + let spec = spec(1); + ExecutionService::::submit_execution(&store, "exec-resume", &spec) + .expect("submit"); + let mut paused = store.load_execution("exec-resume").expect("load").execution; + paused.status = ExecutionStatus::Paused; + store.save_execution(&paused).expect("save paused"); + + ExecutionService::::update_execution_status( + &store, + "exec-resume", + void_control::orchestration::ExecutionAction::Resume, + ) + .expect("resume"); + + tick_bridge_worker_until_terminal(root, "exec-resume"); + + let snapshot = store.load_execution("exec-resume").expect("reload"); + assert_eq!(snapshot.execution.status, ExecutionStatus::Completed); + assert!(snapshot.events.iter().any(|event| { + event.event_type == void_control::orchestration::ControlEventType::ExecutionResumed + })); +} + +#[test] +fn paused_execution_does_not_block_other_queued_work_in_bridge_scheduler() { + let root = temp_store_dir("worker-paused-fairness"); + let store = FsExecutionStore::new(root.clone()); + let spec = spec(1); + + ExecutionService::::submit_execution(&store, "exec-paused", &spec) + .expect("submit paused"); + ExecutionService::::submit_execution(&store, "exec-running", &spec) + .expect("submit running"); + + let mut planner = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + MockRuntime::new(), + store.clone(), + ); + planner.plan_execution("exec-paused").expect("plan paused"); + planner.plan_execution("exec-running").expect("plan running"); + + let mut paused = store.load_execution("exec-paused").expect("load paused").execution; + paused.status = ExecutionStatus::Paused; + store.save_execution(&paused).expect("save paused"); + + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-3", + output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-4", + output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + ); + + void_control::bridge::process_pending_executions_once_for_test( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + root.clone(), + ) + .expect("process pending"); + + let paused_snapshot = store.load_execution("exec-paused").expect("reload paused"); + assert_eq!(paused_snapshot.execution.status, ExecutionStatus::Paused); + assert!(paused_snapshot + .candidates + .iter() + .all(|candidate| candidate.status == CandidateStatus::Queued)); + + let running_snapshot = store.load_execution("exec-running").expect("reload running"); + assert_eq!(running_snapshot.execution.status, ExecutionStatus::Running); + assert!(running_snapshot + .candidates + .iter() + .any(|candidate| candidate.status == CandidateStatus::Completed)); +} + +#[test] +fn bridge_scheduler_dispatches_earliest_queued_execution_first() { + let root = temp_store_dir("worker-bridge-fifo"); + let store = FsExecutionStore::new(root.clone()); + let spec = spec(1); + + ExecutionService::::submit_execution(&store, "exec-early", &spec) + .expect("submit early"); + ExecutionService::::submit_execution(&store, "exec-late", &spec) + .expect("submit late"); + + let mut planner = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + MockRuntime::new(), + store.clone(), + ); + planner.plan_execution("exec-early").expect("plan early"); + planner.plan_execution("exec-late").expect("plan late"); + + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + ); + runtime.seed_success( + "exec-run-candidate-3", + output("candidate-1", &[("latency_p99_ms", 88.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-4", + output("candidate-2", &[("latency_p99_ms", 84.0), ("cost_usd", 0.02)]), + ); + + void_control::bridge::process_pending_executions_once_for_test( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + root.clone(), + ) + .expect("process pending"); + + let early = store.load_execution("exec-early").expect("reload early"); + let late = store.load_execution("exec-late").expect("reload late"); + + assert_eq!(early.execution.status, ExecutionStatus::Running); + assert_eq!(late.execution.status, ExecutionStatus::Running); + assert_eq!(early.candidates[0].status, CandidateStatus::Completed); + assert_eq!(early.candidates[1].status, CandidateStatus::Queued); + assert_eq!(late.candidates[0].status, CandidateStatus::Completed); + assert_eq!(late.candidates[1].status, CandidateStatus::Queued); +} + +fn spec(max_iterations: u32) -> ExecutionSpec { + ExecutionSpec { + mode: "swarm".to_string(), + goal: "optimize latency".to_string(), + workflow: void_control::orchestration::WorkflowTemplateRef { + template: "fixtures/sample.vbrun".to_string(), + }, + policy: OrchestrationPolicy { + budget: void_control::orchestration::BudgetPolicy { + max_iterations: Some(max_iterations), + max_child_runs: None, + max_wall_clock_secs: Some(60), + max_cost_usd_millis: None, + }, + concurrency: void_control::orchestration::ConcurrencyPolicy { + max_concurrent_candidates: 2, + }, + convergence: void_control::orchestration::ConvergencePolicy { + strategy: "exhaustive".to_string(), + min_score: None, + max_iterations_without_improvement: None, + }, + max_candidate_failures_per_iteration: 10, + missing_output_policy: "mark_failed".to_string(), + iteration_failure_policy: "fail_execution".to_string(), + }, + evaluation: void_control::orchestration::EvaluationConfig { + scoring_type: "weighted_metrics".to_string(), + weights: BTreeMap::from([ + ("latency_p99_ms".to_string(), -0.6), + ("cost_usd".to_string(), -0.4), + ]), + pass_threshold: Some(0.7), + ranking: "highest_score".to_string(), + tie_breaking: "cost_usd".to_string(), + }, + variation: VariationConfig::explicit( + 2, + vec![ + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "a".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "b".to_string())]), + }, + ], + ), + swarm: true, + } +} + +fn output(candidate_id: &str, metrics: &[(&str, f64)]) -> CandidateOutput { + CandidateOutput::new( + candidate_id.to_string(), + true, + metrics.iter().map(|(k, v)| (k.to_string(), *v)).collect(), + ) +} + +fn temp_store_dir(label: &str) -> std::path::PathBuf { + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let dir = std::env::temp_dir().join(format!("void-control-worker-{label}-{nanos}")); + std::fs::create_dir_all(&dir).expect("create temp dir"); + dir +} + +fn tick_bridge_worker_until_terminal( + root: std::path::PathBuf, + execution_id: &str, +) { + let store = FsExecutionStore::new(root.clone()); + for _ in 0..6 { + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + ); + void_control::bridge::process_pending_executions_once_for_test( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + root.clone(), + ) + .expect("process pending"); + let snapshot = store.load_execution(execution_id).expect("reload"); + if matches!( + snapshot.execution.status, + ExecutionStatus::Completed | ExecutionStatus::Failed | ExecutionStatus::Canceled + ) { + return; + } + } + panic!("execution did not reach terminal state"); +} diff --git a/tests/strategy_scenarios.rs b/tests/strategy_scenarios.rs new file mode 100644 index 0000000..1f72926 --- /dev/null +++ b/tests/strategy_scenarios.rs @@ -0,0 +1,673 @@ +use std::collections::BTreeMap; + +use void_control::orchestration::{ + CandidateOutput, ControlEventType, ExecutionService, ExecutionSpec, ExecutionStatus, + FsExecutionStore, GlobalConfig, OrchestrationPolicy, VariationConfig, VariationProposal, + VariationSelection, +}; +#[cfg(feature = "serde")] +use void_control::orchestration::{ + CommunicationIntent, CommunicationIntentAudience, CommunicationIntentKind, + CommunicationIntentPriority, RoutedMessageStatus, +}; +use void_control::runtime::MockRuntime; + +#[test] +fn swarm_incident_mitigation_explores_distinct_hypotheses_and_finds_best_family() { + let store_dir = temp_store_dir("swarm-incident"); + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + metrics_output_with_intents( + "candidate-1", + 115.0, + 0.08, + 0.91, + vec![scenario_intent( + "intent-incident-signal", + CommunicationIntentAudience::Leader, + "retry raised errors under peak load", + None, + )], + ), + ); + runtime.seed_success( + "exec-run-candidate-2", + metrics_output_with_intents( + "candidate-2", + 72.0, + 0.04, + 0.99, + vec![scenario_intent( + "intent-incident-broadcast", + CommunicationIntentAudience::Broadcast, + "rate limit plus cache fallback stabilized latency", + None, + )], + ), + ); + runtime.seed_success( + "exec-run-candidate-3", + metrics_output("candidate-3", 88.0, 0.05, 0.97), + ); + runtime.seed_success( + "exec-run-candidate-4", + metrics_output("candidate-4", 96.0, 0.06, 0.94), + ); + runtime.seed_success( + "exec-run-candidate-5", + metrics_output("candidate-5", 101.0, 0.05, 0.95), + ); + for idx in 6..=10 { + runtime.seed_success( + &format!("exec-run-candidate-{idx}"), + metrics_output( + &format!("candidate-{idx}"), + 90.0 + idx as f64, + 0.05, + 0.96, + ), + ); + } + + let store = FsExecutionStore::new(store_dir.clone()); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 8, + }, + runtime, + store, + ); + let execution = service + .run_to_completion(swarm_incident_message_box_spec()) + .expect("run execution"); + + let store = FsExecutionStore::new(store_dir); + let snapshot = store.load_execution(&execution.execution_id).expect("load execution"); + let intents = store.load_intents(&execution.execution_id).expect("load intents"); + let messages = store + .load_routed_messages(&execution.execution_id) + .expect("load routed messages"); + let inbox_one = store + .load_inbox_snapshot(&execution.execution_id, 1, "candidate-1") + .expect("load candidate-1 inbox"); + let inbox_two = store + .load_inbox_snapshot(&execution.execution_id, 1, "candidate-2") + .expect("load candidate-2 inbox"); + let best = snapshot + .candidates + .iter() + .filter(|candidate| Some(&candidate.candidate_id) == execution.result_best_candidate_id.as_ref()) + .max_by_key(|candidate| candidate.created_seq) + .expect("best candidate"); + + let explored: Vec<_> = snapshot + .candidates + .iter() + .map(|candidate| candidate.overrides["mitigation.strategy"].clone()) + .collect(); + + assert_eq!(execution.status, ExecutionStatus::Completed); + assert_eq!(execution.completed_iterations, 2); + assert_eq!( + best.overrides.get("mitigation.strategy").map(String::as_str), + Some("rate_limit_cache") + ); + assert!(explored.starts_with(&[ + "retry".to_string(), + "rate_limit_cache".to_string(), + "circuit_breaker".to_string(), + "queue_buffering".to_string(), + "reduce_concurrency".to_string(), + ])); + assert_eq!(intents.len(), 2); + assert_eq!( + messages + .iter() + .filter(|message| message.status == RoutedMessageStatus::Routed) + .count(), + 2 + ); + assert_eq!( + messages + .iter() + .filter(|message| message.status == RoutedMessageStatus::Delivered) + .count(), + 3 + ); + assert_eq!(inbox_one.entries.len(), 2); + assert_eq!(inbox_two.entries.len(), 1); + assert_event_counts( + &snapshot.events, + &[ + (ControlEventType::CandidateQueued, 10), + (ControlEventType::CandidateDispatched, 10), + (ControlEventType::CandidateOutputCollected, 10), + (ControlEventType::CandidateScored, 2), + (ControlEventType::CommunicationIntentEmitted, 2), + (ControlEventType::MessageRouted, 2), + (ControlEventType::MessageDelivered, 3), + (ControlEventType::ExecutionCompleted, 1), + ], + ); +} + +#[test] +fn swarm_prompt_optimization_finds_best_style_cluster() { + let store_dir = temp_store_dir("swarm-prompt"); + let mut runtime = MockRuntime::new(); + runtime.seed_success("exec-run-candidate-1", prompt_output("candidate-1", 0.74, 0.70)); + runtime.seed_success("exec-run-candidate-2", prompt_output("candidate-2", 0.89, 0.92)); + runtime.seed_success("exec-run-candidate-3", prompt_output("candidate-3", 0.78, 0.76)); + runtime.seed_success("exec-run-candidate-4", prompt_output("candidate-4", 0.69, 0.65)); + runtime.seed_success("exec-run-candidate-5", prompt_output("candidate-5", 0.81, 0.83)); + runtime.seed_success("exec-run-candidate-6", prompt_output("candidate-6", 0.76, 0.72)); + runtime.seed_success("exec-run-candidate-7", prompt_output("candidate-7", 0.72, 0.90)); + runtime.seed_success("exec-run-candidate-8", prompt_output("candidate-8", 0.96, 0.97)); + + let store = FsExecutionStore::new(store_dir.clone()); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 8, + }, + runtime, + store, + ); + let execution = service + .run_to_completion(swarm_prompt_spec()) + .expect("run execution"); + + let snapshot = FsExecutionStore::new(store_dir) + .load_execution(&execution.execution_id) + .expect("load execution"); + let best = snapshot + .candidates + .iter() + .find(|candidate| Some(&candidate.candidate_id) == execution.result_best_candidate_id.as_ref()) + .expect("best candidate"); + + assert_eq!(execution.status, ExecutionStatus::Completed); + assert_eq!( + best.overrides.get("agent.prompt").map(String::as_str), + Some("hybrid_friendly_concise_structured") + ); + assert_eq!(snapshot.candidates.len(), 8); + assert_event_counts( + &snapshot.events, + &[ + (ControlEventType::CandidateQueued, 8), + (ControlEventType::CandidateDispatched, 8), + (ControlEventType::CandidateOutputCollected, 8), + (ControlEventType::ExecutionCompleted, 1), + ], + ); +} + +#[test] +fn search_rate_limit_tuning_refines_known_good_direction() { + let store_dir = temp_store_dir("search-rate-limit"); + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + metrics_output_with_intents( + "candidate-1", + 95.0, + 0.06, + 0.96, + vec![scenario_intent( + "intent-search-parent", + CommunicationIntentAudience::Leader, + "baseline rate limit looks promising", + None, + )], + ), + ); + runtime.seed_success( + "exec-run-candidate-2", + metrics_output("candidate-2", 82.0, 0.05, 0.98), + ); + runtime.seed_success( + "exec-run-candidate-3", + metrics_output_with_intents( + "candidate-3", + 70.0, + 0.05, + 0.99, + vec![scenario_intent( + "intent-search-child", + CommunicationIntentAudience::Leader, + "refine with adaptive jitter", + Some("intent-search-parent"), + )], + ), + ); + + let store = FsExecutionStore::new(store_dir.clone()); + ExecutionService::::submit_execution( + &store, + "exec-search-rate-limit", + &search_rate_limit_spec(), + ) + .expect("submit"); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); + service + .plan_execution("exec-search-rate-limit") + .expect("plan execution"); + + for _ in 0..6 { + let execution = service + .dispatch_execution_once("exec-search-rate-limit") + .expect("dispatch"); + if execution.status == ExecutionStatus::Completed { + break; + } + } + + let store = FsExecutionStore::new(store_dir); + let snapshot = store + .load_execution("exec-search-rate-limit") + .expect("load execution"); + let intents = store + .load_intents("exec-search-rate-limit") + .expect("load intents"); + let messages = store + .load_routed_messages("exec-search-rate-limit") + .expect("load routed messages"); + let inbox = store + .load_inbox_snapshot("exec-search-rate-limit", 1, "candidate-1") + .expect("load refinement inbox"); + let iter0_thresholds: Vec<_> = snapshot + .candidates + .iter() + .filter(|candidate| candidate.iteration == 0) + .map(|candidate| candidate.overrides["rate_limit.threshold"].clone()) + .collect(); + let iter1_thresholds: Vec<_> = snapshot + .candidates + .iter() + .filter(|candidate| candidate.iteration == 1) + .map(|candidate| candidate.overrides["rate_limit.threshold"].clone()) + .collect(); + + assert_eq!(snapshot.execution.status, ExecutionStatus::Completed); + assert_eq!(iter0_thresholds, vec!["80".to_string(), "100".to_string()]); + assert_eq!(iter1_thresholds, vec!["120".to_string()]); + assert_eq!(snapshot.accumulator.search_phase.as_deref(), Some("refine")); + assert_eq!(intents.len(), 2); + assert_eq!( + intents + .iter() + .find(|intent| intent.intent_id == "intent-search-child") + .and_then(|intent| intent.caused_by.as_deref()), + Some("intent-search-parent") + ); + assert!(messages.iter().any(|message| { + message.intent_id == "intent-search-parent" + && message.status == RoutedMessageStatus::Delivered + && message.delivery_iteration == 1 + })); + assert!(inbox + .entries + .iter() + .any(|entry| entry.intent_id == "intent-search-parent")); + assert_event_counts( + &snapshot.events, + &[ + (ControlEventType::CandidateQueued, 3), + (ControlEventType::CandidateDispatched, 3), + (ControlEventType::CandidateScored, 2), + (ControlEventType::CommunicationIntentEmitted, 2), + (ControlEventType::MessageRouted, 2), + (ControlEventType::MessageDelivered, 1), + (ControlEventType::ExecutionCompleted, 1), + ], + ); +} + +#[test] +fn search_pipeline_optimization_refines_known_bottleneck_config() { + let store_dir = temp_store_dir("search-pipeline"); + let mut runtime = MockRuntime::new(); + runtime.seed_success("exec-run-candidate-1", pipeline_output("candidate-1", 0.72, 0.78)); + runtime.seed_success("exec-run-candidate-2", pipeline_output("candidate-2", 0.84, 0.86)); + runtime.seed_success("exec-run-candidate-3", pipeline_output("candidate-3", 0.93, 0.95)); + runtime.seed_success("exec-run-candidate-4", pipeline_output("candidate-4", 0.80, 0.82)); + + let store = FsExecutionStore::new(store_dir.clone()); + ExecutionService::::submit_execution( + &store, + "exec-search-pipeline", + &search_pipeline_spec(), + ) + .expect("submit"); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); + service + .plan_execution("exec-search-pipeline") + .expect("plan execution"); + + for _ in 0..6 { + let execution = service + .dispatch_execution_once("exec-search-pipeline") + .expect("dispatch"); + if execution.status == ExecutionStatus::Completed { + break; + } + } + + let snapshot = FsExecutionStore::new(store_dir) + .load_execution("exec-search-pipeline") + .expect("load execution"); + let mut iter1_prompts: Vec<_> = snapshot + .candidates + .iter() + .filter(|candidate| candidate.iteration == 1) + .map(|candidate| candidate.overrides["transform.config"].clone()) + .collect(); + iter1_prompts.sort(); + let best = snapshot + .candidates + .iter() + .filter(|candidate| Some(&candidate.candidate_id) == snapshot.execution.result_best_candidate_id.as_ref()) + .max_by_key(|candidate| candidate.created_seq) + .expect("best candidate"); + + assert_eq!(snapshot.execution.status, ExecutionStatus::Completed); + assert_eq!( + iter1_prompts, + vec![ + "batch1024_parallel2".to_string(), + "batch512_parallel4_streaming".to_string(), + ] + ); + assert_eq!( + best.overrides.get("transform.config").map(String::as_str), + Some("batch512_parallel4_streaming") + ); + assert_eq!(snapshot.accumulator.search_phase.as_deref(), Some("refine")); +} + +fn swarm_incident_spec() -> ExecutionSpec { + ExecutionSpec { + mode: "swarm".to_string(), + goal: "mitigate latency and errors".to_string(), + workflow: workflow(), + policy: swarm_policy(1), + evaluation: infra_evaluation(), + variation: VariationConfig::explicit( + 5, + vec![ + proposal(&[("mitigation.strategy", "retry")]), + proposal(&[("mitigation.strategy", "rate_limit_cache")]), + proposal(&[("mitigation.strategy", "circuit_breaker")]), + proposal(&[("mitigation.strategy", "queue_buffering")]), + proposal(&[("mitigation.strategy", "reduce_concurrency")]), + ], + ), + swarm: true, + } +} + +fn swarm_incident_message_box_spec() -> ExecutionSpec { + let mut spec = swarm_incident_spec(); + spec.policy = swarm_policy(2); + spec +} + +fn swarm_prompt_spec() -> ExecutionSpec { + ExecutionSpec { + mode: "swarm".to_string(), + goal: "improve support agent prompt quality".to_string(), + workflow: workflow(), + policy: swarm_policy(1), + evaluation: prompt_evaluation(), + variation: VariationConfig::explicit( + 8, + vec![ + proposal(&[("agent.prompt", "formal")]), + proposal(&[("agent.prompt", "friendly_concise_structured")]), + proposal(&[("agent.prompt", "concise")]), + proposal(&[("agent.prompt", "verbose")]), + proposal(&[("agent.prompt", "step_by_step")]), + proposal(&[("agent.prompt", "empathetic")]), + proposal(&[("agent.prompt", "strict_policy")]), + proposal(&[("agent.prompt", "hybrid_friendly_concise_structured")]), + ], + ), + swarm: true, + } +} + +fn search_rate_limit_spec() -> ExecutionSpec { + ExecutionSpec { + mode: "search".to_string(), + goal: "tune rate limiting".to_string(), + workflow: workflow(), + policy: search_policy(2), + evaluation: infra_evaluation(), + variation: VariationConfig::parameter_space( + 4, + VariationSelection::Sequential, + BTreeMap::from([( + "rate_limit.threshold".to_string(), + vec![ + "80".to_string(), + "100".to_string(), + "120".to_string(), + "140".to_string(), + ], + )]), + ), + swarm: true, + } +} + +fn search_pipeline_spec() -> ExecutionSpec { + ExecutionSpec { + mode: "search".to_string(), + goal: "tune transform bottleneck".to_string(), + workflow: workflow(), + policy: search_policy(2), + evaluation: pipeline_evaluation(), + variation: VariationConfig::explicit( + 3, + vec![ + proposal(&[("transform.config", "current_transform")]), + proposal(&[("transform.config", "batch256_parallel4")]), + proposal(&[("transform.config", "batch512_parallel4_streaming")]), + proposal(&[("transform.config", "batch1024_parallel2")]), + ], + ), + swarm: true, + } +} + +fn workflow() -> void_control::orchestration::WorkflowTemplateRef { + void_control::orchestration::WorkflowTemplateRef { + template: "fixtures/sample.vbrun".to_string(), + } +} + +fn swarm_policy(max_iterations: u32) -> OrchestrationPolicy { + base_policy(max_iterations, 10) +} + +fn search_policy(max_iterations: u32) -> OrchestrationPolicy { + base_policy(max_iterations, 10) +} + +fn base_policy(max_iterations: u32, max_failures: u32) -> OrchestrationPolicy { + OrchestrationPolicy { + budget: void_control::orchestration::BudgetPolicy { + max_iterations: Some(max_iterations), + max_child_runs: None, + max_wall_clock_secs: Some(60), + max_cost_usd_millis: None, + }, + concurrency: void_control::orchestration::ConcurrencyPolicy { + max_concurrent_candidates: 8, + }, + convergence: void_control::orchestration::ConvergencePolicy { + strategy: "exhaustive".to_string(), + min_score: None, + max_iterations_without_improvement: None, + }, + max_candidate_failures_per_iteration: max_failures, + missing_output_policy: "mark_failed".to_string(), + iteration_failure_policy: "fail_execution".to_string(), + } +} + +fn infra_evaluation() -> void_control::orchestration::EvaluationConfig { + void_control::orchestration::EvaluationConfig { + scoring_type: "weighted_metrics".to_string(), + weights: BTreeMap::from([ + ("latency_p99_ms".to_string(), -0.5), + ("cost_usd".to_string(), -0.1), + ("success_rate".to_string(), 0.4), + ]), + pass_threshold: Some(0.7), + ranking: "highest_score".to_string(), + tie_breaking: "cost_usd".to_string(), + } +} + +fn prompt_evaluation() -> void_control::orchestration::EvaluationConfig { + void_control::orchestration::EvaluationConfig { + scoring_type: "weighted_metrics".to_string(), + weights: BTreeMap::from([ + ("quality_score".to_string(), 0.6), + ("policy_score".to_string(), 0.4), + ]), + pass_threshold: Some(0.7), + ranking: "highest_score".to_string(), + tie_breaking: "quality_score".to_string(), + } +} + +fn pipeline_evaluation() -> void_control::orchestration::EvaluationConfig { + void_control::orchestration::EvaluationConfig { + scoring_type: "weighted_metrics".to_string(), + weights: BTreeMap::from([ + ("throughput".to_string(), 0.6), + ("stability".to_string(), 0.4), + ]), + pass_threshold: Some(0.7), + ranking: "highest_score".to_string(), + tie_breaking: "throughput".to_string(), + } +} + +fn proposal(items: &[(&str, &str)]) -> VariationProposal { + VariationProposal { + overrides: items + .iter() + .map(|(key, value)| ((*key).to_string(), (*value).to_string())) + .collect(), + } +} + +fn metrics_output(candidate_id: &str, latency_p99_ms: f64, cost_usd: f64, success_rate: f64) -> CandidateOutput { + CandidateOutput::new( + candidate_id.to_string(), + true, + BTreeMap::from([ + ("latency_p99_ms".to_string(), latency_p99_ms), + ("cost_usd".to_string(), cost_usd), + ("success_rate".to_string(), success_rate), + ]), + ) +} + +#[cfg(feature = "serde")] +fn metrics_output_with_intents( + candidate_id: &str, + latency_p99_ms: f64, + cost_usd: f64, + success_rate: f64, + intents: Vec, +) -> CandidateOutput { + metrics_output(candidate_id, latency_p99_ms, cost_usd, success_rate).with_intents(intents) +} + +fn prompt_output(candidate_id: &str, quality_score: f64, policy_score: f64) -> CandidateOutput { + CandidateOutput::new( + candidate_id.to_string(), + true, + BTreeMap::from([ + ("quality_score".to_string(), quality_score), + ("policy_score".to_string(), policy_score), + ]), + ) +} + +fn pipeline_output(candidate_id: &str, throughput: f64, stability: f64) -> CandidateOutput { + CandidateOutput::new( + candidate_id.to_string(), + true, + BTreeMap::from([ + ("throughput".to_string(), throughput), + ("stability".to_string(), stability), + ]), + ) +} + +#[cfg(feature = "serde")] +fn scenario_intent( + intent_id: &str, + audience: CommunicationIntentAudience, + summary_text: &str, + caused_by: Option<&str>, +) -> CommunicationIntent { + CommunicationIntent { + intent_id: intent_id.to_string(), + from_candidate_id: "placeholder".to_string(), + iteration: 0, + kind: CommunicationIntentKind::Proposal, + audience, + payload: serde_json::json!({ + "summary_text": summary_text, + "strategy_hint": "scenario", + }), + priority: CommunicationIntentPriority::Normal, + ttl_iterations: 1, + caused_by: caused_by.map(str::to_string), + context: None, + } +} + +fn assert_event_counts( + events: &[void_control::orchestration::ControlEventEnvelope], + expected: &[(ControlEventType, usize)], +) { + for (event_type, count) in expected { + let actual = events + .iter() + .filter(|event| event.event_type == *event_type) + .count(); + assert_eq!(actual, *count, "{event_type:?}"); + } +} + +fn temp_store_dir(label: &str) -> std::path::PathBuf { + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let dir = std::env::temp_dir().join(format!("void-control-scenarios-{label}-{nanos}")); + std::fs::create_dir_all(&dir).expect("create temp dir"); + dir +} diff --git a/tests/void_box_contract.rs b/tests/void_box_contract.rs index baf5c89..0c092b4 100644 --- a/tests/void_box_contract.rs +++ b/tests/void_box_contract.rs @@ -21,6 +21,10 @@ enum DefaultSpecKind { LongRunning, Timeout, BaselineSuccess, + StructuredOutputSuccess, + StructuredOutputWithArtifact, + MissingStructuredOutput, + MalformedStructuredOutput, } static FALLBACK_COUNTER: AtomicU64 = AtomicU64::new(0); @@ -46,6 +50,10 @@ fn fallback_spec_path(kind: DefaultSpecKind) -> PathBuf { DefaultSpecKind::LongRunning => "long_running", DefaultSpecKind::Timeout => "timeout", DefaultSpecKind::BaselineSuccess => "baseline_success", + DefaultSpecKind::StructuredOutputSuccess => "structured_output_success", + DefaultSpecKind::StructuredOutputWithArtifact => "structured_output_with_artifact", + DefaultSpecKind::MissingStructuredOutput => "missing_structured_output", + DefaultSpecKind::MalformedStructuredOutput => "malformed_structured_output", }; let nonce = FALLBACK_COUNTER.fetch_add(1, Ordering::Relaxed); let nanos = SystemTime::now() @@ -123,6 +131,100 @@ workflow: args: ["a-z", "A-Z"] stdin_from: fetch output_step: transform +"# + } + DefaultSpecKind::StructuredOutputSuccess => { + r#"api_version: v1 +kind: workflow +name: structured-output-success + +sandbox: + mode: mock + network: false + +workflow: + steps: + - name: produce + run: + program: sh + args: + - -lc + - | + cat > result.json <<'JSON' + {"status":"success","summary":"ok","metrics":{"latency_p99_ms":87,"cost_usd":0.018},"artifacts":[]} + JSON + output_step: produce +"# + } + DefaultSpecKind::StructuredOutputWithArtifact => { + r#"api_version: v1 +kind: workflow +name: structured-output-with-artifact + +sandbox: + mode: mock + network: false + +workflow: + steps: + - name: produce + run: + program: sh + args: + - -lc + - | + cat > result.json <<'JSON' + {"status":"success","summary":"ok","metrics":{"latency_p99_ms":87,"cost_usd":0.018},"artifacts":[{"name":"report.md","stage":"main","media_type":"text/markdown"}]} + JSON + cat > report.md <<'MD' + # report + artifact content + MD + output_step: produce +"# + } + DefaultSpecKind::MissingStructuredOutput => { + r#"api_version: v1 +kind: workflow +name: missing-structured-output + +sandbox: + mode: mock + network: false + +workflow: + steps: + - name: produce + run: + program: sh + args: + - -lc + - | + echo "completed without result.json" + output_step: produce +"# + } + DefaultSpecKind::MalformedStructuredOutput => { + r#"api_version: v1 +kind: workflow +name: malformed-structured-output + +sandbox: + mode: mock + network: false + +workflow: + steps: + - name: produce + run: + program: sh + args: + - -lc + - | + cat > result.json <<'JSON' + {"status":"success","summary":"ok","metrics":not-json,"artifacts":[]} + JSON + output_step: produce "# } }; @@ -182,6 +284,10 @@ fn http_get_json(base_url: &str, path: &str) -> (u16, Value) { (status, json) } +fn http_get_text(base_url: &str, path: &str) -> (u16, String) { + http_request(base_url, "GET", path, None) +} + fn http_post_json(base_url: &str, path: &str, payload: &Value) -> (u16, Value) { let body = payload.to_string(); let (status, body) = http_request(base_url, "POST", path, Some(&body)); @@ -236,6 +342,37 @@ fn is_terminal_status(status: &str) -> bool { ) } +fn get_artifact_publication<'a>(run: &'a Value) -> &'a Value { + run.get("artifact_publication") + .unwrap_or_else(|| panic!("missing artifact_publication: {run}")) +} + +fn get_manifest_entries(run: &Value) -> &[Value] { + get_artifact_publication(run) + .get("manifest") + .and_then(Value::as_array) + .unwrap_or_else(|| panic!("missing artifact manifest: {run}")) +} + +fn find_manifest_entry<'a>(run: &'a Value, name: &str) -> &'a Value { + get_manifest_entries(run) + .iter() + .find(|entry| entry.get("name").and_then(Value::as_str) == Some(name)) + .unwrap_or_else(|| panic!("missing manifest entry '{name}': {run}")) +} + +fn manifest_retrieval_path(run: &Value, name: &str) -> String { + let path = find_manifest_entry(run, name) + .get("retrieval_path") + .and_then(Value::as_str) + .unwrap_or_else(|| panic!("manifest entry '{name}' missing retrieval_path: {run}")); + if path.starts_with('/') { + path.to_string() + } else { + format!("/{}", path) + } +} + fn wait_until_terminal(base: &str, run_id: &str, timeout_secs: u64) -> Value { let attempts = timeout_secs * 10; for _ in 0..attempts { @@ -539,11 +676,190 @@ fn structured_error_invalid_policy() { #[test] #[ignore = "requires live void-box daemon"] -fn list_runs_for_reconciliation() { +fn structured_output_result_json_is_retrievable() { + let base = require_env("VOID_BOX_BASE_URL"); + let spec = resolve_spec_path( + "VOID_BOX_STRUCTURED_OUTPUT_SPEC_FILE", + DefaultSpecKind::StructuredOutputSuccess, + ); + let run_id = unique_run_id("contract-structured-output"); + let (status_start, body_start) = http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); + assert_eq!(status_start, 200, "body={body_start}"); + + let terminal = wait_until_terminal(&base, &run_id, 30); + assert_eq!( + terminal.get("status").and_then(Value::as_str).map(|s| s.to_ascii_lowercase()), + Some("succeeded".to_string()), + "terminal={terminal}" + ); + + let (status, body) = http_get_text( + &base, + &format!("/v1/runs/{run_id}/stages/main/output-file"), + ); + assert_eq!(status, 200, "body={body}"); + let parsed = serde_json::from_str::(&body).unwrap_or_else(|e| { + panic!("structured output was not valid JSON: {e}; body={body}") + }); + assert!(parsed.get("metrics").and_then(Value::as_object).is_some()); +} + +#[test] +#[ignore = "requires live void-box daemon"] +fn missing_result_json_is_typed_failure() { let base = require_env("VOID_BOX_BASE_URL"); + let spec = resolve_spec_path( + "VOID_BOX_MISSING_STRUCTURED_OUTPUT_SPEC_FILE", + DefaultSpecKind::MissingStructuredOutput, + ); + let run_id = unique_run_id("contract-missing-structured-output"); + let (status_start, body_start) = http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); + assert_eq!(status_start, 200, "body={body_start}"); + + let terminal = wait_until_terminal(&base, &run_id, 30); + assert_eq!( + terminal.get("status").and_then(Value::as_str).map(|s| s.to_ascii_lowercase()), + Some("failed".to_string()), + "terminal={terminal}" + ); + + let (status, json) = http_get_json( + &base, + &format!("/v1/runs/{run_id}/stages/main/output-file"), + ); + assert!(status >= 400, "body={json}"); + assert_error_shape(&json); + assert_eq!( + json.get("code").and_then(Value::as_str), + Some("STRUCTURED_OUTPUT_MISSING") + ); +} + +#[test] +#[ignore = "requires live void-box daemon"] +fn malformed_result_json_is_typed_failure() { + let base = require_env("VOID_BOX_BASE_URL"); + let spec = resolve_spec_path( + "VOID_BOX_MALFORMED_STRUCTURED_OUTPUT_SPEC_FILE", + DefaultSpecKind::MalformedStructuredOutput, + ); + let run_id = unique_run_id("contract-malformed-structured-output"); + let (status_start, body_start) = http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); + assert_eq!(status_start, 200, "body={body_start}"); + + let terminal = wait_until_terminal(&base, &run_id, 30); + assert_eq!( + terminal.get("status").and_then(Value::as_str).map(|s| s.to_ascii_lowercase()), + Some("failed".to_string()), + "terminal={terminal}" + ); + + let (status, json) = http_get_json( + &base, + &format!("/v1/runs/{run_id}/stages/main/output-file"), + ); + assert!(status >= 400, "body={json}"); + assert_error_shape(&json); + assert_eq!( + json.get("code").and_then(Value::as_str), + Some("STRUCTURED_OUTPUT_MALFORMED") + ); +} + +#[test] +#[ignore = "requires live void-box daemon"] +fn manifest_lists_named_artifacts() { + let base = require_env("VOID_BOX_BASE_URL"); + let spec = resolve_spec_path( + "VOID_BOX_STRUCTURED_OUTPUT_ARTIFACT_SPEC_FILE", + DefaultSpecKind::StructuredOutputWithArtifact, + ); + let run_id = unique_run_id("contract-artifact-manifest"); + let (status_start, body_start) = http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); + assert_eq!(status_start, 200, "body={body_start}"); + + let terminal = wait_until_terminal(&base, &run_id, 30); + assert_eq!( + terminal.get("status").and_then(Value::as_str).map(|s| s.to_ascii_lowercase()), + Some("succeeded".to_string()), + "terminal={terminal}" + ); + + let (status, inspect) = http_get_json(&base, &format!("/v1/runs/{run_id}")); + assert_eq!(status, 200, "body={inspect}"); + assert!(inspect.get("artifact_publication").is_some()); + assert_eq!( + get_artifact_publication(&inspect) + .get("status") + .and_then(Value::as_str), + Some("published") + ); + let manifest = get_manifest_entries(&inspect); + assert!( + manifest + .iter() + .any(|entry| entry.get("name").and_then(Value::as_str) == Some("result.json")), + "manifest missing result.json: {inspect}" + ); + let artifact_entry = find_manifest_entry(&inspect, "report.md"); + assert_eq!( + artifact_entry.get("stage").and_then(Value::as_str), + Some("main") + ); + assert!( + artifact_entry + .get("retrieval_path") + .and_then(Value::as_str) + .is_some(), + "artifact entry missing retrieval_path: {artifact_entry}" + ); +} + +#[test] +#[ignore = "requires live void-box daemon"] +fn named_artifact_endpoint_serves_manifested_file() { + let base = require_env("VOID_BOX_BASE_URL"); + let spec = resolve_spec_path( + "VOID_BOX_STRUCTURED_OUTPUT_ARTIFACT_SPEC_FILE", + DefaultSpecKind::StructuredOutputWithArtifact, + ); + let run_id = unique_run_id("contract-named-artifact"); + let (status_start, body_start) = http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); + assert_eq!(status_start, 200, "body={body_start}"); + + let _ = wait_until_terminal(&base, &run_id, 30); + let (status_inspect, inspect) = http_get_json(&base, &format!("/v1/runs/{run_id}")); + assert_eq!(status_inspect, 200, "body={inspect}"); + + let path = manifest_retrieval_path(&inspect, "report.md"); + let (status, body) = http_get_text(&base, &path); + assert_eq!(status, 200, "body={body}"); + assert!(body.contains("artifact content"), "unexpected artifact body={body}"); +} + +#[test] +#[ignore = "requires live void-box daemon"] +fn active_run_listing_supports_reconciliation() { + let base = require_env("VOID_BOX_BASE_URL"); + let spec = resolve_spec_path("VOID_BOX_TEST_SPEC_FILE", DefaultSpecKind::LongRunning); + let run_id = unique_run_id("contract-active-reconciliation"); + let (status_start, body_start) = http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); + assert_eq!(status_start, 200, "body={body_start}"); + let (status_active, active) = http_get_json(&base, "/v1/runs?state=active"); assert_eq!(status_active, 200, "body={active}"); - assert!(active.get("runs").and_then(Value::as_array).is_some()); + let runs = active + .get("runs") + .and_then(Value::as_array) + .unwrap_or_else(|| panic!("active runs payload missing runs array: {active}")); + let matching = runs.iter().find(|run| { + run.get("run_id").and_then(Value::as_str) == Some(run_id.as_str()) + || run.get("id").and_then(Value::as_str) == Some(run_id.as_str()) + }); + let matching = matching.unwrap_or_else(|| panic!("started run not present in active listing: {active}")); + assert!(matching.get("attempt_id").and_then(Value::as_u64).is_some()); + assert!(matching.get("active_stage_count").and_then(Value::as_u64).is_some()); + assert!(matching.get("active_microvm_count").and_then(Value::as_u64).is_some()); let (status_terminal, terminal) = http_get_json(&base, "/v1/runs?state=terminal"); assert_eq!(status_terminal, 200, "body={terminal}"); From bfcf1f4c29247b4aa49efeed14317f3fc4b085b3 Mon Sep 17 00:00:00 2001 From: diego Date: Mon, 23 Mar 2026 16:34:00 -0300 Subject: [PATCH 2/6] message transport plumbing v1 done --- src/orchestration/message_box.rs | 34 ++++++++++++++++++++++++ src/orchestration/service.rs | 34 +++++++++--------------- src/orchestration/store/fs.rs | 12 ++------- src/orchestration/strategy.rs | 36 +++++++------------------- src/orchestration/types.rs | 1 - tests/execution_strategy_acceptance.rs | 2 ++ tests/execution_swarm_strategy.rs | 11 -------- tests/strategy_scenarios.rs | 6 +++-- 8 files changed, 63 insertions(+), 73 deletions(-) diff --git a/src/orchestration/message_box.rs b/src/orchestration/message_box.rs index 7dedd27..d00d590 100644 --- a/src/orchestration/message_box.rs +++ b/src/orchestration/message_box.rs @@ -174,6 +174,40 @@ pub fn materialize_inbox_snapshots( snapshots.into_iter().zip(delivered_records).collect() } +#[cfg(feature = "serde")] +pub fn build_candidate_inboxes( + delivery_iteration: u32, + candidate_count: usize, + intents: &[CommunicationIntent], + messages: &[RoutedMessage], +) -> Vec { + let mut inboxes: Vec<_> = (0..candidate_count) + .map(|idx| CandidateInbox::new(&format!("candidate-{}", idx + 1))) + .collect(); + let pending = pending_delivery_messages(intents, messages, delivery_iteration); + + for (intent, message) in pending { + let summary = summary_text(&intent.payload); + match message.to.as_str() { + "broadcast" => { + for inbox in &mut inboxes { + inbox.messages.push(summary.clone()); + } + } + _ => { + if let Some(first) = inboxes.first_mut() { + first.messages.push(summary); + } + } + } + } + + if inboxes.is_empty() { + return vec![CandidateInbox::new("candidate-1")]; + } + inboxes +} + #[cfg(feature = "serde")] fn payload_has_summary_text(payload: &Value) -> bool { payload diff --git a/src/orchestration/service.rs b/src/orchestration/service.rs index f61a464..f2f5e17 100644 --- a/src/orchestration/service.rs +++ b/src/orchestration/service.rs @@ -81,16 +81,6 @@ impl SelectedStrategy { } } - fn materialize_inboxes( - &self, - accumulator: &ExecutionAccumulator, - ) -> Vec { - match self { - Self::Swarm(strategy) => strategy.materialize_inboxes(accumulator), - Self::Search(strategy) => strategy.materialize_inboxes(accumulator), - } - } - fn plan_candidates( &self, accumulator: &ExecutionAccumulator, @@ -174,6 +164,7 @@ impl ExecutionService where R: ExecutionRuntime, { + #[cfg(feature = "serde")] fn with_claimed_execution( &mut self, execution_id: &str, @@ -588,24 +579,23 @@ where let strategy = SelectedStrategy::new(spec); self.append_event(&execution.execution_id, ControlEventType::IterationStarted)?; #[cfg(feature = "serde")] - let effective_accumulator = { - let mut effective = accumulator.clone(); + let inboxes = { let intents = self.store.load_intents(&execution.execution_id)?; let messages = self.store.load_routed_messages(&execution.execution_id)?; - let message_backlog = - message_box::backlog_from_pending_messages(&intents, &messages, iteration); - if !message_backlog.is_empty() { - effective.message_backlog = message_backlog; - } - effective + message_box::build_candidate_inboxes( + iteration, + spec.variation.candidates_per_iteration as usize, + &intents, + &messages, + ) }; #[cfg(not(feature = "serde"))] - let effective_accumulator = accumulator.clone(); - - let inboxes = strategy.materialize_inboxes(&effective_accumulator); + let inboxes = (0..spec.variation.candidates_per_iteration.max(1) as usize) + .map(|idx| super::types::CandidateInbox::new(&format!("candidate-{}", idx + 1))) + .collect::>(); #[cfg(feature = "serde")] self.materialize_iteration_inboxes(&execution.execution_id, iteration, &inboxes)?; - let candidates = strategy.plan_candidates(&effective_accumulator, &inboxes); + let candidates = strategy.plan_candidates(accumulator, &inboxes); for candidate in &candidates { let candidate_seq = self.next_candidate_id; self.save_candidate_state( diff --git a/src/orchestration/store/fs.rs b/src/orchestration/store/fs.rs index e988b59..ffdf5d4 100644 --- a/src/orchestration/store/fs.rs +++ b/src/orchestration/store/fs.rs @@ -385,19 +385,16 @@ impl FsExecutionStore { .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; let explored_signatures = serde_json::to_string(&accumulator.explored_signatures) .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; - let message_backlog = serde_json::to_string(&accumulator.message_backlog) - .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; fs::write( self.execution_dir(execution_id).join("accumulator.txt"), format!( - "{}\n{}\n{}\n{}\n{}\n{}\n{}", + "{}\n{}\n{}\n{}\n{}\n{}", accumulator.scoring_history_len, accumulator.completed_iterations, accumulator.best_candidate_id.as_deref().unwrap_or(""), best_candidate_overrides, accumulator.search_phase.as_deref().unwrap_or(""), explored_signatures, - message_backlog, ), ) } @@ -731,11 +728,7 @@ fn parse_accumulator(contents: &str) -> io::Result { .map(|value| serde_json::from_str(&value).map_err(invalid_data)) .transpose()? .unwrap_or_default(); - let message_backlog = optional_line(&mut lines) - .filter(|value| !value.is_empty()) - .map(|value| serde_json::from_str(&value).map_err(invalid_data)) - .transpose()? - .unwrap_or_default(); + let _legacy_message_backlog = optional_line(&mut lines); Ok(ExecutionAccumulator { scoring_history_len, completed_iterations, @@ -743,7 +736,6 @@ fn parse_accumulator(contents: &str) -> io::Result { best_candidate_overrides, search_phase, explored_signatures, - message_backlog, ..ExecutionAccumulator::default() }) } diff --git a/src/orchestration/strategy.rs b/src/orchestration/strategy.rs index 4e1ab22..f4d720e 100644 --- a/src/orchestration/strategy.rs +++ b/src/orchestration/strategy.rs @@ -56,25 +56,6 @@ impl SwarmStrategy { } } - pub fn materialize_inboxes( - &self, - accumulator: &ExecutionAccumulator, - ) -> Vec { - if accumulator.message_backlog.is_empty() { - return vec![CandidateInbox::new("candidate-1")]; - } - - accumulator - .message_backlog - .iter() - .enumerate() - .map(|(idx, message)| CandidateInbox { - candidate_id: format!("candidate-{}", idx + 1), - messages: vec![message.clone()], - }) - .collect() - } - pub fn plan_candidates( &self, accumulator: &ExecutionAccumulator, @@ -166,13 +147,6 @@ impl SearchStrategy { } } - pub fn materialize_inboxes( - &self, - accumulator: &ExecutionAccumulator, - ) -> Vec { - SwarmStrategy::default().materialize_inboxes(accumulator) - } - pub fn plan_candidates( &self, accumulator: &ExecutionAccumulator, @@ -245,9 +219,10 @@ impl SearchStrategy { mut accumulator: ExecutionAccumulator, evaluation: IterationEvaluation, ) -> ExecutionAccumulator { + let candidate_slots = default_candidate_inboxes(self.variation.candidates_per_iteration as usize); let planned_candidates = self.plan_candidates( &accumulator, - &self.materialize_inboxes(&accumulator), + &candidate_slots, ); accumulator.scoring_history_len += 1; @@ -374,6 +349,13 @@ impl SearchStrategy { } } +fn default_candidate_inboxes(count: usize) -> Vec { + let count = count.max(1); + (0..count) + .map(|idx| CandidateInbox::new(&format!("candidate-{}", idx + 1))) + .collect() +} + fn candidate_signature(overrides: &BTreeMap) -> String { overrides .iter() diff --git a/src/orchestration/types.rs b/src/orchestration/types.rs index 833ef25..03f35c7 100644 --- a/src/orchestration/types.rs +++ b/src/orchestration/types.rs @@ -90,7 +90,6 @@ impl ExecutionCandidate { pub struct ExecutionAccumulator { pub scoring_history_len: u32, pub completed_iterations: u32, - pub message_backlog: Vec, pub leader_proposals: Vec, pub iterations_without_improvement: u32, pub best_candidate_id: Option, diff --git a/tests/execution_strategy_acceptance.rs b/tests/execution_strategy_acceptance.rs index 1928616..b8b2ed2 100644 --- a/tests/execution_strategy_acceptance.rs +++ b/tests/execution_strategy_acceptance.rs @@ -1,3 +1,5 @@ +#![cfg(feature = "serde")] + use std::collections::BTreeMap; use void_control::orchestration::{ diff --git a/tests/execution_swarm_strategy.rs b/tests/execution_swarm_strategy.rs index abf7beb..54382d5 100644 --- a/tests/execution_swarm_strategy.rs +++ b/tests/execution_swarm_strategy.rs @@ -108,17 +108,6 @@ fn leader_directed_proposals_are_validated_before_use() { assert_eq!(proposals[0].overrides["sandbox.env.CONCURRENCY"], "2"); } -#[test] -fn swarm_materializes_inboxes_from_message_backlog() { - let mut accumulator = ExecutionAccumulator::default(); - accumulator.message_backlog = vec!["hello".to_string(), "world".to_string()]; - - let inboxes = SwarmStrategy::default().materialize_inboxes(&accumulator); - - assert_eq!(inboxes.len(), 2); - assert_eq!(inboxes[0].messages[0], "hello"); -} - #[test] fn swarm_plans_candidates_from_variation_source() { let strategy = SwarmStrategy::new( diff --git a/tests/strategy_scenarios.rs b/tests/strategy_scenarios.rs index 1f72926..df7bd60 100644 --- a/tests/strategy_scenarios.rs +++ b/tests/strategy_scenarios.rs @@ -1,3 +1,5 @@ +#![cfg(feature = "serde")] + use std::collections::BTreeMap; use void_control::orchestration::{ @@ -133,7 +135,7 @@ fn swarm_incident_mitigation_explores_distinct_hypotheses_and_finds_best_family( .iter() .filter(|message| message.status == RoutedMessageStatus::Delivered) .count(), - 3 + 6 ); assert_eq!(inbox_one.entries.len(), 2); assert_eq!(inbox_two.entries.len(), 1); @@ -146,7 +148,7 @@ fn swarm_incident_mitigation_explores_distinct_hypotheses_and_finds_best_family( (ControlEventType::CandidateScored, 2), (ControlEventType::CommunicationIntentEmitted, 2), (ControlEventType::MessageRouted, 2), - (ControlEventType::MessageDelivered, 3), + (ControlEventType::MessageDelivered, 6), (ControlEventType::ExecutionCompleted, 1), ], ); From 65843201f33beb699bf2122933ec60585d18454a Mon Sep 17 00:00:00 2001 From: diego Date: Mon, 23 Mar 2026 18:06:31 -0300 Subject: [PATCH 3/6] orchestration: add signal reactive planning --- ...26-03-23-signal-reactive-implementation.md | 74 ++ .../void-control-signal-reactive-spec-v0.1.md | 722 ++++++++++++++++++ src/bridge.rs | 18 + src/orchestration/message_box.rs | 67 +- src/orchestration/mod.rs | 3 + src/orchestration/service.rs | 31 +- src/orchestration/spec.rs | 10 + src/orchestration/strategy.rs | 106 ++- src/orchestration/types.rs | 20 + src/orchestration/variation.rs | 25 + tests/execution_message_box.rs | 104 ++- tests/execution_search_strategy.rs | 143 +++- tests/execution_spec_validation.rs | 68 ++ tests/execution_strategy_acceptance.rs | 295 ++++++- tests/execution_swarm_strategy.rs | 162 +++- tests/strategy_scenarios.rs | 110 +++ 16 files changed, 1914 insertions(+), 44 deletions(-) create mode 100644 docs/superpowers/plans/2026-03-23-signal-reactive-implementation.md create mode 100644 spec/void-control-signal-reactive-spec-v0.1.md diff --git a/docs/superpowers/plans/2026-03-23-signal-reactive-implementation.md b/docs/superpowers/plans/2026-03-23-signal-reactive-implementation.md new file mode 100644 index 0000000..136e8ef --- /dev/null +++ b/docs/superpowers/plans/2026-03-23-signal-reactive-implementation.md @@ -0,0 +1,74 @@ +# Signal-Reactive Planning Implementation Plan + +> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Implement the `signal_reactive` variation mode in orchestration code, keeping `leader_directed` as legacy behavior while adding metadata-driven planning inputs for `swarm` and `search`. + +**Architecture:** Extend orchestration types with a routed-message-based `MessageStats` summary, add deterministic extraction from persisted message-box state, teach `SwarmStrategy` and `SearchStrategy` to bias planning from those stats, and update variation/config parsing so `signal_reactive` is first-class without rewriting legacy `leader_directed` executions. + +**Tech Stack:** Rust, Cargo tests, existing orchestration/message-box/store modules, serde-gated execution artifacts. + +--- + +## Chunk 1: Variation Source And Public API + +### Task 1: Add `signal_reactive` variation support without breaking legacy `leader_directed` + +**Files:** +- Modify: `src/orchestration/variation.rs` +- Modify: `src/orchestration/spec.rs` +- Modify: `src/bridge.rs` +- Test: `tests/execution_swarm_strategy.rs` +- Test: `tests/execution_spec_validation.rs` + +- [ ] **Step 1: Write failing tests for `signal_reactive` parsing/validation** +- [ ] **Step 2: Run the targeted tests and verify they fail for the missing mode** +- [ ] **Step 3: Add `VariationConfig::signal_reactive()` and generation behavior that keeps proposals planner-generated, not leader-authored** +- [ ] **Step 4: Update spec validation and bridge/config parsing to accept `signal_reactive` while preserving `leader_directed` as legacy** +- [ ] **Step 5: Run the targeted variation/spec tests and verify they pass** + +## Chunk 2: MessageStats Extraction + +### Task 2: Add `MessageStats` types and deterministic extraction from message-box state + +**Files:** +- Modify: `src/orchestration/types.rs` +- Modify: `src/orchestration/message_box.rs` +- Modify: `src/orchestration/mod.rs` +- Test: `tests/execution_message_box.rs` + +- [ ] **Step 1: Write failing tests for routed-message-based `MessageStats` extraction** +- [ ] **Step 2: Run the targeted message-box tests and verify they fail** +- [ ] **Step 3: Add `MessageStats` type plus extraction logic joined by `intent_id`** +- [ ] **Step 4: Export the new type/helpers through orchestration public API** +- [ ] **Step 5: Run the targeted message-box tests and verify they pass** + +## Chunk 3: Strategy Consumption + +### Task 3: Teach `swarm` and `search` to react to `MessageStats` + +**Files:** +- Modify: `src/orchestration/strategy.rs` +- Modify: `src/orchestration/service.rs` +- Test: `tests/execution_swarm_strategy.rs` +- Test: `tests/execution_search_strategy.rs` +- Test: `tests/strategy_scenarios.rs` + +- [ ] **Step 1: Write failing strategy tests for signal-reactive planning biases** +- [ ] **Step 2: Run the targeted strategy tests and verify they fail** +- [ ] **Step 3: Thread `MessageStats` into planning and implement minimal biasing behavior for `swarm` and `search`** +- [ ] **Step 4: Ensure empty-intent `search` falls back to incumbent-centered planning** +- [ ] **Step 5: Run the targeted strategy tests and verify they pass** + +## Chunk 4: End-To-End Verification + +### Task 4: Verify integrated execution behavior and prevent regression of legacy `leader_directed` + +**Files:** +- Modify: `tests/execution_strategy_acceptance.rs` +- Modify: `tests/strategy_scenarios.rs` + +- [ ] **Step 1: Add an acceptance test for `signal_reactive` execution planning** +- [ ] **Step 2: Add a regression test showing persisted `leader_directed` behavior still works as legacy mode** +- [ ] **Step 3: Run the focused acceptance/scenario tests and verify they pass** +- [ ] **Step 4: Run `cargo test --features serde` and verify the full suite stays green** diff --git a/spec/void-control-signal-reactive-spec-v0.1.md b/spec/void-control-signal-reactive-spec-v0.1.md new file mode 100644 index 0000000..dda457f --- /dev/null +++ b/spec/void-control-signal-reactive-spec-v0.1.md @@ -0,0 +1,722 @@ +# Void Control Signal-Reactive Planning Specification + +## Version: v0.1 + +## Scope + +This specification defines the first planning-semantics layer above the +message box in `void-control`. + +It extends: +- `spec/void-control-message-box-spec-v0.1.md` +- `spec/void-control-iteration-spec-v0.2.md` + +It introduces: +- a metadata-driven planning input called `MessageStats`, +- a dedicated signal-extraction layer between message routing and + strategy planning, +- a new planning mode called `signal_reactive`, +- v1 planning reactions for `swarm` and `search`. + +This specification does not introduce: +- free-text payload parsing by strategies, +- direct candidate-directed routing, +- full semantic `Signal` objects yet, +- direct execution mutation from messages. + +--- + +# 1. Core Idea + +Messages are transport. + +Signals are planning input. + +Strategies do not read raw messages. They react to normalized planning +inputs derived from message metadata. + +The control-plane flow becomes: + +1. candidates emit `CommunicationIntent`s, +2. the message box validates, routes, and persists them, +3. signal extraction derives `MessageStats` from intent and routing + metadata, +4. iterative strategies consume `MessageStats` when planning the next + candidates, +5. candidates still receive full inbox payloads for their own reasoning. + +This preserves: +- determinism, +- replayability, +- provider neutrality, +- content-blind strategy behavior. + +--- + +# 2. Ownership Boundary + +## `void-control` owns + +- signal extraction from persisted message-box state, +- normalization of message metadata into `MessageStats`, +- strategy-specific interpretation of `MessageStats`, +- planning bias derived from message patterns. + +## candidates own + +- interpretation of inbox payload content, +- free-text reasoning, +- optional emission of new intents. + +## strict rule + +Strategies MUST NOT: +- parse free-text payload content, +- read raw inbox payloads directly, +- infer planning meaning from message text, +- treat intents as imperative commands. + +Strategies MAY: +- consume `MessageStats`, +- react to delivery counts, audience mix, priority mix, TTL expiry, + drops, and source diversity, +- adjust candidate generation heuristics based on those normalized + signals. + +--- + +# 3. New Mode: `signal_reactive` + +`signal_reactive` is a new planning mode for metadata-driven planning. + +It is not a semantic alias for `leader_directed`. + +Reason: +- `leader_directed` described leader-authored candidate override + proposals, +- `signal_reactive` describes planner reaction to message metadata + patterns, +- these are related collaboration mechanisms, but they are not the same + control-plane behavior. + +`signal_reactive` means: +- candidate generation may be biased by aggregated communication + patterns, +- no raw payload content is consumed, +- no direct override extraction occurs in v1. + +`leader_directed` remains the legacy name for the older +payload-authored override model described in the iteration +specification. + +New executions that use metadata-only planning SHOULD use +`signal_reactive`. + +--- + +# 4. Layered Model + +The collaboration stack becomes: + +| Layer | Consumes | Produces | +|-------|----------|----------| +| Message box | Raw intents | Routed messages, inbox snapshots | +| Signal extraction | Intent metadata, routed-message metadata | `MessageStats` | +| Strategy | `MessageStats` | Candidate specs | +| Candidate runtime | Inbox snapshots with full payload | New intents | + +Important rule: + +- message payload is for candidates, +- message metadata is for control-plane, +- `MessageStats` is the only v1 planning input produced by the signal + extraction layer. + +--- + +# 5. `MessageStats` + +## 5.1 Purpose + +`MessageStats` is the v1 normalized planning summary for one execution at +one planning step. + +It is intentionally small, deterministic, content-blind, and +routed-message based. + +## 5.2 Suggested shape + +```json +{ + "iteration": 1, + "total_messages": 6, + "leader_messages": 2, + "broadcast_messages": 4, + "proposal_count": 3, + "signal_count": 2, + "evaluation_count": 1, + "high_priority_count": 2, + "normal_priority_count": 4, + "low_priority_count": 0, + "delivered_count": 6, + "dropped_count": 1, + "expired_count": 0, + "unique_sources": 3, + "unique_intent_count": 5 +} +``` + +Illustrative Rust shape: + +```rust +struct MessageStats { + iteration: u32, + total_messages: usize, + leader_messages: usize, + broadcast_messages: usize, + proposal_count: usize, + signal_count: usize, + evaluation_count: usize, + high_priority_count: usize, + normal_priority_count: usize, + low_priority_count: usize, + delivered_count: usize, + dropped_count: usize, + expired_count: usize, + unique_sources: usize, + unique_intent_count: usize, +} +``` + +## 5.3 Required fields + +- `iteration` +- `total_messages` +- `leader_messages` +- `broadcast_messages` +- `proposal_count` +- `signal_count` +- `evaluation_count` +- `high_priority_count` +- `normal_priority_count` +- `low_priority_count` +- `delivered_count` +- `dropped_count` +- `expired_count` +- `unique_sources` +- `unique_intent_count` + +## 5.4 Derived ratios + +Implementations MAY derive ratios from `MessageStats`, for example: + +- `broadcast_ratio = broadcast_messages / max(total_messages, 1)` +- `proposal_ratio = proposal_count / max(total_messages, 1)` +- `priority_pressure = high_priority_count / max(total_messages, 1)` + +Ratios are derived convenience values. They do not need to be persisted +as first-class fields in v0.1. + +--- + +# 6. Signal Extraction Layer + +## 6.1 Required seam + +Signal extraction MUST exist as a dedicated control-plane step between +message routing and strategy planning. + +Suggested interface: + +```rust +fn extract_message_stats( + intents: &[CommunicationIntent], + routed_messages: &[RoutedMessage], + delivery_iteration: u32, +) -> MessageStats +``` + +Illustrative implementation skeleton: + +```rust +fn extract_message_stats( + intents: &[CommunicationIntent], + routed_messages: &[RoutedMessage], + delivery_iteration: u32, +) -> MessageStats { + // Join routed messages back to source intent metadata by intent_id. + // Count only routed-message outcomes for this planning iteration. + // Do not inspect payload text. + todo!() +} +``` + +This layer: +- reads message metadata only, +- does not parse payload content, +- may join routed messages to source intent metadata by `intent_id`, +- is deterministic from persisted state, +- is replayable after restart. + +## 6.2 Source material + +`MessageStats` may use: +- routed-message destination +- routed-message `status` +- routed-message delivery iteration +- routed-message source intent metadata joined via `intent_id` +- TTL outcome effects such as `Expired` +- delivery/dedup/drop outcomes from control-plane + +`MessageStats` MUST NOT use: +- `payload.summary_text` +- free-text content +- provider-specific delivery transport details + +--- + +# 7. Strategy Consumption Rules + +## 7.1 Shared rule + +All iterative strategies MUST treat `MessageStats` as advisory evidence, +not imperative commands. + +Messages do not directly mutate execution. + +They shape the search space. + +## 7.2 `swarm` + +`swarm` SHOULD use `MessageStats` to adjust breadth and convergence +pressure. + +Examples: +- higher `proposal_count` and higher `unique_sources` MAY increase + exploration budget or preserve breadth, +- higher `broadcast_messages` MAY increase convergence bias, +- higher `dropped_count` or `expired_count` MAY reduce fan-out pressure + or exploration aggressiveness, +- higher `leader_messages` MAY shift some budget toward refinement-like + candidates while preserving diversity. + +`swarm` MUST NOT: +- derive exact override patches from message payload, +- collapse exploration solely from raw message count. + +## 7.3 `search` + +`search` SHOULD use `MessageStats` to adjust refinement aggressiveness. + +If a `search` execution emits no communication intents, `MessageStats` +for that iteration is simply zero-biased input and `search` falls back +to incumbent-centered planning. + +Examples: +- more `evaluation_count` than `proposal_count` MAY increase exploitation + pressure, +- higher `signal_count` MAY preserve a small exploration quota, +- higher `leader_messages` MAY allow one additional refinement iteration + before declaring plateau, +- higher `expired_count` or `dropped_count` MAY reduce planner confidence + and avoid over-committing to a refinement path. + +`search` MUST remain incumbent-centered. + +`MessageStats` may bias refinement, but MUST NOT replace incumbent-based +planning. + +--- + +# 8. TTL, Dedup, Drops, and Planning Semantics + +These are not only storage concerns. They are learning-dynamics controls. + +## TTL + +TTL controls memory horizon. + +- short TTL means reactive behavior, +- longer TTL means more persistent planning evidence. + +## Dedup + +Dedup controls signal compression. + +- repeated similar communication should not create unbounded planner + noise, +- dedup MAY still contribute to stronger aggregate counts or repeated + delivery evidence. + +## Drops + +Drops indicate overload or bounded suppression. + +High drop counts MAY cause strategies to: +- reduce exploration pressure, +- reduce broadcast-heavy behavior, +- preserve budget for higher-priority patterns. + +## Expiry + +Expiry indicates stale information. + +High expiry counts MAY reduce confidence in long-lived communication +patterns. + +--- + +# 9. What V1 Does Not Do + +V1 does not include: +- arbitrary override extraction from payload, +- strategy parsing of message text, +- direct spawn/cancel/suppress commands from intents, +- candidate-targeted routing, +- full semantic `Signal` objects like `ProposalCluster` yet. + +Those may be added later in a future specification after the +`MessageStats` seam is stable. + +--- + +# 10. V1.5 / V2 Direction + +The future evolution path is: + +1. `MessageStats` in v1, +2. controller-derived structured `Signal` objects later, +3. typed candidate proposal objects or validated override hints if + needed. + +Important future rule: + +- strategies should react to patterns first, +- then to controller-derived structured meaning, +- never to raw messages. + +Illustrative later-stage semantic layer: + +```rust +enum Signal { + ProposalCluster { topic: String }, + ImprovementTrend { topic: String }, + RegressionTrend { topic: String }, +} + +fn plan_with_signals(signals: &[Signal]) { + for signal in signals { + match signal { + Signal::ProposalCluster { topic } => bias_topic(topic), + Signal::ImprovementTrend { topic } => reinforce(topic), + Signal::RegressionTrend { topic } => penalize(topic), + } + } +} +``` + +--- + +# 11. Acceptance Criteria + +An implementation conforms to this specification if: + +1. `signal_reactive` exists as a distinct metadata-driven planning mode, +2. a dedicated signal-extraction layer exists, +3. `MessageStats` is derived from persisted message metadata only, +4. `swarm` planning can react to `MessageStats`, +5. `search` planning can react to `MessageStats`, +6. replay can reconstruct `MessageStats` from persisted state, +7. no strategy consumes free-text payload directly, +8. message transport and candidate inbox delivery continue to work + independently of strategy planning semantics. + +--- + +# 12. Non-Goals + +This specification is not: +- a replacement for the message box, +- a replacement for scoring/evaluation, +- a direct-command protocol, +- a free-form chat coordination system. + +It is the first deterministic planning-semantics layer above the message +transport. + +--- + +# 13. Derivation Semantics + +## 13.1 Planning window + +`MessageStats` is derived for exactly one planning step. + +The planning window SHOULD be: +- all routed-message outcomes relevant to iteration `N` planning, +- primarily routed messages with `delivery_iteration = N`. + +This keeps signal extraction aligned with actual planner input instead +of raw historical message volume. + +## 13.2 Field interpretation + +Suggested v0.1 interpretation: + +- `iteration`: the planning iteration for which stats are derived, +- `total_messages`: count of routed messages considered in the planning + window, +- `leader_messages`: routed messages whose destination is `leader`, +- `broadcast_messages`: routed messages whose destination is + `broadcast`, +- `proposal_count`: routed messages whose source intent kind is + `proposal`, +- `signal_count`: routed messages whose source intent kind is `signal`, +- `evaluation_count`: routed messages whose source intent kind is + `evaluation`, +- `high_priority_count`: routed messages whose source intent priority is + `high`, +- `normal_priority_count`: routed messages whose source intent priority is + `normal`, +- `low_priority_count`: routed messages whose source intent priority is + `low`, +- `delivered_count`: routed messages with status `Delivered`, +- `dropped_count`: routed messages with status `Dropped`, +- `expired_count`: routed messages with status `Expired`, +- `unique_sources`: distinct `from_candidate_id` values represented in + the window, +- `unique_intent_count`: distinct `intent_id` values represented in the + window. + +The canonical counting unit in v0.1 is the routed message, not the raw +intent. Intent-oriented distinctness is captured only via +`unique_intent_count`. + +## 13.3 Invariants + +Implementations SHOULD maintain the following invariants: + +- `proposal_count + signal_count + evaluation_count = total_messages` +- `high_priority_count + normal_priority_count + low_priority_count = + total_messages` +- `leader_messages + broadcast_messages = total_messages` +- `delivered_count + dropped_count + expired_count <= total_messages` +- `unique_sources <= total_messages` +- `unique_intent_count <= total_messages` + +These are sanity rules for deterministic extraction, not additional +persisted state requirements. + +## 13.4 Dedup accounting + +When dedup suppresses repeated routed messages, implementations MUST +count only the persisted post-dedup routed outcomes. + +Dedup pressure may appear indirectly through lower delivered counts and +higher dropped counts when those outcomes are persisted. + +--- + +# 14. Persistence and Replay + +## 14.1 Source of truth + +The source of truth remains: +- persisted `CommunicationIntent` records, +- persisted `RoutedMessage` records, +- persisted inbox snapshots when materialized. + +`MessageStats` is a controller-derived view over that persisted state. + +## 14.2 Persistence options + +V0.1 permits either: +- recomputing `MessageStats` on demand from persisted message-box state, + or +- persisting a cached `MessageStats` snapshot per planning iteration. + +If persisted, the snapshot MUST be treated as derived data and SHOULD +include: +- `execution_id`, +- `iteration`, +- extractor version or schema version, +- the `MessageStats` payload. + +## 14.3 Replay rule + +After restart, the controller MUST be able to derive the same +`MessageStats` for the same execution state without consulting candidate +payload text. + +If a cached `MessageStats` snapshot disagrees with recomputation, the +implementation SHOULD: +- prefer recomputation from canonical persisted message-box state, +- emit a diagnostic event, +- avoid silently feeding inconsistent planning input into strategies. + +--- + +# 15. Planner Integration Contract + +## 15.1 Execution-spec shape + +For iterative planning modes that use this behavior, the variation +source SHOULD be expressed as: + +- `signal_reactive` + +Suggested `ExecutionSpec` fragment: + +```json +{ + "variation": { + "source": "signal_reactive", + "candidates_per_iteration": 3 + } +} +``` + +This source means: +- candidate variation is planner-generated, +- planner bias may depend on `MessageStats`, +- raw message payload remains inaccessible to the planner. + +This specification amends the variation-source set from the iteration +specification for signal-reactive planning: +- `signal_reactive` is the valid planning mode name for new executions, +- `leader_directed` remains the legacy mode for payload-authored + candidate proposals. + +## 15.2 Planner seam + +Suggested strategy-facing interface: + +```rust +fn plan_candidates( + execution: &Execution, + iteration: &Iteration, + stats: &MessageStats, +) -> Vec +``` + +Illustrative planner hook: + +```rust +fn plan_with_message_stats(stats: &MessageStats) { + if stats.proposal_count > 3 && stats.unique_sources > 2 { + increase_exploration(); + } + + if stats.broadcast_messages > stats.leader_messages { + bias_convergence(); + } + + if stats.dropped_count > 0 || stats.expired_count > 0 { + reduce_fanout_pressure(); + } + + if stats.evaluation_count > stats.proposal_count { + bias_refinement(); + } +} +``` + +`MessageStats` joins existing planning inputs such as: +- execution policy, +- prior scores and rankings, +- iteration history, +- candidate provenance. + +It does not replace them. + +## 15.3 Candidate input boundary + +The planner and the candidate runtime consume different views: + +- planner: `MessageStats`, +- candidate: inbox snapshot with full structured payload. + +This split MUST remain explicit in code structure and persisted data +flow. + +--- + +# 16. Compatibility and Migration + +## 16.1 Config migration + +Existing configurations that reference `leader_directed` SHOULD migrate +to `signal_reactive`. + +V0.1-compatible implementations MAY support a temporary compatibility +mapping: + +- rewrite new submission-time configuration from `leader_directed` to + `signal_reactive`, +- emit a deprecation warning, +- require that such rewritten executions use metadata-only planning and + do not parse payload-authored candidate override directives. + +This mapping applies only to new execution submission or config parsing. +It MUST NOT rewrite the persisted `variation.source` of an already +created execution. + +## 16.2 Iteration-spec alignment + +Any section of the iteration specification that describes +`leader_directed` as payload-authored variation remains applicable to +legacy executions whose persisted `variation.source` is +`leader_directed`. + +The key semantic shift is: +- before: leader output proposed concrete candidate overrides, +- now: controller derives metadata signals and the strategy plans + candidates from those signals plus normal execution history. + +This specification does not redefine `leader_directed`. +It introduces `signal_reactive` alongside it. + +The older iteration-spec description of `candidate.message`, `@` +mentions, and free-text message bodies should be read as legacy +pre-message-box transport language. The canonical communication model +for signal-reactive planning is the structured `CommunicationIntent` +defined by the message-box specification. + +## 16.3 Backward-compatibility constraint + +Migration to `signal_reactive` MUST NOT break: +- message-box persistence, +- inbox delivery semantics, +- candidate ability to emit structured intents, +- replay of pre-existing executions under their originally persisted + `variation.source` and routing state. + +## 16.4 Future explicit override mechanism + +If explicit candidate proposal behavior is still needed, it SHOULD be +specified as a separate future mechanism rather than folded back into +`signal_reactive`. + +That future mechanism SHOULD: +- use typed candidate proposal objects, +- remain controller-validated, +- make override authority explicit, +- stay distinct from metadata-driven signaling. + +--- + +# 17. Observability + +Implementations SHOULD emit control-plane diagnostics that make signal +reactivity inspectable. + +Suggested event/data points: +- `MessageStatsDerived`, +- extractor version, +- iteration number, +- key counts and ratios, +- whether stats were recomputed or loaded from cache, +- any replay mismatch between cached and recomputed stats. + +This is important because strategy behavior will otherwise appear +opaque: the planner changes, but the underlying reason remains hidden. diff --git a/src/bridge.rs b/src/bridge.rs index 610133e..2aba9c2 100644 --- a/src/bridge.rs +++ b/src/bridge.rs @@ -1113,6 +1113,24 @@ impl ExecutionSpecRequest { }) .collect(), ), + "signal_reactive" => VariationConfig { + source: "signal_reactive".to_string(), + candidates_per_iteration: self.variation.candidates_per_iteration, + selection: match self.variation.selection.as_deref() { + Some("random") => Some(VariationSelection::Random), + Some("sequential") | None => Some(VariationSelection::Sequential), + Some(_) => Some(VariationSelection::Sequential), + }, + parameter_space: self.variation.parameter_space.unwrap_or_default(), + explicit: self.variation + .explicit + .unwrap_or_default() + .into_iter() + .map(|proposal| VariationProposal { + overrides: proposal.overrides, + }) + .collect(), + }, "leader_directed" => { VariationConfig::leader_directed(self.variation.candidates_per_iteration) } diff --git a/src/orchestration/message_box.rs b/src/orchestration/message_box.rs index d00d590..4bf0e96 100644 --- a/src/orchestration/message_box.rs +++ b/src/orchestration/message_box.rs @@ -1,13 +1,14 @@ #[cfg(feature = "serde")] -use std::collections::{BTreeMap, HashMap}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; #[cfg(feature = "serde")] use serde_json::Value; #[cfg(feature = "serde")] use super::types::{ - CandidateInbox, CommunicationIntent, CommunicationIntentAudience, InboxEntry, InboxSnapshot, - RoutedMessage, RoutedMessageStatus, + CandidateInbox, CommunicationIntent, CommunicationIntentAudience, CommunicationIntentKind, + CommunicationIntentPriority, InboxEntry, InboxSnapshot, MessageStats, RoutedMessage, + RoutedMessageStatus, }; #[cfg(feature = "serde")] @@ -75,6 +76,66 @@ pub fn route_intents(intents: &[CommunicationIntent]) -> Vec { .collect() } +#[cfg(feature = "serde")] +pub fn extract_message_stats( + intents: &[CommunicationIntent], + routed_messages: &[RoutedMessage], + delivery_iteration: u32, +) -> MessageStats { + let intents_by_id: BTreeMap<_, _> = intents + .iter() + .map(|intent| (intent.intent_id.clone(), intent)) + .collect(); + let mut stats = MessageStats { + iteration: delivery_iteration, + ..MessageStats::default() + }; + let mut unique_sources = BTreeSet::new(); + let mut unique_intents = BTreeSet::new(); + + for message in routed_messages + .iter() + .filter(|message| message.delivery_iteration == delivery_iteration) + { + let Some(intent) = intents_by_id.get(&message.intent_id) else { + continue; + }; + + stats.total_messages += 1; + unique_intents.insert(intent.intent_id.clone()); + unique_sources.insert(intent.from_candidate_id.clone()); + + match message.to.as_str() { + "leader" => stats.leader_messages += 1, + "broadcast" => stats.broadcast_messages += 1, + _ => {} + } + + match intent.kind { + CommunicationIntentKind::Proposal => stats.proposal_count += 1, + CommunicationIntentKind::Signal => stats.signal_count += 1, + CommunicationIntentKind::Evaluation => stats.evaluation_count += 1, + } + + match intent.priority { + CommunicationIntentPriority::High => stats.high_priority_count += 1, + CommunicationIntentPriority::Normal => stats.normal_priority_count += 1, + CommunicationIntentPriority::Low => stats.low_priority_count += 1, + } + + match message.status { + RoutedMessageStatus::Delivered => stats.delivered_count += 1, + RoutedMessageStatus::Dropped => stats.dropped_count += 1, + RoutedMessageStatus::Expired => stats.expired_count += 1, + RoutedMessageStatus::Routed => {} + } + } + + stats.unique_sources = unique_sources.len(); + stats.unique_intent_count = unique_intents.len(); + stats +} + #[cfg(feature = "serde")] pub fn pending_delivery_messages( intents: &[CommunicationIntent], diff --git a/src/orchestration/mod.rs b/src/orchestration/mod.rs index c512281..96fa89a 100644 --- a/src/orchestration/mod.rs +++ b/src/orchestration/mod.rs @@ -12,6 +12,8 @@ pub mod types; pub mod variation; pub use events::{ControlEventEnvelope, ControlEventType}; +#[cfg(feature = "serde")] +pub use message_box::extract_message_stats; pub use policy::{ BudgetPolicy, ConcurrencyPolicy, ConvergencePolicy, GlobalConfig, OrchestrationPolicy, }; @@ -33,6 +35,7 @@ pub use strategy::{IterationEvaluation, SearchStrategy, StopReason, SwarmStrateg pub use types::{ CandidateInbox, CandidateOutput, CandidateSpec, CandidateStatus, Execution, ExecutionAccumulator, ExecutionCandidate, ExecutionSnapshot, ExecutionStatus, FailureCounts, + MessageStats, }; #[cfg(feature = "serde")] pub use types::{ diff --git a/src/orchestration/service.rs b/src/orchestration/service.rs index f2f5e17..b128713 100644 --- a/src/orchestration/service.rs +++ b/src/orchestration/service.rs @@ -12,7 +12,7 @@ use super::store::FsExecutionStore; use super::strategy::{IterationEvaluation, SearchStrategy, StopReason, SwarmStrategy}; use super::types::{ CandidateOutput, CandidateSpec, CandidateStatus, Execution, ExecutionAccumulator, - ExecutionCandidate, ExecutionStatus, + ExecutionCandidate, ExecutionStatus, MessageStats, }; #[cfg(feature = "serde")] @@ -85,10 +85,11 @@ impl SelectedStrategy { &self, accumulator: &ExecutionAccumulator, inboxes: &[super::types::CandidateInbox], + message_stats: Option<&MessageStats>, ) -> Vec { match self { - Self::Swarm(strategy) => strategy.plan_candidates(accumulator, inboxes), - Self::Search(strategy) => strategy.plan_candidates(accumulator, inboxes), + Self::Swarm(strategy) => strategy.plan_candidates(accumulator, inboxes, message_stats), + Self::Search(strategy) => strategy.plan_candidates(accumulator, inboxes, message_stats), } } @@ -106,11 +107,12 @@ impl SelectedStrategy { fn reduce( &self, accumulator: ExecutionAccumulator, + planned_candidates: &[CandidateSpec], evaluation: IterationEvaluation, ) -> ExecutionAccumulator { match self { Self::Swarm(strategy) => strategy.reduce(accumulator, evaluation), - Self::Search(strategy) => strategy.reduce(accumulator, evaluation), + Self::Search(strategy) => strategy.reduce(accumulator, planned_candidates, evaluation), } } @@ -595,7 +597,11 @@ where .collect::>(); #[cfg(feature = "serde")] self.materialize_iteration_inboxes(&execution.execution_id, iteration, &inboxes)?; - let candidates = strategy.plan_candidates(accumulator, &inboxes); + #[cfg(feature = "serde")] + let message_stats = Some(self.load_message_stats(&execution.execution_id, iteration)?); + #[cfg(not(feature = "serde"))] + let message_stats: Option = None; + let candidates = strategy.plan_candidates(accumulator, &inboxes, message_stats.as_ref()); for candidate in &candidates { let candidate_seq = self.next_candidate_id; self.save_candidate_state( @@ -615,6 +621,17 @@ where Ok(candidates) } + #[cfg(feature = "serde")] + fn load_message_stats( + &self, + execution_id: &str, + iteration: u32, + ) -> io::Result { + let intents = self.store.load_intents(execution_id)?; + let messages = self.store.load_routed_messages(execution_id)?; + Ok(message_box::extract_message_stats(&intents, &messages, iteration)) + } + fn load_or_plan_iteration_candidates( &mut self, execution: &Execution, @@ -909,7 +926,7 @@ where let candidates = self.load_or_plan_iteration_candidates(execution, spec, &accumulator, iteration)?; - for candidate in candidates { + for candidate in &candidates { let candidate_record = self .store .load_candidates(&execution.execution_id)? @@ -1008,7 +1025,7 @@ where .count() as u32; let evaluation = strategy.evaluate(&accumulator, &outputs); self.append_event(&execution.execution_id, ControlEventType::CandidateScored)?; - accumulator = strategy.reduce(accumulator, evaluation.clone()); + accumulator = strategy.reduce(accumulator, &candidates, evaluation.clone()); accumulator.failure_counts.total_candidate_failures = accumulator .failure_counts .total_candidate_failures diff --git a/src/orchestration/spec.rs b/src/orchestration/spec.rs index 7cd5ffa..c8c7cfe 100644 --- a/src/orchestration/spec.rs +++ b/src/orchestration/spec.rs @@ -78,6 +78,16 @@ impl ExecutionSpec { )); } + if !matches!( + self.variation.source.as_str(), + "parameter_space" | "explicit" | "leader_directed" | "signal_reactive" + ) { + return Err(SpecValidationError::new(format!( + "unknown variation source '{}'", + self.variation.source + ))); + } + if self.workflow.template.trim().is_empty() { return Err(SpecValidationError::new("workflow.template is required")); } diff --git a/src/orchestration/strategy.rs b/src/orchestration/strategy.rs index f4d720e..c375506 100644 --- a/src/orchestration/strategy.rs +++ b/src/orchestration/strategy.rs @@ -1,6 +1,8 @@ use super::policy::ConvergencePolicy; use super::scoring::{score_iteration, RankedCandidate, ScoringConfig}; -use super::types::{CandidateInbox, CandidateOutput, CandidateSpec, ExecutionAccumulator}; +use super::types::{ + CandidateInbox, CandidateOutput, CandidateSpec, ExecutionAccumulator, MessageStats, +}; use super::variation::VariationConfig; use std::collections::BTreeMap; @@ -60,8 +62,10 @@ impl SwarmStrategy { &self, accumulator: &ExecutionAccumulator, inboxes: &[CandidateInbox], + message_stats: Option<&MessageStats>, ) -> Vec { - self.variation + let mut candidates: Vec<_> = self + .variation .generate(accumulator) .into_iter() .enumerate() @@ -72,7 +76,21 @@ impl SwarmStrategy { .unwrap_or_else(|| format!("candidate-{}", idx + 1)), overrides: proposal.overrides, }) - .collect() + .collect(); + + if let Some(stats) = advisory_message_stats(&self.variation, message_stats) { + let exploration_pressure = stats.proposal_count + + stats.signal_count + + stats.unique_sources + + stats.leader_messages; + let convergence_pressure = + stats.broadcast_messages + stats.dropped_count + stats.expired_count; + if convergence_pressure > exploration_pressure && candidates.len() > 1 { + candidates.truncate((candidates.len() + 1) / 2); + } + } + + candidates } pub fn evaluate( @@ -151,11 +169,12 @@ impl SearchStrategy { &self, accumulator: &ExecutionAccumulator, inboxes: &[CandidateInbox], + message_stats: Option<&MessageStats>, ) -> Vec { let proposals = if accumulator.best_candidate_overrides.is_empty() { self.bootstrap_proposals(accumulator) } else { - self.refinement_proposals(accumulator) + self.refinement_proposals(accumulator, message_stats) }; proposals @@ -207,7 +226,7 @@ impl SearchStrategy { } if !accumulator.best_candidate_overrides.is_empty() - && self.refinement_proposals(accumulator).is_empty() + && self.refinement_proposals(accumulator, None).is_empty() { return Some(StopReason::ConvergencePlateau); } @@ -217,14 +236,9 @@ impl SearchStrategy { pub fn reduce( &self, mut accumulator: ExecutionAccumulator, + planned_candidates: &[CandidateSpec], evaluation: IterationEvaluation, ) -> ExecutionAccumulator { - let candidate_slots = default_candidate_inboxes(self.variation.candidates_per_iteration as usize); - let planned_candidates = self.plan_candidates( - &accumulator, - &candidate_slots, - ); - accumulator.scoring_history_len += 1; accumulator.completed_iterations += 1; accumulator.failure_counts.total_candidate_failures += evaluation @@ -245,7 +259,7 @@ impl SearchStrategy { } } } - for candidate in &planned_candidates { + for candidate in planned_candidates { let signature = candidate_signature(&candidate.overrides); if !signature.is_empty() && !accumulator.explored_signatures.contains(&signature) { accumulator.explored_signatures.push(signature); @@ -276,19 +290,38 @@ impl SearchStrategy { fn refinement_proposals( &self, accumulator: &ExecutionAccumulator, + message_stats: Option<&MessageStats>, ) -> Vec { - match self.variation.source.as_str() { - "explicit" => self.refine_explicit(accumulator), - "parameter_space" => self.refine_parameter_space(accumulator), - _ => Vec::new(), + let mut proposals: Vec<_> = match refinement_source(&self.variation) { + RefinementSource::Explicit => self.refine_explicit(accumulator), + RefinementSource::ParameterSpace => self.refine_parameter_space(accumulator), + RefinementSource::None => Vec::new(), } .into_iter() .filter(|proposal| { let signature = candidate_signature(&proposal.overrides); !accumulator.explored_signatures.contains(&signature) }) - .take(self.variation.candidates_per_iteration as usize) - .collect() + .collect(); + + if let Some(stats) = advisory_message_stats(&self.variation, message_stats) { + let exploration_pressure = stats.signal_count + stats.dropped_count + stats.expired_count; + let refinement_pressure = stats.evaluation_count + stats.leader_messages; + if exploration_pressure > refinement_pressure && proposals.len() > 2 { + let first = proposals.remove(0); + if let Some(last) = proposals.pop() { + proposals.insert(0, last); + proposals.insert(0, first); + } else { + proposals.insert(0, first); + } + } + } + + proposals + .into_iter() + .take(self.variation.candidates_per_iteration as usize) + .collect() } fn refine_explicit( @@ -349,13 +382,6 @@ impl SearchStrategy { } } -fn default_candidate_inboxes(count: usize) -> Vec { - let count = count.max(1); - (0..count) - .map(|idx| CandidateInbox::new(&format!("candidate-{}", idx + 1))) - .collect() -} - fn candidate_signature(overrides: &BTreeMap) -> String { overrides .iter() @@ -363,3 +389,33 @@ fn candidate_signature(overrides: &BTreeMap) -> String { .collect::>() .join("|") } + +fn advisory_message_stats<'a>( + variation: &VariationConfig, + message_stats: Option<&'a MessageStats>, +) -> Option<&'a MessageStats> { + let stats = message_stats?; + if variation.source == "leader_directed" || stats.total_messages == 0 { + return None; + } + Some(stats) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum RefinementSource { + Explicit, + ParameterSpace, + None, +} + +fn refinement_source(variation: &VariationConfig) -> RefinementSource { + match variation.source.as_str() { + "explicit" => RefinementSource::Explicit, + "parameter_space" => RefinementSource::ParameterSpace, + "signal_reactive" if !variation.explicit.is_empty() => RefinementSource::Explicit, + "signal_reactive" if !variation.parameter_space.is_empty() => { + RefinementSource::ParameterSpace + } + _ => RefinementSource::None, + } +} diff --git a/src/orchestration/types.rs b/src/orchestration/types.rs index 03f35c7..94efe6f 100644 --- a/src/orchestration/types.rs +++ b/src/orchestration/types.rs @@ -113,6 +113,26 @@ pub struct FailureCounts { pub total_candidate_failures: u32, } +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub struct MessageStats { + pub iteration: u32, + pub total_messages: usize, + pub leader_messages: usize, + pub broadcast_messages: usize, + pub proposal_count: usize, + pub signal_count: usize, + pub evaluation_count: usize, + pub high_priority_count: usize, + pub normal_priority_count: usize, + pub low_priority_count: usize, + pub delivered_count: usize, + pub dropped_count: usize, + pub expired_count: usize, + pub unique_sources: usize, + pub unique_intent_count: usize, +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct CandidateInbox { pub candidate_id: String, diff --git a/src/orchestration/variation.rs b/src/orchestration/variation.rs index fbca2b0..4eff456 100644 --- a/src/orchestration/variation.rs +++ b/src/orchestration/variation.rs @@ -63,6 +63,16 @@ impl VariationConfig { } } + pub fn signal_reactive(candidates_per_iteration: u32) -> Self { + Self { + source: "signal_reactive".to_string(), + candidates_per_iteration, + selection: None, + parameter_space: BTreeMap::new(), + explicit: Vec::new(), + } + } + pub fn generate(&self, accumulator: &ExecutionAccumulator) -> Vec { match self.source.as_str() { "parameter_space" => self.generate_parameter_space(), @@ -74,6 +84,21 @@ impl VariationConfig { .take(self.candidates_per_iteration as usize) .cloned() .collect(), + "signal_reactive" => { + if !self.explicit.is_empty() { + self.generate_explicit(accumulator) + } else if !self.parameter_space.is_empty() { + self.generate_parameter_space() + } else { + accumulator + .leader_proposals + .iter() + .filter(|proposal| !proposal.overrides.is_empty()) + .take(self.candidates_per_iteration as usize) + .cloned() + .collect() + } + } _ => Vec::new(), } } diff --git a/tests/execution_message_box.rs b/tests/execution_message_box.rs index fa5abb1..d636cb0 100644 --- a/tests/execution_message_box.rs +++ b/tests/execution_message_box.rs @@ -14,8 +14,9 @@ use void_control::contract::{ use void_control::orchestration::{ CandidateOutput, CandidateSpec, CandidateStatus, CommunicationIntent, CommunicationIntentAudience, CommunicationIntentKind, CommunicationIntentPriority, ExecutionCandidate, ExecutionService, ExecutionSpec, - FsExecutionStore, GlobalConfig, InboxEntry, InboxSnapshot, OrchestrationPolicy, RoutedMessage, RoutedMessageStatus, - StructuredOutputResult, VariationConfig, VariationProposal, WorkflowTemplateRef, + FsExecutionStore, GlobalConfig, InboxEntry, InboxSnapshot, MessageStats, OrchestrationPolicy, RoutedMessage, + RoutedMessageStatus, StructuredOutputResult, VariationConfig, VariationProposal, WorkflowTemplateRef, + extract_message_stats, }; use void_control::orchestration::service::ExecutionRuntime; use void_control::runtime::MockRuntime; @@ -98,6 +99,105 @@ fn fs_store_round_trips_message_box_logs() { assert_eq!(message_log.lines().count(), 2); } +#[test] +fn extract_message_stats_joins_intents_by_id_for_routed_messages() { + let intents = vec![ + CommunicationIntent { + intent_id: "intent-2".to_string(), + from_candidate_id: "candidate-2".to_string(), + iteration: 0, + kind: CommunicationIntentKind::Evaluation, + audience: CommunicationIntentAudience::Broadcast, + payload: json_payload("summary-two", "hint-two"), + priority: CommunicationIntentPriority::Low, + ttl_iterations: 2, + caused_by: None, + context: None, + }, + CommunicationIntent { + intent_id: "intent-1".to_string(), + from_candidate_id: "candidate-1".to_string(), + iteration: 0, + kind: CommunicationIntentKind::Proposal, + audience: CommunicationIntentAudience::Leader, + payload: json_payload("summary-one", "hint-one"), + priority: CommunicationIntentPriority::High, + ttl_iterations: 1, + caused_by: None, + context: None, + }, + CommunicationIntent { + intent_id: "intent-3".to_string(), + from_candidate_id: "candidate-3".to_string(), + iteration: 1, + kind: CommunicationIntentKind::Signal, + audience: CommunicationIntentAudience::Broadcast, + payload: json_payload("summary-three", "hint-three"), + priority: CommunicationIntentPriority::Normal, + ttl_iterations: 1, + caused_by: Some("intent-1".to_string()), + context: None, + }, + ]; + let routed_messages = vec![ + RoutedMessage { + message_id: "message-1".to_string(), + intent_id: "intent-1".to_string(), + to: "leader".to_string(), + delivery_iteration: 2, + routing_reason: "leader_feedback_channel".to_string(), + status: RoutedMessageStatus::Delivered, + }, + RoutedMessage { + message_id: "message-2".to_string(), + intent_id: "intent-2".to_string(), + to: "broadcast".to_string(), + delivery_iteration: 2, + routing_reason: "broadcast_fanout".to_string(), + status: RoutedMessageStatus::Dropped, + }, + RoutedMessage { + message_id: "message-3".to_string(), + intent_id: "intent-3".to_string(), + to: "broadcast".to_string(), + delivery_iteration: 2, + routing_reason: "broadcast_fanout".to_string(), + status: RoutedMessageStatus::Expired, + }, + RoutedMessage { + message_id: "message-4".to_string(), + intent_id: "intent-3".to_string(), + to: "broadcast".to_string(), + delivery_iteration: 3, + routing_reason: "broadcast_fanout".to_string(), + status: RoutedMessageStatus::Routed, + }, + ]; + + let stats = extract_message_stats(&intents, &routed_messages, 2); + + assert_eq!( + stats, + MessageStats { + iteration: 2, + total_messages: 3, + leader_messages: 1, + broadcast_messages: 2, + proposal_count: 1, + signal_count: 1, + evaluation_count: 1, + high_priority_count: 1, + normal_priority_count: 1, + low_priority_count: 1, + delivered_count: 1, + dropped_count: 1, + expired_count: 1, + unique_sources: 3, + unique_intent_count: 3, + } + ); +} + #[test] fn fs_store_round_trips_inbox_snapshot() { let root = temp_store_root("message-box-inbox"); diff --git a/tests/execution_search_strategy.rs b/tests/execution_search_strategy.rs index f944602..80ff6bc 100644 --- a/tests/execution_search_strategy.rs +++ b/tests/execution_search_strategy.rs @@ -1,9 +1,9 @@ use std::collections::BTreeMap; use void_control::orchestration::{ - CandidateInbox, CandidateOutput, ConvergencePolicy, ExecutionAccumulator, IterationEvaluation, - MetricDirection, SearchStrategy, ScoringConfig, StopReason, VariationConfig, - VariationProposal, VariationSelection, WeightedMetric, + CandidateInbox, CandidateOutput, CandidateSpec, ConvergencePolicy, ExecutionAccumulator, + IterationEvaluation, MessageStats, MetricDirection, SearchStrategy, ScoringConfig, + StopReason, VariationConfig, VariationProposal, VariationSelection, WeightedMetric, }; #[test] @@ -29,6 +29,7 @@ fn search_bootstraps_when_no_seed_exists() { CandidateInbox::new("candidate-3"), CandidateInbox::new("candidate-4"), ], + None, ); assert!(!candidates.is_empty()); @@ -58,6 +59,7 @@ fn search_refines_around_explicit_incumbent() { let candidates = strategy.plan_candidates( &accumulator, &[CandidateInbox::new("candidate-1"), CandidateInbox::new("candidate-2")], + None, ); assert_eq!(candidates.len(), 2); @@ -89,6 +91,7 @@ fn search_avoids_explored_signatures() { let candidates = strategy.plan_candidates( &accumulator, &[CandidateInbox::new("candidate-1"), CandidateInbox::new("candidate-2")], + None, ); assert_eq!(candidates.len(), 1); @@ -109,8 +112,20 @@ fn search_reduce_updates_incumbent_phase_and_signatures() { ConvergencePolicy::default(), ); + let planned_candidates = vec![ + CandidateSpec { + candidate_id: "candidate-1".to_string(), + overrides: BTreeMap::from([("agent.prompt".to_string(), "baseline".to_string())]), + }, + CandidateSpec { + candidate_id: "candidate-2".to_string(), + overrides: BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]), + }, + ]; + let next = strategy.reduce( ExecutionAccumulator::default(), + &planned_candidates, IterationEvaluation { ranked_candidates: void_control::orchestration::score_iteration( &scoring_config(), @@ -163,6 +178,128 @@ fn search_stops_when_no_new_neighbors_remain() { assert_eq!(stop, Some(StopReason::ConvergencePlateau)); } +#[test] +fn search_falls_back_to_incumbent_centered_planning_without_meaningful_stats() { + let strategy = SearchStrategy::new( + VariationConfig::explicit( + 2, + vec![ + proposal(&[("agent.prompt", "baseline")]), + proposal(&[("agent.prompt", "v1")]), + proposal(&[("agent.prompt", "v2")]), + proposal(&[("agent.prompt", "v3")]), + ], + ), + scoring_config(), + ConvergencePolicy::default(), + ); + let mut accumulator = ExecutionAccumulator::default(); + accumulator.best_candidate_overrides = + BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]); + + let candidates = strategy.plan_candidates( + &accumulator, + &[CandidateInbox::new("candidate-1"), CandidateInbox::new("candidate-2")], + Some(&MessageStats::default()), + ); + + assert_eq!(candidates.len(), 2); + assert_eq!(candidates[0].overrides["agent.prompt"], "baseline"); + assert_eq!(candidates[1].overrides["agent.prompt"], "v2"); +} + +#[test] +fn search_keeps_a_small_exploration_quota_when_signal_pressure_is_high() { + let strategy = SearchStrategy::new( + VariationConfig::explicit( + 2, + vec![ + proposal(&[("agent.prompt", "baseline")]), + proposal(&[("agent.prompt", "v1")]), + proposal(&[("agent.prompt", "v2")]), + proposal(&[("agent.prompt", "v3")]), + ], + ), + scoring_config(), + ConvergencePolicy::default(), + ); + let mut accumulator = ExecutionAccumulator::default(); + accumulator.best_candidate_overrides = + BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]); + + let candidates = strategy.plan_candidates( + &accumulator, + &[CandidateInbox::new("candidate-1"), CandidateInbox::new("candidate-2")], + Some(&MessageStats { + iteration: 1, + total_messages: 4, + leader_messages: 0, + broadcast_messages: 2, + proposal_count: 0, + signal_count: 3, + evaluation_count: 1, + high_priority_count: 1, + normal_priority_count: 3, + low_priority_count: 0, + delivered_count: 3, + dropped_count: 1, + expired_count: 0, + unique_sources: 2, + unique_intent_count: 4, + }), + ); + + assert_eq!(candidates.len(), 2); + assert_eq!(candidates[0].overrides["agent.prompt"], "baseline"); + assert_eq!(candidates[1].overrides["agent.prompt"], "v3"); +} + +#[test] +fn search_reduce_uses_the_actual_planned_candidates() { + let strategy = SearchStrategy::new( + VariationConfig::explicit( + 2, + vec![ + proposal(&[("agent.prompt", "baseline")]), + proposal(&[("agent.prompt", "v1")]), + proposal(&[("agent.prompt", "v2")]), + ], + ), + scoring_config(), + ConvergencePolicy::default(), + ); + let accumulator = ExecutionAccumulator::default(); + let planned_candidates = vec![ + CandidateSpec { + candidate_id: "candidate-1".to_string(), + overrides: BTreeMap::from([("agent.prompt".to_string(), "v2".to_string())]), + }, + CandidateSpec { + candidate_id: "candidate-2".to_string(), + overrides: BTreeMap::from([("agent.prompt".to_string(), "baseline".to_string())]), + }, + ]; + + let next = strategy.reduce( + accumulator, + &planned_candidates, + IterationEvaluation { + ranked_candidates: void_control::orchestration::score_iteration( + &scoring_config(), + &[ + candidate_output("candidate-1", true, &[("latency_p99_ms", 60.0)]), + candidate_output("candidate-2", true, &[("latency_p99_ms", 80.0)]), + ], + ), + }, + ); + + assert_eq!( + next.best_candidate_overrides.get("agent.prompt").map(String::as_str), + Some("v2") + ); +} + fn scoring_config() -> ScoringConfig { ScoringConfig { metrics: vec![WeightedMetric { diff --git a/tests/execution_spec_validation.rs b/tests/execution_spec_validation.rs index d3ae083..7e024ae 100644 --- a/tests/execution_spec_validation.rs +++ b/tests/execution_spec_validation.rs @@ -87,6 +87,74 @@ fn accepts_search_mode() { .expect("expected search mode to validate"); } +#[test] +fn rejects_unknown_variation_source() { + let mut spec = base_spec(); + spec.variation.source = "unsupported_mode".to_string(); + + let err = spec + .validate(&global_config()) + .expect_err("expected invalid variation source to fail"); + + assert!(err.to_string().contains("unsupported_mode")); +} + +#[cfg(feature = "serde")] +#[test] +fn bridge_accepts_signal_reactive_and_legacy_leader_directed_variations() { + use serde_json::json; + + for source in ["signal_reactive", "leader_directed"] { + let body = json!({ + "mode": "swarm", + "goal": "optimize latency", + "workflow": { "template": "fixtures/sample.vbrun" }, + "policy": { + "budget": { + "max_iterations": 3, + "max_wall_clock_secs": 60 + }, + "concurrency": { + "max_concurrent_candidates": 2 + }, + "convergence": { + "strategy": "exhaustive" + }, + "max_candidate_failures_per_iteration": 10, + "missing_output_policy": "mark_failed", + "iteration_failure_policy": "fail_execution" + }, + "evaluation": { + "scoring_type": "weighted_metrics", + "weights": { + "latency_p99_ms": -0.6, + "cost_usd": -0.4 + }, + "pass_threshold": 0.7, + "ranking": "highest_score", + "tie_breaking": "cost_usd" + }, + "variation": { + "source": source, + "candidates_per_iteration": 2 + }, + "swarm": true + }) + .to_string(); + + let response = void_control::bridge::handle_bridge_request_for_test( + "POST", + "/v1/executions/dry-run", + Some(&body), + ) + .expect("response"); + + assert_eq!(response.status, 200); + assert_eq!(response.json["valid"], true); + assert_eq!(response.json["plan"]["variation_source"], source); + } +} + fn global_config() -> GlobalConfig { GlobalConfig { max_concurrent_child_runs: 4, diff --git a/tests/execution_strategy_acceptance.rs b/tests/execution_strategy_acceptance.rs index b8b2ed2..ccba530 100644 --- a/tests/execution_strategy_acceptance.rs +++ b/tests/execution_strategy_acceptance.rs @@ -3,8 +3,9 @@ use std::collections::BTreeMap; use void_control::orchestration::{ - CandidateOutput, CandidateStatus, ControlEventType, ExecutionService, ExecutionSpec, - ExecutionStatus, FsExecutionStore, GlobalConfig, OrchestrationPolicy, VariationConfig, VariationProposal, + CandidateInbox, CandidateOutput, CandidateStatus, ControlEventType, ExecutionCandidate, + ExecutionService, ExecutionSpec, ExecutionStatus, FsExecutionStore, GlobalConfig, + OrchestrationPolicy, VariationConfig, VariationProposal, }; #[cfg(feature = "serde")] use void_control::orchestration::{ @@ -346,6 +347,201 @@ fn search_strategy_persists_lineage_and_delivers_parent_intent_to_refinement_ite .any(|entry| entry.intent_id == "intent-search-parent")); } +#[cfg(feature = "serde")] +#[test] +fn signal_reactive_search_runs_end_to_end() { + let store_dir = temp_store_dir("search-signal-reactive-acceptance"); + let store = FsExecutionStore::new(store_dir.clone()); + let mut runtime = MockRuntime::new(); + let signal_output = output_with_intents( + "candidate-1", + &[("latency_p99_ms", 95.0), ("cost_usd", 0.05)], + vec![signal_intent( + "intent-search-signal", + CommunicationIntentAudience::Broadcast, + "multiple candidates saw the same bottleneck", + )], + ); + runtime.seed_success( + "exec-run-candidate-1", + signal_output.clone(), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-3", + output("candidate-3", &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)]), + ); + runtime.seed_success( + "exec-run-candidate-4", + output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)]), + ); + + let execution_id = "exec-search-signal-reactive"; + ExecutionService::::submit_execution( + &store, + execution_id, + &signal_reactive_strategy_spec("search"), + ) + .expect("submit execution"); + seed_planner_authored_candidates( + &store, + execution_id, + &[ + (1, 0, "candidate-1", "baseline"), + (2, 0, "candidate-2", "v1"), + (3, 1, "candidate-3", "v2"), + (4, 1, "candidate-4", "v4"), + ], + ); + seed_iteration_inboxes( + &store, + execution_id, + 1, + &["candidate-3", "candidate-4"], + &void_control::orchestration::message_box::normalize_intents( + "candidate-1", + 0, + &signal_output.intents, + ) + .0, + ); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); + let execution = service + .process_execution(execution_id) + .expect("process execution"); + + let read_store = FsExecutionStore::new(store_dir); + let snapshot = read_store + .load_execution(execution_id) + .expect("load execution snapshot"); + let intents = read_store.load_intents(execution_id).expect("load intents"); + let messages = read_store + .load_routed_messages(execution_id) + .expect("load routed messages"); + let inbox_one = read_store + .load_inbox_snapshot(execution_id, 1, "candidate-3") + .expect("load candidate-3 inbox"); + let inbox_two = read_store + .load_inbox_snapshot(execution_id, 1, "candidate-4") + .expect("load candidate-4 inbox"); + let mut refinement_prompts: Vec<_> = snapshot + .candidates + .iter() + .filter(|candidate| candidate.iteration == 1) + .map(|candidate| candidate.overrides["agent.prompt"].clone()) + .collect(); + refinement_prompts.sort(); + + assert_eq!(execution.status, ExecutionStatus::Completed); + assert_eq!(snapshot.execution.status, ExecutionStatus::Completed); + assert_eq!(snapshot.execution.mode, "search"); + assert_eq!(refinement_prompts, vec!["v2".to_string(), "v4".to_string()]); + assert_eq!(snapshot.candidates.len(), 4); + assert_eq!(intents.len(), 1); + assert_eq!( + messages + .iter() + .filter(|message| message.status == void_control::orchestration::RoutedMessageStatus::Routed) + .count(), + 1 + ); + assert_eq!( + messages + .iter() + .filter(|message| message.status == void_control::orchestration::RoutedMessageStatus::Delivered) + .count(), + 2 + ); + assert_eq!(inbox_one.entries.len(), 1); + assert_eq!(inbox_two.entries.len(), 1); +} + +#[cfg(feature = "serde")] +#[test] +fn legacy_leader_directed_uses_persisted_planner_proposals() { + let store_dir = temp_store_dir("leader-directed-legacy-acceptance"); + let store = FsExecutionStore::new(store_dir.clone()); + let spec = legacy_leader_directed_strategy_spec(); + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + output("candidate-1", &[("latency_p99_ms", 95.0), ("cost_usd", 0.05)]), + ); + runtime.seed_success( + "exec-run-candidate-2", + output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + ); + runtime.seed_success( + "exec-run-candidate-3", + output("candidate-3", &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)]), + ); + runtime.seed_success( + "exec-run-candidate-4", + output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)]), + ); + + ExecutionService::::submit_execution(&store, "exec-legacy-leader", &spec) + .expect("submit execution"); + seed_planner_authored_candidates( + &store, + "exec-legacy-leader", + &[ + (1, 0, "candidate-1", "legacy-a"), + (2, 0, "candidate-2", "legacy-b"), + (3, 1, "candidate-3", "legacy-c"), + (4, 1, "candidate-4", "legacy-d"), + ], + ); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); + let execution = service + .process_execution("exec-legacy-leader") + .expect("process execution"); + + let candidates = FsExecutionStore::new(store_dir.clone()) + .load_candidates("exec-legacy-leader") + .expect("load candidates"); + let prompts: Vec<_> = candidates + .iter() + .map(|candidate| candidate.overrides["agent.prompt"].clone()) + .collect(); + + assert_eq!(execution.status, ExecutionStatus::Completed); + assert_eq!( + prompts, + vec![ + "legacy-a".to_string(), + "legacy-b".to_string(), + "legacy-c".to_string(), + "legacy-d".to_string(), + ] + ); + assert_eq!( + FsExecutionStore::new(store_dir) + .load_spec("exec-legacy-leader") + .expect("load spec") + .variation + .source, + "leader_directed" + ); +} + fn run_mode_to_completion( mode: &str, store_dir: std::path::PathBuf, @@ -479,12 +675,95 @@ fn strategy_spec(mode: &str) -> ExecutionSpec { } } +fn signal_reactive_strategy_spec(mode: &str) -> ExecutionSpec { + let mut spec = strategy_spec(mode); + spec.variation = VariationConfig { + source: "signal_reactive".to_string(), + candidates_per_iteration: 2, + selection: None, + parameter_space: BTreeMap::new(), + explicit: vec![ + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "baseline".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "v2".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "v3".to_string())]), + }, + VariationProposal { + overrides: BTreeMap::from([("agent.prompt".to_string(), "v4".to_string())]), + }, + ], + }; + spec +} + +fn legacy_leader_directed_strategy_spec() -> ExecutionSpec { + let mut spec = strategy_spec("swarm"); + spec.policy.budget.max_iterations = Some(2); + spec.variation = VariationConfig::leader_directed(2); + spec +} + fn failing_strategy_spec(mode: &str) -> ExecutionSpec { let mut spec = strategy_spec(mode); spec.policy.budget.max_iterations = Some(1); spec } +fn seed_planner_authored_candidates( + store: &FsExecutionStore, + execution_id: &str, + candidates: &[(u64, u32, &str, &str)], +) { + for (created_seq, iteration, candidate_id, prompt) in candidates { + let mut candidate = ExecutionCandidate::new( + execution_id, + candidate_id, + *created_seq, + *iteration, + CandidateStatus::Queued, + ); + candidate + .overrides + .insert("agent.prompt".to_string(), (*prompt).to_string()); + store.save_candidate(&candidate).expect("save candidate"); + } +} + +fn seed_iteration_inboxes( + store: &FsExecutionStore, + execution_id: &str, + iteration: u32, + candidate_ids: &[&str], + intents: &[CommunicationIntent], +) { + let inboxes = candidate_ids + .iter() + .map(|candidate_id| CandidateInbox::new(candidate_id)) + .collect::>(); + let routed = void_control::orchestration::message_box::route_intents(intents); + for (snapshot, delivered) in void_control::orchestration::message_box::materialize_inbox_snapshots( + execution_id, + iteration, + &inboxes, + intents, + &routed, + ) { + store.save_inbox_snapshot(&snapshot).expect("save inbox snapshot"); + for delivered in delivered { + store + .append_routed_message(execution_id, &delivered) + .expect("append delivered message"); + } + } +} + fn assert_event_counts( mode: &str, events: &[void_control::orchestration::ControlEventEnvelope], @@ -540,6 +819,18 @@ fn proposal_intent( } } +#[cfg(feature = "serde")] +fn signal_intent( + intent_id: &str, + audience: CommunicationIntentAudience, + summary_text: &str, +) -> CommunicationIntent { + CommunicationIntent { + kind: CommunicationIntentKind::Signal, + ..proposal_intent(intent_id, audience, summary_text, None) + } +} + fn temp_store_dir(label: &str) -> std::path::PathBuf { let nanos = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) diff --git a/tests/execution_swarm_strategy.rs b/tests/execution_swarm_strategy.rs index 54382d5..f560d3b 100644 --- a/tests/execution_swarm_strategy.rs +++ b/tests/execution_swarm_strategy.rs @@ -2,8 +2,9 @@ use std::collections::BTreeMap; use void_control::orchestration::{ CandidateInbox, CandidateOutput, ConvergencePolicy, ExecutionAccumulator, - IterationEvaluation, MetricDirection, ScoringConfig, StopReason, SwarmStrategy, - VariationConfig, VariationProposal, VariationSelection, WeightedMetric, score_iteration, + IterationEvaluation, MessageStats, MetricDirection, ScoringConfig, StopReason, + SwarmStrategy, VariationConfig, VariationProposal, VariationSelection, WeightedMetric, + score_iteration, }; #[test] @@ -108,6 +109,22 @@ fn leader_directed_proposals_are_validated_before_use() { assert_eq!(proposals[0].overrides["sandbox.env.CONCURRENCY"], "2"); } +#[test] +fn signal_reactive_proposals_are_generated_from_planner_output() { + let mut accumulator = ExecutionAccumulator::default(); + accumulator.leader_proposals = vec![ + proposal(&[("sandbox.env.CONCURRENCY", "2")]), + VariationProposal { + overrides: BTreeMap::new(), + }, + ]; + + let proposals = VariationConfig::signal_reactive(2).generate(&accumulator); + + assert_eq!(proposals.len(), 1); + assert_eq!(proposals[0].overrides["sandbox.env.CONCURRENCY"], "2"); +} + #[test] fn swarm_plans_candidates_from_variation_source() { let strategy = SwarmStrategy::new( @@ -125,12 +142,153 @@ fn swarm_plans_candidates_from_variation_source() { let candidates = strategy.plan_candidates( &ExecutionAccumulator::default(), &[CandidateInbox::new("candidate-1"), CandidateInbox::new("candidate-2")], + None, ); assert_eq!(candidates.len(), 2); assert_eq!(candidates[0].overrides["agent.prompt"], "first"); } +#[test] +fn swarm_reduces_breadth_when_broadcast_and_delivery_failures_raise_convergence_pressure() { + let strategy = SwarmStrategy::new( + VariationConfig::explicit( + 4, + vec![ + proposal(&[("agent.prompt", "first")]), + proposal(&[("agent.prompt", "second")]), + proposal(&[("agent.prompt", "third")]), + proposal(&[("agent.prompt", "fourth")]), + ], + ), + scoring_config(), + ConvergencePolicy::default(), + ); + + let candidates = strategy.plan_candidates( + &ExecutionAccumulator::default(), + &[ + CandidateInbox::new("candidate-1"), + CandidateInbox::new("candidate-2"), + CandidateInbox::new("candidate-3"), + CandidateInbox::new("candidate-4"), + ], + Some(&MessageStats { + iteration: 1, + total_messages: 4, + leader_messages: 0, + broadcast_messages: 3, + proposal_count: 0, + signal_count: 1, + evaluation_count: 0, + high_priority_count: 0, + normal_priority_count: 4, + low_priority_count: 0, + delivered_count: 2, + dropped_count: 1, + expired_count: 1, + unique_sources: 1, + unique_intent_count: 4, + }), + ); + + assert_eq!(candidates.len(), 2); + assert_eq!(candidates[0].overrides["agent.prompt"], "first"); + assert_eq!(candidates[1].overrides["agent.prompt"], "second"); +} + +#[test] +fn swarm_preserves_full_breadth_when_proposals_arrive_from_multiple_sources() { + let strategy = SwarmStrategy::new( + VariationConfig::explicit( + 4, + vec![ + proposal(&[("agent.prompt", "first")]), + proposal(&[("agent.prompt", "second")]), + proposal(&[("agent.prompt", "third")]), + proposal(&[("agent.prompt", "fourth")]), + ], + ), + scoring_config(), + ConvergencePolicy::default(), + ); + + let candidates = strategy.plan_candidates( + &ExecutionAccumulator::default(), + &[ + CandidateInbox::new("candidate-1"), + CandidateInbox::new("candidate-2"), + CandidateInbox::new("candidate-3"), + CandidateInbox::new("candidate-4"), + ], + Some(&MessageStats { + iteration: 1, + total_messages: 4, + leader_messages: 1, + broadcast_messages: 1, + proposal_count: 3, + signal_count: 1, + evaluation_count: 0, + high_priority_count: 1, + normal_priority_count: 3, + low_priority_count: 0, + delivered_count: 4, + dropped_count: 0, + expired_count: 0, + unique_sources: 3, + unique_intent_count: 4, + }), + ); + + assert_eq!(candidates.len(), 4); +} + +#[test] +fn swarm_keeps_legacy_leader_directed_planning_unbiased_by_message_stats() { + let strategy = SwarmStrategy::new( + VariationConfig::leader_directed(3), + scoring_config(), + ConvergencePolicy::default(), + ); + let mut accumulator = ExecutionAccumulator::default(); + accumulator.leader_proposals = vec![ + proposal(&[("agent.prompt", "first")]), + proposal(&[("agent.prompt", "second")]), + proposal(&[("agent.prompt", "third")]), + ]; + + let candidates = strategy.plan_candidates( + &accumulator, + &[ + CandidateInbox::new("candidate-1"), + CandidateInbox::new("candidate-2"), + CandidateInbox::new("candidate-3"), + ], + Some(&MessageStats { + iteration: 1, + total_messages: 3, + leader_messages: 0, + broadcast_messages: 3, + proposal_count: 0, + signal_count: 0, + evaluation_count: 0, + high_priority_count: 0, + normal_priority_count: 3, + low_priority_count: 0, + delivered_count: 1, + dropped_count: 1, + expired_count: 1, + unique_sources: 1, + unique_intent_count: 3, + }), + ); + + assert_eq!(candidates.len(), 3); + assert_eq!(candidates[0].overrides["agent.prompt"], "first"); + assert_eq!(candidates[1].overrides["agent.prompt"], "second"); + assert_eq!(candidates[2].overrides["agent.prompt"], "third"); +} + #[test] fn swarm_should_stop_on_threshold() { let strategy = SwarmStrategy::new( diff --git a/tests/strategy_scenarios.rs b/tests/strategy_scenarios.rs index df7bd60..67aefe6 100644 --- a/tests/strategy_scenarios.rs +++ b/tests/strategy_scenarios.rs @@ -402,6 +402,83 @@ fn search_pipeline_optimization_refines_known_bottleneck_config() { assert_eq!(snapshot.accumulator.search_phase.as_deref(), Some("refine")); } +#[test] +fn search_message_stats_can_preserve_a_small_exploration_quota() { + let store_dir = temp_store_dir("search-signal-reactive"); + let mut runtime = MockRuntime::new(); + runtime.seed_success( + "exec-run-candidate-1", + metrics_output_with_intents( + "candidate-1", + 95.0, + 0.06, + 0.95, + vec![scenario_signal_intent( + "intent-search-signal", + CommunicationIntentAudience::Broadcast, + "multiple candidates saw the same bottleneck", + )], + ), + ); + runtime.seed_success( + "exec-run-candidate-2", + metrics_output("candidate-2", 80.0, 0.05, 0.99), + ); + runtime.seed_success( + "exec-run-candidate-3", + metrics_output("candidate-3", 84.0, 0.05, 0.98), + ); + runtime.seed_success( + "exec-run-candidate-4", + metrics_output("candidate-4", 76.0, 0.05, 0.99), + ); + runtime.seed_success( + "exec-run-candidate-5", + metrics_output("candidate-5", 78.0, 0.05, 0.99), + ); + + let store = FsExecutionStore::new(store_dir.clone()); + ExecutionService::::submit_execution( + &store, + "exec-search-signal-reactive", + &search_signal_reactive_spec(), + ) + .expect("submit"); + + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); + service + .plan_execution("exec-search-signal-reactive") + .expect("plan execution"); + + for _ in 0..6 { + let execution = service + .dispatch_execution_once("exec-search-signal-reactive") + .expect("dispatch"); + if execution.status == ExecutionStatus::Completed { + break; + } + } + + let snapshot = FsExecutionStore::new(store_dir) + .load_execution("exec-search-signal-reactive") + .expect("load execution"); + let iter1_prompts: Vec<_> = snapshot + .candidates + .iter() + .filter(|candidate| candidate.iteration == 1) + .map(|candidate| candidate.overrides["agent.prompt"].clone()) + .collect(); + + assert_eq!(snapshot.execution.status, ExecutionStatus::Completed); + assert_eq!(iter1_prompts, vec!["v2".to_string(), "v4".to_string()]); +} + fn swarm_incident_spec() -> ExecutionSpec { ExecutionSpec { mode: "swarm".to_string(), @@ -497,6 +574,27 @@ fn search_pipeline_spec() -> ExecutionSpec { } } +fn search_signal_reactive_spec() -> ExecutionSpec { + ExecutionSpec { + mode: "search".to_string(), + goal: "react to message stats without leaving incumbent-centered search".to_string(), + workflow: workflow(), + policy: search_policy(2), + evaluation: infra_evaluation(), + variation: VariationConfig::explicit( + 2, + vec![ + proposal(&[("agent.prompt", "baseline")]), + proposal(&[("agent.prompt", "v1")]), + proposal(&[("agent.prompt", "v2")]), + proposal(&[("agent.prompt", "v3")]), + proposal(&[("agent.prompt", "v4")]), + ], + ), + swarm: true, + } +} + fn workflow() -> void_control::orchestration::WorkflowTemplateRef { void_control::orchestration::WorkflowTemplateRef { template: "fixtures/sample.vbrun".to_string(), @@ -651,6 +749,18 @@ fn scenario_intent( } } +#[cfg(feature = "serde")] +fn scenario_signal_intent( + intent_id: &str, + audience: CommunicationIntentAudience, + summary_text: &str, +) -> CommunicationIntent { + CommunicationIntent { + kind: CommunicationIntentKind::Signal, + ..scenario_intent(intent_id, audience, summary_text, None) + } +} + fn assert_event_counts( events: &[void_control::orchestration::ControlEventEnvelope], expected: &[(ControlEventType, usize)], From f7acda19d17a91d431c817c9970026afb4f7a7bf Mon Sep 17 00:00:00 2001 From: diego Date: Mon, 23 Mar 2026 18:35:03 -0300 Subject: [PATCH 4/6] docs: add architecture and contributor guidance --- AGENTS.md | 222 +++++++------ README.md | 34 ++ docs/architecture.md | 291 ++++++++++++++++++ .../2026-03-23-repo-docs-ci-hardening.md | 148 +++++++++ .../specs/2026-03-23-repo-docs-ci-design.md | 140 +++++++++ 5 files changed, 748 insertions(+), 87 deletions(-) create mode 100644 docs/architecture.md create mode 100644 docs/superpowers/plans/2026-03-23-repo-docs-ci-hardening.md create mode 100644 docs/superpowers/specs/2026-03-23-repo-docs-ci-design.md diff --git a/AGENTS.md b/AGENTS.md index 33e1326..e4b8ac2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,120 +1,168 @@ -# Repository Guidelines - -## Project Structure & Module Organization -This repository currently contains architecture and runtime-contract documentation for Void Control. - -- `spec/`: Canonical specifications (for example, `spec/void-control-runtime-spec-v0.1.md`). -- `LICENSE`: Project license. - -When adding implementation code, keep the same separation of concerns defined in the spec: -- Control-plane orchestration logic should be separate from runtime execution logic. -- Add new specs to `spec/` and version them in the filename (for example, `*-v0.2.md`). - -## Build, Test, and Development Commands -Use Cargo for local development and validation: - -- `cargo test`: Run core unit tests (no optional JSON compatibility feature). -- `cargo test --features serde`: Run JSON compatibility tests and fixture-based checks. -- `cargo test --features serde runtime::void_box::`: Run live-daemon client contract tests (mocked transport). -- `VOID_BOX_BASE_URL=http://127.0.0.1:3000 cargo test --features serde --test void_box_contract -- --ignored --nocapture`: Run live daemon contract gate tests (tests auto-generate fallback specs under `/tmp`). -- Optional spec overrides for policy behavior checks: - - `VOID_BOX_TIMEOUT_SPEC_FILE` - - `VOID_BOX_PARALLEL_SPEC_FILE` - - `VOID_BOX_RETRY_SPEC_FILE` - - `VOID_BOX_NO_POLICY_SPEC_FILE` -- `cargo run --example normalize_void_box_run`: Run the typed normalization example. -- `cargo run --bin normalize_fixture -- fixtures/sample.vbrun`: Normalize from local fixture format. - -### Void-Box Production Image (for UI/real Claude runs) - -When validating real pipeline execution from `void-control` UI, use the production -void-box rootfs from the sibling repository: +# AGENTS.md — void-control + +`void-control` is the control-plane side of the Void stack. It owns runtime +contract normalization, orchestration planning, persistence, bridge APIs, and +the operator UI. It does not implement VM isolation or guest execution; that +belongs to `void-box`. + +## System boundary + +- `void-control`: + - normalizes `void-box` daemon responses into a stable contract + - plans and tracks multi-candidate executions + - persists execution state, events, candidate records, and message-box data + - exposes bridge APIs for launch, dry-run, and policy operations + - provides the graph-first web UI +- `void-box`: + - launches isolated runtime execution + - produces run, event, stage, and artifact data + - enforces sandbox/runtime behavior + +When changing code here, preserve that boundary. Control-plane orchestration and +runtime transport concerns should stay separate. + +## Repository layout + +- `spec/`: canonical specifications and design contracts +- `src/contract/`: runtime contract types, normalization, and compatibility logic +- `src/runtime/`: runtime adapter implementations (`MockRuntime`, `VoidBoxRuntimeClient`) +- `src/orchestration/`: planning, persistence, scheduling, reduction, strategies +- `src/bridge.rs`: HTTP bridge for launch, dry-run, execution inspection, and policy patching +- `src/bin/voidctl.rs`: CLI entrypoint and bridge server +- `tests/`: orchestration, bridge, runtime, and compatibility coverage +- `web/void-control-ux/`: React/Vite operator dashboard +- `docs/`: architecture notes, release process, and internal plans/specs + +## Module map + +### Rust library + +- `src/contract/` + - contract-facing API and normalization layer + - converts raw `void-box` payloads into stable `void-control` views +- `src/runtime/` + - execution runtime abstraction plus mock and live `void-box` client + - provider launch injection for message-box inbox delivery +- `src/orchestration/spec.rs` + - execution spec parsing and validation +- `src/orchestration/variation.rs` + - candidate-generation sources such as `parameter_space`, `explicit`, + `leader_directed`, and `signal_reactive` +- `src/orchestration/strategy.rs` + - swarm/search planning and reduction logic +- `src/orchestration/message_box.rs` + - communication intent routing, inbox snapshots, and `MessageStats` extraction +- `src/orchestration/store/` + - persisted execution, event, candidate, and message-box data +- `src/orchestration/service.rs` + - orchestration coordinator; plans, dispatches, reduces, and persists +- `src/orchestration/scheduler.rs` + - global execution/candidate dispatch ordering +- `src/orchestration/reconcile.rs` + - restart/reload of persisted active work +- `src/bridge.rs` + - serde-gated HTTP routes for UI/bridge workflows + +### Web UI + +- `web/void-control-ux/` + - graph-first operator dashboard + - reads daemon and bridge APIs + - build is the current validation gate for frontend changes + +## Core local commands + +Rust validation: ```bash -cd /home/diego/github/agent-infra/void-box -TMPDIR=$PWD/target/tmp scripts/build_claude_rootfs.sh +cargo fmt --all -- --check +cargo clippy --all-targets --all-features -- -D warnings +cargo test +cargo test --features serde +RUSTDOCFLAGS="-D warnings" cargo doc --no-deps --all-features ``` -Start daemon with production kernel/initramfs: +UI validation: ```bash -cd /home/diego/github/agent-infra/void-box -export ANTHROPIC_API_KEY=sk-ant-... -export VOID_BOX_KERNEL=/boot/vmlinuz-$(uname -r) -export VOID_BOX_INITRAMFS=$PWD/target/void-box-rootfs.cpio.gz -cargo run --bin voidbox -- serve --listen 127.0.0.1:43100 +cd web/void-control-ux +npm ci +npm run build ``` -Start bridge (required for Launch modal spec upload/content mode): +Bridge and UI local run: ```bash -cd /home/diego/github/void-control cargo run --features serde --bin voidctl -- serve +cd web/void-control-ux +npm run dev -- --host 127.0.0.1 --port 3000 ``` -Start UI: +## Runtime compatibility commands + +Live daemon contract gate: ```bash -cd /home/diego/github/void-control/web/void-control-ux -npm run dev -- --host 127.0.0.1 --port 3000 +VOID_BOX_BASE_URL=http://127.0.0.1:43100 \ +cargo test --features serde --test void_box_contract -- --ignored --nocapture ``` -Important: -- Do not use `/tmp/void-box-test-rootfs.cpio.gz` for production/runtime UI validation. -- `target/void-box-rootfs.cpio.gz` is the expected production image path. +Optional policy fixture overrides: -### UI Debugging Requirement +- `VOID_BOX_TIMEOUT_SPEC_FILE` +- `VOID_BOX_PARALLEL_SPEC_FILE` +- `VOID_BOX_RETRY_SPEC_FILE` +- `VOID_BOX_NO_POLICY_SPEC_FILE` -For UI work in `web/void-control-ux`, browser automation/inspection is required. -Do not rely on screenshot-only iteration when layout, DOM state, resize behavior, -or graph rendering need verification. +## UI workflow expectations + +For UI work in `web/void-control-ux`, use browser automation/inspection for DOM, +layout, resize, console, and network validation. Screenshots are fallback only. Preferred order: -- Use configured browser MCP first. -- If browser MCP is unavailable, install and use Playwright locally. -- Screenshots are a fallback only, not the primary workflow. +- configured browser MCP +- local Playwright if browser MCP is unavailable +- screenshots only when interactive inspection is impossible -Current local browser MCP: +## Documentation expectations -- `chrome-devtools` is already configured in `~/.codex/config.toml`. -- This should be the default tool for DOM inspection, layout debugging, console - errors, network checks, and viewport validation. +- add new specs under `spec/` with versioned filenames +- keep implementation-facing architecture notes in `docs/` +- update `README.md`, `AGENTS.md`, or `docs/architecture.md` when behavior or + workflows change materially -Playwright install fallback: +## Testing expectations -```bash -cd /home/diego/github/void-control/web/void-control-ux -npm install -D playwright -npx playwright install chromium -``` +- keep unit/contract tests close to the relevant Rust logic where practical +- use integration tests in `tests/` for orchestration, bridge, and acceptance flows +- before merging Rust changes, run: + - `cargo fmt --all -- --check` + - `cargo clippy --all-targets --all-features -- -D warnings` + - `cargo test` + - `cargo test --features serde` +- before merging UI changes, also run: + - `npm run build` in `web/void-control-ux` -If Playwright MCP is later added, prefer that over manual screenshots for UI -inspection. No dedicated local skill currently exists in this repo for -Playwright setup; use browser MCP or direct Playwright commands. +## Pre-commit -## Coding Style & Naming Conventions -For documentation and future code contributions: +This repo uses a checked-in `.pre-commit-config.yaml` for local validation. -- Use clear, boundary-focused naming aligned with the spec (`Run`, `Stage`, `Attempt`, `Runtime`, `Controller`). -- Keep Markdown headings hierarchical and concise. -- Prefer short sections and bullet lists over long prose blocks. -- Use ASCII unless a symbol is required for technical clarity. +Typical setup: -## Testing Guidelines -- Keep contract tests in module `#[cfg(test)]` blocks close to conversion/runtime logic. -- Add fixture-based tests for compatibility behavior under `--features serde`. -- Validate both paths before PRs: - - `cargo test` - - `cargo test --features serde` +```bash +pip install pre-commit +pre-commit install +pre-commit run --all-files +``` -## Commit & Pull Request Guidelines -Git history is minimal (`Initial commit`), so adopt a consistent imperative style now: +## Commit and PR guidance -- Commit format: `area: concise action` (example: `spec: clarify cancellation semantics`). -- Keep commits focused to one concern. +- commit format: `area: concise action` +- keep commits scoped to one concern - PRs should include: - - A short problem statement. - - A summary of what changed. - - Any spec sections affected (file paths + headings). - - Follow-up work, if intentionally deferred. + - problem statement + - summary of changes + - affected specs/docs + - verification commands run + - follow-up work, if intentionally deferred diff --git a/README.md b/README.md index 7b9aa06..26347f0 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,12 @@ Click the preview above for the full-quality MP4, or use the direct file link: [ - Provides terminal-first and graph-first operator UX. - Enforces runtime contract compatibility with `void-box`. +## Documentation + +- Architecture: [docs/architecture.md](docs/architecture.md) +- Contributor and agent guide: [AGENTS.md](AGENTS.md) +- Release and compatibility process: [docs/release-process.md](docs/release-process.md) + ## Project Components - `spec/`: Runtime and orchestration contracts. @@ -80,6 +86,34 @@ VITE_VOID_CONTROL_BASE_URL=http://127.0.0.1:43210 \ npm run dev ``` +## Development + +Rust validation: + +```bash +cargo fmt --all -- --check +cargo clippy --all-targets --all-features -- -D warnings +cargo test +cargo test --features serde +RUSTDOCFLAGS="-D warnings" cargo doc --no-deps --all-features +``` + +UI validation: + +```bash +cd web/void-control-ux +npm ci +npm run build +``` + +Optional local pre-commit setup: + +```bash +pip install pre-commit +pre-commit install +pre-commit run --all-files +``` + ## Terminal Console ```bash diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..0d546ae --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,291 @@ +# Architecture + +## Overview + +`void-control` is the control-plane layer for `void-box` execution. It consumes +runtime data from `void-box`, normalizes it into stable contract types, plans +and tracks orchestration iterations, persists execution state, and exposes both +operator-facing and programmatic interfaces. + +At a high level: + +```text +void-control = contract normalization + orchestration + persistence + bridge/UI +``` + +## System boundary + +`void-control` does not launch or isolate workloads itself. That responsibility +belongs to `void-box`. `void-control` assumes a runtime provider that can: + +- start a run +- inspect a run +- return structured output or a typed failure + +The default live provider is `VoidBoxRuntimeClient`. Tests use `MockRuntime`. + +## Component diagram + +```text +┌─────────────────────────────────────────────────────────────────┐ +│ Operator / CLI / UI │ +│ │ +│ ┌──────────────────────────────┐ ┌─────────────────────────┐ │ +│ │ web/void-control-ux │ │ voidctl / bridge │ │ +│ │ graph + inspector + launch │ │ launch, dry-run, query │ │ +│ └───────────────┬──────────────┘ └────────────┬────────────┘ │ +│ │ │ │ +└──────────────────┼───────────────────────────────┼──────────────┘ + │ │ + ▼ ▼ + ┌───────────────────────────────────────────────────┐ + │ Orchestration Service │ + │ │ + │ - validate execution spec │ + │ - plan candidates │ + │ - route communication intents │ + │ - dispatch runtime work │ + │ - collect artifacts and reduce iterations │ + │ - persist execution/event/candidate state │ + └───────────────┬───────────────────────┬───────────┘ + │ │ + ▼ ▼ + ┌───────────────────┐ ┌──────────────────────┐ + │ Store / Replay │ │ Planning Strategies │ + │ fs-backed data │ │ swarm / search │ + │ events / inboxes │ │ variation sources │ + └─────────┬─────────┘ └──────────┬───────────┘ + │ │ + └────────────┬───────────┘ + ▼ + ┌───────────────────┐ + │ Runtime Adapter │ + │ mock / void-box │ + └─────────┬─────────┘ + ▼ + `void-box` +``` + +## Main components + +### Contract layer + +`src/contract/` defines the stable types and normalization logic used to map +raw runtime payloads into `void-control`'s contract model. + +Responsibilities: + +- map daemon status/event values into stable enums +- reject malformed or incompatible payloads +- preserve diagnostics for compatibility analysis + +Key files: + +- `src/contract/api.rs` +- `src/contract/compat.rs` +- `src/contract/compat_json.rs` +- `src/contract/event.rs` +- `src/contract/state.rs` + +### Runtime adapter layer + +`src/runtime/` abstracts over the execution provider. + +Responsibilities: + +- define the runtime interface used by orchestration +- provide the mock runtime used by tests +- provide the serde-gated `void-box` client used for live integrations +- inject launch context such as inbox snapshots into provider requests + +Key files: + +- `src/runtime/mod.rs` +- `src/runtime/mock.rs` +- `src/runtime/void_box.rs` + +### Orchestration core + +`src/orchestration/service.rs` coordinates the execution lifecycle. + +Responsibilities: + +- create and validate execution records +- plan iteration candidates from a chosen strategy and variation source +- persist queued candidates +- start candidate runs through the runtime adapter +- collect structured output and failure outcomes +- reduce iteration results into accumulator and execution state +- emit control-plane events for replay and UX + +Supporting modules: + +- `src/orchestration/spec.rs`: execution spec schema/validation +- `src/orchestration/variation.rs`: candidate source generation +- `src/orchestration/strategy.rs`: swarm/search planning and stopping +- `src/orchestration/scoring.rs`: weighted scoring and ranking +- `src/orchestration/policy.rs`: budgets, concurrency, convergence policies +- `src/orchestration/events.rs`: persisted control-plane event model +- `src/orchestration/scheduler.rs`: global dispatch fairness and queueing +- `src/orchestration/reconcile.rs`: restart/reload handling + +### Message box and signal-reactive planning + +The message-box model gives candidates a structured communication channel across +iterations. + +Responsibilities: + +- persist `CommunicationIntent` records +- route intents into `RoutedMessage` records +- build per-candidate inbox snapshots +- derive `MessageStats` for planning iteration `N` + +Current signal-reactive behavior is metadata-driven: + +- planner reacts to routed-message counts and delivery outcomes +- planner does not inspect free-form payload text for candidate construction +- legacy `leader_directed` remains distinct from `signal_reactive` + +Key files: + +- `src/orchestration/message_box.rs` +- `src/orchestration/types.rs` +- `src/orchestration/variation.rs` +- `src/orchestration/strategy.rs` + +### Persistence and replay + +The filesystem-backed store persists enough state to reconstruct active +executions and replay control-plane history. + +Responsibilities: + +- execution metadata and snapshots +- queued and terminal candidate records +- control-plane events +- communication intents and routed messages +- inbox snapshots for provider launch injection + +Key files: + +- `src/orchestration/store.rs` +- `src/orchestration/store/fs.rs` + +## Core flows + +### 1. Execution submission + +```text +ExecutionSpec + -> validation + -> execution record + accumulator persisted + -> initial planning request + -> queued candidate records +``` + +Entry points: + +- CLI / bridge route +- test harness helpers + +### 2. Iteration planning + +```text +execution + accumulator + prior results + message stats + -> strategy.plan_candidates(...) + -> variation source selection + -> candidate specs persisted as queued +``` + +Planning inputs depend on strategy: + +- swarm: breadth-oriented candidate planning +- search: incumbent-centered neighborhood refinement + +### 3. Candidate dispatch + +```text +queued candidate + -> scheduler grant + -> inbox snapshot resolution + -> runtime.start_run(...) + -> candidate marked running +``` + +For serde-enabled live flows, launch injection can embed the inbox snapshot into +the runtime request. + +### 4. Artifact collection and reduction + +```text +runtime inspection / terminal result + -> structured output collection + -> candidate terminal record + -> iteration evaluation set + -> strategy.reduce(...) + -> accumulator + execution status update +``` + +Reduction decides whether to: + +- continue with another iteration +- stop due to threshold/plateau/exhaustion +- mark execution failed when policy requires it + +### 5. Signal-reactive planning path + +```text +CommunicationIntent[] + -> RoutedMessage[] + -> inbox snapshots for delivery iteration N + -> extract_message_stats(...) + -> advisory strategy bias for iteration N +``` + +The planner uses the stats as advisory metadata. It does not treat message +payloads as direct candidate-authoring commands. + +## Persistence and replay model + +The event log and persisted execution state must support restart and partial +reconstruction: + +- active executions are reloaded by reconciliation +- queued candidates are restored without duplication +- control events remain the replay spine for execution history +- message-box artifacts remain separate persisted data, not ad hoc in-memory state + +This separation matters because planning, dispatch, and operator views all +depend on deterministic persisted state rather than transient worker memory. + +## Source file map + +### Operator and bridge + +- `src/bin/voidctl.rs` +- `src/bridge.rs` +- `web/void-control-ux/` + +### Contract and runtime + +- `src/contract/` +- `src/runtime/` + +### Orchestration + +- `src/orchestration/service.rs` +- `src/orchestration/spec.rs` +- `src/orchestration/strategy.rs` +- `src/orchestration/variation.rs` +- `src/orchestration/message_box.rs` +- `src/orchestration/store/fs.rs` +- `src/orchestration/scheduler.rs` +- `src/orchestration/reconcile.rs` + +## Related documents + +- `README.md` +- `AGENTS.md` +- `spec/` +- `docs/release-process.md` diff --git a/docs/superpowers/plans/2026-03-23-repo-docs-ci-hardening.md b/docs/superpowers/plans/2026-03-23-repo-docs-ci-hardening.md new file mode 100644 index 0000000..746db15 --- /dev/null +++ b/docs/superpowers/plans/2026-03-23-repo-docs-ci-hardening.md @@ -0,0 +1,148 @@ +# Repo Docs And CI Hardening Implementation Plan + +> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Improve `void-control` contributor documentation, architecture documentation, pre-commit checks, and CI coverage without changing runtime behavior. + +**Architecture:** This change strengthens repository metadata around the current codebase rather than introducing new product behavior. The work is split into documentation updates, local pre-commit automation, and CI workflow hardening so each area stays focused and independently reviewable. + +**Tech Stack:** Markdown, GitHub Actions, pre-commit, Rust/Cargo, Node/Vite + +--- + +## File Map + +- Create: `docs/architecture.md` +- Create: `.pre-commit-config.yaml` +- Create: `docs/superpowers/specs/2026-03-23-repo-docs-ci-design.md` +- Create: `docs/superpowers/plans/2026-03-23-repo-docs-ci-hardening.md` +- Modify: `AGENTS.md` +- Modify: `README.md` +- Modify: `.github/workflows/ci.yml` + +## Chunk 1: Documentation + +### Task 1: Rewrite `AGENTS.md` + +**Files:** +- Modify: `AGENTS.md` + +- [ ] **Step 1: Rewrite the repository guide** + +Update `AGENTS.md` so it explains: +- system boundary between `void-control` and `void-box` +- repo/module layout +- recommended commands +- UI workflow expectations +- testing and PR expectations + +- [ ] **Step 2: Review for consistency** + +Check that commands, file paths, and expectations match the current repo. + +### Task 2: Add `docs/architecture.md` + +**Files:** +- Create: `docs/architecture.md` + +- [ ] **Step 1: Write the architecture document** + +Document the implemented architecture with: +- component overview +- data flows +- persistence and replay notes +- source file map + +- [ ] **Step 2: Review for accuracy** + +Verify the doc matches current module names and responsibilities. + +### Task 3: Improve `README.md` + +**Files:** +- Modify: `README.md` + +- [ ] **Step 1: Add contributor-oriented links and development commands** + +Keep quick start concise, but add: +- architecture link +- contributor guide link +- validation command list + +- [ ] **Step 2: Review for duplication** + +Ensure README stays high-level and delegates deeper detail to `AGENTS.md` and `docs/architecture.md`. + +## Chunk 2: Local Validation + +### Task 4: Add pre-commit config + +**Files:** +- Create: `.pre-commit-config.yaml` + +- [ ] **Step 1: Add repo-managed hooks** + +Include local hooks for: +- `cargo fmt --all -- --check` +- `cargo clippy --all-targets --all-features -- -D warnings` +- `cargo test` +- `cargo test --features serde` +- `npm run build` in `web/void-control-ux` + +- [ ] **Step 2: Document hook usage** + +Reference installation and usage from `README.md` or `AGENTS.md`. + +## Chunk 3: CI + +### Task 5: Expand GitHub CI workflow + +**Files:** +- Modify: `.github/workflows/ci.yml` + +- [ ] **Step 1: Split CI into focused jobs** + +Add distinct jobs for: +- formatting +- clippy +- Rust tests +- serde tests +- docs +- UI build + +- [ ] **Step 2: Keep workflow aligned with local checks** + +Ensure the commands used in CI match the documented local validation flow where practical. + +## Chunk 4: Verification + +### Task 6: Run validation + +**Files:** +- No code changes + +- [ ] **Step 1: Validate Rust formatting** + +Run: `cargo fmt --all -- --check` + +- [ ] **Step 2: Validate clippy** + +Run: `cargo clippy --all-targets --all-features -- -D warnings` + +- [ ] **Step 3: Validate Rust tests** + +Run: `cargo test` + +- [ ] **Step 4: Validate serde tests** + +Run: `cargo test --features serde` + +- [ ] **Step 5: Validate UI build** + +Run: `npm run build` +Working directory: `web/void-control-ux` + +- [ ] **Step 6: Inspect final diff** + +Run: `git diff --stat` + diff --git a/docs/superpowers/specs/2026-03-23-repo-docs-ci-design.md b/docs/superpowers/specs/2026-03-23-repo-docs-ci-design.md new file mode 100644 index 0000000..f6fa5ab --- /dev/null +++ b/docs/superpowers/specs/2026-03-23-repo-docs-ci-design.md @@ -0,0 +1,140 @@ +# Void-Control Docs And CI Design + +## Goal + +Improve `void-control`'s contributor-facing repository documentation and local/CI validation so the repo is easier to navigate, easier to maintain, and less dependent on tribal knowledge. + +## Scope + +This design covers: + +- rewriting `AGENTS.md` into a stronger repository guide +- adding `docs/architecture.md` for `void-control` +- tightening `README.md` to link contributor and architecture docs +- adding repository-managed pre-commit hooks +- expanding GitHub CI coverage for Rust and UI validation + +This design does not cover: + +- cross-platform CI matrices +- MSRV policy +- security audit jobs +- release workflow redesign +- changes to `void-box` + +## Current Problems + +### Documentation gaps + +- `AGENTS.md` is short and mostly procedural; it does not explain the current Rust module layout or the orchestration/runtime boundary in enough detail. +- There is no `docs/architecture.md`, so contributors have to infer architecture from specs and source files. +- `README.md` is useful for quick start but not for contributor orientation. + +### Validation gaps + +- CI currently runs only `cargo test`, `cargo test --features serde`, and the UI build. +- Formatting, clippy, and docs are not enforced in CI. +- There is no checked-in pre-commit configuration for local validation consistency. + +## Design + +### 1. `AGENTS.md` + +Rewrite `AGENTS.md` as the repo-local contributor guide for agents and humans. + +Sections: + +- project purpose and control-plane/runtime boundary +- repository layout with key directories +- module map for `src/contract`, `src/runtime`, `src/orchestration`, `src/bridge`, and `web/void-control-ux` +- required local validation commands +- guidance for UI work and browser-based inspection +- testing expectations +- commit and PR expectations + +Tone should remain concise and operational, but more informative than the current file. + +### 2. `docs/architecture.md` + +Add a contributor-focused architecture document for `void-control`. + +Sections: + +- overview and system boundary +- main components and responsibilities +- component diagram in ASCII +- core data flows: + - execution submission + - planning and iteration + - candidate dispatch + - artifact collection and reduction + - signal-reactive planning path +- persistence and replay responsibilities +- source file map for quick navigation + +This document should describe the code as implemented today and avoid speculative future architecture beyond brief notes. + +### 3. `README.md` + +Improve contributor orientation without turning the README into a full architecture doc. + +Changes: + +- add links to `docs/architecture.md`, `AGENTS.md`, and release-process docs +- add a short "Development" section with the main validation commands +- keep quick-start instructions concise + +### 4. Pre-commit + +Add a repository-managed `.pre-commit-config.yaml`. + +Hooks: + +- `cargo fmt --all -- --check` +- `cargo clippy --all-targets --all-features -- -D warnings` +- `cargo test` +- `cargo test --features serde` +- `npm run build` in `web/void-control-ux` + +Rationale: + +- these are already meaningful repo checks +- they align with local development and CI +- they do not introduce speculative tooling not already used by the repo + +### 5. CI + +Expand `.github/workflows/ci.yml` into separate jobs for clearer failure modes. + +Jobs: + +- `fmt` +- `clippy` +- `rust-test` +- `rust-test-serde` +- `rust-doc` +- `ui-build` + +Details: + +- use stable Rust +- keep current Ubuntu-only baseline +- enable `RUSTDOCFLAGS=-D warnings` for docs +- keep the existing compatibility workflow separate + +## Trade-offs + +### Why not copy `void-box` CI exactly + +`void-box` has a broader platform matrix and stronger runtime-specific constraints. `void-control` does not yet need the same level of CI breadth, and copying it directly would add cost and noise without clear benefit. + +### Why include `cargo test` and `cargo test --features serde` in pre-commit + +They are heavier than formatting checks, but this repo is still small enough that the stronger local gate is practical. The goal is to catch breakage before push, not optimize for very fast hooks. + +## Success Criteria + +- a new contributor can find the architecture and main module boundaries quickly +- local validation commands are documented once and consistent across docs, hooks, and CI +- CI failures clearly identify whether the issue is formatting, linting, docs, Rust tests, serde tests, or UI build +- the repo has a checked-in pre-commit configuration contributors can install locally From a26c065e83307a435f631e6a2d133a8a66284382 Mon Sep 17 00:00:00 2001 From: diego Date: Mon, 23 Mar 2026 18:35:04 -0300 Subject: [PATCH 5/6] ci: add repo validation gates --- .github/workflows/ci.yml | 67 ++++++++++++++++++++++++++++++++++++++-- .pre-commit-config.yaml | 32 +++++++++++++++++++ 2 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f7ce775..9485bae 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,8 +6,44 @@ on: - main pull_request: +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: 1 + jobs: - rust: + fmt: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Rust + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + + - name: Cache Cargo + uses: Swatinem/rust-cache@v2 + + - name: Check formatting + run: cargo fmt --all -- --check + + clippy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Rust + uses: dtolnay/rust-toolchain@stable + with: + components: clippy + + - name: Cache Cargo + uses: Swatinem/rust-cache@v2 + + - name: Run clippy + run: cargo clippy --all-targets --all-features -- -D warnings + + rust-test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -21,10 +57,37 @@ jobs: - name: Run Rust tests run: cargo test + rust-test-serde: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Rust + uses: dtolnay/rust-toolchain@stable + + - name: Cache Cargo + uses: Swatinem/rust-cache@v2 + - name: Run Rust serde tests run: cargo test --features serde - ui: + rust-doc: + runs-on: ubuntu-latest + env: + RUSTDOCFLAGS: -D warnings + steps: + - uses: actions/checkout@v4 + + - name: Set up Rust + uses: dtolnay/rust-toolchain@stable + + - name: Cache Cargo + uses: Swatinem/rust-cache@v2 + + - name: Build docs + run: cargo doc --no-deps --all-features + + ui-build: runs-on: ubuntu-latest defaults: run: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..b20af1e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,32 @@ +repos: + - repo: local + hooks: + - id: cargo-fmt + name: cargo fmt + entry: cargo fmt --all -- --check + language: system + pass_filenames: false + + - id: cargo-clippy + name: cargo clippy + entry: cargo clippy --all-targets --all-features -- -D warnings + language: system + pass_filenames: false + + - id: cargo-test + name: cargo test + entry: cargo test + language: system + pass_filenames: false + + - id: cargo-test-serde + name: cargo test --features serde + entry: cargo test --features serde + language: system + pass_filenames: false + + - id: ui-build + name: ui build + entry: sh -c 'cd web/void-control-ux && npm run build' + language: system + pass_filenames: false From 1f1d82d24b67165eecfc9b2e3ff24f5c1791380d Mon Sep 17 00:00:00 2001 From: diego Date: Mon, 23 Mar 2026 18:35:14 -0300 Subject: [PATCH 6/6] rust: satisfy fmt and clippy gates --- examples/normalize_void_box_json.rs | 1 - examples/normalize_void_box_run.rs | 1 - src/bin/normalize_fixture.rs | 24 +- src/bin/voidctl.rs | 212 ++++++++-------- src/bridge.rs | 75 +++--- src/contract/compat.rs | 27 ++- src/contract/compat_json.rs | 5 +- src/contract/error.rs | 6 +- src/lib.rs | 4 +- src/orchestration/events.rs | 7 +- src/orchestration/mod.rs | 12 +- src/orchestration/scoring.rs | 23 +- src/orchestration/service.rs | 323 ++++++++++++++----------- src/orchestration/store/fs.rs | 73 +++--- src/orchestration/strategy.rs | 15 +- src/orchestration/variation.rs | 9 +- src/runtime/mock.rs | 35 ++- src/runtime/void_box.rs | 79 ++++-- tests/execution_artifact_collection.rs | 80 +++++- tests/execution_bridge.rs | 19 +- tests/execution_bridge_live.rs | 39 +-- tests/execution_dry_run.rs | 39 ++- tests/execution_event_replay.rs | 12 +- tests/execution_message_box.rs | 47 ++-- tests/execution_reconciliation.rs | 26 +- tests/execution_scheduler.rs | 102 ++++++-- tests/execution_search_strategy.rs | 99 +++++--- tests/execution_spec_validation.rs | 4 +- tests/execution_strategy_acceptance.rs | 187 ++++++++++---- tests/execution_swarm_strategy.rs | 100 +++++--- tests/execution_worker.rs | 221 ++++++++++++----- tests/strategy_scenarios.rs | 98 ++++++-- tests/void_box_contract.rs | 143 +++++++---- 33 files changed, 1401 insertions(+), 746 deletions(-) diff --git a/examples/normalize_void_box_json.rs b/examples/normalize_void_box_json.rs index 2f4b530..0b35bb5 100644 --- a/examples/normalize_void_box_json.rs +++ b/examples/normalize_void_box_json.rs @@ -20,4 +20,3 @@ fn main() { fn main() { eprintln!("re-run with: cargo run --features serde --example normalize_void_box_json"); } - diff --git a/examples/normalize_void_box_run.rs b/examples/normalize_void_box_run.rs index 86bbd87..9e60850 100644 --- a/examples/normalize_void_box_run.rs +++ b/examples/normalize_void_box_run.rs @@ -48,4 +48,3 @@ fn main() { } } } - diff --git a/src/bin/normalize_fixture.rs b/src/bin/normalize_fixture.rs index e30dd86..5536e4f 100644 --- a/src/bin/normalize_fixture.rs +++ b/src/bin/normalize_fixture.rs @@ -109,9 +109,10 @@ fn parse_event_line(value: &str, line_no: usize) -> Result { - ts_ms = Some(raw.parse::().map_err(|_| { - format!("line {}: invalid ts_ms '{}'", line_no, raw) - })?); + ts_ms = Some( + raw.parse::() + .map_err(|_| format!("line {}: invalid ts_ms '{}'", line_no, raw))?, + ); } "event_type" => event_type = Some(raw.to_string()), "run_id" => { @@ -121,9 +122,10 @@ fn parse_event_line(value: &str, line_no: usize) -> Result { if !raw.is_empty() { - seq = Some(raw.parse::().map_err(|_| { - format!("line {}: invalid seq '{}'", line_no, raw) - })?); + seq = Some( + raw.parse::() + .map_err(|_| format!("line {}: invalid seq '{}'", line_no, raw))?, + ); } } "payload" => { @@ -141,8 +143,7 @@ fn parse_event_line(value: &str, line_no: usize) -> Result Result, String> { let mut map = BTreeMap::new(); for pair in value.split(',') { - let (key, raw) = pair.split_once(':').ok_or_else(|| { - format!("line {}: invalid payload pair '{}'", line_no, pair) - })?; + let (key, raw) = pair + .split_once(':') + .ok_or_else(|| format!("line {}: invalid payload pair '{}'", line_no, pair))?; map.insert(key.to_string(), parse_payload_value(raw)); } Ok(map) @@ -186,4 +187,3 @@ fn parse_payload_value(raw: &str) -> VoidBoxPayloadValue { } VoidBoxPayloadValue::String(raw.to_string()) } - diff --git a/src/bin/voidctl.rs b/src/bin/voidctl.rs index dad051a..482baee 100644 --- a/src/bin/voidctl.rs +++ b/src/bin/voidctl.rs @@ -47,10 +47,7 @@ fn run() -> Result<(), String> { println!(" voidctl help # show this help"); return Ok(()); } - return Err(format!( - "unknown command '{}'. supported: serve, help", - cmd - )); + return Err(format!("unknown command '{}'. supported: serve, help", cmd)); } #[derive(Debug, Default, Serialize, Deserialize)] @@ -84,8 +81,17 @@ fn run() -> Result<(), String> { let tokens = head.split_whitespace().collect::>(); let command_candidates = [ - "/run", "/status", "/events", "/logs", "/cancel", "/list", "/watch", "/resume", - "/execution", "/help", "/exit", + "/run", + "/status", + "/events", + "/logs", + "/cancel", + "/list", + "/watch", + "/resume", + "/execution", + "/help", + "/exit", ]; let mut out = Vec::new(); @@ -114,17 +120,19 @@ fn run() -> Result<(), String> { match cmd { "/run" => { options.extend(["--run-id", "--policy"]); - if tokens.iter().any(|t| *t == "--policy") { + if tokens.contains(&"--policy") { options.extend(["fast", "balanced", "safe"]); } } - "/execution" => options.extend(["create", "dry-run", "list", "status", "pause", "resume", "cancel", "patch"]), + "/execution" => options.extend([ + "create", "dry-run", "list", "status", "pause", "resume", "cancel", "patch", + ]), "/events" => options.push("--from"), "/logs" => options.push("--follow"), "/cancel" => options.push("--reason"), "/list" => { options.push("--state"); - if tokens.iter().any(|t| *t == "--state") { + if tokens.contains(&"--state") { options.extend(["active", "terminal"]); } } @@ -276,7 +284,10 @@ fn run() -> Result<(), String> { "/run" => { let spec = tokens .next() - .ok_or_else(|| "usage: /run [--run-id ] [--policy ]".to_string())? + .ok_or_else(|| { + "usage: /run [--run-id ] [--policy ]" + .to_string() + })? .to_string(); let mut run_id = None; let mut policy = None; @@ -414,42 +425,32 @@ fn run() -> Result<(), String> { "dry-run" => Ok(Command::ExecutionDryRun { spec: tokens .next() - .ok_or_else(|| { - "usage: /execution dry-run ".to_string() - })? + .ok_or_else(|| "usage: /execution dry-run ".to_string())? .to_string(), }), "list" => Ok(Command::ExecutionList), "status" => Ok(Command::ExecutionStatus { execution_id: tokens .next() - .ok_or_else(|| { - "usage: /execution status ".to_string() - })? + .ok_or_else(|| "usage: /execution status ".to_string())? .to_string(), }), "pause" => Ok(Command::ExecutionPause { execution_id: tokens .next() - .ok_or_else(|| { - "usage: /execution pause ".to_string() - })? + .ok_or_else(|| "usage: /execution pause ".to_string())? .to_string(), }), "resume" => Ok(Command::ExecutionResume { execution_id: tokens .next() - .ok_or_else(|| { - "usage: /execution resume ".to_string() - })? + .ok_or_else(|| "usage: /execution resume ".to_string())? .to_string(), }), "cancel" => Ok(Command::ExecutionCancel { execution_id: tokens .next() - .ok_or_else(|| { - "usage: /execution cancel ".to_string() - })? + .ok_or_else(|| "usage: /execution cancel ".to_string())? .to_string(), }), "patch" => { @@ -468,27 +469,33 @@ fn run() -> Result<(), String> { "--max-iterations" => { idx += 1; if idx >= rest.len() { - return Err("missing value for --max-iterations".to_string()); + return Err( + "missing value for --max-iterations".to_string() + ); } - max_iterations = Some( - rest[idx] - .parse::() - .map_err(|_| "invalid integer for --max-iterations".to_string())?, - ); + max_iterations = + Some(rest[idx].parse::().map_err(|_| { + "invalid integer for --max-iterations".to_string() + })?); } "--max-concurrent-candidates" => { idx += 1; if idx >= rest.len() { - return Err("missing value for --max-concurrent-candidates".to_string()); + return Err( + "missing value for --max-concurrent-candidates" + .to_string(), + ); } - max_concurrent_candidates = Some( - rest[idx] - .parse::() - .map_err(|_| "invalid integer for --max-concurrent-candidates".to_string())?, - ); + max_concurrent_candidates = + Some(rest[idx].parse::().map_err(|_| { + "invalid integer for --max-concurrent-candidates" + .to_string() + })?); } other => { - return Err(format!("unknown /execution patch option '{other}'")); + return Err(format!( + "unknown /execution patch option '{other}'" + )); } } idx += 1; @@ -628,8 +635,8 @@ Policy presets: fast | balanced | safe" if let Some(parent) = path.parent() { fs::create_dir_all(parent).map_err(|e| format!("create session dir failed: {e}"))?; } - let serialized = - serde_json::to_string_pretty(session).map_err(|e| format!("serialize session failed: {e}"))?; + let serialized = serde_json::to_string_pretty(session) + .map_err(|e| format!("serialize session failed: {e}"))?; fs::write(path, serialized).map_err(|e| format!("write session failed: {e}")) } @@ -766,8 +773,8 @@ Policy presets: fast | balanced | safe" use std::net::TcpStream; let (host, port) = parse_host_port(base_url)?; - let mut stream = - TcpStream::connect(format!("{host}:{port}")).map_err(|e| format!("connect failed: {e}"))?; + let mut stream = TcpStream::connect(format!("{host}:{port}")) + .map_err(|e| format!("connect failed: {e}"))?; let body = body.unwrap_or(""); let request = format!( "{method} {path} HTTP/1.1\r\nHost: {host}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", @@ -955,10 +962,9 @@ Policy presets: fast | balanced | safe" run_id, stopped.state, stopped.terminal_event_id ); session.last_selected_run = Some(run_id.clone()); - session.last_seen_event_id_by_run.insert( - run_id, - stopped.terminal_event_id, - ); + session + .last_seen_event_id_by_run + .insert(run_id, stopped.terminal_event_id); } Err(err) => print_contract_error(&err), } @@ -995,16 +1001,13 @@ Policy presets: fast | balanced | safe" } Command::ExecutionCreate { spec } => { match load_execution_spec_file(&spec).and_then(|spec_text| { - bridge_request( - &bridge_base_url, - "POST", - "/v1/executions", - Some(&spec_text), - ) + bridge_request(&bridge_base_url, "POST", "/v1/executions", Some(&spec_text)) }) { Ok(json) => println!( "execution_id={} status={} iterations={} best_candidate={}", - json.get("execution_id").and_then(|v| v.as_str()).unwrap_or("-"), + json.get("execution_id") + .and_then(|v| v.as_str()) + .unwrap_or("-"), json.get("status") .and_then(|v| v.as_str()) .unwrap_or("unknown"), @@ -1046,46 +1049,43 @@ Policy presets: fast | balanced | safe" Err(err) => println!("error: {err}"), } } - Command::ExecutionList => match bridge_request( - &bridge_base_url, - "GET", - "/v1/executions", - None, - ) { - Ok(json) => { - let executions = json - .get("executions") - .and_then(|v| v.as_array()) - .cloned() - .unwrap_or_default(); - if executions.is_empty() { - println!("no executions"); - } else { - for execution in executions { - println!( - "execution_id={} status={} iterations={} best_candidate={}", - execution - .get("execution_id") - .and_then(|v| v.as_str()) - .unwrap_or("-"), - execution - .get("status") - .and_then(|v| v.as_str()) - .unwrap_or("unknown"), - execution - .get("completed_iterations") - .and_then(|v| v.as_u64()) - .unwrap_or(0), - execution - .get("result_best_candidate_id") - .and_then(|v| v.as_str()) - .unwrap_or("-") - ); + Command::ExecutionList => { + match bridge_request(&bridge_base_url, "GET", "/v1/executions", None) { + Ok(json) => { + let executions = json + .get("executions") + .and_then(|v| v.as_array()) + .cloned() + .unwrap_or_default(); + if executions.is_empty() { + println!("no executions"); + } else { + for execution in executions { + println!( + "execution_id={} status={} iterations={} best_candidate={}", + execution + .get("execution_id") + .and_then(|v| v.as_str()) + .unwrap_or("-"), + execution + .get("status") + .and_then(|v| v.as_str()) + .unwrap_or("unknown"), + execution + .get("completed_iterations") + .and_then(|v| v.as_u64()) + .unwrap_or(0), + execution + .get("result_best_candidate_id") + .and_then(|v| v.as_str()) + .unwrap_or("-") + ); + } } } + Err(err) => println!("error: {err}"), } - Err(err) => println!("error: {err}"), - }, + } Command::ExecutionStatus { execution_id } => match bridge_request( &bridge_base_url, "GET", @@ -1094,7 +1094,9 @@ Policy presets: fast | balanced | safe" ) { Ok(json) => println!( "execution_id={} status={} iterations={} best_candidate={}", - json.get("execution_id").and_then(|v| v.as_str()).unwrap_or("-"), + json.get("execution_id") + .and_then(|v| v.as_str()) + .unwrap_or("-"), json.get("status") .and_then(|v| v.as_str()) .unwrap_or("unknown"), @@ -1115,8 +1117,12 @@ Policy presets: fast | balanced | safe" ) { Ok(json) => println!( "execution_id={} status={}", - json.get("execution_id").and_then(|v| v.as_str()).unwrap_or("-"), - json.get("status").and_then(|v| v.as_str()).unwrap_or("unknown"), + json.get("execution_id") + .and_then(|v| v.as_str()) + .unwrap_or("-"), + json.get("status") + .and_then(|v| v.as_str()) + .unwrap_or("unknown"), ), Err(err) => println!("error: {err}"), }, @@ -1128,8 +1134,12 @@ Policy presets: fast | balanced | safe" ) { Ok(json) => println!( "execution_id={} status={}", - json.get("execution_id").and_then(|v| v.as_str()).unwrap_or("-"), - json.get("status").and_then(|v| v.as_str()).unwrap_or("unknown"), + json.get("execution_id") + .and_then(|v| v.as_str()) + .unwrap_or("-"), + json.get("status") + .and_then(|v| v.as_str()) + .unwrap_or("unknown"), ), Err(err) => println!("error: {err}"), }, @@ -1141,8 +1151,12 @@ Policy presets: fast | balanced | safe" ) { Ok(json) => println!( "execution_id={} status={}", - json.get("execution_id").and_then(|v| v.as_str()).unwrap_or("-"), - json.get("status").and_then(|v| v.as_str()).unwrap_or("unknown"), + json.get("execution_id") + .and_then(|v| v.as_str()) + .unwrap_or("-"), + json.get("status") + .and_then(|v| v.as_str()) + .unwrap_or("unknown"), ), Err(err) => println!("error: {err}"), }, @@ -1168,7 +1182,9 @@ Policy presets: fast | balanced | safe" ) { Ok(json) => println!( "execution_id={} max_iterations={} max_concurrent_candidates={}", - json.get("execution_id").and_then(|v| v.as_str()).unwrap_or("-"), + json.get("execution_id") + .and_then(|v| v.as_str()) + .unwrap_or("-"), json.get("max_iterations") .and_then(|v| v.as_u64()) .unwrap_or(0), diff --git a/src/bridge.rs b/src/bridge.rs index 2aba9c2..505725e 100644 --- a/src/bridge.rs +++ b/src/bridge.rs @@ -20,8 +20,8 @@ use crate::contract::{ExecutionPolicy, RunState, StartRequest}; use crate::orchestration::{ BudgetPolicy, ConcurrencyPolicy, ConvergencePolicy, EvaluationConfig, ExecutionAction, ExecutionRuntime, ExecutionService, ExecutionSpec, FsExecutionStore, GlobalConfig, - GlobalScheduler, OrchestrationPolicy, PolicyPatch, QueuedCandidate, - VariationConfig, VariationProposal, VariationSelection, WorkflowTemplateRef, + GlobalScheduler, OrchestrationPolicy, PolicyPatch, QueuedCandidate, VariationConfig, + VariationProposal, VariationSelection, WorkflowTemplateRef, }; #[cfg(feature = "serde")] use crate::runtime::{MockRuntime, VoidBoxRuntimeClient}; @@ -253,21 +253,19 @@ pub fn run_bridge() -> Result<(), String> { let config = BridgeConfig::from_env(); let worker_config = config.clone(); - thread::spawn(move || { - loop { - let runtime = VoidBoxRuntimeClient::new(worker_config.base_url.clone(), 250); - let _ = process_pending_executions_once( - GlobalConfig { - max_concurrent_child_runs: 20, - }, - runtime, - worker_config.execution_dir.clone(), - ); - std::thread::sleep(std::time::Duration::from_millis(500)); - } + thread::spawn(move || loop { + let runtime = VoidBoxRuntimeClient::new(worker_config.base_url.clone(), 250); + let _ = process_pending_executions_once( + GlobalConfig { + max_concurrent_child_runs: 20, + }, + runtime, + worker_config.execution_dir.clone(), + ); + std::thread::sleep(std::time::Duration::from_millis(500)); }); - let server = - Server::http(&config.listen).map_err(|e| format!("listen {} failed: {e}", config.listen))?; + let server = Server::http(&config.listen) + .map_err(|e| format!("listen {} failed: {e}", config.listen))?; let client = VoidBoxRuntimeClient::new(config.base_url.clone(), 250); println!( "voidctl bridge listening on http://{} -> {}", @@ -282,7 +280,10 @@ pub fn run_bridge() -> Result<(), String> { let _ = req.respond( Response::empty(204) .with_header(make_header("Access-Control-Allow-Origin", "*")) - .with_header(make_header("Access-Control-Allow-Methods", "GET,POST,OPTIONS")) + .with_header(make_header( + "Access-Control-Allow-Methods", + "GET,POST,OPTIONS", + )) .with_header(make_header("Access-Control-Allow-Headers", "Content-Type")), ); continue; @@ -307,7 +308,10 @@ pub fn run_bridge() -> Result<(), String> { .with_status_code(StatusCode(response.status)) .with_header(make_header("Content-Type", "application/json")) .with_header(make_header("Access-Control-Allow-Origin", "*")) - .with_header(make_header("Access-Control-Allow-Methods", "GET,POST,OPTIONS")) + .with_header(make_header( + "Access-Control-Allow-Methods", + "GET,POST,OPTIONS", + )) .with_header(make_header("Access-Control-Allow-Headers", "Content-Type")), ); } @@ -576,7 +580,10 @@ fn handle_execution_get(path: &str, config: &BridgeConfig) -> JsonHttpResponse { let result = ExecutionResultResponse { best_candidate_id: snapshot.execution.result_best_candidate_id.clone(), completed_iterations: snapshot.execution.completed_iterations, - total_candidate_failures: snapshot.execution.failure_counts.total_candidate_failures, + total_candidate_failures: snapshot + .execution + .failure_counts + .total_candidate_failures, }; json_response( 200, @@ -668,8 +675,7 @@ fn summarize_progress( .events .iter() .filter(|event| { - event.event_type - == crate::orchestration::ControlEventType::CandidateOutputCollected + event.event_type == crate::orchestration::ControlEventType::CandidateOutputCollected }) .count() as u32, queued_candidate_count, @@ -928,9 +934,7 @@ fn process_pending_executions_once( let running = snapshot .candidates .iter() - .filter(|candidate| { - candidate.status == crate::orchestration::CandidateStatus::Running - }) + .filter(|candidate| candidate.status == crate::orchestration::CandidateStatus::Running) .count(); scheduler.register_execution( &execution_id, @@ -1091,12 +1095,7 @@ impl ExecutionSpecRequest { let variation = match self.variation.source.as_str() { "parameter_space" => VariationConfig::parameter_space( self.variation.candidates_per_iteration, - match self - .variation - .selection - .as_deref() - .unwrap_or("sequential") - { + match self.variation.selection.as_deref().unwrap_or("sequential") { "random" => VariationSelection::Random, _ => VariationSelection::Sequential, }, @@ -1122,7 +1121,8 @@ impl ExecutionSpecRequest { Some(_) => Some(VariationSelection::Sequential), }, parameter_space: self.variation.parameter_space.unwrap_or_default(), - explicit: self.variation + explicit: self + .variation .explicit .unwrap_or_default() .into_iter() @@ -1200,10 +1200,10 @@ fn policy_from_json(raw: Option) -> ExecutionPolicy { max_parallel_microvms_per_run: raw .max_parallel_microvms_per_run .unwrap_or(defaults.max_parallel_microvms_per_run), - max_stage_retries: raw - .max_stage_retries - .unwrap_or(defaults.max_stage_retries), - stage_timeout_secs: raw.stage_timeout_secs.unwrap_or(defaults.stage_timeout_secs), + max_stage_retries: raw.max_stage_retries.unwrap_or(defaults.max_stage_retries), + stage_timeout_secs: raw + .stage_timeout_secs + .unwrap_or(defaults.stage_timeout_secs), cancel_grace_period_secs: raw .cancel_grace_period_secs .unwrap_or(defaults.cancel_grace_period_secs), @@ -1307,6 +1307,9 @@ fn to_tiny_response(response: JsonHttpResponse) -> tiny_http::Response Result, ) -> VoidBoxRunRaw { let DaemonRunStateJson { - id, - status, - error, - .. + id, status, error, .. } = run; VoidBoxRunRaw { id, diff --git a/src/contract/error.rs b/src/contract/error.rs index a74ed92..2840780 100644 --- a/src/contract/error.rs +++ b/src/contract/error.rs @@ -22,11 +22,7 @@ pub struct ContractError { } impl ContractError { - pub fn new( - code: ContractErrorCode, - message: impl Into, - retryable: bool, - ) -> Self { + pub fn new(code: ContractErrorCode, message: impl Into, retryable: bool) -> Self { Self { code, message: message.into(), diff --git a/src/lib.rs b/src/lib.rs index 6109500..56f108c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ +#[cfg(feature = "serde")] +pub mod bridge; pub mod contract; pub mod orchestration; pub mod runtime; -#[cfg(feature = "serde")] -pub mod bridge; diff --git a/src/orchestration/events.rs b/src/orchestration/events.rs index b5f0e5c..20f1b25 100644 --- a/src/orchestration/events.rs +++ b/src/orchestration/events.rs @@ -50,7 +50,7 @@ impl ControlEventType { } } - pub fn from_str(value: &str) -> Option { + pub fn parse(value: &str) -> Option { match value { "ExecutionCreated" => Some(Self::ExecutionCreated), "ExecutionSubmitted" => Some(Self::ExecutionSubmitted), @@ -111,10 +111,7 @@ impl ControlEventEnvelope { } impl ExecutionSnapshot { - pub fn replay( - mut execution: Execution, - events: &[ControlEventEnvelope], - ) -> ExecutionSnapshot { + pub fn replay(mut execution: Execution, events: &[ControlEventEnvelope]) -> ExecutionSnapshot { let mut accumulator = ExecutionAccumulator::default(); for event in events { diff --git a/src/orchestration/mod.rs b/src/orchestration/mod.rs index 96fa89a..14ffa00 100644 --- a/src/orchestration/mod.rs +++ b/src/orchestration/mod.rs @@ -2,10 +2,10 @@ pub mod events; pub mod message_box; pub mod policy; pub mod reconcile; -pub mod scoring; pub mod scheduler; -pub mod spec; +pub mod scoring; pub mod service; +pub mod spec; pub mod store; pub mod strategy; pub mod types; @@ -17,17 +17,17 @@ pub use message_box::extract_message_stats; pub use policy::{ BudgetPolicy, ConcurrencyPolicy, ConvergencePolicy, GlobalConfig, OrchestrationPolicy, }; +pub use reconcile::ReconciliationService; +pub use scheduler::{DispatchGrant, GlobalScheduler, QueuedCandidate, SchedulerDecision}; pub use scoring::{ score_iteration, MetricDirection, RankedCandidate, ScoringConfig, WeightedMetric, }; -pub use reconcile::ReconciliationService; -pub use scheduler::{DispatchGrant, GlobalScheduler, QueuedCandidate, SchedulerDecision}; +#[cfg(feature = "serde")] +pub use service::PolicyPatch; pub use service::{ DryRunPlan, DryRunResult, ExecutionAction, ExecutionRuntime, ExecutionService, StructuredOutputResult, }; -#[cfg(feature = "serde")] -pub use service::PolicyPatch; pub use spec::ExecutionSpec; pub use spec::{EvaluationConfig, WorkflowTemplateRef}; pub use store::{ExecutionStore, FsExecutionStore}; diff --git a/src/orchestration/scoring.rs b/src/orchestration/scoring.rs index 0e5c755..5888477 100644 --- a/src/orchestration/scoring.rs +++ b/src/orchestration/scoring.rs @@ -31,7 +31,10 @@ pub struct RankedCandidate { pub metrics: BTreeMap, } -pub fn score_iteration(config: &ScoringConfig, outputs: &[CandidateOutput]) -> Vec { +pub fn score_iteration( + config: &ScoringConfig, + outputs: &[CandidateOutput], +) -> Vec { let mut ranked: Vec = outputs .iter() .map(|output| { @@ -90,14 +93,26 @@ fn normalized_value( } } -fn compare_ranked(config: &ScoringConfig, left: &RankedCandidate, right: &RankedCandidate) -> Ordering { +fn compare_ranked( + config: &ScoringConfig, + left: &RankedCandidate, + right: &RankedCandidate, +) -> Ordering { right .score .partial_cmp(&left.score) .unwrap_or(Ordering::Equal) .then_with(|| { - let left_metric = left.metrics.get(&config.tie_break_metric).copied().unwrap_or(f64::INFINITY); - let right_metric = right.metrics.get(&config.tie_break_metric).copied().unwrap_or(f64::INFINITY); + let left_metric = left + .metrics + .get(&config.tie_break_metric) + .copied() + .unwrap_or(f64::INFINITY); + let right_metric = right + .metrics + .get(&config.tie_break_metric) + .copied() + .unwrap_or(f64::INFINITY); left_metric .partial_cmp(&right_metric) .unwrap_or(Ordering::Equal) diff --git a/src/orchestration/service.rs b/src/orchestration/service.rs index b128713..4931aa1 100644 --- a/src/orchestration/service.rs +++ b/src/orchestration/service.rs @@ -1,6 +1,8 @@ use std::io; -use crate::contract::{ContractError, ExecutionPolicy, RuntimeInspection, StartRequest, StartResult}; +use crate::contract::{ + ContractError, ExecutionPolicy, RuntimeInspection, StartRequest, StartResult, +}; use super::events::{ControlEventEnvelope, ControlEventType}; #[cfg(feature = "serde")] @@ -59,6 +61,18 @@ enum DispatchOutcome { Canceled, } +struct CandidateStateUpdate<'a> { + execution_id: &'a str, + candidate_id: &'a str, + created_seq: u64, + iteration: u32, + status: CandidateStatus, + runtime_run_id: Option, + overrides: &'a std::collections::BTreeMap, + succeeded: Option, + metrics: &'a std::collections::BTreeMap, +} + enum SelectedStrategy { Swarm(SwarmStrategy), Search(SearchStrategy), @@ -200,10 +214,7 @@ where snapshot.execution.status, ExecutionStatus::Pending | ExecutionStatus::Running ) { - return Err(io::Error::new( - io::ErrorKind::InvalidInput, - invalid_message, - )); + return Err(io::Error::new(io::ErrorKind::InvalidInput, invalid_message)); } let spec = self.store.load_spec(execution_id)?; Ok((snapshot, spec)) @@ -238,14 +249,17 @@ where match self.check_execution_control(execution_id, worker_id)? { ExecutionControl::Continue => {} ExecutionControl::Paused => { - return Err(io::Error::new(io::ErrorKind::WouldBlock, "execution paused")); + return Err(io::Error::new( + io::ErrorKind::WouldBlock, + "execution paused", + )); } ExecutionControl::Canceled => return Ok(None), } let inspection = self .runtime .inspect_run(handle) - .map_err(|err| io::Error::new(io::ErrorKind::Other, err.message.clone()))?; + .map_err(|err| io::Error::other(err.message.clone()))?; if inspection.state.is_terminal() { return Ok(Some(inspection)); } @@ -306,7 +320,10 @@ where let worker_id = Self::worker_id(); self.store.create_execution(&execution)?; self.append_event(&execution.execution_id, ControlEventType::ExecutionCreated)?; - self.append_event(&execution.execution_id, ControlEventType::ExecutionSubmitted)?; + self.append_event( + &execution.execution_id, + ControlEventType::ExecutionSubmitted, + )?; execution.status = ExecutionStatus::Running; self.store.save_execution(&execution)?; self.append_event(&execution.execution_id, ControlEventType::ExecutionStarted)?; @@ -472,28 +489,24 @@ where fn append_event(&self, execution_id: &str, event_type: ControlEventType) -> io::Result<()> { let seq = self.store.load_execution(execution_id)?.events.len() as u64 + 1; - self.store - .append_event(execution_id, &ControlEventEnvelope::new(execution_id, seq, event_type)) + self.store.append_event( + execution_id, + &ControlEventEnvelope::new(execution_id, seq, event_type), + ) } - fn save_candidate_state( - &self, - execution_id: &str, - candidate_id: &str, - created_seq: u64, - iteration: u32, - status: CandidateStatus, - runtime_run_id: Option, - overrides: &std::collections::BTreeMap, - succeeded: Option, - metrics: &std::collections::BTreeMap, - ) -> io::Result<()> { - let mut record = - ExecutionCandidate::new(execution_id, candidate_id, created_seq, iteration, status); - record.runtime_run_id = runtime_run_id; - record.overrides = overrides.clone(); - record.succeeded = succeeded; - record.metrics = metrics.clone(); + fn save_candidate_state(&self, update: CandidateStateUpdate<'_>) -> io::Result<()> { + let mut record = ExecutionCandidate::new( + update.execution_id, + update.candidate_id, + update.created_seq, + update.iteration, + update.status, + ); + record.runtime_run_id = update.runtime_run_id; + record.overrides = update.overrides.clone(); + record.succeeded = update.succeeded; + record.metrics = update.metrics.clone(); self.store.save_candidate(&record) } @@ -604,17 +617,17 @@ where let candidates = strategy.plan_candidates(accumulator, &inboxes, message_stats.as_ref()); for candidate in &candidates { let candidate_seq = self.next_candidate_id; - self.save_candidate_state( - &execution.execution_id, - &candidate.candidate_id, - candidate_seq, + self.save_candidate_state(CandidateStateUpdate { + execution_id: &execution.execution_id, + candidate_id: &candidate.candidate_id, + created_seq: candidate_seq, iteration, - CandidateStatus::Queued, - None, - &candidate.overrides, - None, - &Default::default(), - )?; + status: CandidateStatus::Queued, + runtime_run_id: None, + overrides: &candidate.overrides, + succeeded: None, + metrics: &Default::default(), + })?; self.append_event(&execution.execution_id, ControlEventType::CandidateQueued)?; self.next_candidate_id += 1; } @@ -622,14 +635,12 @@ where } #[cfg(feature = "serde")] - fn load_message_stats( - &self, - execution_id: &str, - iteration: u32, - ) -> io::Result { + fn load_message_stats(&self, execution_id: &str, iteration: u32) -> io::Result { let intents = self.store.load_intents(execution_id)?; let messages = self.store.load_routed_messages(execution_id)?; - Ok(message_box::extract_message_stats(&intents, &messages, iteration)) + Ok(message_box::extract_message_stats( + &intents, &messages, iteration, + )) } fn load_or_plan_iteration_candidates( @@ -670,7 +681,10 @@ where created_seq: u64, ) -> io::Result { let run_id = format!("exec-run-candidate-{created_seq}"); - self.append_event(&execution.execution_id, ControlEventType::CandidateDispatched)?; + self.append_event( + &execution.execution_id, + ControlEventType::CandidateDispatched, + )?; #[cfg(feature = "serde")] let launch_inbox = self.load_launch_inbox_snapshot( &execution.execution_id, @@ -692,7 +706,7 @@ where let started = self .runtime .start_run(launch_request) - .map_err(|err| io::Error::new(io::ErrorKind::Other, err.message))?; + .map_err(|err| io::Error::other(err.message))?; #[cfg(not(feature = "serde"))] let started = self .runtime @@ -702,54 +716,54 @@ where launch_context: None, policy: default_runtime_policy(), }) - .map_err(|err| io::Error::new(io::ErrorKind::Other, err.message))?; - self.save_candidate_state( - &execution.execution_id, - &candidate.candidate_id, + .map_err(|err| io::Error::other(err.message))?; + self.save_candidate_state(CandidateStateUpdate { + execution_id: &execution.execution_id, + candidate_id: &candidate.candidate_id, created_seq, iteration, - CandidateStatus::Running, - Some(run_id.clone()), - &candidate.overrides, - None, - &Default::default(), - )?; - - let inspection = match self.wait_for_terminal_run(&execution.execution_id, worker_id, &started.handle) - { - Ok(Some(inspection)) => inspection, - Ok(None) => { - self.save_candidate_state( - &execution.execution_id, - &candidate.candidate_id, - created_seq, - iteration, - CandidateStatus::Canceled, - Some(run_id), - &candidate.overrides, - None, - &Default::default(), - )?; - return Ok(DispatchOutcome::Canceled); - } - Err(err) if err.kind() == io::ErrorKind::WouldBlock => { - return Ok(DispatchOutcome::Paused(err)); - } - Err(err) => return Err(err), - }; + status: CandidateStatus::Running, + runtime_run_id: Some(run_id.clone()), + overrides: &candidate.overrides, + succeeded: None, + metrics: &Default::default(), + })?; + + let inspection = + match self.wait_for_terminal_run(&execution.execution_id, worker_id, &started.handle) { + Ok(Some(inspection)) => inspection, + Ok(None) => { + self.save_candidate_state(CandidateStateUpdate { + execution_id: &execution.execution_id, + candidate_id: &candidate.candidate_id, + created_seq, + iteration, + status: CandidateStatus::Canceled, + runtime_run_id: Some(run_id), + overrides: &candidate.overrides, + succeeded: None, + metrics: &Default::default(), + })?; + return Ok(DispatchOutcome::Canceled); + } + Err(err) if err.kind() == io::ErrorKind::WouldBlock => { + return Ok(DispatchOutcome::Paused(err)); + } + Err(err) => return Err(err), + }; if inspection.state == crate::contract::RunState::Failed { - self.save_candidate_state( - &execution.execution_id, - &candidate.candidate_id, + self.save_candidate_state(CandidateStateUpdate { + execution_id: &execution.execution_id, + candidate_id: &candidate.candidate_id, created_seq, iteration, - CandidateStatus::Failed, - Some(inspection.run_id.clone()), - &candidate.overrides, - Some(false), - &Default::default(), - )?; + status: CandidateStatus::Failed, + runtime_run_id: Some(inspection.run_id.clone()), + overrides: &candidate.overrides, + succeeded: Some(false), + metrics: &Default::default(), + })?; self.append_event( &execution.execution_id, ControlEventType::CandidateOutputCollected, @@ -766,17 +780,17 @@ where match self.runtime.take_structured_output(&inspection.run_id) { StructuredOutputResult::Found(mut output) => { - self.save_candidate_state( - &execution.execution_id, - &candidate.candidate_id, + self.save_candidate_state(CandidateStateUpdate { + execution_id: &execution.execution_id, + candidate_id: &candidate.candidate_id, created_seq, iteration, - CandidateStatus::Completed, - Some(inspection.run_id.clone()), - &candidate.overrides, - Some(output.succeeded), - &output.metrics, - )?; + status: CandidateStatus::Completed, + runtime_run_id: Some(inspection.run_id.clone()), + overrides: &candidate.overrides, + succeeded: Some(output.succeeded), + metrics: &output.metrics, + })?; output.candidate_id = candidate.candidate_id.clone(); self.append_event( &execution.execution_id, @@ -796,21 +810,21 @@ where } StructuredOutputResult::Missing => { let failed = spec.policy.missing_output_policy == "mark_failed"; - self.save_candidate_state( - &execution.execution_id, - &candidate.candidate_id, + self.save_candidate_state(CandidateStateUpdate { + execution_id: &execution.execution_id, + candidate_id: &candidate.candidate_id, created_seq, iteration, - if failed { + status: if failed { CandidateStatus::Failed } else { CandidateStatus::Completed }, - Some(inspection.run_id.clone()), - &candidate.overrides, - Some(!failed), - &Default::default(), - )?; + runtime_run_id: Some(inspection.run_id.clone()), + overrides: &candidate.overrides, + succeeded: Some(!failed), + metrics: &Default::default(), + })?; self.append_event( &execution.execution_id, ControlEventType::CandidateOutputCollected, @@ -827,21 +841,21 @@ where StructuredOutputResult::Error(err) => match err.code { crate::contract::ContractErrorCode::StructuredOutputMissing => { let failed = spec.policy.missing_output_policy == "mark_failed"; - self.save_candidate_state( - &execution.execution_id, - &candidate.candidate_id, + self.save_candidate_state(CandidateStateUpdate { + execution_id: &execution.execution_id, + candidate_id: &candidate.candidate_id, created_seq, iteration, - if failed { + status: if failed { CandidateStatus::Failed } else { CandidateStatus::Completed }, - Some(inspection.run_id.clone()), - &candidate.overrides, - Some(!failed), - &Default::default(), - )?; + runtime_run_id: Some(inspection.run_id.clone()), + overrides: &candidate.overrides, + succeeded: Some(!failed), + metrics: &Default::default(), + })?; self.append_event( &execution.execution_id, ControlEventType::CandidateOutputCollected, @@ -866,17 +880,17 @@ where ))) } _ => { - self.save_candidate_state( - &execution.execution_id, - &candidate.candidate_id, + self.save_candidate_state(CandidateStateUpdate { + execution_id: &execution.execution_id, + candidate_id: &candidate.candidate_id, created_seq, iteration, - CandidateStatus::Failed, - Some(inspection.run_id.clone()), - &candidate.overrides, - Some(false), - &Default::default(), - )?; + status: CandidateStatus::Failed, + runtime_run_id: Some(inspection.run_id.clone()), + overrides: &candidate.overrides, + succeeded: Some(false), + metrics: &Default::default(), + })?; self.append_event( &execution.execution_id, ControlEventType::CandidateOutputCollected, @@ -914,12 +928,18 @@ where ExecutionControl::Paused => { execution.status = ExecutionStatus::Paused; self.store.save_execution(execution)?; - return Err(io::Error::new(io::ErrorKind::WouldBlock, "execution paused")); + return Err(io::Error::new( + io::ErrorKind::WouldBlock, + "execution paused", + )); } ExecutionControl::Canceled => { execution.status = ExecutionStatus::Canceled; self.store.save_execution(execution)?; - self.append_event(&execution.execution_id, ControlEventType::ExecutionCanceled)?; + self.append_event( + &execution.execution_id, + ControlEventType::ExecutionCanceled, + )?; return Ok(execution.clone()); } } @@ -949,12 +969,18 @@ where ExecutionControl::Paused => { execution.status = ExecutionStatus::Paused; self.store.save_execution(execution)?; - return Err(io::Error::new(io::ErrorKind::WouldBlock, "execution paused")); + return Err(io::Error::new( + io::ErrorKind::WouldBlock, + "execution paused", + )); } ExecutionControl::Canceled => { execution.status = ExecutionStatus::Canceled; self.store.save_execution(execution)?; - self.append_event(&execution.execution_id, ControlEventType::ExecutionCanceled)?; + self.append_event( + &execution.execution_id, + ControlEventType::ExecutionCanceled, + )?; return Ok(execution.clone()); } } @@ -968,7 +994,7 @@ where execution, spec, worker_id, - &candidate, + candidate, iteration, candidate_seq, )? { @@ -989,7 +1015,10 @@ where DispatchOutcome::Canceled => { execution.status = ExecutionStatus::Canceled; self.store.save_execution(execution)?; - self.append_event(&execution.execution_id, ControlEventType::ExecutionCanceled)?; + self.append_event( + &execution.execution_id, + ControlEventType::ExecutionCanceled, + )?; return Ok(execution.clone()); } } @@ -1002,7 +1031,10 @@ where .filter(|candidate| candidate.iteration == iteration) .collect(); let has_pending_candidates = persisted_iteration_candidates.iter().any(|candidate| { - matches!(candidate.status, CandidateStatus::Queued | CandidateStatus::Running) + matches!( + candidate.status, + CandidateStatus::Queued | CandidateStatus::Running + ) }); if has_pending_candidates { self.store.save_execution(execution)?; @@ -1040,7 +1072,8 @@ where execution.completed_iterations = accumulator.completed_iterations; execution.failure_counts = accumulator.failure_counts.clone(); execution.result_best_candidate_id = accumulator.best_candidate_id.clone(); - self.store.save_accumulator(&execution.execution_id, &accumulator)?; + self.store + .save_accumulator(&execution.execution_id, &accumulator)?; let all_failed = outputs.iter().all(|output| !output.succeeded); if all_failed { @@ -1061,7 +1094,10 @@ where _ => { execution.status = ExecutionStatus::Failed; self.store.save_execution(execution)?; - self.append_event(&execution.execution_id, ControlEventType::ExecutionFailed)?; + self.append_event( + &execution.execution_id, + ControlEventType::ExecutionFailed, + )?; return Ok(execution.clone()); } } @@ -1077,12 +1113,21 @@ where if strategy.should_stop(&accumulator, &evaluation).is_some() { execution.status = ExecutionStatus::Completed; self.store.save_execution(execution)?; - self.append_event(&execution.execution_id, ControlEventType::IterationCompleted)?; - self.append_event(&execution.execution_id, ControlEventType::ExecutionCompleted)?; + self.append_event( + &execution.execution_id, + ControlEventType::IterationCompleted, + )?; + self.append_event( + &execution.execution_id, + ControlEventType::ExecutionCompleted, + )?; return Ok(execution.clone()); } - self.append_event(&execution.execution_id, ControlEventType::IterationCompleted)?; + self.append_event( + &execution.execution_id, + ControlEventType::IterationCompleted, + )?; iteration += 1; } @@ -1146,11 +1191,7 @@ impl ExecutionService { }; store.append_event( execution_id, - &ControlEventEnvelope::new( - execution_id, - snapshot.events.len() as u64 + 1, - event_type, - ), + &ControlEventEnvelope::new(execution_id, snapshot.events.len() as u64 + 1, event_type), )?; Ok(snapshot.execution) } diff --git a/src/orchestration/store/fs.rs b/src/orchestration/store/fs.rs index ffdf5d4..b887965 100644 --- a/src/orchestration/store/fs.rs +++ b/src/orchestration/store/fs.rs @@ -13,9 +13,7 @@ use crate::orchestration::types::{ ExecutionStatus, }; #[cfg(feature = "serde")] -use crate::orchestration::types::{ - CommunicationIntent, InboxSnapshot, RoutedMessage, -}; +use crate::orchestration::types::{CommunicationIntent, InboxSnapshot, RoutedMessage}; #[cfg(not(feature = "serde"))] mod serde_json { @@ -93,7 +91,11 @@ mod serde_json { } fn encode_list(value: &[String]) -> String { - value.iter().map(|item| escape(item)).collect::>().join(";") + value + .iter() + .map(|item| escape(item)) + .collect::>() + .join(";") } fn decode_map_string(value: &str) -> Result, Error> { @@ -169,7 +171,11 @@ mod serde_json { let mut seen_separator = false; for ch in value.chars() { if escaped { - let target = if seen_separator { &mut right } else { &mut left }; + let target = if seen_separator { + &mut right + } else { + &mut left + }; target.push('\\'); target.push(ch); escaped = false; @@ -324,10 +330,7 @@ impl FsExecutionStore { pub fn refresh_claim(&self, execution_id: &str, worker_id: &str) -> io::Result<()> { let claim_path = self.execution_dir(execution_id).join("claim.txt"); let Some(existing) = self.load_claim_record(execution_id)? else { - return Err(io::Error::new( - io::ErrorKind::NotFound, - "claim not found", - )); + return Err(io::Error::new(io::ErrorKind::NotFound, "claim not found")); }; if existing.worker_id != worker_id { return Err(io::Error::new( @@ -359,11 +362,7 @@ impl FsExecutionStore { } } - pub fn append_event( - &self, - execution_id: &str, - event: &ControlEventEnvelope, - ) -> io::Result<()> { + pub fn append_event(&self, execution_id: &str, event: &ControlEventEnvelope) -> io::Result<()> { let path = self.execution_dir(execution_id).join("events.log"); let existing = fs::read_to_string(&path).unwrap_or_default(); let next = format!( @@ -400,14 +399,19 @@ impl FsExecutionStore { } pub fn save_candidate(&self, candidate: &ExecutionCandidate) -> io::Result<()> { - let dir = self.execution_dir(&candidate.execution_id).join("candidates"); + let dir = self + .execution_dir(&candidate.execution_id) + .join("candidates"); fs::create_dir_all(&dir)?; let overrides = serde_json::to_string(&candidate.overrides) .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; let metrics = serde_json::to_string(&candidate.metrics) .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; fs::write( - dir.join(format!("{}-{}.txt", candidate.created_seq, candidate.candidate_id)), + dir.join(format!( + "{}-{}.txt", + candidate.created_seq, candidate.candidate_id + )), format!( "{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}", candidate.execution_id, @@ -488,7 +492,10 @@ impl FsExecutionStore { execution_id: &str, message: &RoutedMessage, ) -> io::Result<()> { - append_ndjson_record(self.execution_dir(execution_id).join("messages.log"), message) + append_ndjson_record( + self.execution_dir(execution_id).join("messages.log"), + message, + ) } #[cfg(feature = "serde")] @@ -528,8 +535,8 @@ impl FsExecutionStore { pub fn save_spec(&self, execution_id: &str, spec: &ExecutionSpec) -> io::Result<()> { let dir = self.execution_dir(execution_id); fs::create_dir_all(&dir)?; - let payload = - serde_json::to_vec_pretty(spec).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?; + let payload = serde_json::to_vec_pretty(spec) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?; fs::write(dir.join("spec.json"), payload) } @@ -621,10 +628,7 @@ impl ExecutionStore for FsExecutionStore { } #[cfg(feature = "serde")] -fn append_ndjson_record( - path: PathBuf, - record: &T, -) -> io::Result<()> { +fn append_ndjson_record(path: PathBuf, record: &T) -> io::Result<()> { if let Some(parent) = path.parent() { fs::create_dir_all(parent)?; } @@ -652,10 +656,7 @@ fn load_ndjson_records(path: PathBuf) -> io::Res Ok(record) => records.push(record), Err(err) if lines.peek().is_none() => break, Err(err) => { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - err.to_string(), - )) + return Err(io::Error::new(io::ErrorKind::InvalidData, err.to_string())) } } } @@ -672,8 +673,7 @@ fn parse_execution(contents: String) -> io::Result { let mode = required_line(&mut lines, "mode")?; let goal = required_line(&mut lines, "goal")?; let status = required_line(&mut lines, "status")?; - let result_best_candidate_id = optional_line(&mut lines) - .filter(|value| !value.is_empty()); + let result_best_candidate_id = optional_line(&mut lines).filter(|value| !value.is_empty()); let completed_iterations = optional_line(&mut lines) .map(|value| value.parse().map_err(invalid_data)) .transpose()? @@ -702,7 +702,7 @@ fn parse_events(contents: &str) -> Vec { let mut parts = line.split('|'); let execution_id = parts.next()?; let seq = parts.next()?.parse().ok()?; - let event_type = ControlEventType::from_str(parts.next()?)?; + let event_type = ControlEventType::parse(parts.next()?)?; Some(ControlEventEnvelope::new(execution_id, seq, event_type)) }) .collect() @@ -786,10 +786,7 @@ fn parse_candidate(contents: String) -> io::Result { }) } -fn required_line<'a>( - lines: &mut impl Iterator, - name: &str, -) -> io::Result { +fn required_line<'a>(lines: &mut impl Iterator, name: &str) -> io::Result { lines .next() .map(|line| line.to_string()) @@ -867,10 +864,10 @@ fn validate_inbox_candidate_id(candidate_id: &str) -> io::Result<()> { match (components.next(), components.next()) { (Some(Component::Normal(_)), None) => {} _ => { - return Err(io::Error::new( - io::ErrorKind::InvalidInput, - format!("unsafe candidate_id '{candidate_id}'"), - )) + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("unsafe candidate_id '{candidate_id}'"), + )) } } Ok(()) diff --git a/src/orchestration/strategy.rs b/src/orchestration/strategy.rs index c375506..0cdbdd0 100644 --- a/src/orchestration/strategy.rs +++ b/src/orchestration/strategy.rs @@ -86,7 +86,7 @@ impl SwarmStrategy { let convergence_pressure = stats.broadcast_messages + stats.dropped_count + stats.expired_count; if convergence_pressure > exploration_pressure && candidates.len() > 1 { - candidates.truncate((candidates.len() + 1) / 2); + candidates.truncate(candidates.len().div_ceil(2)); } } @@ -278,11 +278,7 @@ impl SearchStrategy { accumulator: &ExecutionAccumulator, ) -> Vec { let mut generated = self.variation.generate(accumulator); - let bootstrap_size = self - .variation - .candidates_per_iteration - .min(2) - .max(1) as usize; + let bootstrap_size = self.variation.candidates_per_iteration.clamp(1, 2) as usize; generated.truncate(bootstrap_size); generated } @@ -305,7 +301,8 @@ impl SearchStrategy { .collect(); if let Some(stats) = advisory_message_stats(&self.variation, message_stats) { - let exploration_pressure = stats.signal_count + stats.dropped_count + stats.expired_count; + let exploration_pressure = + stats.signal_count + stats.dropped_count + stats.expired_count; let refinement_pressure = stats.evaluation_count + stats.leader_messages; if exploration_pressure > refinement_pressure && proposals.len() > 2 { let first = proposals.remove(0); @@ -364,7 +361,9 @@ impl SearchStrategy { let incumbent = &accumulator.best_candidate_overrides; for (path, values) in &self.variation.parameter_space { let current = incumbent.get(path); - let Some(current_idx) = current.and_then(|value| values.iter().position(|candidate| candidate == value)) else { + let Some(current_idx) = + current.and_then(|value| values.iter().position(|candidate| candidate == value)) + else { continue; }; for neighbor_idx in [current_idx.checked_sub(1), Some(current_idx + 1)] diff --git a/src/orchestration/variation.rs b/src/orchestration/variation.rs index 4eff456..b8212fc 100644 --- a/src/orchestration/variation.rs +++ b/src/orchestration/variation.rs @@ -108,10 +108,11 @@ impl VariationConfig { return Vec::new(); }; - let iter: Box> = match self.selection.unwrap_or(VariationSelection::Sequential) { - VariationSelection::Sequential => Box::new(values.iter().cloned()), - VariationSelection::Random => Box::new(values.iter().rev().cloned()), - }; + let iter: Box> = + match self.selection.unwrap_or(VariationSelection::Sequential) { + VariationSelection::Sequential => Box::new(values.iter().cloned()), + VariationSelection::Random => Box::new(values.iter().rev().cloned()), + }; iter.take(self.candidates_per_iteration as usize) .map(|value| VariationProposal { diff --git a/src/runtime/mock.rs b/src/runtime/mock.rs index 9a703cb..5a6d55f 100644 --- a/src/runtime/mock.rs +++ b/src/runtime/mock.rs @@ -76,9 +76,10 @@ impl MockRuntime { } pub fn start(&mut self, request: StartRequest) -> Result { - request.policy.validate().map_err(|msg| { - ContractError::new(ContractErrorCode::InvalidPolicy, msg, false) - })?; + request + .policy + .validate() + .map_err(|msg| ContractError::new(ContractErrorCode::InvalidPolicy, msg, false))?; if let Some(existing) = self.runs.iter_mut().find(|r| r.run_id == request.run_id) { if existing.state.is_terminal() { @@ -172,8 +173,16 @@ impl MockRuntime { run_id: record.run_id.clone(), attempt_id: record.attempt_id, state: record.state, - active_stage_count: if record.state == RunState::Running { 1 } else { 0 }, - active_microvm_count: if record.state == RunState::Running { 1 } else { 0 }, + active_stage_count: if record.state == RunState::Running { + 1 + } else { + 0 + }, + active_microvm_count: if record.state == RunState::Running { + 1 + } else { + 0 + }, started_at: record.started_at.clone(), updated_at: record.updated_at.clone(), terminal_reason: record.terminal_reason.clone(), @@ -229,11 +238,13 @@ impl MockRuntime { pub fn take_structured_output(&mut self, run_id: &str) -> StructuredOutputResult { match self.seeded.get(run_id) { Some(SeededOutcome::Success(output)) => StructuredOutputResult::Found(output.clone()), - Some(SeededOutcome::MalformedOutput) => StructuredOutputResult::Error(ContractError::new( - ContractErrorCode::StructuredOutputMalformed, - format!("run '{run_id}' emitted malformed structured output"), - false, - )), + Some(SeededOutcome::MalformedOutput) => { + StructuredOutputResult::Error(ContractError::new( + ContractErrorCode::StructuredOutputMalformed, + format!("run '{run_id}' emitted malformed structured output"), + false, + )) + } _ => StructuredOutputResult::Missing, } } @@ -373,6 +384,8 @@ mod tests { }) .expect("subscribe"); assert!(events.iter().any(|e| e.event_type == EventType::RunStarted)); - assert!(events.iter().any(|e| e.event_type == EventType::RunCanceled)); + assert!(events + .iter() + .any(|e| e.event_type == EventType::RunCanceled)); } } diff --git a/src/runtime/void_box.rs b/src/runtime/void_box.rs index 00ec891..8e90cc4 100644 --- a/src/runtime/void_box.rs +++ b/src/runtime/void_box.rs @@ -2,9 +2,9 @@ use std::io::{Read, Write}; use std::net::TcpStream; use crate::contract::{ - from_void_box_run_and_events_json, from_void_box_run_json, ContractError, ContractErrorCode, - map_void_box_status, ConvertedRunView, EventEnvelope, EventType, RunState, RuntimeInspection, StartRequest, - StartResult, StopRequest, StopResult, SubscribeEventsRequest, + from_void_box_run_and_events_json, from_void_box_run_json, map_void_box_status, ContractError, + ContractErrorCode, ConvertedRunView, EventEnvelope, EventType, RunState, RuntimeInspection, + StartRequest, StartResult, StopRequest, StopResult, SubscribeEventsRequest, }; use crate::orchestration::CandidateOutput; @@ -41,9 +41,10 @@ impl VoidBoxRuntimeClient { } pub fn start(&self, request: StartRequest) -> Result { - request.policy.validate().map_err(|msg| { - ContractError::new(ContractErrorCode::InvalidPolicy, msg, false) - })?; + request + .policy + .validate() + .map_err(|msg| ContractError::new(ContractErrorCode::InvalidPolicy, msg, false))?; let input = match request.launch_context.as_deref() { Some(context) => Some(serde_json::from_str(context).map_err(|e| { @@ -283,9 +284,14 @@ impl VoidBoxRuntimeClient { &self, run_id: &str, ) -> Result, ContractError> { - if let Some(retrieval_path) = self.find_manifest_artifact_path(run_id, None, "result.json")? { + if let Some(retrieval_path) = + self.find_manifest_artifact_path(run_id, None, "result.json")? + { let response = self.http_get(&retrieval_path)?; - return match parse_artifact_response(&response, ContractErrorCode::StructuredOutputMissing)? { + return match parse_artifact_response( + &response, + ContractErrorCode::StructuredOutputMissing, + )? { Some(body) => parse_structured_output(run_id, &body).map(Some), None => Ok(None), }; @@ -528,7 +534,10 @@ fn run_id_from_handle(handle: &str) -> Result<&str, ContractError> { }) } -fn filter_events_from_id(events: Vec, from_event_id: Option<&str>) -> Vec { +fn filter_events_from_id( + events: Vec, + from_event_id: Option<&str>, +) -> Vec { let Some(from_id) = from_event_id else { return events; }; @@ -566,7 +575,9 @@ fn manifest_retrieval_path( .and_then(serde_json::Value::as_str); if entry_name == Some(name) && retrieval_path.is_some() - && stage.map(|wanted| Some(wanted) == entry_stage).unwrap_or(true) + && stage + .map(|wanted| Some(wanted) == entry_stage) + .unwrap_or(true) { return Ok(retrieval_path.map(normalize_retrieval_path)); } @@ -640,11 +651,7 @@ fn parse_api_error(body: &str) -> Option { .get("retryable") .and_then(serde_json::Value::as_bool) .unwrap_or(false); - Some(ContractError::new( - map_error_code(code), - message, - retryable, - )) + Some(ContractError::new(map_error_code(code), message, retryable)) } fn map_error_code(code: &str) -> ContractErrorCode { @@ -664,10 +671,7 @@ fn map_error_code(code: &str) -> ContractErrorCode { } } -fn parse_structured_output( - run_id: &str, - body: &str, -) -> Result { +fn parse_structured_output(run_id: &str, body: &str) -> Result { let value: serde_json::Value = serde_json::from_str(body).map_err(|e| { ContractError::new( ContractErrorCode::StructuredOutputMalformed, @@ -845,7 +849,9 @@ mod tests { fn returns_none_when_structured_output_file_missing() { let client = client(vec![]); - let output = client.fetch_structured_output("run-missing").expect("fetch"); + let output = client + .fetch_structured_output("run-missing") + .expect("fetch"); assert!(output.is_none()); } @@ -888,7 +894,12 @@ mod tests { #[test] fn fetch_structured_output_maps_missing_output_error() { let client = client(vec![ - ("GET", "/v1/runs/run-missing-output", 200, r#"{"id":"run-missing-output","status":"Completed"}"#), + ( + "GET", + "/v1/runs/run-missing-output", + 200, + r#"{"id":"run-missing-output","status":"Completed"}"#, + ), ( "GET", "/v1/runs/run-missing-output/stages/main/output-file", @@ -908,7 +919,12 @@ mod tests { #[test] fn fetch_structured_output_falls_back_to_output_stage_after_main_404() { let client = client(vec![ - ("GET", "/v1/runs/run-output-stage", 200, r#"{"id":"run-output-stage","status":"Completed"}"#), + ( + "GET", + "/v1/runs/run-output-stage", + 200, + r#"{"id":"run-output-stage","status":"Completed"}"#, + ), ( "GET", "/v1/runs/run-output-stage/stages/main/output-file", @@ -934,7 +950,12 @@ mod tests { #[test] fn fetch_structured_output_maps_malformed_output_error() { let client = client(vec![ - ("GET", "/v1/runs/run-malformed", 200, r#"{"id":"run-malformed","status":"Completed"}"#), + ( + "GET", + "/v1/runs/run-malformed", + 200, + r#"{"id":"run-malformed","status":"Completed"}"#, + ), ( "GET", "/v1/runs/run-malformed/stages/main/output-file", @@ -1049,7 +1070,10 @@ mod tests { assert_eq!(recorded[0].1, "/v1/runs"); let body: serde_json::Value = serde_json::from_str(&recorded[0].2).expect("parse request body"); - assert_eq!(body.get("file").and_then(serde_json::Value::as_str), Some("fixtures/sample.vbrun")); + assert_eq!( + body.get("file").and_then(serde_json::Value::as_str), + Some("fixtures/sample.vbrun") + ); assert_eq!(body.get("input"), Some(&snapshot)); } @@ -1126,7 +1150,12 @@ mod tests { #[test] fn inspect_404_maps_to_not_found() { - let c = client(vec![("GET", "/v1/runs/run-404", 404, r#"{"error":"not found"}"#)]); + let c = client(vec![( + "GET", + "/v1/runs/run-404", + 404, + r#"{"error":"not found"}"#, + )]); let err = c.inspect("vb:run-404").expect_err("expected not found"); assert_eq!(err.code, ContractErrorCode::NotFound); } diff --git a/tests/execution_artifact_collection.rs b/tests/execution_artifact_collection.rs index c040edd..fde910d 100644 --- a/tests/execution_artifact_collection.rs +++ b/tests/execution_artifact_collection.rs @@ -12,11 +12,20 @@ fn missing_output_can_mark_failed() { runtime.seed_missing_output("exec-run-candidate-1"); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + output( + "candidate-2", + &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)], + ), ); let store = FsExecutionStore::new(temp_store_dir("missing-failed")); - let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); let execution = service .run_to_completion(spec_with_missing_output_policy("mark_failed")) .expect("run execution"); @@ -31,18 +40,30 @@ fn missing_output_can_mark_incomplete_without_failure_count() { runtime.seed_missing_output("exec-run-candidate-1"); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + output( + "candidate-2", + &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)], + ), ); let store = FsExecutionStore::new(temp_store_dir("missing-incomplete")); - let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); let execution = service .run_to_completion(spec_with_continue_missing_output()) .expect("run execution"); assert_eq!(execution.status, ExecutionStatus::Completed); assert_eq!(execution.failure_counts.total_candidate_failures, 0); - assert_eq!(execution.result_best_candidate_id.as_deref(), Some("candidate-2")); + assert_eq!( + execution.result_best_candidate_id.as_deref(), + Some("candidate-2") + ); } #[test] @@ -52,15 +73,27 @@ fn iteration_failure_policy_continue_advances_despite_all_failures() { runtime.seed_failure("exec-run-candidate-2"); runtime.seed_success( "exec-run-candidate-3", - output("candidate-3", &[("latency_p99_ms", 75.0), ("cost_usd", 0.02)]), + output( + "candidate-3", + &[("latency_p99_ms", 75.0), ("cost_usd", 0.02)], + ), ); runtime.seed_success( "exec-run-candidate-4", - output("candidate-4", &[("latency_p99_ms", 78.0), ("cost_usd", 0.02)]), + output( + "candidate-4", + &[("latency_p99_ms", 78.0), ("cost_usd", 0.02)], + ), ); let store = FsExecutionStore::new(temp_store_dir("continue")); - let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); let execution = service .run_to_completion(spec_with_iteration_failure_policy("continue", 2)) .expect("run execution"); @@ -76,15 +109,27 @@ fn iteration_failure_policy_retry_retries_once() { runtime.seed_failure("exec-run-candidate-2"); runtime.seed_success( "exec-run-candidate-3", - output("candidate-3", &[("latency_p99_ms", 74.0), ("cost_usd", 0.02)]), + output( + "candidate-3", + &[("latency_p99_ms", 74.0), ("cost_usd", 0.02)], + ), ); runtime.seed_success( "exec-run-candidate-4", - output("candidate-4", &[("latency_p99_ms", 76.0), ("cost_usd", 0.02)]), + output( + "candidate-4", + &[("latency_p99_ms", 76.0), ("cost_usd", 0.02)], + ), ); let store = FsExecutionStore::new(temp_store_dir("retry")); - let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); let execution = service .run_to_completion(spec_with_iteration_failure_policy("retry_iteration", 1)) .expect("run execution"); @@ -99,11 +144,20 @@ fn malformed_output_is_counted_as_candidate_failure() { runtime.seed_malformed_output("exec-run-candidate-1"); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + output( + "candidate-2", + &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)], + ), ); let store = FsExecutionStore::new(temp_store_dir("malformed-output")); - let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); let execution = service .run_to_completion(spec_with_missing_output_policy("mark_failed")) .expect("run execution"); diff --git a/tests/execution_bridge.rs b/tests/execution_bridge.rs index 17131b3..6f79129 100644 --- a/tests/execution_bridge.rs +++ b/tests/execution_bridge.rs @@ -140,7 +140,9 @@ fn create_list_and_get_execution_routes_round_trip() { .expect("list"); assert_eq!(listed.status, 200); assert_eq!( - listed.json["executions"].as_array().map(|items| items.len()), + listed.json["executions"] + .as_array() + .map(|items| items.len()), Some(1) ); @@ -155,10 +157,19 @@ fn create_list_and_get_execution_routes_round_trip() { assert_eq!(fetched.status, 200); assert_eq!(fetched.json["execution"]["execution_id"], execution_id); assert_eq!(fetched.json["progress"]["event_count"], 2); - assert_eq!(fetched.json["progress"]["event_type_counts"]["ExecutionCreated"], 1); - assert_eq!(fetched.json["progress"]["event_type_counts"]["ExecutionSubmitted"], 1); + assert_eq!( + fetched.json["progress"]["event_type_counts"]["ExecutionCreated"], + 1 + ); + assert_eq!( + fetched.json["progress"]["event_type_counts"]["ExecutionSubmitted"], + 1 + ); assert_eq!(fetched.json["progress"]["candidate_queue_count"], 0); - assert_eq!(fetched.json["result"]["best_candidate_id"], serde_json::Value::Null); + assert_eq!( + fetched.json["result"]["best_candidate_id"], + serde_json::Value::Null + ); assert_eq!(fetched.json["result"]["completed_iterations"], 0); assert_eq!(fetched.json["result"]["total_candidate_failures"], 0); } diff --git a/tests/execution_bridge_live.rs b/tests/execution_bridge_live.rs index 1ac729c..5e708f9 100644 --- a/tests/execution_bridge_live.rs +++ b/tests/execution_bridge_live.rs @@ -9,8 +9,7 @@ use serde_json::json; use void_control::orchestration::{ BudgetPolicy, ConcurrencyPolicy, ConvergencePolicy, EvaluationConfig, ExecutionSpec, - GlobalConfig, OrchestrationPolicy, VariationConfig, VariationProposal, - WorkflowTemplateRef, + GlobalConfig, OrchestrationPolicy, VariationConfig, VariationProposal, WorkflowTemplateRef, }; use void_control::runtime::VoidBoxRuntimeClient; @@ -38,8 +37,8 @@ fn bridge_submission_and_worker_loop_complete_execution_against_live_daemon() { .expect("execution_id") .to_string(); - let base_url = std::env::var("VOID_BOX_BASE_URL") - .unwrap_or_else(|_| "http://127.0.0.1:43100".to_string()); + let base_url = + std::env::var("VOID_BOX_BASE_URL").unwrap_or_else(|_| "http://127.0.0.1:43100".to_string()); let mut attempts = 0; loop { @@ -84,7 +83,9 @@ fn bridge_submission_and_worker_loop_complete_execution_against_live_daemon() { .expect("get execution events"); assert_eq!(events.status, 200); let items = events.json["events"].as_array().expect("events array"); - assert!(items.iter().any(|event| event["event_type"] == "ExecutionStarted")); + assert!(items + .iter() + .any(|event| event["event_type"] == "ExecutionStarted")); assert!(items .iter() .any(|event| event["event_type"] == "CandidateOutputCollected")); @@ -107,8 +108,8 @@ fn bridge_multiple_executions_complete_against_live_daemon() { let execution_dir = root.join("executions"); let spec = structured_output_spec(); let body = execution_request_json(&spec); - let base_url = std::env::var("VOID_BOX_BASE_URL") - .unwrap_or_else(|_| "http://127.0.0.1:43100".to_string()); + let base_url = + std::env::var("VOID_BOX_BASE_URL").unwrap_or_else(|_| "http://127.0.0.1:43100".to_string()); let first = void_control::bridge::handle_bridge_request_with_dirs_for_test( "POST", @@ -188,8 +189,8 @@ fn bridge_pause_resume_and_cancel_work_against_live_daemon() { let execution_dir = root.join("executions"); let spec = long_running_spec(); let body = execution_request_json(&spec); - let base_url = std::env::var("VOID_BOX_BASE_URL") - .unwrap_or_else(|_| "http://127.0.0.1:43100".to_string()); + let base_url = + std::env::var("VOID_BOX_BASE_URL").unwrap_or_else(|_| "http://127.0.0.1:43100".to_string()); let created = void_control::bridge::handle_bridge_request_with_dirs_for_test( "POST", @@ -298,9 +299,15 @@ fn bridge_pause_resume_and_cancel_work_against_live_daemon() { ) .expect("get events"); let items = events.json["events"].as_array().expect("events array"); - assert!(items.iter().any(|event| event["event_type"] == "ExecutionPaused")); - assert!(items.iter().any(|event| event["event_type"] == "ExecutionResumed")); - assert!(items.iter().any(|event| event["event_type"] == "ExecutionCanceled")); + assert!(items + .iter() + .any(|event| event["event_type"] == "ExecutionPaused")); + assert!(items + .iter() + .any(|event| event["event_type"] == "ExecutionResumed")); + assert!(items + .iter() + .any(|event| event["event_type"] == "ExecutionCanceled")); } fn structured_output_spec() -> ExecutionSpec { @@ -493,7 +500,9 @@ fn fallback_structured_output_spec_path() -> PathBuf { .duration_since(UNIX_EPOCH) .expect("clock") .as_nanos(); - std::env::temp_dir().join(format!("void-control-bridge-live-structured-output-{nanos}.yaml")) + std::env::temp_dir().join(format!( + "void-control-bridge-live-structured-output-{nanos}.yaml" + )) } fn fallback_long_running_spec_path() -> PathBuf { @@ -501,7 +510,9 @@ fn fallback_long_running_spec_path() -> PathBuf { .duration_since(UNIX_EPOCH) .expect("clock") .as_nanos(); - std::env::temp_dir().join(format!("void-control-bridge-live-long-running-{nanos}.yaml")) + std::env::temp_dir().join(format!( + "void-control-bridge-live-long-running-{nanos}.yaml" + )) } fn temp_root(label: &str) -> PathBuf { diff --git a/tests/execution_dry_run.rs b/tests/execution_dry_run.rs index 24f65bb..27c1bf5 100644 --- a/tests/execution_dry_run.rs +++ b/tests/execution_dry_run.rs @@ -10,18 +10,33 @@ use void_control::runtime::MockRuntime; fn dry_run_validates_without_creating_execution() { let store_dir = temp_store_dir("dry-run-valid"); let store = FsExecutionStore::new(store_dir.clone()); - let service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 4 }, MockRuntime::new(), store); + let service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 4, + }, + MockRuntime::new(), + store, + ); let result = service.dry_run(&spec(3)).expect("dry run"); assert!(result.valid); - assert!(std::fs::read_dir(store_dir).expect("read dir").next().is_none()); + assert!(std::fs::read_dir(store_dir) + .expect("read dir") + .next() + .is_none()); } #[test] fn dry_run_returns_plan_warnings_and_errors() { let store = FsExecutionStore::new(temp_store_dir("dry-run-errors")); - let service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 4 }, MockRuntime::new(), store); + let service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 4, + }, + MockRuntime::new(), + store, + ); let mut spec = spec(3); spec.policy.budget.max_wall_clock_secs = None; spec.policy.budget.max_iterations = None; @@ -36,14 +51,26 @@ fn dry_run_returns_plan_warnings_and_errors() { #[test] fn dry_run_reports_parameter_space_cardinality() { let store = FsExecutionStore::new(temp_store_dir("dry-run-cardinality")); - let service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 4 }, MockRuntime::new(), store); + let service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 4, + }, + MockRuntime::new(), + store, + ); let spec = ExecutionSpec { variation: VariationConfig::parameter_space( 2, void_control::orchestration::VariationSelection::Sequential, BTreeMap::from([ - ("sandbox.env.CONCURRENCY".to_string(), vec!["2".to_string(), "4".to_string()]), - ("sandbox.memory_mb".to_string(), vec!["512".to_string(), "1024".to_string()]), + ( + "sandbox.env.CONCURRENCY".to_string(), + vec!["2".to_string(), "4".to_string()], + ), + ( + "sandbox.memory_mb".to_string(), + vec!["512".to_string(), "1024".to_string()], + ), ]), ), ..spec(3) diff --git a/tests/execution_event_replay.rs b/tests/execution_event_replay.rs index 18bd27a..bea502e 100644 --- a/tests/execution_event_replay.rs +++ b/tests/execution_event_replay.rs @@ -79,9 +79,13 @@ fn store_round_trips_execution_and_events() { event(ControlEventType::IterationStarted), ]; - store.create_execution(&execution).expect("create execution"); + store + .create_execution(&execution) + .expect("create execution"); for event in &events { - store.append_event("exec-store-1", event).expect("append event"); + store + .append_event("exec-store-1", event) + .expect("append event"); } store .save_accumulator( @@ -108,7 +112,9 @@ fn store_can_reload_accumulator_after_restart() { { let store = FsExecutionStore::new(root.clone()); - store.create_execution(&execution).expect("create execution"); + store + .create_execution(&execution) + .expect("create execution"); store .save_accumulator( "exec-store-2", diff --git a/tests/execution_message_box.rs b/tests/execution_message_box.rs index d636cb0..095df2f 100644 --- a/tests/execution_message_box.rs +++ b/tests/execution_message_box.rs @@ -1,24 +1,25 @@ #![cfg(feature = "serde")] +use std::cell::RefCell; +use std::collections::BTreeMap; use std::env; use std::fs; use std::path::PathBuf; -use std::cell::RefCell; -use std::collections::BTreeMap; use std::rc::Rc; use std::time::{SystemTime, UNIX_EPOCH}; use void_control::contract::{ - ContractError, RuntimeInspection, StartRequest, StartResult, RunState, + ContractError, RunState, RuntimeInspection, StartRequest, StartResult, }; +use void_control::orchestration::service::ExecutionRuntime; use void_control::orchestration::{ - CandidateOutput, CandidateSpec, CandidateStatus, CommunicationIntent, CommunicationIntentAudience, - CommunicationIntentKind, CommunicationIntentPriority, ExecutionCandidate, ExecutionService, ExecutionSpec, - FsExecutionStore, GlobalConfig, InboxEntry, InboxSnapshot, MessageStats, OrchestrationPolicy, RoutedMessage, - RoutedMessageStatus, StructuredOutputResult, VariationConfig, VariationProposal, WorkflowTemplateRef, - extract_message_stats, + extract_message_stats, CandidateOutput, CandidateSpec, CandidateStatus, CommunicationIntent, + CommunicationIntentAudience, CommunicationIntentKind, CommunicationIntentPriority, + ExecutionCandidate, ExecutionService, ExecutionSpec, FsExecutionStore, GlobalConfig, + InboxEntry, InboxSnapshot, MessageStats, OrchestrationPolicy, RoutedMessage, + RoutedMessageStatus, StructuredOutputResult, VariationConfig, VariationProposal, + WorkflowTemplateRef, }; -use void_control::orchestration::service::ExecutionRuntime; use void_control::runtime::MockRuntime; use void_control::runtime::{LaunchInjectionAdapter, ProviderLaunchAdapter}; @@ -272,7 +273,10 @@ fn fs_store_rejects_unsafe_inbox_snapshot_paths() { assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); let inbox_dir = root.join("exec-message-box").join("inboxes"); - assert!(!inbox_dir.exists(), "unsafe path should not create inbox dirs"); + assert!( + !inbox_dir.exists(), + "unsafe path should not create inbox dirs" + ); } #[test] @@ -298,8 +302,14 @@ fn fs_store_ignores_truncated_ndjson_tail_when_loading_intents() { .expect("append valid intent"); let log_path = root.join("exec-message-box").join("intents.log"); - fs::write(&log_path, format!("{}\n{{\"intent_id\":", serde_json::to_string(&intent).expect("serialize intent"))) - .expect("truncate tail"); + fs::write( + &log_path, + format!( + "{}\n{{\"intent_id\":", + serde_json::to_string(&intent).expect("serialize intent") + ), + ) + .expect("truncate tail"); let loaded = store .load_intents("exec-message-box") @@ -367,11 +377,9 @@ fn service_launches_through_adapter_and_injects_inbox_content() { let requests = runtime_requests.borrow(); assert_eq!(requests.len(), 1); assert_eq!(requests[0].workflow_spec, "workflow-template"); - let launch_context = requests[0] - .launch_context - .as_ref() - .expect("launch context"); - let decoded: InboxSnapshot = serde_json::from_str(launch_context).expect("decode launch context"); + let launch_context = requests[0].launch_context.as_ref().expect("launch context"); + let decoded: InboxSnapshot = + serde_json::from_str(launch_context).expect("decode launch context"); assert_eq!(decoded, snapshot); } @@ -627,7 +635,10 @@ fn two_iteration_swarm_spec() -> ExecutionSpec { 2, vec![ VariationProposal { - overrides: BTreeMap::from([("agent.prompt".to_string(), "baseline".to_string())]), + overrides: BTreeMap::from([( + "agent.prompt".to_string(), + "baseline".to_string(), + )]), }, VariationProposal { overrides: BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]), diff --git a/tests/execution_reconciliation.rs b/tests/execution_reconciliation.rs index b515740..8782257 100644 --- a/tests/execution_reconciliation.rs +++ b/tests/execution_reconciliation.rs @@ -15,7 +15,9 @@ fn reloads_non_terminal_executions_after_restart() { let mut execution = Execution::new("exec-reload", "swarm", "reload state"); execution.status = ExecutionStatus::Running; - store.create_execution(&execution).expect("create execution"); + store + .create_execution(&execution) + .expect("create execution"); store .append_event( "exec-reload", @@ -54,7 +56,9 @@ fn paused_execution_remains_paused_after_restart() { let mut execution = Execution::new("exec-paused", "swarm", "stay paused"); execution.status = ExecutionStatus::Paused; - store.create_execution(&execution).expect("create execution"); + store + .create_execution(&execution) + .expect("create execution"); let reconciler = ReconciliationService::new(FsExecutionStore::new(root)); let active = reconciler.reload_active_executions().expect("reload"); @@ -70,7 +74,9 @@ fn completed_execution_is_not_reloaded_as_active() { let mut execution = Execution::new("exec-complete", "swarm", "done"); execution.status = ExecutionStatus::Completed; - store.create_execution(&execution).expect("create execution"); + store + .create_execution(&execution) + .expect("create execution"); let reconciler = ReconciliationService::new(FsExecutionStore::new(root)); let active = reconciler.reload_active_executions().expect("reload"); @@ -87,18 +93,22 @@ fn reloads_queued_candidates_fifo_across_active_executions() { exec_a.status = ExecutionStatus::Running; store.create_execution(&exec_a).expect("create a"); let mut a1 = ExecutionCandidate::new("exec-a", "cand-a1", 2, 0, CandidateStatus::Queued); - a1.overrides.insert("agent.prompt".to_string(), "a1".to_string()); + a1.overrides + .insert("agent.prompt".to_string(), "a1".to_string()); store.save_candidate(&a1).expect("save a1"); let mut exec_b = Execution::new("exec-b", "swarm", "b"); exec_b.status = ExecutionStatus::Running; store.create_execution(&exec_b).expect("create b"); let mut b1 = ExecutionCandidate::new("exec-b", "cand-b1", 1, 0, CandidateStatus::Queued); - b1.overrides.insert("agent.prompt".to_string(), "b1".to_string()); + b1.overrides + .insert("agent.prompt".to_string(), "b1".to_string()); store.save_candidate(&b1).expect("save b1"); let reconciler = ReconciliationService::new(FsExecutionStore::new(root)); - let queued = reconciler.reload_queued_candidates().expect("reload queued"); + let queued = reconciler + .reload_queued_candidates() + .expect("reload queued"); assert_eq!(queued.len(), 2); assert_eq!(queued[0].execution_id, "exec-b"); @@ -139,7 +149,9 @@ fn paused_execution_candidates_are_excluded_from_reloaded_queue() { .expect("save running candidate"); let reconciler = ReconciliationService::new(FsExecutionStore::new(root)); - let queued = reconciler.reload_queued_candidates().expect("reload queued"); + let queued = reconciler + .reload_queued_candidates() + .expect("reload queued"); assert_eq!(queued.len(), 1); assert_eq!(queued[0].execution_id, "exec-running"); diff --git a/tests/execution_scheduler.rs b/tests/execution_scheduler.rs index 61012bf..1ba5855 100644 --- a/tests/execution_scheduler.rs +++ b/tests/execution_scheduler.rs @@ -3,8 +3,7 @@ use std::collections::BTreeMap; use void_control::orchestration::{ CandidateOutput, ExecutionAccumulator, ExecutionService, ExecutionSpec, ExecutionStatus, FsExecutionStore, GlobalConfig, OrchestrationPolicy, QueuedCandidate, SchedulerDecision, - StructuredOutputResult, - VariationConfig, VariationProposal, + StructuredOutputResult, VariationConfig, VariationProposal, }; use void_control::runtime::MockRuntime; @@ -18,13 +17,15 @@ fn mock_runtime_can_complete_runs_with_structured_outputs() { let started = runtime.start(test_start_request("run-1")).expect("start"); let inspection = runtime.inspect(&started.handle).expect("inspect"); - let output = runtime - .take_structured_output("run-1"); + let output = runtime.take_structured_output("run-1"); let StructuredOutputResult::Found(output) = output else { panic!("expected structured output") }; - assert_eq!(inspection.state, void_control::contract::RunState::Succeeded); + assert_eq!( + inspection.state, + void_control::contract::RunState::Succeeded + ); assert_eq!(output.metrics["latency_p99_ms"], 100.0); } @@ -34,7 +35,9 @@ fn mock_runtime_can_simulate_failure_timeout_and_missing_output() { runtime.seed_failure("run-fail"); runtime.seed_missing_output("run-missing"); - let fail = runtime.start(test_start_request("run-fail")).expect("start fail"); + let fail = runtime + .start(test_start_request("run-fail")) + .expect("start fail"); let missing = runtime .start(test_start_request("run-missing")) .expect("start missing"); @@ -44,7 +47,10 @@ fn mock_runtime_can_simulate_failure_timeout_and_missing_output() { void_control::contract::RunState::Failed ); assert_eq!( - runtime.inspect(&missing.handle).expect("inspect missing").state, + runtime + .inspect(&missing.handle) + .expect("inspect missing") + .state, void_control::contract::RunState::Succeeded ); assert!(matches!( @@ -130,8 +136,10 @@ fn per_execution_concurrency_cap_blocks_dispatch_until_release() { #[test] fn exhausted_budget_prevents_queue_entry() { let mut scheduler = void_control::orchestration::GlobalScheduler::new(1); - let mut accumulator = ExecutionAccumulator::default(); - accumulator.completed_iterations = 1; + let accumulator = ExecutionAccumulator { + completed_iterations: 1, + ..ExecutionAccumulator::default() + }; let decision = scheduler.enqueue_if_budget_allows( QueuedCandidate::new("exec-1", "cand-1", 1), @@ -147,19 +155,36 @@ fn runs_single_iteration_and_completes_with_best_result() { let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 120.0), ("cost_usd", 0.04)]), + output( + "candidate-1", + &[("latency_p99_ms", 120.0), ("cost_usd", 0.04)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + output( + "candidate-2", + &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)], + ), ); let store = FsExecutionStore::new(temp_store_dir("single")); - let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); - let execution = service.run_to_completion(test_spec(1)).expect("run execution"); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); + let execution = service + .run_to_completion(test_spec(1)) + .expect("run execution"); assert_eq!(execution.status, ExecutionStatus::Completed); - assert_eq!(execution.result_best_candidate_id.as_deref(), Some("candidate-2")); + assert_eq!( + execution.result_best_candidate_id.as_deref(), + Some("candidate-2") + ); } #[test] @@ -167,23 +192,41 @@ fn runs_multiple_iterations_until_threshold() { let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 100.0), ("cost_usd", 0.02)]), + output( + "candidate-1", + &[("latency_p99_ms", 100.0), ("cost_usd", 0.02)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 95.0), ("cost_usd", 0.20)]), + output( + "candidate-2", + &[("latency_p99_ms", 95.0), ("cost_usd", 0.20)], + ), ); runtime.seed_success( "exec-run-candidate-3", - output("candidate-3", &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)]), + output( + "candidate-3", + &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)], + ), ); runtime.seed_success( "exec-run-candidate-4", - output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.02)]), + output( + "candidate-4", + &[("latency_p99_ms", 72.0), ("cost_usd", 0.02)], + ), ); let store = FsExecutionStore::new(temp_store_dir("threshold")); - let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); let execution = service .run_to_completion(test_spec_with_threshold(0.9, 2)) .expect("run execution"); @@ -198,11 +241,20 @@ fn short_circuits_iteration_after_failure_limit() { runtime.seed_failure("exec-run-candidate-1"); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 95.0), ("cost_usd", 0.03)]), + output( + "candidate-2", + &[("latency_p99_ms", 95.0), ("cost_usd", 0.03)], + ), ); let store = FsExecutionStore::new(temp_store_dir("fail-limit")); - let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); let execution = service .run_to_completion(test_spec_with_failure_limit(1)) .expect("run execution"); @@ -218,7 +270,13 @@ fn marks_execution_failed_when_all_candidates_fail_and_policy_says_fail() { runtime.seed_failure("exec-run-candidate-2"); let store = FsExecutionStore::new(temp_store_dir("all-fail")); - let mut service = ExecutionService::new(GlobalConfig { max_concurrent_child_runs: 2 }, runtime, store); + let mut service = ExecutionService::new( + GlobalConfig { + max_concurrent_child_runs: 2, + }, + runtime, + store, + ); let execution = service .run_to_completion(test_spec_with_failure_limit(2)) .expect("run execution"); diff --git a/tests/execution_search_strategy.rs b/tests/execution_search_strategy.rs index 80ff6bc..4266067 100644 --- a/tests/execution_search_strategy.rs +++ b/tests/execution_search_strategy.rs @@ -2,8 +2,8 @@ use std::collections::BTreeMap; use void_control::orchestration::{ CandidateInbox, CandidateOutput, CandidateSpec, ConvergencePolicy, ExecutionAccumulator, - IterationEvaluation, MessageStats, MetricDirection, SearchStrategy, ScoringConfig, - StopReason, VariationConfig, VariationProposal, VariationSelection, WeightedMetric, + IterationEvaluation, MessageStats, MetricDirection, ScoringConfig, SearchStrategy, StopReason, + VariationConfig, VariationProposal, VariationSelection, WeightedMetric, }; #[test] @@ -14,7 +14,12 @@ fn search_bootstraps_when_no_seed_exists() { VariationSelection::Sequential, BTreeMap::from([( "sandbox.env.CONCURRENCY".to_string(), - vec!["2".to_string(), "4".to_string(), "8".to_string(), "16".to_string()], + vec![ + "2".to_string(), + "4".to_string(), + "8".to_string(), + "16".to_string(), + ], )]), ), scoring_config(), @@ -50,15 +55,17 @@ fn search_refines_around_explicit_incumbent() { scoring_config(), ConvergencePolicy::default(), ); - let mut accumulator = ExecutionAccumulator::default(); - accumulator.best_candidate_overrides = BTreeMap::from([( - "agent.prompt".to_string(), - "v1".to_string(), - )]); + let accumulator = ExecutionAccumulator { + best_candidate_overrides: BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]), + ..ExecutionAccumulator::default() + }; let candidates = strategy.plan_candidates( &accumulator, - &[CandidateInbox::new("candidate-1"), CandidateInbox::new("candidate-2")], + &[ + CandidateInbox::new("candidate-1"), + CandidateInbox::new("candidate-2"), + ], None, ); @@ -81,16 +88,18 @@ fn search_avoids_explored_signatures() { scoring_config(), ConvergencePolicy::default(), ); - let mut accumulator = ExecutionAccumulator::default(); - accumulator.best_candidate_overrides = BTreeMap::from([( - "agent.prompt".to_string(), - "v1".to_string(), - )]); - accumulator.explored_signatures = vec!["agent.prompt=baseline".to_string()]; + let accumulator = ExecutionAccumulator { + best_candidate_overrides: BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]), + explored_signatures: vec!["agent.prompt=baseline".to_string()], + ..ExecutionAccumulator::default() + }; let candidates = strategy.plan_candidates( &accumulator, - &[CandidateInbox::new("candidate-1"), CandidateInbox::new("candidate-2")], + &[ + CandidateInbox::new("candidate-1"), + CandidateInbox::new("candidate-2"), + ], None, ); @@ -139,12 +148,18 @@ fn search_reduce_updates_incumbent_phase_and_signatures() { assert_eq!(next.best_candidate_id.as_deref(), Some("candidate-2")); assert_eq!( - next.best_candidate_overrides.get("agent.prompt").map(String::as_str), + next.best_candidate_overrides + .get("agent.prompt") + .map(String::as_str), Some("v1") ); assert_eq!(next.search_phase.as_deref(), Some("refine")); - assert!(next.explored_signatures.contains(&"agent.prompt=baseline".to_string())); - assert!(next.explored_signatures.contains(&"agent.prompt=v1".to_string())); + assert!(next + .explored_signatures + .contains(&"agent.prompt=baseline".to_string())); + assert!(next + .explored_signatures + .contains(&"agent.prompt=v1".to_string())); } #[test] @@ -160,13 +175,17 @@ fn search_stops_when_no_new_neighbors_remain() { scoring_config(), ConvergencePolicy::default(), ); - let mut accumulator = ExecutionAccumulator::default(); - accumulator.best_candidate_overrides = BTreeMap::from([( - "agent.prompt".to_string(), - "baseline".to_string(), - )]); - accumulator.explored_signatures = - vec!["agent.prompt=v1".to_string(), "agent.prompt=baseline".to_string()]; + let accumulator = ExecutionAccumulator { + best_candidate_overrides: BTreeMap::from([( + "agent.prompt".to_string(), + "baseline".to_string(), + )]), + explored_signatures: vec![ + "agent.prompt=v1".to_string(), + "agent.prompt=baseline".to_string(), + ], + ..ExecutionAccumulator::default() + }; let stop = strategy.should_stop( &accumulator, @@ -193,13 +212,17 @@ fn search_falls_back_to_incumbent_centered_planning_without_meaningful_stats() { scoring_config(), ConvergencePolicy::default(), ); - let mut accumulator = ExecutionAccumulator::default(); - accumulator.best_candidate_overrides = - BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]); + let accumulator = ExecutionAccumulator { + best_candidate_overrides: BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]), + ..ExecutionAccumulator::default() + }; let candidates = strategy.plan_candidates( &accumulator, - &[CandidateInbox::new("candidate-1"), CandidateInbox::new("candidate-2")], + &[ + CandidateInbox::new("candidate-1"), + CandidateInbox::new("candidate-2"), + ], Some(&MessageStats::default()), ); @@ -223,13 +246,17 @@ fn search_keeps_a_small_exploration_quota_when_signal_pressure_is_high() { scoring_config(), ConvergencePolicy::default(), ); - let mut accumulator = ExecutionAccumulator::default(); - accumulator.best_candidate_overrides = - BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]); + let accumulator = ExecutionAccumulator { + best_candidate_overrides: BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]), + ..ExecutionAccumulator::default() + }; let candidates = strategy.plan_candidates( &accumulator, - &[CandidateInbox::new("candidate-1"), CandidateInbox::new("candidate-2")], + &[ + CandidateInbox::new("candidate-1"), + CandidateInbox::new("candidate-2"), + ], Some(&MessageStats { iteration: 1, total_messages: 4, @@ -295,7 +322,9 @@ fn search_reduce_uses_the_actual_planned_candidates() { ); assert_eq!( - next.best_candidate_overrides.get("agent.prompt").map(String::as_str), + next.best_candidate_overrides + .get("agent.prompt") + .map(String::as_str), Some("v2") ); } diff --git a/tests/execution_spec_validation.rs b/tests/execution_spec_validation.rs index 7e024ae..01663b9 100644 --- a/tests/execution_spec_validation.rs +++ b/tests/execution_spec_validation.rs @@ -32,9 +32,7 @@ fn rejects_concurrency_above_global_pool() { }) .expect_err("expected concurrency validation error"); - assert!(err - .to_string() - .contains("max_concurrent_candidates")); + assert!(err.to_string().contains("max_concurrent_candidates")); } #[test] diff --git a/tests/execution_strategy_acceptance.rs b/tests/execution_strategy_acceptance.rs index ccba530..482bacb 100644 --- a/tests/execution_strategy_acceptance.rs +++ b/tests/execution_strategy_acceptance.rs @@ -92,9 +92,24 @@ fn supported_strategies_persist_terminal_candidate_records() { assert_eq!(candidates.len(), queued_count, "{mode}"); assert!(!candidates.is_empty(), "{mode}"); - assert!(candidates.iter().all(|candidate| candidate.status == CandidateStatus::Completed), "{mode}"); - assert!(candidates.iter().all(|candidate| candidate.runtime_run_id.is_some()), "{mode}"); - assert!(candidates.iter().all(|candidate| candidate.succeeded == Some(true)), "{mode}"); + assert!( + candidates + .iter() + .all(|candidate| candidate.status == CandidateStatus::Completed), + "{mode}" + ); + assert!( + candidates + .iter() + .all(|candidate| candidate.runtime_run_id.is_some()), + "{mode}" + ); + assert!( + candidates + .iter() + .all(|candidate| candidate.succeeded == Some(true)), + "{mode}" + ); } } @@ -131,24 +146,40 @@ fn search_strategy_refines_across_incremental_worker_ticks() { let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 95.0), ("cost_usd", 0.05)]), + output( + "candidate-1", + &[("latency_p99_ms", 95.0), ("cost_usd", 0.05)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + output( + "candidate-2", + &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-3", - output("candidate-3", &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)]), + output( + "candidate-3", + &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)], + ), ); runtime.seed_success( "exec-run-candidate-4", - output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)]), + output( + "candidate-4", + &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)], + ), ); let store = FsExecutionStore::new(store_dir.clone()); - ExecutionService::::submit_execution(&store, "exec-search", &strategy_spec("search")) - .expect("submit execution"); + ExecutionService::::submit_execution( + &store, + "exec-search", + &strategy_spec("search"), + ) + .expect("submit execution"); let mut service = ExecutionService::new( GlobalConfig { @@ -157,13 +188,18 @@ fn search_strategy_refines_across_incremental_worker_ticks() { runtime, store, ); - service.plan_execution("exec-search").expect("plan execution"); + service + .plan_execution("exec-search") + .expect("plan execution"); for _ in 0..8 { let execution = service .dispatch_execution_once("exec-search") .expect("dispatch execution"); - if matches!(execution.status, ExecutionStatus::Completed | ExecutionStatus::Failed) { + if matches!( + execution.status, + ExecutionStatus::Completed | ExecutionStatus::Failed + ) { break; } } @@ -216,11 +252,17 @@ fn swarm_strategy_routes_intents_into_next_iteration_message_box_and_events() { ); runtime.seed_success( "exec-run-candidate-3", - output("candidate-3", &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)]), + output( + "candidate-3", + &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)], + ), ); runtime.seed_success( "exec-run-candidate-4", - output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)]), + output( + "candidate-4", + &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)], + ), ); let store = FsExecutionStore::new(store_dir.clone()); @@ -236,8 +278,12 @@ fn swarm_strategy_routes_intents_into_next_iteration_message_box_and_events() { .expect("run execution"); let store = FsExecutionStore::new(store_dir); - let snapshot = store.load_execution(&execution.execution_id).expect("load execution"); - let intents = store.load_intents(&execution.execution_id).expect("load intents"); + let snapshot = store + .load_execution(&execution.execution_id) + .expect("load execution"); + let intents = store + .load_intents(&execution.execution_id) + .expect("load intents"); let messages = store .load_routed_messages(&execution.execution_id) .expect("load routed messages"); @@ -252,14 +298,16 @@ fn swarm_strategy_routes_intents_into_next_iteration_message_box_and_events() { assert_eq!( messages .iter() - .filter(|message| message.status == void_control::orchestration::RoutedMessageStatus::Routed) + .filter(|message| message.status + == void_control::orchestration::RoutedMessageStatus::Routed) .count(), 2 ); assert_eq!( messages .iter() - .filter(|message| message.status == void_control::orchestration::RoutedMessageStatus::Delivered) + .filter(|message| message.status + == void_control::orchestration::RoutedMessageStatus::Delivered) .count(), 3 ); @@ -297,7 +345,10 @@ fn search_strategy_persists_lineage_and_delivers_parent_intent_to_refinement_ite ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + output( + "candidate-2", + &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-3", @@ -314,7 +365,10 @@ fn search_strategy_persists_lineage_and_delivers_parent_intent_to_refinement_ite ); runtime.seed_success( "exec-run-candidate-4", - output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)]), + output( + "candidate-4", + &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)], + ), ); let store = FsExecutionStore::new(store_dir.clone()); @@ -330,7 +384,9 @@ fn search_strategy_persists_lineage_and_delivers_parent_intent_to_refinement_ite .expect("run execution"); let store = FsExecutionStore::new(store_dir); - let intents = store.load_intents(&execution.execution_id).expect("load intents"); + let intents = store + .load_intents(&execution.execution_id) + .expect("load intents"); let inbox = store .load_inbox_snapshot(&execution.execution_id, 1, "candidate-1") .expect("load iteration-1 inbox"); @@ -362,21 +418,27 @@ fn signal_reactive_search_runs_end_to_end() { "multiple candidates saw the same bottleneck", )], ); - runtime.seed_success( - "exec-run-candidate-1", - signal_output.clone(), - ); + runtime.seed_success("exec-run-candidate-1", signal_output.clone()); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + output( + "candidate-2", + &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-3", - output("candidate-3", &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)]), + output( + "candidate-3", + &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)], + ), ); runtime.seed_success( "exec-run-candidate-4", - output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)]), + output( + "candidate-4", + &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)], + ), ); let execution_id = "exec-search-signal-reactive"; @@ -451,14 +513,16 @@ fn signal_reactive_search_runs_end_to_end() { assert_eq!( messages .iter() - .filter(|message| message.status == void_control::orchestration::RoutedMessageStatus::Routed) + .filter(|message| message.status + == void_control::orchestration::RoutedMessageStatus::Routed) .count(), 1 ); assert_eq!( messages .iter() - .filter(|message| message.status == void_control::orchestration::RoutedMessageStatus::Delivered) + .filter(|message| message.status + == void_control::orchestration::RoutedMessageStatus::Delivered) .count(), 2 ); @@ -475,19 +539,31 @@ fn legacy_leader_directed_uses_persisted_planner_proposals() { let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 95.0), ("cost_usd", 0.05)]), + output( + "candidate-1", + &[("latency_p99_ms", 95.0), ("cost_usd", 0.05)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + output( + "candidate-2", + &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-3", - output("candidate-3", &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)]), + output( + "candidate-3", + &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)], + ), ); runtime.seed_success( "exec-run-candidate-4", - output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)]), + output( + "candidate-4", + &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)], + ), ); ExecutionService::::submit_execution(&store, "exec-legacy-leader", &spec) @@ -553,19 +629,31 @@ fn run_mode_to_completion( let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 95.0), ("cost_usd", 0.05)]), + output( + "candidate-1", + &[("latency_p99_ms", 95.0), ("cost_usd", 0.05)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)]), + output( + "candidate-2", + &[("latency_p99_ms", 80.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-3", - output("candidate-3", &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)]), + output( + "candidate-3", + &[("latency_p99_ms", 70.0), ("cost_usd", 0.02)], + ), ); runtime.seed_success( "exec-run-candidate-4", - output("candidate-4", &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)]), + output( + "candidate-4", + &[("latency_p99_ms", 72.0), ("cost_usd", 0.025)], + ), ); let store = FsExecutionStore::new(store_dir.clone()); @@ -658,7 +746,10 @@ fn strategy_spec(mode: &str) -> ExecutionSpec { 2, vec![ VariationProposal { - overrides: BTreeMap::from([("agent.prompt".to_string(), "baseline".to_string())]), + overrides: BTreeMap::from([( + "agent.prompt".to_string(), + "baseline".to_string(), + )]), }, VariationProposal { overrides: BTreeMap::from([("agent.prompt".to_string(), "v1".to_string())]), @@ -748,14 +839,18 @@ fn seed_iteration_inboxes( .map(|candidate_id| CandidateInbox::new(candidate_id)) .collect::>(); let routed = void_control::orchestration::message_box::route_intents(intents); - for (snapshot, delivered) in void_control::orchestration::message_box::materialize_inbox_snapshots( - execution_id, - iteration, - &inboxes, - intents, - &routed, - ) { - store.save_inbox_snapshot(&snapshot).expect("save inbox snapshot"); + for (snapshot, delivered) in + void_control::orchestration::message_box::materialize_inbox_snapshots( + execution_id, + iteration, + &inboxes, + intents, + &routed, + ) + { + store + .save_inbox_snapshot(&snapshot) + .expect("save inbox snapshot"); for delivered in delivered { store .append_routed_message(execution_id, &delivered) diff --git a/tests/execution_swarm_strategy.rs b/tests/execution_swarm_strategy.rs index f560d3b..4deb1b8 100644 --- a/tests/execution_swarm_strategy.rs +++ b/tests/execution_swarm_strategy.rs @@ -1,10 +1,9 @@ use std::collections::BTreeMap; use void_control::orchestration::{ - CandidateInbox, CandidateOutput, ConvergencePolicy, ExecutionAccumulator, - IterationEvaluation, MessageStats, MetricDirection, ScoringConfig, StopReason, - SwarmStrategy, VariationConfig, VariationProposal, VariationSelection, WeightedMetric, - score_iteration, + score_iteration, CandidateInbox, CandidateOutput, ConvergencePolicy, ExecutionAccumulator, + IterationEvaluation, MessageStats, MetricDirection, ScoringConfig, StopReason, SwarmStrategy, + VariationConfig, VariationProposal, VariationSelection, WeightedMetric, }; #[test] @@ -12,8 +11,16 @@ fn weighted_metrics_normalizes_within_iteration() { let scores = score_iteration( &scoring_config(), &[ - candidate_output("cand-a", true, &[("latency_p99_ms", 100.0), ("cost_usd", 0.02)]), - candidate_output("cand-b", true, &[("latency_p99_ms", 200.0), ("cost_usd", 0.05)]), + candidate_output( + "cand-a", + true, + &[("latency_p99_ms", 100.0), ("cost_usd", 0.02)], + ), + candidate_output( + "cand-b", + true, + &[("latency_p99_ms", 200.0), ("cost_usd", 0.05)], + ), ], ); @@ -24,7 +31,11 @@ fn weighted_metrics_normalizes_within_iteration() { fn failed_candidate_scores_zero() { let scores = score_iteration( &scoring_config(), - &[candidate_output("cand-fail", false, &[("latency_p99_ms", 100.0)])], + &[candidate_output( + "cand-fail", + false, + &[("latency_p99_ms", 100.0)], + )], ); assert_eq!(scores[0].score, 0.0); @@ -36,8 +47,16 @@ fn best_result_uses_tie_breaking_after_score() { let scores = score_iteration( &scoring_config(), &[ - candidate_output("cand-a", true, &[("latency_p99_ms", 100.0), ("cost_usd", 0.05)]), - candidate_output("cand-b", true, &[("latency_p99_ms", 100.0), ("cost_usd", 0.03)]), + candidate_output( + "cand-a", + true, + &[("latency_p99_ms", 100.0), ("cost_usd", 0.05)], + ), + candidate_output( + "cand-b", + true, + &[("latency_p99_ms", 100.0), ("cost_usd", 0.03)], + ), ], ); @@ -77,8 +96,10 @@ fn parameter_space_sequential_preserves_order() { #[test] fn explicit_variation_cycles_through_overrides() { - let mut accumulator = ExecutionAccumulator::default(); - accumulator.scoring_history_len = 1; + let accumulator = ExecutionAccumulator { + scoring_history_len: 1, + ..ExecutionAccumulator::default() + }; let proposals = VariationConfig::explicit( 2, vec![ @@ -95,13 +116,15 @@ fn explicit_variation_cycles_through_overrides() { #[test] fn leader_directed_proposals_are_validated_before_use() { - let mut accumulator = ExecutionAccumulator::default(); - accumulator.leader_proposals = vec![ - proposal(&[("sandbox.env.CONCURRENCY", "2")]), - VariationProposal { - overrides: BTreeMap::new(), - }, - ]; + let accumulator = ExecutionAccumulator { + leader_proposals: vec![ + proposal(&[("sandbox.env.CONCURRENCY", "2")]), + VariationProposal { + overrides: BTreeMap::new(), + }, + ], + ..ExecutionAccumulator::default() + }; let proposals = VariationConfig::leader_directed(2).generate(&accumulator); @@ -111,13 +134,15 @@ fn leader_directed_proposals_are_validated_before_use() { #[test] fn signal_reactive_proposals_are_generated_from_planner_output() { - let mut accumulator = ExecutionAccumulator::default(); - accumulator.leader_proposals = vec![ - proposal(&[("sandbox.env.CONCURRENCY", "2")]), - VariationProposal { - overrides: BTreeMap::new(), - }, - ]; + let accumulator = ExecutionAccumulator { + leader_proposals: vec![ + proposal(&[("sandbox.env.CONCURRENCY", "2")]), + VariationProposal { + overrides: BTreeMap::new(), + }, + ], + ..ExecutionAccumulator::default() + }; let proposals = VariationConfig::signal_reactive(2).generate(&accumulator); @@ -141,7 +166,10 @@ fn swarm_plans_candidates_from_variation_source() { let candidates = strategy.plan_candidates( &ExecutionAccumulator::default(), - &[CandidateInbox::new("candidate-1"), CandidateInbox::new("candidate-2")], + &[ + CandidateInbox::new("candidate-1"), + CandidateInbox::new("candidate-2"), + ], None, ); @@ -250,12 +278,14 @@ fn swarm_keeps_legacy_leader_directed_planning_unbiased_by_message_stats() { scoring_config(), ConvergencePolicy::default(), ); - let mut accumulator = ExecutionAccumulator::default(); - accumulator.leader_proposals = vec![ - proposal(&[("agent.prompt", "first")]), - proposal(&[("agent.prompt", "second")]), - proposal(&[("agent.prompt", "third")]), - ]; + let accumulator = ExecutionAccumulator { + leader_proposals: vec![ + proposal(&[("agent.prompt", "first")]), + proposal(&[("agent.prompt", "second")]), + proposal(&[("agent.prompt", "third")]), + ], + ..ExecutionAccumulator::default() + }; let candidates = strategy.plan_candidates( &accumulator, @@ -329,8 +359,10 @@ fn swarm_should_stop_on_plateau() { max_iterations_without_improvement: Some(2), }, ); - let mut accumulator = ExecutionAccumulator::default(); - accumulator.iterations_without_improvement = 2; + let accumulator = ExecutionAccumulator { + iterations_without_improvement: 2, + ..ExecutionAccumulator::default() + }; let stop = strategy.should_stop( &accumulator, diff --git a/tests/execution_worker.rs b/tests/execution_worker.rs index 3b61f97..697a86b 100644 --- a/tests/execution_worker.rs +++ b/tests/execution_worker.rs @@ -21,11 +21,17 @@ fn submitted_pending_execution_can_be_processed_to_completion() { let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + output( + "candidate-1", + &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + output( + "candidate-2", + &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)], + ), ); let mut service = ExecutionService::new( @@ -38,7 +44,10 @@ fn submitted_pending_execution_can_be_processed_to_completion() { let processed = service.process_execution("exec-worker").expect("process"); assert_eq!(processed.status, ExecutionStatus::Completed); - assert_eq!(processed.result_best_candidate_id.as_deref(), Some("candidate-2")); + assert_eq!( + processed.result_best_candidate_id.as_deref(), + Some("candidate-2") + ); let snapshot = store.load_execution("exec-worker").expect("reload"); assert_eq!(snapshot.execution.status, ExecutionStatus::Completed); @@ -83,13 +92,19 @@ fn planning_execution_persists_queued_candidates_without_dispatching() { assert_eq!(snapshot.candidates[1].status, CandidateStatus::Queued); assert_eq!(snapshot.candidates[0].runtime_run_id, None); assert_eq!(snapshot.candidates[1].runtime_run_id, None); - let event_types: Vec<_> = snapshot.events.iter().map(|event| event.event_type).collect(); + let event_types: Vec<_> = snapshot + .events + .iter() + .map(|event| event.event_type) + .collect(); assert!(event_types.contains(&void_control::orchestration::ControlEventType::ExecutionStarted)); assert!(event_types.contains(&void_control::orchestration::ControlEventType::IterationStarted)); assert_eq!( event_types .iter() - .filter(|&&event| event == void_control::orchestration::ControlEventType::CandidateQueued) + .filter( + |&&event| event == void_control::orchestration::ControlEventType::CandidateQueued + ) .count(), 2 ); @@ -117,11 +132,17 @@ fn processing_reuses_preplanned_candidates_without_duplication() { let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + output( + "candidate-1", + &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + output( + "candidate-2", + &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)], + ), ); let mut worker = ExecutionService::new( GlobalConfig { @@ -148,8 +169,7 @@ fn processing_reuses_preplanned_candidates_without_duplication() { .events .iter() .filter(|event| { - event.event_type - == void_control::orchestration::ControlEventType::CandidateQueued + event.event_type == void_control::orchestration::ControlEventType::CandidateQueued }) .count(), 2 @@ -176,11 +196,17 @@ fn dispatch_execution_once_runs_only_one_queued_candidate() { let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + output( + "candidate-1", + &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + output( + "candidate-2", + &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)], + ), ); let mut worker = ExecutionService::new( GlobalConfig { @@ -263,20 +289,22 @@ fn stale_claim_is_recovered_and_processing_can_proceed() { .expect("submit"); let execution_dir = root.join("exec-stale-claim"); - std::fs::write( - execution_dir.join("claim.txt"), - "dead-worker|1", - ) - .expect("seed stale claim"); + std::fs::write(execution_dir.join("claim.txt"), "dead-worker|1").expect("seed stale claim"); let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + output( + "candidate-1", + &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + output( + "candidate-2", + &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)], + ), ); let mut service = ExecutionService::new( @@ -308,7 +336,10 @@ fn refresh_claim_keeps_owned_claim_valid() { .refresh_claim("exec-refresh-claim", "worker-a") .expect("refresh"); assert_eq!( - store.load_claim("exec-refresh-claim").expect("load claim").as_deref(), + store + .load_claim("exec-refresh-claim") + .expect("load claim") + .as_deref(), Some("worker-a") ); store @@ -363,7 +394,10 @@ fn candidate_records_round_trip_through_store() { ); assert_eq!(snapshot.candidates[1].candidate_id, "candidate-2"); assert_eq!(snapshot.candidates[1].status, CandidateStatus::Running); - assert_eq!(snapshot.candidates[1].runtime_run_id.as_deref(), Some("run-2")); + assert_eq!( + snapshot.candidates[1].runtime_run_id.as_deref(), + Some("run-2") + ); assert_eq!( snapshot.candidates[1] .overrides @@ -378,21 +412,23 @@ fn process_execution_persists_terminal_candidate_records() { let root = temp_store_dir("worker-candidate-lifecycle"); let store = FsExecutionStore::new(root); let spec = spec(1); - ExecutionService::::submit_execution( - &store, - "exec-candidate-lifecycle", - &spec, - ) - .expect("submit"); + ExecutionService::::submit_execution(&store, "exec-candidate-lifecycle", &spec) + .expect("submit"); let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + output( + "candidate-1", + &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + output( + "candidate-2", + &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)], + ), ); let mut service = ExecutionService::new( @@ -406,7 +442,9 @@ fn process_execution_persists_terminal_candidate_records() { .process_execution("exec-candidate-lifecycle") .expect("process"); - let snapshot = store.load_execution("exec-candidate-lifecycle").expect("reload"); + let snapshot = store + .load_execution("exec-candidate-lifecycle") + .expect("reload"); assert_eq!(snapshot.candidates.len(), 2); assert_eq!(snapshot.candidates[0].candidate_id, "candidate-1"); assert_eq!(snapshot.candidates[0].status, CandidateStatus::Completed); @@ -422,7 +460,10 @@ fn process_execution_persists_terminal_candidate_records() { Some("a") ); assert_eq!(snapshot.candidates[0].succeeded, Some(true)); - assert_eq!(snapshot.candidates[0].metrics.get("latency_p99_ms"), Some(&90.0)); + assert_eq!( + snapshot.candidates[0].metrics.get("latency_p99_ms"), + Some(&90.0) + ); assert_eq!(snapshot.candidates[1].candidate_id, "candidate-2"); assert_eq!(snapshot.candidates[1].status, CandidateStatus::Completed); assert_eq!( @@ -437,7 +478,10 @@ fn process_execution_persists_terminal_candidate_records() { Some("b") ); assert_eq!(snapshot.candidates[1].succeeded, Some(true)); - assert_eq!(snapshot.candidates[1].metrics.get("latency_p99_ms"), Some(&85.0)); + assert_eq!( + snapshot.candidates[1].metrics.get("latency_p99_ms"), + Some(&85.0) + ); } #[test] @@ -452,7 +496,10 @@ fn process_execution_persists_mixed_candidate_terminal_states() { runtime.seed_failure("exec-run-candidate-1"); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + output( + "candidate-2", + &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)], + ), ); let mut service = ExecutionService::new( @@ -467,7 +514,9 @@ fn process_execution_persists_mixed_candidate_terminal_states() { .expect("process"); assert_eq!(execution.status, ExecutionStatus::Completed); - let snapshot = store.load_execution("exec-candidate-mixed").expect("reload"); + let snapshot = store + .load_execution("exec-candidate-mixed") + .expect("reload"); assert_eq!(snapshot.candidates.len(), 2); assert_eq!(snapshot.candidates[0].candidate_id, "candidate-1"); assert_eq!(snapshot.candidates[0].status, CandidateStatus::Failed); @@ -489,11 +538,17 @@ fn process_execution_releases_claim_after_completion() { let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + output( + "candidate-1", + &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + output( + "candidate-2", + &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)], + ), ); let mut service = ExecutionService::new( @@ -513,7 +568,10 @@ fn process_execution_releases_claim_after_completion() { Some("candidate-2") ); assert_eq!(snapshot.execution.completed_iterations, 1); - assert_eq!(snapshot.execution.failure_counts.total_candidate_failures, 0); + assert_eq!( + snapshot.execution.failure_counts.total_candidate_failures, + 0 + ); } #[test] @@ -527,11 +585,17 @@ fn process_execution_persists_lifecycle_events() { let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + output( + "candidate-1", + &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + output( + "candidate-2", + &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)], + ), ); let mut service = ExecutionService::new( @@ -544,15 +608,24 @@ fn process_execution_persists_lifecycle_events() { service.process_execution("exec-events").expect("process"); let snapshot = store.load_execution("exec-events").expect("reload"); - let event_types: Vec<_> = snapshot.events.iter().map(|event| event.event_type).collect(); - assert!(event_types.contains(&void_control::orchestration::ControlEventType::ExecutionSubmitted)); + let event_types: Vec<_> = snapshot + .events + .iter() + .map(|event| event.event_type) + .collect(); + assert!( + event_types.contains(&void_control::orchestration::ControlEventType::ExecutionSubmitted) + ); assert!(event_types.contains(&void_control::orchestration::ControlEventType::ExecutionStarted)); assert!(event_types.contains(&void_control::orchestration::ControlEventType::CandidateQueued)); - assert!(event_types.contains(&void_control::orchestration::ControlEventType::CandidateDispatched)); - assert!(event_types.contains( - &void_control::orchestration::ControlEventType::CandidateOutputCollected - )); - assert!(event_types.contains(&void_control::orchestration::ControlEventType::ExecutionCompleted)); + assert!( + event_types.contains(&void_control::orchestration::ControlEventType::CandidateDispatched) + ); + assert!(event_types + .contains(&void_control::orchestration::ControlEventType::CandidateOutputCollected)); + assert!( + event_types.contains(&void_control::orchestration::ControlEventType::ExecutionCompleted) + ); } #[test] @@ -675,20 +748,31 @@ fn paused_execution_does_not_block_other_queued_work_in_bridge_scheduler() { store.clone(), ); planner.plan_execution("exec-paused").expect("plan paused"); - planner.plan_execution("exec-running").expect("plan running"); + planner + .plan_execution("exec-running") + .expect("plan running"); - let mut paused = store.load_execution("exec-paused").expect("load paused").execution; + let mut paused = store + .load_execution("exec-paused") + .expect("load paused") + .execution; paused.status = ExecutionStatus::Paused; store.save_execution(&paused).expect("save paused"); let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-3", - output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + output( + "candidate-1", + &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-4", - output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + output( + "candidate-2", + &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)], + ), ); void_control::bridge::process_pending_executions_once_for_test( @@ -707,7 +791,9 @@ fn paused_execution_does_not_block_other_queued_work_in_bridge_scheduler() { .iter() .all(|candidate| candidate.status == CandidateStatus::Queued)); - let running_snapshot = store.load_execution("exec-running").expect("reload running"); + let running_snapshot = store + .load_execution("exec-running") + .expect("reload running"); assert_eq!(running_snapshot.execution.status, ExecutionStatus::Running); assert!(running_snapshot .candidates @@ -739,19 +825,31 @@ fn bridge_scheduler_dispatches_earliest_queued_execution_first() { let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + output( + "candidate-1", + &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + output( + "candidate-2", + &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)], + ), ); runtime.seed_success( "exec-run-candidate-3", - output("candidate-1", &[("latency_p99_ms", 88.0), ("cost_usd", 0.03)]), + output( + "candidate-1", + &[("latency_p99_ms", 88.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-4", - output("candidate-2", &[("latency_p99_ms", 84.0), ("cost_usd", 0.02)]), + output( + "candidate-2", + &[("latency_p99_ms", 84.0), ("cost_usd", 0.02)], + ), ); void_control::bridge::process_pending_executions_once_for_test( @@ -843,20 +941,23 @@ fn temp_store_dir(label: &str) -> std::path::PathBuf { dir } -fn tick_bridge_worker_until_terminal( - root: std::path::PathBuf, - execution_id: &str, -) { +fn tick_bridge_worker_until_terminal(root: std::path::PathBuf, execution_id: &str) { let store = FsExecutionStore::new(root.clone()); for _ in 0..6 { let mut runtime = MockRuntime::new(); runtime.seed_success( "exec-run-candidate-1", - output("candidate-1", &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)]), + output( + "candidate-1", + &[("latency_p99_ms", 90.0), ("cost_usd", 0.03)], + ), ); runtime.seed_success( "exec-run-candidate-2", - output("candidate-2", &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)]), + output( + "candidate-2", + &[("latency_p99_ms", 85.0), ("cost_usd", 0.02)], + ), ); void_control::bridge::process_pending_executions_once_for_test( GlobalConfig { diff --git a/tests/strategy_scenarios.rs b/tests/strategy_scenarios.rs index 67aefe6..4a94019 100644 --- a/tests/strategy_scenarios.rs +++ b/tests/strategy_scenarios.rs @@ -63,12 +63,7 @@ fn swarm_incident_mitigation_explores_distinct_hypotheses_and_finds_best_family( for idx in 6..=10 { runtime.seed_success( &format!("exec-run-candidate-{idx}"), - metrics_output( - &format!("candidate-{idx}"), - 90.0 + idx as f64, - 0.05, - 0.96, - ), + metrics_output(&format!("candidate-{idx}"), 90.0 + idx as f64, 0.05, 0.96), ); } @@ -85,8 +80,12 @@ fn swarm_incident_mitigation_explores_distinct_hypotheses_and_finds_best_family( .expect("run execution"); let store = FsExecutionStore::new(store_dir); - let snapshot = store.load_execution(&execution.execution_id).expect("load execution"); - let intents = store.load_intents(&execution.execution_id).expect("load intents"); + let snapshot = store + .load_execution(&execution.execution_id) + .expect("load execution"); + let intents = store + .load_intents(&execution.execution_id) + .expect("load intents"); let messages = store .load_routed_messages(&execution.execution_id) .expect("load routed messages"); @@ -99,7 +98,9 @@ fn swarm_incident_mitigation_explores_distinct_hypotheses_and_finds_best_family( let best = snapshot .candidates .iter() - .filter(|candidate| Some(&candidate.candidate_id) == execution.result_best_candidate_id.as_ref()) + .filter(|candidate| { + Some(&candidate.candidate_id) == execution.result_best_candidate_id.as_ref() + }) .max_by_key(|candidate| candidate.created_seq) .expect("best candidate"); @@ -112,7 +113,9 @@ fn swarm_incident_mitigation_explores_distinct_hypotheses_and_finds_best_family( assert_eq!(execution.status, ExecutionStatus::Completed); assert_eq!(execution.completed_iterations, 2); assert_eq!( - best.overrides.get("mitigation.strategy").map(String::as_str), + best.overrides + .get("mitigation.strategy") + .map(String::as_str), Some("rate_limit_cache") ); assert!(explored.starts_with(&[ @@ -158,14 +161,38 @@ fn swarm_incident_mitigation_explores_distinct_hypotheses_and_finds_best_family( fn swarm_prompt_optimization_finds_best_style_cluster() { let store_dir = temp_store_dir("swarm-prompt"); let mut runtime = MockRuntime::new(); - runtime.seed_success("exec-run-candidate-1", prompt_output("candidate-1", 0.74, 0.70)); - runtime.seed_success("exec-run-candidate-2", prompt_output("candidate-2", 0.89, 0.92)); - runtime.seed_success("exec-run-candidate-3", prompt_output("candidate-3", 0.78, 0.76)); - runtime.seed_success("exec-run-candidate-4", prompt_output("candidate-4", 0.69, 0.65)); - runtime.seed_success("exec-run-candidate-5", prompt_output("candidate-5", 0.81, 0.83)); - runtime.seed_success("exec-run-candidate-6", prompt_output("candidate-6", 0.76, 0.72)); - runtime.seed_success("exec-run-candidate-7", prompt_output("candidate-7", 0.72, 0.90)); - runtime.seed_success("exec-run-candidate-8", prompt_output("candidate-8", 0.96, 0.97)); + runtime.seed_success( + "exec-run-candidate-1", + prompt_output("candidate-1", 0.74, 0.70), + ); + runtime.seed_success( + "exec-run-candidate-2", + prompt_output("candidate-2", 0.89, 0.92), + ); + runtime.seed_success( + "exec-run-candidate-3", + prompt_output("candidate-3", 0.78, 0.76), + ); + runtime.seed_success( + "exec-run-candidate-4", + prompt_output("candidate-4", 0.69, 0.65), + ); + runtime.seed_success( + "exec-run-candidate-5", + prompt_output("candidate-5", 0.81, 0.83), + ); + runtime.seed_success( + "exec-run-candidate-6", + prompt_output("candidate-6", 0.76, 0.72), + ); + runtime.seed_success( + "exec-run-candidate-7", + prompt_output("candidate-7", 0.72, 0.90), + ); + runtime.seed_success( + "exec-run-candidate-8", + prompt_output("candidate-8", 0.96, 0.97), + ); let store = FsExecutionStore::new(store_dir.clone()); let mut service = ExecutionService::new( @@ -185,7 +212,9 @@ fn swarm_prompt_optimization_finds_best_style_cluster() { let best = snapshot .candidates .iter() - .find(|candidate| Some(&candidate.candidate_id) == execution.result_best_candidate_id.as_ref()) + .find(|candidate| { + Some(&candidate.candidate_id) == execution.result_best_candidate_id.as_ref() + }) .expect("best candidate"); assert_eq!(execution.status, ExecutionStatus::Completed); @@ -337,10 +366,22 @@ fn search_rate_limit_tuning_refines_known_good_direction() { fn search_pipeline_optimization_refines_known_bottleneck_config() { let store_dir = temp_store_dir("search-pipeline"); let mut runtime = MockRuntime::new(); - runtime.seed_success("exec-run-candidate-1", pipeline_output("candidate-1", 0.72, 0.78)); - runtime.seed_success("exec-run-candidate-2", pipeline_output("candidate-2", 0.84, 0.86)); - runtime.seed_success("exec-run-candidate-3", pipeline_output("candidate-3", 0.93, 0.95)); - runtime.seed_success("exec-run-candidate-4", pipeline_output("candidate-4", 0.80, 0.82)); + runtime.seed_success( + "exec-run-candidate-1", + pipeline_output("candidate-1", 0.72, 0.78), + ); + runtime.seed_success( + "exec-run-candidate-2", + pipeline_output("candidate-2", 0.84, 0.86), + ); + runtime.seed_success( + "exec-run-candidate-3", + pipeline_output("candidate-3", 0.93, 0.95), + ); + runtime.seed_success( + "exec-run-candidate-4", + pipeline_output("candidate-4", 0.80, 0.82), + ); let store = FsExecutionStore::new(store_dir.clone()); ExecutionService::::submit_execution( @@ -383,7 +424,9 @@ fn search_pipeline_optimization_refines_known_bottleneck_config() { let best = snapshot .candidates .iter() - .filter(|candidate| Some(&candidate.candidate_id) == snapshot.execution.result_best_candidate_id.as_ref()) + .filter(|candidate| { + Some(&candidate.candidate_id) == snapshot.execution.result_best_candidate_id.as_ref() + }) .max_by_key(|candidate| candidate.created_seq) .expect("best candidate"); @@ -680,7 +723,12 @@ fn proposal(items: &[(&str, &str)]) -> VariationProposal { } } -fn metrics_output(candidate_id: &str, latency_p99_ms: f64, cost_usd: f64, success_rate: f64) -> CandidateOutput { +fn metrics_output( + candidate_id: &str, + latency_p99_ms: f64, + cost_usd: f64, + success_rate: f64, +) -> CandidateOutput { CandidateOutput::new( candidate_id.to_string(), true, diff --git a/tests/void_box_contract.rs b/tests/void_box_contract.rs index 0c092b4..d2e1cf6 100644 --- a/tests/void_box_contract.rs +++ b/tests/void_box_contract.rs @@ -230,7 +230,11 @@ workflow: }; fs::write(path, yaml).unwrap_or_else(|e| { - panic!("failed to write fallback spec at '{}': {}", path.display(), e) + panic!( + "failed to write fallback spec at '{}': {}", + path.display(), + e + ) }); } @@ -296,7 +300,10 @@ fn http_post_json(base_url: &str, path: &str, payload: &Value) -> (u16, Value) { } fn assert_error_shape(v: &Value) { - assert!(v.get("code").and_then(Value::as_str).is_some(), "missing code"); + assert!( + v.get("code").and_then(Value::as_str).is_some(), + "missing code" + ); assert!( v.get("message").and_then(Value::as_str).is_some(), "missing message" @@ -342,7 +349,7 @@ fn is_terminal_status(status: &str) -> bool { ) } -fn get_artifact_publication<'a>(run: &'a Value) -> &'a Value { +fn get_artifact_publication(run: &Value) -> &Value { run.get("artifact_publication") .unwrap_or_else(|| panic!("missing artifact_publication: {run}")) } @@ -436,7 +443,10 @@ fn start_returns_enriched_contract_fields() { let run_id = unique_run_id("contract-start"); let (status, json) = http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); assert_eq!(status, 200, "body={json}"); - assert_eq!(json.get("run_id").and_then(Value::as_str), Some(run_id.as_str())); + assert_eq!( + json.get("run_id").and_then(Value::as_str), + Some(run_id.as_str()) + ); assert!(json.get("attempt_id").and_then(Value::as_u64).is_some()); assert!(json.get("state").and_then(Value::as_str).is_some()); } @@ -458,7 +468,10 @@ fn start_idempotency_active_run() { let (status_2, json_2) = http_post_json(&base, "/v1/runs", &payload); if status_2 == 200 { - assert_eq!(json_2.get("run_id").and_then(Value::as_str), Some(run_id.as_str())); + assert_eq!( + json_2.get("run_id").and_then(Value::as_str), + Some(run_id.as_str()) + ); assert_eq!( json_2.get("attempt_id").and_then(Value::as_u64), Some(first_attempt) @@ -491,8 +504,14 @@ fn inspect_enriched_fields() { assert!(json.get("attempt_id").and_then(Value::as_u64).is_some()); assert!(json.get("started_at").and_then(Value::as_str).is_some()); assert!(json.get("updated_at").and_then(Value::as_str).is_some()); - assert!(json.get("active_stage_count").and_then(Value::as_u64).is_some()); - assert!(json.get("active_microvm_count").and_then(Value::as_u64).is_some()); + assert!(json + .get("active_stage_count") + .and_then(Value::as_u64) + .is_some()); + assert!(json + .get("active_microvm_count") + .and_then(Value::as_u64) + .is_some()); } #[test] @@ -513,9 +532,18 @@ fn events_envelope_required_fields() { for e in events { let event_id = e.get("event_id").and_then(Value::as_str).expect("event_id"); let seq = e.get("seq").and_then(Value::as_u64).expect("seq"); - assert!(e.get("event_type").and_then(Value::as_str).is_some(), "event_type"); - assert!(e.get("attempt_id").and_then(Value::as_u64).is_some(), "attempt_id"); - assert!(e.get("timestamp").and_then(Value::as_str).is_some(), "timestamp"); + assert!( + e.get("event_type").and_then(Value::as_str).is_some(), + "event_type" + ); + assert!( + e.get("attempt_id").and_then(Value::as_u64).is_some(), + "attempt_id" + ); + assert!( + e.get("timestamp").and_then(Value::as_str).is_some(), + "timestamp" + ); assert!(e.get("run_id").and_then(Value::as_str).is_some(), "run_id"); seqs.push(seq); assert!(ids.insert(event_id.to_string()), "duplicate event_id"); @@ -570,8 +598,10 @@ fn events_resume_from_event_id() { assert_ne!(resumed_first, first_id); } - let (status_missing, json_missing) = - http_get_json(&base, &format!("/v1/runs/{run_id}/events?from_event_id=evt_missing")); + let (status_missing, json_missing) = http_get_json( + &base, + &format!("/v1/runs/{run_id}/events?from_event_id=evt_missing"), + ); assert_eq!(status_missing, 200); assert!(json_missing.as_array().is_some()); } @@ -596,12 +626,10 @@ fn cancel_returns_terminal_response_shape() { Some(run_id.as_str()) ); assert!(json_cancel.get("state").and_then(Value::as_str).is_some()); - assert!( - json_cancel - .get("terminal_event_id") - .and_then(Value::as_str) - .is_some() - ); + assert!(json_cancel + .get("terminal_event_id") + .and_then(Value::as_str) + .is_some()); } #[test] @@ -683,24 +711,25 @@ fn structured_output_result_json_is_retrievable() { DefaultSpecKind::StructuredOutputSuccess, ); let run_id = unique_run_id("contract-structured-output"); - let (status_start, body_start) = http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); + let (status_start, body_start) = + http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); assert_eq!(status_start, 200, "body={body_start}"); let terminal = wait_until_terminal(&base, &run_id, 30); assert_eq!( - terminal.get("status").and_then(Value::as_str).map(|s| s.to_ascii_lowercase()), + terminal + .get("status") + .and_then(Value::as_str) + .map(|s| s.to_ascii_lowercase()), Some("succeeded".to_string()), "terminal={terminal}" ); - let (status, body) = http_get_text( - &base, - &format!("/v1/runs/{run_id}/stages/main/output-file"), - ); + let (status, body) = + http_get_text(&base, &format!("/v1/runs/{run_id}/stages/main/output-file")); assert_eq!(status, 200, "body={body}"); - let parsed = serde_json::from_str::(&body).unwrap_or_else(|e| { - panic!("structured output was not valid JSON: {e}; body={body}") - }); + let parsed = serde_json::from_str::(&body) + .unwrap_or_else(|e| panic!("structured output was not valid JSON: {e}; body={body}")); assert!(parsed.get("metrics").and_then(Value::as_object).is_some()); } @@ -713,20 +742,22 @@ fn missing_result_json_is_typed_failure() { DefaultSpecKind::MissingStructuredOutput, ); let run_id = unique_run_id("contract-missing-structured-output"); - let (status_start, body_start) = http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); + let (status_start, body_start) = + http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); assert_eq!(status_start, 200, "body={body_start}"); let terminal = wait_until_terminal(&base, &run_id, 30); assert_eq!( - terminal.get("status").and_then(Value::as_str).map(|s| s.to_ascii_lowercase()), + terminal + .get("status") + .and_then(Value::as_str) + .map(|s| s.to_ascii_lowercase()), Some("failed".to_string()), "terminal={terminal}" ); - let (status, json) = http_get_json( - &base, - &format!("/v1/runs/{run_id}/stages/main/output-file"), - ); + let (status, json) = + http_get_json(&base, &format!("/v1/runs/{run_id}/stages/main/output-file")); assert!(status >= 400, "body={json}"); assert_error_shape(&json); assert_eq!( @@ -744,20 +775,22 @@ fn malformed_result_json_is_typed_failure() { DefaultSpecKind::MalformedStructuredOutput, ); let run_id = unique_run_id("contract-malformed-structured-output"); - let (status_start, body_start) = http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); + let (status_start, body_start) = + http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); assert_eq!(status_start, 200, "body={body_start}"); let terminal = wait_until_terminal(&base, &run_id, 30); assert_eq!( - terminal.get("status").and_then(Value::as_str).map(|s| s.to_ascii_lowercase()), + terminal + .get("status") + .and_then(Value::as_str) + .map(|s| s.to_ascii_lowercase()), Some("failed".to_string()), "terminal={terminal}" ); - let (status, json) = http_get_json( - &base, - &format!("/v1/runs/{run_id}/stages/main/output-file"), - ); + let (status, json) = + http_get_json(&base, &format!("/v1/runs/{run_id}/stages/main/output-file")); assert!(status >= 400, "body={json}"); assert_error_shape(&json); assert_eq!( @@ -775,12 +808,16 @@ fn manifest_lists_named_artifacts() { DefaultSpecKind::StructuredOutputWithArtifact, ); let run_id = unique_run_id("contract-artifact-manifest"); - let (status_start, body_start) = http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); + let (status_start, body_start) = + http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); assert_eq!(status_start, 200, "body={body_start}"); let terminal = wait_until_terminal(&base, &run_id, 30); assert_eq!( - terminal.get("status").and_then(Value::as_str).map(|s| s.to_ascii_lowercase()), + terminal + .get("status") + .and_then(Value::as_str) + .map(|s| s.to_ascii_lowercase()), Some("succeeded".to_string()), "terminal={terminal}" ); @@ -824,7 +861,8 @@ fn named_artifact_endpoint_serves_manifested_file() { DefaultSpecKind::StructuredOutputWithArtifact, ); let run_id = unique_run_id("contract-named-artifact"); - let (status_start, body_start) = http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); + let (status_start, body_start) = + http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); assert_eq!(status_start, 200, "body={body_start}"); let _ = wait_until_terminal(&base, &run_id, 30); @@ -834,7 +872,10 @@ fn named_artifact_endpoint_serves_manifested_file() { let path = manifest_retrieval_path(&inspect, "report.md"); let (status, body) = http_get_text(&base, &path); assert_eq!(status, 200, "body={body}"); - assert!(body.contains("artifact content"), "unexpected artifact body={body}"); + assert!( + body.contains("artifact content"), + "unexpected artifact body={body}" + ); } #[test] @@ -843,7 +884,8 @@ fn active_run_listing_supports_reconciliation() { let base = require_env("VOID_BOX_BASE_URL"); let spec = resolve_spec_path("VOID_BOX_TEST_SPEC_FILE", DefaultSpecKind::LongRunning); let run_id = unique_run_id("contract-active-reconciliation"); - let (status_start, body_start) = http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); + let (status_start, body_start) = + http_post_json(&base, "/v1/runs", &start_payload(&run_id, &spec)); assert_eq!(status_start, 200, "body={body_start}"); let (status_active, active) = http_get_json(&base, "/v1/runs?state=active"); @@ -856,10 +898,17 @@ fn active_run_listing_supports_reconciliation() { run.get("run_id").and_then(Value::as_str) == Some(run_id.as_str()) || run.get("id").and_then(Value::as_str) == Some(run_id.as_str()) }); - let matching = matching.unwrap_or_else(|| panic!("started run not present in active listing: {active}")); + let matching = + matching.unwrap_or_else(|| panic!("started run not present in active listing: {active}")); assert!(matching.get("attempt_id").and_then(Value::as_u64).is_some()); - assert!(matching.get("active_stage_count").and_then(Value::as_u64).is_some()); - assert!(matching.get("active_microvm_count").and_then(Value::as_u64).is_some()); + assert!(matching + .get("active_stage_count") + .and_then(Value::as_u64) + .is_some()); + assert!(matching + .get("active_microvm_count") + .and_then(Value::as_u64) + .is_some()); let (status_terminal, terminal) = http_get_json(&base, "/v1/runs?state=terminal"); assert_eq!(status_terminal, 200, "body={terminal}");