diff --git a/CLAUDE.md b/CLAUDE.md
index c6717b0..6b1e010 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,4 +1,6 @@
-# NOPEA: AI-Native Deployment Tool with Memory
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
 
 ---
 
@@ -10,196 +12,206 @@ Nopea is a deployment tool that builds a knowledge graph from every deployment.
 
 ---
 
-## ARCHITECTURE
+## BUILD AND TEST COMMANDS
 
+```bash
+# Full verification (run after every change)
+mix format && mix compile --warnings-as-errors && mix test
+
+# Individual commands
+mix test                                    # 280 tests, 0 failures
+mix test test/nopea/deploy_test.exs         # Single file
+mix test test/nopea/deploy_test.exs:106     # Single test by line number
+mix test --exclude integration --exclude cluster  # Skip slow tests
+mix format --check-formatted
+mix credo
+mix escript.build                           # CLI binary → ./nopea
 ```
-CLI/MCP/API → Deploy.run(spec)
-    → Memory.get_deploy_context()        # KERTO graph query
-    → select_strategy()                  # direct/canary/blue_green
-    → Strategy.*.execute()               # K8s server-side apply
+
+Tests exclude `:integration` and `:cluster` tags by default (configured in `test_helper.exs`).
+
+---
+
+## DEPLOY PIPELINE
+
+```
+CLI/MCP/API → Deploy.deploy(spec)
+    → ServiceAgent.deploy()              # queue/serialize per-service
+    → Deploy.run(spec)                   # orchestration
+    → Memory.get_deploy_context()        # graph query
+    → select_strategy()                  # direct/canary/blue_green (memory-aware)
+    → Strategy.Direct.execute()          # K8s server-side apply
     → Drift.verify_manifest()            # post-deploy 3-way diff
-    → Memory.record_deploy()             # graph update (EWMA)
+    → Memory.record_deploy()             # graph update (EWMA, async cast)
     → Occurrence.build() + persist()     # FALSE Protocol
 ```
 
-### OTP Supervision Tree
+**Entry points**: `Deploy.deploy/1` routes through ServiceAgent if the supervisor is running; falls back to `Deploy.run/1` otherwise. Always use `deploy/1` — never call `run/1` directly from external callers.
+
+---
+
+## OTP SUPERVISION TREE
 
 ```
 Nopea.Application
-├── Nopea.ULID                    # Monotonic ID generator
-├── TelemetryMetricsPrometheus    # Metrics (optional)
-├── Nopea.Events.Emitter          # CDEvents HTTP emitter (optional)
-├── Nopea.Memory                  # GenServer wrapping KERTO Graph
-├── Nopea.Cache                   # ETS tables for deployment state
-├── Nopea.Registry                # Process registry
-├── Nopea.Deploy.Supervisor       # DynamicSupervisor for deploy workers
-└── Nopea.API.Router              # Plug/Cowboy HTTP (optional)
+├── Nopea.ULID                       # Monotonic ID generator
+├── TelemetryMetricsPrometheus       # Metrics (optional)
+├── Nopea.Events.Emitter             # CDEvents HTTP emitter (optional)
+├── Nopea.Cache                      # ETS tables for deployment state
+├── Nopea.Memory                     # GenServer wrapping knowledge graph
+├── Nopea.Cluster                    # libcluster (optional, cluster mode)
+├── Nopea.Registry / DistributedRegistry  # Process registry
+├── Nopea.ServiceAgent.Supervisor    # DynamicSupervisor for per-service agents
+└── Nopea.API.Router                 # Plug/Cowboy HTTP (optional)
 ```
 
----
+### Configuration Feature Flags
 
-## KEY MODULES
-
-| Module | Role | Lines |
-|--------|------|-------|
-| `Deploy` | Orchestration: context → strategy → execute → verify → record | ~100 |
-| `Deploy.Spec` / `Result` | Structs for deploy lifecycle | ~100 |
-| `Deploy.Worker` / `Supervisor` | Per-deploy GenServer + DynamicSupervisor | ~60 |
-| `Strategy.Direct` | Immediate K8s apply | ~20 |
-| `Strategy.Canary` | Gradual rollout (stub, step API exposed) | ~50 |
-| `Strategy.BlueGreen` | Slot-based cutover (stub, slot API exposed) | ~60 |
-| `Memory` | GenServer owning KERTO `Graph.t()` | ~150 |
-| `Memory.Ingestor` | Deploy events → graph upsert operations | ~100 |
-| `Memory.Query` | Context queries (failure patterns, deps) | ~100 |
-| `Occurrence` | FALSE Protocol occurrence generator | ~150 |
-| `MCP` | JSON-RPC MCP server (tools/list, tools/call) | ~200 |
-| `API.Router` | HTTP API (deploy, context, history endpoints) | ~100 |
-| `SYKLI.Target` | SYKLI target behaviour adapter | ~60 |
-| `CLI` | Escript entry point | ~100 |
-| `K8s` | K8s API wrapper (conn, apply, get, delete) | ~70 |
-| `Applier` | YAML parsing + K8s server-side apply | ~200 |
-| `Drift` | Three-way drift detection (normalize, diff, verify) | ~250 |
-| `Cache` | ETS tables: deployments, service_state, graph_snapshot | ~100 |
-| `Events` / `Events.Emitter` | CDEvents builder + async HTTP emitter | ~200 |
-| `ULID` | Monotonic ULID generator | ~80 |
+Most children are optional, controlled by `Application.get_env(:nopea, key)`:
 
----
-
-## FILE LOCATIONS
-
-| What | Where |
-|------|-------|
-| OTP Application | `lib/nopea/application.ex` |
-| Deploy orchestration | `lib/nopea/deploy.ex` |
-| Deploy structs | `lib/nopea/deploy/spec.ex`, `result.ex` |
-| Deploy workers | `lib/nopea/deploy/worker.ex`, `supervisor.ex` |
-| Strategy behaviour | `lib/nopea/strategy.ex` |
-| Strategy impls | `lib/nopea/strategy/direct.ex`, `canary.ex`, `blue_green.ex` |
-| Memory (KERTO) | `lib/nopea/memory.ex` |
-| Memory helpers | `lib/nopea/memory/ingestor.ex`, `query.ex` |
-| FALSE Protocol | `lib/nopea/occurrence.ex` |
-| MCP server | `lib/nopea/mcp.ex` |
-| HTTP API | `lib/nopea/api/router.ex` |
-| SYKLI integration | `lib/nopea/sykli/target.ex` |
-| CLI | `lib/nopea/cli.ex` |
-| K8s client | `lib/nopea/k8s.ex`, `k8s/behaviour.ex` |
-| YAML + apply | `lib/nopea/applier.ex` |
-| Drift detection | `lib/nopea/drift.ex` |
-| Cache (ETS) | `lib/nopea/cache.ex` |
-| Events | `lib/nopea/events.ex`, `events/emitter.ex` |
-| Metrics | `lib/nopea/metrics.ex` |
-| ULID | `lib/nopea/ulid.ex` |
-| Clustering | `lib/nopea/cluster.ex`, `distributed_*.ex` |
+| Key | Default | Controls |
+|-----|---------|----------|
+| `:enable_metrics` | `true` | TelemetryMetricsPrometheus |
+| `:enable_cache` | `true` | Nopea.Cache (ETS) |
+| `:enable_memory` | `true` | Nopea.Memory (knowledge graph) |
+| `:enable_deploy_supervisor` | `true` | Registry + ServiceAgent.Supervisor |
+| `:enable_router` | `false` | Nopea.API.Router (HTTP) |
+| `:cluster_enabled` | `false` | Cluster + DistributedRegistry |
+| `:cdevents_endpoint` | `nil` | Events.Emitter (started only if set) |
+| `:canary_threshold` | `0.15` | Failure confidence for auto-canary |
 
 ---
 
-## DEPENDENCIES
+## STRATEGY AUTO-SELECTION
 
-| Package | Purpose |
-|---------|---------|
-| `kerto` | Knowledge graph (path dep: `../kerto`) |
-| `k8s` | Kubernetes client |
-| `yaml_elixir` | YAML parsing |
-| `jason` | JSON |
-| `plug_cowboy` | HTTP server |
-| `req` | HTTP client (CDEvents) |
-| `libcluster` | BEAM clustering (optional) |
-| `horde` | Distributed supervisor (optional) |
-| `telemetry` + `telemetry_metrics` + `prometheus_core` | Observability |
-| `mox` | Test mocking (test only) |
-| `credo` | Linting (dev/test only) |
+```elixir
+# Explicit strategy always wins
+defp select_strategy(%Spec{strategy: strategy}, _context)
+     when strategy in [:direct, :canary, :blue_green], do: strategy
+
+# Memory-based: known service with high failure confidence → canary
+defp select_strategy(%Spec{strategy: nil}, %{known: true, failure_patterns: patterns})
+     when is_list(patterns) do
+  threshold = Application.get_env(:nopea, :canary_threshold, 0.15)
+  if Enum.any?(patterns, fn p -> p.confidence > threshold end), do: :canary, else: :direct
+end
+
+# Default: direct
+defp select_strategy(%Spec{strategy: nil}, _context), do: :direct
+```
 
-**No Rust. No msgpax. No git operations.**
+Canary/blue_green strategies use `Kulta.RolloutBuilder` to create Rollout CRDs. If no Deployment manifest is found in the spec, the strategy fails with `:no_deployment_found`.
 
 ---
 
-## ELIXIR PATTERNS
+## SERVICE AGENT
 
-### Error Handling
+Per-service GenServer that queues and serializes deploys:
 
-```elixir
-# Use {:ok, _} / {:error, _} tuples, not bare raise
-with {:ok, conn} <- K8s.conn(),
-     {:ok, applied} <- Applier.apply_manifests(manifests, conn, ns) do
-  {:ok, applied}
-end
-```
+- **Queue limit**: 10 — rejects excess with `{:error, :queue_full}`
+- **Crash cooldown**: 2s delay before dequeuing after worker crash
+- **Idle timeout**: 30 min — agent shuts down if no deploys
+- **Lookup**: `ServiceAgent.status(service)` returns `{:ok, %{status: :idle | :deploying, ...}}`
+- **Health**: `ServiceAgent.health()` queries all active agents
 
-### Logging
+---
 
-```elixir
-require Logger
-Logger.info("Deploy completed: #{service} [#{deploy_id}] in #{duration_ms}ms")
-# No IO.puts, no IO.inspect in production code
-```
+## MEMORY SYSTEM
 
-### Atoms not Strings
+Knowledge graph stored in `Nopea.Memory` GenServer state.
 
-```elixir
-# Status: :completed, :failed — not "completed", "failed"
-# Strategy: :direct, :canary, :blue_green — not strings
-```
+**Graph nodes**: services, namespaces, errors (kinds: `:concept`, `:error`)
+**Graph relationships**: `:deployed_to`, `:breaks`, `:deployed_together`
+**EWMA decay**: Weights decay hourly (factor 0.98) so recent deploys matter more
 
-### K8s Mock Pattern
+Key API:
+- `Memory.get_deploy_context(service, namespace)` → failure patterns, recommendations
+- `Memory.record_deploy(result)` → ingest into graph (**async cast**)
+- `Memory.node_count()` / `Memory.relationship_count()` → graph stats (**sync call**)
 
-`Nopea.K8s` implements `Nopea.K8s.Behaviour`. In tests, `Nopea.K8sMock` (Mox) is injected via:
+---
+
+## K8S MOCK PATTERN
+
+`Nopea.K8s` implements `Nopea.K8s.Behaviour`. Mox injects `Nopea.K8sMock` in tests via config:
 
 ```elixir
-# test_helper.exs
+# test_helper.exs sets:
 Application.put_env(:nopea, :k8s_module, Nopea.K8sMock)
 
-# Direct.execute uses:
+# Production code resolves at runtime:
 defp k8s_module, do: Application.get_env(:nopea, :k8s_module, Nopea.K8s)
+```
 
-# Tests that don't set explicit expectations:
+### Test Setup Patterns
+
+**Unit tests** (no spawned processes):
+```elixir
+setup :verify_on_exit!
 setup do
   Mox.stub_with(Nopea.K8sMock, Nopea.K8s)
+  Mox.stub(Nopea.K8sMock, :get_resource, fn _, _, _, _ -> {:error, :not_found} end)
   :ok
 end
-
-# Tests with spawned processes (Worker, Supervisor):
-setup :set_mox_global
 ```
 
----
+**Integration tests** (ServiceAgent, spawned workers):
+```elixir
+setup :set_mox_global          # MUST come before other setup — allows spawned processes to use mocks
+setup :verify_on_exit!
+setup do
+  Mox.stub_with(Nopea.K8sMock, Nopea.K8s)
+  start_supervised!({Registry, keys: :unique, name: Nopea.Registry})
+  start_supervised!(Nopea.ServiceAgent.Supervisor)
+  start_supervised!({Nopea.Memory, []})
+  start_supervised!(Nopea.Cache)
+  :ok
+end
+```
 
-## TDD WORKFLOW
+### Sync After Async Casts
 
-**RED → GREEN → REFACTOR** — Always.
+`Memory.record_deploy/1` is a `cast` — don't use `Process.sleep` to wait for it. Use any `GenServer.call` to the same process as a mailbox flush:
 
-1. Write failing test
-2. Verify it fails
-3. Write minimal implementation
-4. Verify all tests pass
-5. Refactor, add edge cases
-6. Run `mix format && mix compile --warnings-as-errors && mix test`
+```elixir
+# BEAM mailbox FIFO ordering guarantees all prior casts complete before this call returns
+_ = Nopea.Memory.node_count()
+ctx = Nopea.Memory.get_deploy_context("svc", "ns")
+```
 
----
+### Test Factories
 
-## VERIFICATION
-
-```bash
-mix compile --warnings-as-errors
-mix test                          # 235 tests, 0 failures
-mix format --check-formatted
-mix credo
-mix escript.build                 # CLI binary
-```
+Available in `test/support/factory.ex`:
+- `Nopea.Test.Factory.sample_deployment_manifest(name, namespace)`
+- `Nopea.Test.Factory.sample_service_manifest(name)`
+- `Nopea.Test.Factory.sample_configmap_manifest(name, namespace, data)`
 
 ---
 
-## MEMORY SYSTEM (KERTO)
+## ELIXIR PATTERNS
 
-The memory is a KERTO knowledge graph stored in the `Nopea.Memory` GenServer state.
+### Error Handling
+```elixir
+# {:ok, _} / {:error, _} tuples — no bare raise
+with {:ok, conn} <- K8s.conn(),
+     {:ok, applied} <- Applier.apply_manifests(manifests, conn, ns) do
+  {:ok, applied}
+end
+```
 
-**Graph nodes**: services, namespaces, errors, strategies
-**Graph relationships**: `:deployed_to`, `:failed_with`, `:depends_on`, `:used_strategy`
-**EWMA decay**: Weights decay hourly (factor 0.98) so recent deploys matter more
+### Logging
+```elixir
+require Logger
+Logger.info("Deploy completed", service: service, deploy_id: deploy_id, duration_ms: duration_ms)
+# Use structured metadata — keys configured in config/config.exs
+# No IO.puts or IO.inspect in production code
+```
 
-Key queries:
-- `Memory.get_deploy_context(service, namespace)` → failure patterns, recommendations
-- `Memory.record_deploy(result)` → ingest into graph (cast)
-- `Memory.get_graph_stats()` → node/relationship counts
+### Atoms not Strings
+```elixir
+# Status: :completed, :failed — not "completed", "failed"
+# Strategy: :direct, :canary, :blue_green — not strings
+```
 
 ---
 
@@ -208,38 +220,31 @@ Key queries:
 Occurrences are structured events generated after every deployment.
 
 **Types**: `deploy.run.completed`, `deploy.run.failed`, `deploy.run.rolledback`
-**Blocks**: error, reasoning (includes memory context), history, deploy_data
 **Storage**: `.nopea/occurrence.json` (cold) + `.nopea/occurrences/*.etf` (warm)
 
 ---
 
 ## MCP SERVER
 
-JSON-RPC 2.0 over stdin/stdout. Tools:
-
-| Tool | Description |
-|------|-------------|
-| `nopea_deploy` | Deploy manifests to K8s |
-| `nopea_context` | Get memory context for a service |
-| `nopea_history` | Get deployment history |
-| `nopea_explain` | Explain strategy selection reasoning |
+JSON-RPC 2.0 over stdin/stdout. Tools: `nopea_deploy`, `nopea_context`, `nopea_history`, `nopea_health`, `nopea_explain`.
 
 ---
 
-## STRATEGY AUTO-SELECTION
-
-```elixir
-# Explicit strategy always wins
-defp select_strategy(%Spec{strategy: strategy}, _) when not is_nil(strategy), do: strategy
-
-# Memory-based: high failure confidence → canary
-defp select_strategy(_spec, %{failure_patterns: patterns}) do
-  if Enum.any?(patterns, fn p -> p.confidence > 0.15 end), do: :canary, else: :direct
-end
+## DEPENDENCIES
 
-# Default: direct
-defp select_strategy(_spec, _context), do: :direct
-```
+| Package | Purpose |
+|---------|---------|
+| `false_protocol` | FALSE Protocol occurrence generation |
+| `k8s` | Kubernetes client |
+| `yaml_elixir` | YAML parsing |
+| `jason` | JSON |
+| `plug_cowboy` | HTTP server |
+| `req` | HTTP client (CDEvents) |
+| `libcluster` | BEAM clustering (optional) |
+| `horde` | Distributed supervisor/registry (optional) |
+| `telemetry` + `prometheus_core` | Observability |
+| `mox` | Test mocking (test only) |
+| `credo` | Linting (dev/test only) |
 
 ---
 
@@ -249,6 +254,7 @@ defp select_strategy(_spec, _context), do: :direct
 2. **TDD always** — write failing test, implement, refactor
 3. **No stubs** — complete implementations only
 4. **Typespecs required** — all public functions
-5. **Run checks** — `mix compile --warnings-as-errors && mix test`
-6. **No IO.puts** — use `require Logger`
+5. **Run checks** — `mix format && mix compile --warnings-as-errors && mix test`
+6. **No IO.puts** — use `require Logger` with structured metadata
 7. **No bare raise** — use `{:error, reason}` tuples
+8. **No Process.sleep in tests** — use GenServer.call barriers for async cast sync
diff --git a/config/config.exs b/config/config.exs
index 50dddd2..4a3ad5c 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -5,4 +5,24 @@ config :nopea,
   enable_memory: true,
   enable_cache: true
 
+config :logger, :default_formatter,
+  format: "$time $metadata[$level] $message\n",
+  metadata: [
+    :service,
+    :deploy_id,
+    :namespace,
+    :strategy,
+    :error,
+    :reason,
+    :duration_ms,
+    :resource,
+    :stacktrace,
+    :cooldown_ms,
+    :queued,
+    :node_count,
+    :relationship_count,
+    :auto_selected,
+    :verified
+  ]
+
 import_config "#{config_env()}.exs"
diff --git a/lib/nopea/cli.ex b/lib/nopea/cli.ex
index 23fe5ab..95d36dc 100644
--- a/lib/nopea/cli.ex
+++ b/lib/nopea/cli.ex
@@ -7,11 +7,12 @@ defmodule Nopea.CLI do
   - status   Show deployment status
   - context  Show memory context for a service
   - history  Show deployment history
-  - rollback Roll back a deployment
   - memory   Show memory graph stats
   - serve    Start daemon mode (HTTP API)
   """
 
+  require Logger
+
   def main(args) do
     {opts, args, _} =
       OptionParser.parse(args,
@@ -44,7 +45,7 @@ defmodule Nopea.CLI do
 
     case Nopea.Deploy.Spec.from_path(path, service, namespace, strategy: strategy) do
       {:ok, spec} ->
-        result = Nopea.Deploy.run(spec)
+        result = Nopea.Deploy.deploy(spec)
         output(result, opts)
 
       {:error, reason} ->
@@ -98,21 +99,19 @@ defmodule Nopea.CLI do
   end
 
   defp serve(_opts) do
-    IO.puts("Starting Nopea daemon...")
+    Logger.info("Starting Nopea daemon...")
+    Application.put_env(:nopea, :enable_router, true)
 
-    case Supervisor.start_child(Nopea.AppSupervisor, Nopea.API.Router) do
-      {:ok, _pid} ->
+    case Application.ensure_all_started(:nopea) do
+      {:ok, _apps} ->
         port = Application.get_env(:nopea, :api_port, 4000)
-        IO.puts("Nopea API listening on port #{port}")
-
-      {:error, {:already_started, _pid}} ->
-        IO.puts("Nopea API already running")
+        Logger.info("Nopea API listening on port #{port}")
+        Process.sleep(:infinity)
 
       {:error, reason} ->
-        IO.puts(:stderr, "Failed to start API: #{inspect(reason)}")
+        Logger.error("Failed to start Nopea: #{inspect(reason)}")
+        System.halt(1)
     end
-
-    Process.sleep(:infinity)
   end
 
   defp output(data, opts) do
diff --git a/lib/nopea/deploy.ex b/lib/nopea/deploy.ex
index 516766a..86ee106 100644
--- a/lib/nopea/deploy.ex
+++ b/lib/nopea/deploy.ex
@@ -119,6 +119,15 @@ defmodule Nopea.Deploy do
     strategy
   end
 
+  defp select_strategy(%Spec{strategy: nil}, %{known: true, failure_patterns: patterns})
+       when is_list(patterns) do
+    threshold = Application.get_env(:nopea, :canary_threshold, 0.15)
+
+    if Enum.any?(patterns, fn p -> p.confidence > threshold end),
+      do: :canary,
+      else: :direct
+  end
+
   defp select_strategy(%Spec{strategy: nil}, _context), do: :direct
 
   defp select_strategy(%Spec{strategy: other}, _context) do
@@ -151,21 +160,12 @@ defmodule Nopea.Deploy do
 
   defp verify_deploy(spec, applied) when is_list(applied) do
     Enum.all?(applied, fn manifest ->
-      case Nopea.Drift.verify_manifest(spec.service, manifest) do
+      case Nopea.Drift.verify_manifest(spec.service, manifest, k8s_module: k8s_module()) do
         :no_drift -> true
         :new_resource -> true
         _ -> false
       end
     end)
-  rescue
-    error ->
-      Logger.warning("Post-deploy verification failed",
-        service: spec.service,
-        error: inspect(error),
-        stacktrace: __STACKTRACE__ |> Exception.format_stacktrace()
-      )
-
-      false
   end
 
   defp verify_deploy(_spec, _applied), do: false
@@ -178,7 +178,7 @@ defmodule Nopea.Deploy do
         status: result.status,
         error: result.error,
         duration_ms: result.duration_ms,
-        concurrent_deploys: []
+        concurrent_deploys: get_concurrent_services(result.service)
       })
     end
 
@@ -408,6 +408,30 @@ defmodule Nopea.Deploy do
 
   defp emitter_running?, do: Process.whereis(Nopea.Events.Emitter) != nil
 
+  defp get_concurrent_services(current_service) do
+    if Process.whereis(Nopea.Registry) do
+      Registry.select(Nopea.Registry, [
+        {{:"$1", :"$2", :_}, [], [{{:"$1", :"$2"}}]}
+      ])
+      |> Enum.flat_map(fn
+        {{:service, name}, pid} when name != current_service ->
+          try do
+            case GenServer.call(pid, :status, 1_000) do
+              %{status: :deploying} -> [name]
+              _ -> []
+            end
+          catch
+            :exit, _ -> []
+          end
+
+        _ ->
+          []
+      end)
+    else
+      []
+    end
+  end
+
   defp duration_ms(start_time) do
     System.convert_time_unit(System.monotonic_time() - start_time, :native, :millisecond)
   end
diff --git a/lib/nopea/graph/relation_type.ex b/lib/nopea/graph/relation_type.ex
index cca26c7..e0e65ce 100644
--- a/lib/nopea/graph/relation_type.ex
+++ b/lib/nopea/graph/relation_type.ex
@@ -7,7 +7,8 @@ defmodule Nopea.Graph.RelationType do
 
   @types [
     :breaks,
-    :deployed_to
+    :deployed_to,
+    :deployed_together
   ]
 
   @spec valid?(term()) :: boolean()
diff --git a/lib/nopea/memory/ingestor.ex b/lib/nopea/memory/ingestor.ex
index f91f03e..c1e7b8c 100644
--- a/lib/nopea/memory/ingestor.ex
+++ b/lib/nopea/memory/ingestor.ex
@@ -80,9 +80,28 @@ defmodule Nopea.Memory.Ingestor do
 
   defp maybe_record_failure(graph, _result, _ulid), do: graph
 
-  defp maybe_record_dependencies(graph, %{concurrent_deploys: [_ | _] = deploys}, ulid) do
+  defp maybe_record_dependencies(
+         graph,
+         %{service: service, concurrent_deploys: [_ | _] = deploys},
+         ulid
+       ) do
+    service_id = Nopea.Graph.Identity.compute_id(:concept, service)
+
     Enum.reduce(deploys, graph, fn other_service, g ->
       {g, _node} = Graph.upsert_node(g, :concept, other_service, 0.5, ulid)
+      other_id = Nopea.Graph.Identity.compute_id(:concept, other_service)
+
+      {g, _rel} =
+        Graph.upsert_relationship(
+          g,
+          service_id,
+          :deployed_together,
+          other_id,
+          0.5,
+          ulid,
+          "concurrent deploy at #{DateTime.utc_now() |> DateTime.to_iso8601()}"
+        )
+
       g
     end)
   end
diff --git a/lib/nopea/sykli/target.ex b/lib/nopea/sykli/target.ex
index e283602..bc7750d 100644
--- a/lib/nopea/sykli/target.ex
+++ b/lib/nopea/sykli/target.ex
@@ -59,7 +59,7 @@ defmodule Nopea.SYKLI.Target do
       strategy: Map.get(task, :strategy)
     }
 
-    result = Nopea.Deploy.run(spec)
+    result = Nopea.Deploy.deploy(spec)
 
     case result.status do
       :completed -> {:ok, result}
diff --git a/test/nopea/deploy_integration_test.exs b/test/nopea/deploy_integration_test.exs
index b573ff9..1bde765 100644
--- a/test/nopea/deploy_integration_test.exs
+++ b/test/nopea/deploy_integration_test.exs
@@ -11,6 +11,12 @@ defmodule Nopea.DeployIntegrationTest do
   setup do
     start_supervised!(Nopea.Cache)
     start_supervised!({Nopea.Memory, []})
+
+    # Stub get_resource — no real cluster in tests
+    Mox.stub(Nopea.K8sMock, :get_resource, fn _api, _kind, _name, _ns ->
+      {:error, :not_found}
+    end)
+
     :ok
   end
 
diff --git a/test/nopea/deploy_test.exs b/test/nopea/deploy_test.exs
index b0095cd..c7f9151 100644
--- a/test/nopea/deploy_test.exs
+++ b/test/nopea/deploy_test.exs
@@ -11,6 +11,12 @@ defmodule Nopea.DeployTest do
   setup do
     # Stub K8s mock to fall through to real implementation (works for empty manifests)
     Mox.stub_with(Nopea.K8sMock, Nopea.K8s)
+
+    # Stub get_resource to return not_found — no real cluster in tests
+    Mox.stub(Nopea.K8sMock, :get_resource, fn _api, _kind, _name, _ns ->
+      {:error, :not_found}
+    end)
+
     # Start Memory for context tracking
     start_supervised!({Nopea.Memory, []})
     # Start Cache for state recording
@@ -48,8 +54,8 @@ defmodule Nopea.DeployTest do
 
       Deploy.run(spec)
 
-      # Memory.record_deploy is a cast, give it time
-      Process.sleep(50)
+      # Flush Memory mailbox — node_count is a call, so all prior casts complete first
+      _ = Nopea.Memory.node_count()
 
       ctx = Nopea.Memory.get_deploy_context("memory-test-svc", "default")
       assert ctx.known == true
@@ -85,7 +91,7 @@ defmodule Nopea.DeployTest do
   end
 
   describe "strategy selection" do
-    test "always uses direct when no explicit strategy" do
+    test "unknown service (no memory) defaults to direct" do
       spec = %Spec{
         service: "clean-svc",
         namespace: "default",
@@ -96,6 +102,131 @@ defmodule Nopea.DeployTest do
       result = Deploy.run(spec)
       assert result.strategy == :direct
     end
+
+    test "known service with high failure confidence auto-selects canary" do
+      # First, create failure history so Memory knows about this service
+      Nopea.Memory.record_deploy(%{
+        service: "flaky-svc",
+        namespace: "default",
+        status: :failed,
+        error: {:timeout, "connection refused"},
+        concurrent_deploys: []
+      })
+
+      # Reinforce the failure pattern to push confidence above threshold
+      for _ <- 1..4 do
+        Nopea.Memory.record_deploy(%{
+          service: "flaky-svc",
+          namespace: "default",
+          status: :failed,
+          error: {:timeout, "connection refused"},
+          concurrent_deploys: []
+        })
+      end
+
+      # Flush Memory mailbox — node_count is a call, so all prior casts complete first
+      _ = Nopea.Memory.node_count()
+
+      # Verify memory has failure patterns above threshold
+      ctx = Nopea.Memory.get_deploy_context("flaky-svc", "default")
+      assert ctx.known == true
+      assert Enum.any?(ctx.failure_patterns, fn p -> p.confidence > 0.15 end)
+
+      # Now deploy with nil strategy — should auto-select canary
+      deployment = Nopea.Test.Factory.sample_deployment_manifest("flaky-svc", "default")
+
+      Nopea.K8sMock
+      |> expect(:apply_manifest, fn manifest, "default" ->
+        assert manifest["kind"] == "Rollout"
+        {:ok, manifest}
+      end)
+
+      spec = %Spec{
+        service: "flaky-svc",
+        namespace: "default",
+        manifests: [deployment],
+        strategy: nil
+      }
+
+      result = Deploy.run(spec)
+      assert result.strategy == :canary
+    end
+
+    test "known service with low failure confidence stays direct" do
+      # Single success — known but no failure patterns
+      Nopea.Memory.record_deploy(%{
+        service: "stable-svc",
+        namespace: "default",
+        status: :completed,
+        error: nil,
+        concurrent_deploys: []
+      })
+
+      _ = Nopea.Memory.node_count()
+
+      ctx = Nopea.Memory.get_deploy_context("stable-svc", "default")
+      assert ctx.known == true
+      assert ctx.failure_patterns == []
+
+      spec = %Spec{
+        service: "stable-svc",
+        namespace: "default",
+        manifests: [],
+        strategy: nil
+      }
+
+      result = Deploy.run(spec)
+      assert result.strategy == :direct
+    end
+
+    test "explicit strategy always overrides memory" do
+      # Create failure history
+      for _ <- 1..5 do
+        Nopea.Memory.record_deploy(%{
+          service: "override-svc",
+          namespace: "default",
+          status: :failed,
+          error: "crash",
+          concurrent_deploys: []
+        })
+      end
+
+      _ = Nopea.Memory.node_count()
+
+      spec = %Spec{
+        service: "override-svc",
+        namespace: "default",
+        manifests: [],
+        strategy: :direct
+      }
+
+      result = Deploy.run(spec)
+      assert result.strategy == :direct
+    end
+  end
+
+  describe "verify_deploy crash propagation" do
+    test "malformed manifest raises instead of returning false" do
+      # A manifest missing "apiVersion" and "kind" will cause Drift.verify_manifest
+      # to raise KeyError — this should propagate, not be silently caught
+      malformed = %{"metadata" => %{"name" => "bad"}}
+
+      Nopea.K8sMock
+      |> expect(:apply_manifests, fn _manifests, _ns ->
+        {:ok, [malformed]}
+      end)
+
+      spec = %Spec{
+        service: "crash-test-svc",
+        namespace: "default",
+        manifests: [malformed],
+        strategy: :direct
+      }
+
+      assert_raise KeyError, fn ->
+        Deploy.run(spec)
+      end
+    end
   end
 
   describe "Kulta strategies" do
diff --git a/test/nopea/distributed_registry_test.exs b/test/nopea/distributed_registry_test.exs
index c90e8a6..a635359 100644
--- a/test/nopea/distributed_registry_test.exs
+++ b/test/nopea/distributed_registry_test.exs
@@ -12,6 +12,28 @@ defmodule Nopea.DistributedRegistryTest do
 
   @moduletag :distributed
 
+  # Poll-based assertion for async operations (replaces Process.sleep barriers)
+  defp assert_eventually(fun, timeout_ms \\ 500, interval_ms \\ 10) do
+    deadline = System.monotonic_time(:millisecond) + timeout_ms
+
+    do_poll(fun, deadline, interval_ms)
+  end
+
+  defp do_poll(fun, deadline, interval_ms) do
+    case fun.() do
+      true ->
+        true
+
+      false ->
+        if System.monotonic_time(:millisecond) >= deadline do
+          flunk("assert_eventually timed out")
+        else
+          Process.sleep(interval_ms)
+          do_poll(fun, deadline, interval_ms)
+        end
+    end
+  end
+
   # Start registry once for all tests
   setup_all do
     case DistributedRegistry.start_link([]) do
@@ -66,8 +88,10 @@ defmodule Nopea.DistributedRegistryTest do
       # Kill the agent
       Agent.stop(agent)
 
-      # Give Horde time to clean up
-      Process.sleep(100)
+      # Poll until Horde cleans up the registration
+      assert_eventually(fn ->
+        DistributedRegistry.lookup(key) == {:error, :not_found}
+      end)
 
       # Should be able to re-register
       {:ok, agent2} = Agent.start_link(fn -> 99 end, name: DistributedRegistry.via(key))
@@ -90,8 +114,10 @@ defmodule Nopea.DistributedRegistryTest do
           Process.sleep(1000)
         end)
 
-      # Give it time to register
-      Process.sleep(50)
+      # Poll until registration completes
+      assert_eventually(fn ->
+        match?({:ok, _}, DistributedRegistry.lookup(key))
+      end)
 
       # Should be able to look up the task's pid
       assert {:ok, pid} = DistributedRegistry.lookup(key)
@@ -110,7 +136,10 @@ defmodule Nopea.DistributedRegistryTest do
           Process.sleep(1000)
         end)
 
-      Process.sleep(50)
+      # Poll until first registration completes
+      assert_eventually(fn ->
+        match?({:ok, _}, DistributedRegistry.lookup(key))
+      end)
 
       # Second process tries to register same key
       task2 =
@@ -135,6 +164,11 @@ defmodule Nopea.DistributedRegistryTest do
       key = "lookup-test-#{:rand.uniform(10000)}"
       {:ok, agent} = Agent.start_link(fn -> :found end, name: DistributedRegistry.via(key))
 
+      # Horde registration may need a moment to propagate
+      assert_eventually(fn ->
+        match?({:ok, _}, DistributedRegistry.lookup(key))
+      end)
+
       assert {:ok, ^agent} = DistributedRegistry.lookup(key)
 
       Agent.stop(agent)
diff --git a/test/nopea/mcp_test.exs b/test/nopea/mcp_test.exs
index 193b9db..02fb8ce 100644
--- a/test/nopea/mcp_test.exs
+++ b/test/nopea/mcp_test.exs
@@ -24,7 +24,7 @@ defmodule Nopea.MCPTest do
   end
 
   describe "handle_request/1 tools/list" do
-    test "lists available tools" do
+    test "lists all 5 tools" do
       request = %{
         "jsonrpc" => "2.0",
         "id" => 2,
@@ -35,13 +35,14 @@ defmodule Nopea.MCPTest do
       assert {:ok, response} = MCP.handle_request(request)
       tools = response["result"]["tools"]
       assert is_list(tools)
-      assert tools != []
 
       tool_names = Enum.map(tools, & &1["name"])
+      assert length(tool_names) == 5
       assert "nopea_deploy" in tool_names
       assert "nopea_context" in tool_names
       assert "nopea_history" in tool_names
       assert "nopea_health" in tool_names
+      assert "nopea_explain" in tool_names
     end
   end
 
@@ -133,6 +134,78 @@ defmodule Nopea.MCPTest do
     end
   end
 
+  describe "handle_request/1 tools/call nopea_deploy" do
+    test "returns error when service is missing" do
+      request = %{
+        "jsonrpc" => "2.0",
+        "id" => 20,
+        "method" => "tools/call",
+        "params" => %{
+          "name" => "nopea_deploy",
+          "arguments" => %{}
+        }
+      }
+
+      assert {:ok, response} = MCP.handle_request(request)
+      assert response["error"]["message"] == "service is required"
+    end
+
+    test "returns error when service is empty string" do
+      request = %{
+        "jsonrpc" => "2.0",
+        "id" => 21,
+        "method" => "tools/call",
+        "params" => %{
+          "name" => "nopea_deploy",
+          "arguments" => %{"service" => ""}
+        }
+      }
+
+      assert {:ok, response} = MCP.handle_request(request)
+      assert response["error"]["message"] == "service is required"
+    end
+  end
+
+  describe "handle_request/1 tools/call nopea_history" do
+    test "returns no history when cache unavailable" do
+      request = %{
+        "jsonrpc" => "2.0",
+        "id" => 30,
+        "method" => "tools/call",
+        "params" => %{
+          "name" => "nopea_history",
+          "arguments" => %{"service" => "unknown-svc"}
+        }
+      }
+
+      assert {:ok, response} = MCP.handle_request(request)
+      content = response["result"]["content"]
+      text = hd(content)["text"]
+      decoded = Jason.decode!(text)
+      # Cache not running in async test → either "Cache not available" or "No history found"
+      assert decoded["message"] != nil
+    end
+  end
+
+  describe "handle_request/1 tools/call nopea_explain" do
+    test "returns default message when memory unavailable" do
+      request = %{
+        "jsonrpc" => "2.0",
+        "id" => 40,
+        "method" => "tools/call",
+        "params" => %{
+          "name" => "nopea_explain",
+          "arguments" => %{"service" => "test-svc"}
+        }
+      }
+
+      assert {:ok, response} = MCP.handle_request(request)
+      content = response["result"]["content"]
+      text = hd(content)["text"]
+      assert text =~ "Memory not available"
+    end
+  end
+
   describe "encode/decode" do
     test "round-trips through JSON" do
       request = %{
diff --git a/test/nopea/mcp_with_memory_test.exs b/test/nopea/mcp_with_memory_test.exs
new file mode 100644
index 0000000..ab634d7
--- /dev/null
+++ b/test/nopea/mcp_with_memory_test.exs
@@ -0,0 +1,138 @@
+defmodule Nopea.MCPWithMemoryTest do
+  use ExUnit.Case, async: false
+
+  alias Nopea.MCP
+
+  setup do
+    start_supervised!({Nopea.Memory, []})
+    start_supervised!(Nopea.Cache)
+    :ok
+  end
+
+  describe "nopea_explain with memory" do
+    test "explains strategy for unknown service" do
+      request = %{
+        "jsonrpc" => "2.0",
+        "id" => 1,
+        "method" => "tools/call",
+        "params" => %{
+          "name" => "nopea_explain",
+          "arguments" => %{"service" => "new-svc", "namespace" => "default"}
+        }
+      }
+
+      assert {:ok, response} = MCP.handle_request(request)
+      content = response["result"]["content"]
+      text = hd(content)["text"]
+      assert text =~ "No deployment history"
+      assert text =~ "direct"
+    end
+
+    test "explains strategy for known service with failures" do
+      # Record failures to build memory context
+      for _ <- 1..3 do
+        Nopea.Memory.record_deploy(%{
+          service: "fragile-svc",
+          namespace: "prod",
+          status: :failed,
+          error: {:timeout, "connection timeout"},
+          concurrent_deploys: []
+        })
+      end
+
+      _ = Nopea.Memory.node_count()
+
+      request = %{
+        "jsonrpc" => "2.0",
+        "id" => 2,
+        "method" => "tools/call",
+        "params" => %{
+          "name" => "nopea_explain",
+          "arguments" => %{"service" => "fragile-svc", "namespace" => "prod"}
+        }
+      }
+
+      assert {:ok, response} = MCP.handle_request(request)
+      content = response["result"]["content"]
+      text = hd(content)["text"]
+      assert text =~ "Failure patterns detected"
+      assert text =~ "canary"
+    end
+  end
+
+  describe "nopea_context with memory" do
+    test "returns context for known service" do
+      Nopea.Memory.record_deploy(%{
+        service: "api-svc",
+        namespace: "default",
+        status: :completed,
+        error: nil,
+        concurrent_deploys: []
+      })
+
+      _ = Nopea.Memory.node_count()
+
+      request = %{
+        "jsonrpc" => "2.0",
+        "id" => 3,
+        "method" => "tools/call",
+        "params" => %{
+          "name" => "nopea_context",
+          "arguments" => %{"service" => "api-svc", "namespace" => "default"}
+        }
+      }
+
+      assert {:ok, response} = MCP.handle_request(request)
+      content = response["result"]["content"]
+      text = hd(content)["text"]
+      decoded = Jason.decode!(text)
+      assert decoded["known"] == true
+      assert decoded["service"] == "api-svc"
+    end
+  end
+
+  describe "nopea_history with cache" do
+    test "returns state for known service" do
+      Nopea.Cache.put_service_state("cached-svc", %{
+        status: :completed,
+        last_deploy: "01ABC",
+        last_deploy_at: DateTime.utc_now()
+      })
+
+      request = %{
+        "jsonrpc" => "2.0",
+        "id" => 4,
+        "method" => "tools/call",
+        "params" => %{
+          "name" => "nopea_history",
+          "arguments" => %{"service" => "cached-svc"}
+        }
+      }
+
+      assert {:ok, response} = MCP.handle_request(request)
+      content = response["result"]["content"]
+      text = hd(content)["text"]
+      decoded = Jason.decode!(text)
+      assert decoded["service"] == "cached-svc"
+      assert decoded["state"] != nil
+    end
+
+    test "returns no history for unknown service" do
+      request = %{
+        "jsonrpc" => "2.0",
+        "id" => 5,
+        "method" => "tools/call",
+        "params" => %{
+          "name" => "nopea_history",
+          "arguments" => %{"service" => "nonexistent-svc"}
+        }
+      }
+
+      assert {:ok, response} = MCP.handle_request(request)
+      content = response["result"]["content"]
+      text = hd(content)["text"]
+      decoded = Jason.decode!(text)
+      assert decoded["message"] == "No history found"
+    end
+  end
+end
diff --git a/test/nopea/memory/ingestor_test.exs b/test/nopea/memory/ingestor_test.exs
index 686262b..2f543e3 100644
--- a/test/nopea/memory/ingestor_test.exs
+++ b/test/nopea/memory/ingestor_test.exs
@@ -158,6 +158,40 @@ defmodule Nopea.Memory.IngestorTest do
       assert {:ok, _} = Graph.get_node(graph, redis_id)
       assert {:ok, _} = Graph.get_node(graph, config_id)
     end
+
+    test "creates deployed_together edges between service and concurrent deploys" do
+      graph = Graph.new()
+
+      result = %{
+        service: "auth-service",
+        namespace: "production",
+        status: :completed,
+        error: nil,
+        concurrent_deploys: ["redis", "config-service"]
+      }
+
+      graph = Ingestor.ingest(graph, result)
+
+      service_id = Identity.compute_id(:concept, "auth-service")
+
+      together_rels =
+        graph
+        |> Graph.neighbors(service_id, :outgoing)
+        |> Enum.filter(fn rel -> rel.relation == :deployed_together end)
+
+      assert length(together_rels) == 2
+
+      target_ids = Enum.map(together_rels, & &1.target) |> Enum.sort()
+
+      expected_ids =
+        [
+          Identity.compute_id(:concept, "redis"),
+          Identity.compute_id(:concept, "config-service")
+        ]
+        |> Enum.sort()
+
+      assert target_ids == expected_ids
+    end
   end
 
   describe "ingest/2 edge cases" do
diff --git a/test/nopea/sykli/target_test.exs b/test/nopea/sykli/target_test.exs
index 6418512..8bb9c73 100644
--- a/test/nopea/sykli/target_test.exs
+++ b/test/nopea/sykli/target_test.exs
@@ -1,10 +1,11 @@
 defmodule Nopea.SYKLI.TargetTest do
-  use ExUnit.Case, async: true
+  use ExUnit.Case, async: false
 
   import Mox
 
   alias Nopea.SYKLI.Target
 
+  setup :set_mox_global
   setup :verify_on_exit!
 
   describe "name/0" do
@@ -44,6 +45,11 @@ defmodule Nopea.SYKLI.TargetTest do
   describe "run_task/3" do
     setup do
       Mox.stub_with(Nopea.K8sMock, Nopea.K8s)
+
+      Mox.stub(Nopea.K8sMock, :get_resource, fn _api, _kind, _name, _ns ->
+        {:error, :not_found}
+      end)
+
       {:ok, state} = Target.setup([])
       %{state: state}
     end
@@ -89,5 +95,29 @@ defmodule Nopea.SYKLI.TargetTest do
       assert {:error, reason} = Target.run_task(task, state, [])
       assert reason != nil
     end
+
+    test "routes through ServiceAgent when available", %{state: _state} do
+      # Start the ServiceAgent infrastructure
+      start_supervised!({Registry, keys: :unique, name: Nopea.Registry})
+      start_supervised!(Nopea.ServiceAgent.Supervisor)
+      start_supervised!(Nopea.Cache)
+      start_supervised!({Nopea.Memory, []})
+
+      {:ok, state} = Target.setup(namespace: "staging")
+
+      task = %{
+        name: "deploy-agent-test",
+        service: "agent-routed-svc",
+        manifests: [],
+        strategy: :direct
+      }
+
+      assert {:ok, result} = Target.run_task(task, state, [])
+      assert result.status == :completed
+
+      # Verify ServiceAgent was used — it should have state for this service
+      assert {:ok, agent_status} = Nopea.ServiceAgent.status("agent-routed-svc")
+      assert agent_status.deploy_count == 1
+    end
   end
 end