diff --git a/CLAUDE.md b/CLAUDE.md index c6717b0..6b1e010 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,4 +1,6 @@ -# NOPEA: AI-Native Deployment Tool with Memory +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. --- @@ -10,196 +12,206 @@ Nopea is a deployment tool that builds a knowledge graph from every deployment. --- -## ARCHITECTURE +## BUILD AND TEST COMMANDS +```bash +# Full verification (run after every change) +mix format && mix compile --warnings-as-errors && mix test + +# Individual commands +mix test # 280 tests, 0 failures +mix test test/nopea/deploy_test.exs # Single file +mix test test/nopea/deploy_test.exs:106 # Single test by line number +mix test --exclude integration --exclude cluster # Skip slow tests +mix format --check-formatted +mix credo +mix escript.build # CLI binary → ./nopea ``` -CLI/MCP/API → Deploy.run(spec) - → Memory.get_deploy_context() # KERTO graph query - → select_strategy() # direct/canary/blue_green - → Strategy.*.execute() # K8s server-side apply + +Tests exclude `:integration` and `:cluster` tags by default (configured in `test_helper.exs`). + +--- + +## DEPLOY PIPELINE + +``` +CLI/MCP/API → Deploy.deploy(spec) + → ServiceAgent.deploy() # queue/serialize per-service + → Deploy.run(spec) # orchestration + → Memory.get_deploy_context() # graph query + → select_strategy() # direct/canary/blue_green (memory-aware) + → Strategy.Direct.execute() # K8s server-side apply → Drift.verify_manifest() # post-deploy 3-way diff - → Memory.record_deploy() # graph update (EWMA) + → Memory.record_deploy() # graph update (EWMA, async cast) → Occurrence.build() + persist() # FALSE Protocol ``` -### OTP Supervision Tree +**Entry points**: `Deploy.deploy/1` routes through ServiceAgent if the supervisor is running; falls back to `Deploy.run/1` otherwise. Always use `deploy/1` — never call `run/1` directly from external callers. + +--- + +## OTP SUPERVISION TREE ``` Nopea.Application -├── Nopea.ULID # Monotonic ID generator -├── TelemetryMetricsPrometheus # Metrics (optional) -├── Nopea.Events.Emitter # CDEvents HTTP emitter (optional) -├── Nopea.Memory # GenServer wrapping KERTO Graph -├── Nopea.Cache # ETS tables for deployment state -├── Nopea.Registry # Process registry -├── Nopea.Deploy.Supervisor # DynamicSupervisor for deploy workers -└── Nopea.API.Router # Plug/Cowboy HTTP (optional) +├── Nopea.ULID # Monotonic ID generator +├── TelemetryMetricsPrometheus # Metrics (optional) +├── Nopea.Events.Emitter # CDEvents HTTP emitter (optional) +├── Nopea.Cache # ETS tables for deployment state +├── Nopea.Memory # GenServer wrapping knowledge graph +├── Nopea.Cluster # libcluster (optional, cluster mode) +├── Nopea.Registry / DistributedRegistry # Process registry +├── Nopea.ServiceAgent.Supervisor # DynamicSupervisor for per-service agents +└── Nopea.API.Router # Plug/Cowboy HTTP (optional) ``` ---- +### Configuration Feature Flags -## KEY MODULES - -| Module | Role | Lines | -|--------|------|-------| -| `Deploy` | Orchestration: context → strategy → execute → verify → record | ~100 | -| `Deploy.Spec` / `Result` | Structs for deploy lifecycle | ~100 | -| `Deploy.Worker` / `Supervisor` | Per-deploy GenServer + DynamicSupervisor | ~60 | -| `Strategy.Direct` | Immediate K8s apply | ~20 | -| `Strategy.Canary` | Gradual rollout (stub, step API exposed) | ~50 | -| `Strategy.BlueGreen` | Slot-based cutover (stub, slot API exposed) | ~60 | -| `Memory` | GenServer owning KERTO `Graph.t()` | ~150 | -| `Memory.Ingestor` | Deploy events → graph upsert operations | ~100 | -| `Memory.Query` | Context queries (failure patterns, deps) | ~100 | -| `Occurrence` | FALSE Protocol occurrence generator | ~150 | -| `MCP` | JSON-RPC MCP server (tools/list, tools/call) | ~200 | -| `API.Router` | HTTP API (deploy, context, history endpoints) | ~100 | -| `SYKLI.Target` | SYKLI target behaviour adapter | ~60 | -| `CLI` | Escript entry point | ~100 | -| `K8s` | K8s API wrapper (conn, apply, get, delete) | ~70 | -| `Applier` | YAML parsing + K8s server-side apply | ~200 | -| `Drift` | Three-way drift detection (normalize, diff, verify) | ~250 | -| `Cache` | ETS tables: deployments, service_state, graph_snapshot | ~100 | -| `Events` / `Events.Emitter` | CDEvents builder + async HTTP emitter | ~200 | -| `ULID` | Monotonic ULID generator | ~80 | +Most children are optional, controlled by `Application.get_env(:nopea, key)`: ---- - -## FILE LOCATIONS - -| What | Where | -|------|-------| -| OTP Application | `lib/nopea/application.ex` | -| Deploy orchestration | `lib/nopea/deploy.ex` | -| Deploy structs | `lib/nopea/deploy/spec.ex`, `result.ex` | -| Deploy workers | `lib/nopea/deploy/worker.ex`, `supervisor.ex` | -| Strategy behaviour | `lib/nopea/strategy.ex` | -| Strategy impls | `lib/nopea/strategy/direct.ex`, `canary.ex`, `blue_green.ex` | -| Memory (KERTO) | `lib/nopea/memory.ex` | -| Memory helpers | `lib/nopea/memory/ingestor.ex`, `query.ex` | -| FALSE Protocol | `lib/nopea/occurrence.ex` | -| MCP server | `lib/nopea/mcp.ex` | -| HTTP API | `lib/nopea/api/router.ex` | -| SYKLI integration | `lib/nopea/sykli/target.ex` | -| CLI | `lib/nopea/cli.ex` | -| K8s client | `lib/nopea/k8s.ex`, `k8s/behaviour.ex` | -| YAML + apply | `lib/nopea/applier.ex` | -| Drift detection | `lib/nopea/drift.ex` | -| Cache (ETS) | `lib/nopea/cache.ex` | -| Events | `lib/nopea/events.ex`, `events/emitter.ex` | -| Metrics | `lib/nopea/metrics.ex` | -| ULID | `lib/nopea/ulid.ex` | -| Clustering | `lib/nopea/cluster.ex`, `distributed_*.ex` | +| Key | Default | Controls | +|-----|---------|----------| +| `:enable_metrics` | `true` | TelemetryMetricsPrometheus | +| `:enable_cache` | `true` | Nopea.Cache (ETS) | +| `:enable_memory` | `true` | Nopea.Memory (knowledge graph) | +| `:enable_deploy_supervisor` | `true` | Registry + ServiceAgent.Supervisor | +| `:enable_router` | `false` | Nopea.API.Router (HTTP) | +| `:cluster_enabled` | `false` | Cluster + DistributedRegistry | +| `:cdevents_endpoint` | `nil` | Events.Emitter (started only if set) | +| `:canary_threshold` | `0.15` | Failure confidence for auto-canary | --- -## DEPENDENCIES +## STRATEGY AUTO-SELECTION -| Package | Purpose | -|---------|---------| -| `kerto` | Knowledge graph (path dep: `../kerto`) | -| `k8s` | Kubernetes client | -| `yaml_elixir` | YAML parsing | -| `jason` | JSON | -| `plug_cowboy` | HTTP server | -| `req` | HTTP client (CDEvents) | -| `libcluster` | BEAM clustering (optional) | -| `horde` | Distributed supervisor (optional) | -| `telemetry` + `telemetry_metrics` + `prometheus_core` | Observability | -| `mox` | Test mocking (test only) | -| `credo` | Linting (dev/test only) | +```elixir +# Explicit strategy always wins +defp select_strategy(%Spec{strategy: strategy}, _context) + when strategy in [:direct, :canary, :blue_green], do: strategy + +# Memory-based: known service with high failure confidence → canary +defp select_strategy(%Spec{strategy: nil}, %{known: true, failure_patterns: patterns}) + when is_list(patterns) do + threshold = Application.get_env(:nopea, :canary_threshold, 0.15) + if Enum.any?(patterns, fn p -> p.confidence > threshold end), do: :canary, else: :direct +end + +# Default: direct +defp select_strategy(%Spec{strategy: nil}, _context), do: :direct +``` -**No Rust. No msgpax. No git operations.** +Canary/blue_green strategies use `Kulta.RolloutBuilder` to create Rollout CRDs. If no Deployment manifest is found in the spec, the strategy fails with `:no_deployment_found`. --- -## ELIXIR PATTERNS +## SERVICE AGENT -### Error Handling +Per-service GenServer that queues and serializes deploys: -```elixir -# Use {:ok, _} / {:error, _} tuples, not bare raise -with {:ok, conn} <- K8s.conn(), - {:ok, applied} <- Applier.apply_manifests(manifests, conn, ns) do - {:ok, applied} -end -``` +- **Queue limit**: 10 — rejects excess with `{:error, :queue_full}` +- **Crash cooldown**: 2s delay before dequeuing after worker crash +- **Idle timeout**: 30 min — agent shuts down if no deploys +- **Lookup**: `ServiceAgent.status(service)` returns `{:ok, %{status: :idle | :deploying, ...}}` +- **Health**: `ServiceAgent.health()` queries all active agents -### Logging +--- -```elixir -require Logger -Logger.info("Deploy completed: #{service} [#{deploy_id}] in #{duration_ms}ms") -# No IO.puts, no IO.inspect in production code -``` +## MEMORY SYSTEM -### Atoms not Strings +Knowledge graph stored in `Nopea.Memory` GenServer state. -```elixir -# Status: :completed, :failed — not "completed", "failed" -# Strategy: :direct, :canary, :blue_green — not strings -``` +**Graph nodes**: services, namespaces, errors (kinds: `:concept`, `:error`) +**Graph relationships**: `:deployed_to`, `:breaks`, `:deployed_together` +**EWMA decay**: Weights decay hourly (factor 0.98) so recent deploys matter more -### K8s Mock Pattern +Key API: +- `Memory.get_deploy_context(service, namespace)` → failure patterns, recommendations +- `Memory.record_deploy(result)` → ingest into graph (**async cast**) +- `Memory.node_count()` / `Memory.relationship_count()` → graph stats (**sync call**) -`Nopea.K8s` implements `Nopea.K8s.Behaviour`. In tests, `Nopea.K8sMock` (Mox) is injected via: +--- + +## K8S MOCK PATTERN + +`Nopea.K8s` implements `Nopea.K8s.Behaviour`. Mox injects `Nopea.K8sMock` in tests via config: ```elixir -# test_helper.exs +# test_helper.exs sets: Application.put_env(:nopea, :k8s_module, Nopea.K8sMock) -# Direct.execute uses: +# Production code resolves at runtime: defp k8s_module, do: Application.get_env(:nopea, :k8s_module, Nopea.K8s) +``` -# Tests that don't set explicit expectations: +### Test Setup Patterns + +**Unit tests** (no spawned processes): +```elixir +setup :verify_on_exit! setup do Mox.stub_with(Nopea.K8sMock, Nopea.K8s) + Mox.stub(Nopea.K8sMock, :get_resource, fn _, _, _, _ -> {:error, :not_found} end) :ok end - -# Tests with spawned processes (Worker, Supervisor): -setup :set_mox_global ``` ---- +**Integration tests** (ServiceAgent, spawned workers): +```elixir +setup :set_mox_global # MUST come before other setup — allows spawned processes to use mocks +setup :verify_on_exit! +setup do + Mox.stub_with(Nopea.K8sMock, Nopea.K8s) + start_supervised!({Registry, keys: :unique, name: Nopea.Registry}) + start_supervised!(Nopea.ServiceAgent.Supervisor) + start_supervised!({Nopea.Memory, []}) + start_supervised!(Nopea.Cache) + :ok +end +``` -## TDD WORKFLOW +### Sync After Async Casts -**RED → GREEN → REFACTOR** — Always. +`Memory.record_deploy/1` is a `cast` — don't use `Process.sleep` to wait for it. Use any `GenServer.call` to the same process as a mailbox flush: -1. Write failing test -2. Verify it fails -3. Write minimal implementation -4. Verify all tests pass -5. Refactor, add edge cases -6. Run `mix format && mix compile --warnings-as-errors && mix test` +```elixir +# BEAM mailbox FIFO ordering guarantees all prior casts complete before this call returns +_ = Nopea.Memory.node_count() +ctx = Nopea.Memory.get_deploy_context("svc", "ns") +``` ---- +### Test Factories -## VERIFICATION - -```bash -mix compile --warnings-as-errors -mix test # 235 tests, 0 failures -mix format --check-formatted -mix credo -mix escript.build # CLI binary -``` +Available in `test/support/factory.ex`: +- `Nopea.Test.Factory.sample_deployment_manifest(name, namespace)` +- `Nopea.Test.Factory.sample_service_manifest(name)` +- `Nopea.Test.Factory.sample_configmap_manifest(name, namespace, data)` --- -## MEMORY SYSTEM (KERTO) +## ELIXIR PATTERNS -The memory is a KERTO knowledge graph stored in the `Nopea.Memory` GenServer state. +### Error Handling +```elixir +# {:ok, _} / {:error, _} tuples — no bare raise +with {:ok, conn} <- K8s.conn(), + {:ok, applied} <- Applier.apply_manifests(manifests, conn, ns) do + {:ok, applied} +end +``` -**Graph nodes**: services, namespaces, errors, strategies -**Graph relationships**: `:deployed_to`, `:failed_with`, `:depends_on`, `:used_strategy` -**EWMA decay**: Weights decay hourly (factor 0.98) so recent deploys matter more +### Logging +```elixir +require Logger +Logger.info("Deploy completed", service: service, deploy_id: deploy_id, duration_ms: duration_ms) +# Use structured metadata — keys configured in config/config.exs +# No IO.puts or IO.inspect in production code +``` -Key queries: -- `Memory.get_deploy_context(service, namespace)` → failure patterns, recommendations -- `Memory.record_deploy(result)` → ingest into graph (cast) -- `Memory.get_graph_stats()` → node/relationship counts +### Atoms not Strings +```elixir +# Status: :completed, :failed — not "completed", "failed" +# Strategy: :direct, :canary, :blue_green — not strings +``` --- @@ -208,38 +220,31 @@ Key queries: Occurrences are structured events generated after every deployment. **Types**: `deploy.run.completed`, `deploy.run.failed`, `deploy.run.rolledback` -**Blocks**: error, reasoning (includes memory context), history, deploy_data **Storage**: `.nopea/occurrence.json` (cold) + `.nopea/occurrences/*.etf` (warm) --- ## MCP SERVER -JSON-RPC 2.0 over stdin/stdout. Tools: - -| Tool | Description | -|------|-------------| -| `nopea_deploy` | Deploy manifests to K8s | -| `nopea_context` | Get memory context for a service | -| `nopea_history` | Get deployment history | -| `nopea_explain` | Explain strategy selection reasoning | +JSON-RPC 2.0 over stdin/stdout. Tools: `nopea_deploy`, `nopea_context`, `nopea_history`, `nopea_health`, `nopea_explain`. --- -## STRATEGY AUTO-SELECTION - -```elixir -# Explicit strategy always wins -defp select_strategy(%Spec{strategy: strategy}, _) when not is_nil(strategy), do: strategy - -# Memory-based: high failure confidence → canary -defp select_strategy(_spec, %{failure_patterns: patterns}) do - if Enum.any?(patterns, fn p -> p.confidence > 0.15 end), do: :canary, else: :direct -end +## DEPENDENCIES -# Default: direct -defp select_strategy(_spec, _context), do: :direct -``` +| Package | Purpose | +|---------|---------| +| `false_protocol` | FALSE Protocol occurrence generation | +| `k8s` | Kubernetes client | +| `yaml_elixir` | YAML parsing | +| `jason` | JSON | +| `plug_cowboy` | HTTP server | +| `req` | HTTP client (CDEvents) | +| `libcluster` | BEAM clustering (optional) | +| `horde` | Distributed supervisor/registry (optional) | +| `telemetry` + `prometheus_core` | Observability | +| `mox` | Test mocking (test only) | +| `credo` | Linting (dev/test only) | --- @@ -249,6 +254,7 @@ defp select_strategy(_spec, _context), do: :direct 2. **TDD always** — write failing test, implement, refactor 3. **No stubs** — complete implementations only 4. **Typespecs required** — all public functions -5. **Run checks** — `mix compile --warnings-as-errors && mix test` -6. **No IO.puts** — use `require Logger` +5. **Run checks** — `mix format && mix compile --warnings-as-errors && mix test` +6. **No IO.puts** — use `require Logger` with structured metadata 7. **No bare raise** — use `{:error, reason}` tuples +8. **No Process.sleep in tests** — use GenServer.call barriers for async cast sync diff --git a/config/config.exs b/config/config.exs index 50dddd2..4a3ad5c 100644 --- a/config/config.exs +++ b/config/config.exs @@ -5,4 +5,24 @@ config :nopea, enable_memory: true, enable_cache: true +config :logger, :default_formatter, + format: "$time $metadata[$level] $message\n", + metadata: [ + :service, + :deploy_id, + :namespace, + :strategy, + :error, + :reason, + :duration_ms, + :resource, + :stacktrace, + :cooldown_ms, + :queued, + :node_count, + :relationship_count, + :auto_selected, + :verified + ] + import_config "#{config_env()}.exs" diff --git a/lib/nopea/cli.ex b/lib/nopea/cli.ex index 23fe5ab..95d36dc 100644 --- a/lib/nopea/cli.ex +++ b/lib/nopea/cli.ex @@ -7,11 +7,12 @@ defmodule Nopea.CLI do - status Show deployment status - context Show memory context for a service - history Show deployment history - - rollback Roll back a deployment - memory Show memory graph stats - serve Start daemon mode (HTTP API) """ + require Logger + def main(args) do {opts, args, _} = OptionParser.parse(args, @@ -44,7 +45,7 @@ defmodule Nopea.CLI do case Nopea.Deploy.Spec.from_path(path, service, namespace, strategy: strategy) do {:ok, spec} -> - result = Nopea.Deploy.run(spec) + result = Nopea.Deploy.deploy(spec) output(result, opts) {:error, reason} -> @@ -98,21 +99,19 @@ defmodule Nopea.CLI do end defp serve(_opts) do - IO.puts("Starting Nopea daemon...") + Logger.info("Starting Nopea daemon...") + Application.put_env(:nopea, :enable_router, true) - case Supervisor.start_child(Nopea.AppSupervisor, Nopea.API.Router) do - {:ok, _pid} -> + case Application.ensure_all_started(:nopea) do + {:ok, _apps} -> port = Application.get_env(:nopea, :api_port, 4000) - IO.puts("Nopea API listening on port #{port}") - - {:error, {:already_started, _pid}} -> - IO.puts("Nopea API already running") + Logger.info("Nopea API listening on port #{port}") + Process.sleep(:infinity) {:error, reason} -> - IO.puts(:stderr, "Failed to start API: #{inspect(reason)}") + Logger.error("Failed to start Nopea: #{inspect(reason)}") + System.halt(1) end - - Process.sleep(:infinity) end defp output(data, opts) do diff --git a/lib/nopea/deploy.ex b/lib/nopea/deploy.ex index 516766a..86ee106 100644 --- a/lib/nopea/deploy.ex +++ b/lib/nopea/deploy.ex @@ -119,6 +119,15 @@ defmodule Nopea.Deploy do strategy end + defp select_strategy(%Spec{strategy: nil}, %{known: true, failure_patterns: patterns}) + when is_list(patterns) do + threshold = Application.get_env(:nopea, :canary_threshold, 0.15) + + if Enum.any?(patterns, fn p -> p.confidence > threshold end), + do: :canary, + else: :direct + end + defp select_strategy(%Spec{strategy: nil}, _context), do: :direct defp select_strategy(%Spec{strategy: other}, _context) do @@ -151,21 +160,12 @@ defmodule Nopea.Deploy do defp verify_deploy(spec, applied) when is_list(applied) do Enum.all?(applied, fn manifest -> - case Nopea.Drift.verify_manifest(spec.service, manifest) do + case Nopea.Drift.verify_manifest(spec.service, manifest, k8s_module: k8s_module()) do :no_drift -> true :new_resource -> true _ -> false end end) - rescue - error -> - Logger.warning("Post-deploy verification failed", - service: spec.service, - error: inspect(error), - stacktrace: __STACKTRACE__ |> Exception.format_stacktrace() - ) - - false end defp verify_deploy(_spec, _applied), do: false @@ -178,7 +178,7 @@ defmodule Nopea.Deploy do status: result.status, error: result.error, duration_ms: result.duration_ms, - concurrent_deploys: [] + concurrent_deploys: get_concurrent_services(result.service) }) end @@ -408,6 +408,30 @@ defmodule Nopea.Deploy do defp emitter_running?, do: Process.whereis(Nopea.Events.Emitter) != nil + defp get_concurrent_services(current_service) do + if Process.whereis(Nopea.Registry) do + Registry.select(Nopea.Registry, [ + {{:"$1", :"$2", :_}, [], [{{:"$1", :"$2"}}]} + ]) + |> Enum.flat_map(fn + {{:service, name}, pid} when name != current_service -> + try do + case GenServer.call(pid, :status, 1_000) do + %{status: :deploying} -> [name] + _ -> [] + end + catch + :exit, _ -> [] + end + + _ -> + [] + end) + else + [] + end + end + defp duration_ms(start_time) do System.convert_time_unit(System.monotonic_time() - start_time, :native, :millisecond) end diff --git a/lib/nopea/graph/relation_type.ex b/lib/nopea/graph/relation_type.ex index cca26c7..e0e65ce 100644 --- a/lib/nopea/graph/relation_type.ex +++ b/lib/nopea/graph/relation_type.ex @@ -7,7 +7,8 @@ defmodule Nopea.Graph.RelationType do @types [ :breaks, - :deployed_to + :deployed_to, + :deployed_together ] @spec valid?(term()) :: boolean() diff --git a/lib/nopea/memory/ingestor.ex b/lib/nopea/memory/ingestor.ex index f91f03e..c1e7b8c 100644 --- a/lib/nopea/memory/ingestor.ex +++ b/lib/nopea/memory/ingestor.ex @@ -80,9 +80,28 @@ defmodule Nopea.Memory.Ingestor do defp maybe_record_failure(graph, _result, _ulid), do: graph - defp maybe_record_dependencies(graph, %{concurrent_deploys: [_ | _] = deploys}, ulid) do + defp maybe_record_dependencies( + graph, + %{service: service, concurrent_deploys: [_ | _] = deploys}, + ulid + ) do + service_id = Nopea.Graph.Identity.compute_id(:concept, service) + Enum.reduce(deploys, graph, fn other_service, g -> {g, _node} = Graph.upsert_node(g, :concept, other_service, 0.5, ulid) + other_id = Nopea.Graph.Identity.compute_id(:concept, other_service) + + {g, _rel} = + Graph.upsert_relationship( + g, + service_id, + :deployed_together, + other_id, + 0.5, + ulid, + "concurrent deploy at #{DateTime.utc_now() |> DateTime.to_iso8601()}" + ) + g end) end diff --git a/lib/nopea/sykli/target.ex b/lib/nopea/sykli/target.ex index e283602..bc7750d 100644 --- a/lib/nopea/sykli/target.ex +++ b/lib/nopea/sykli/target.ex @@ -59,7 +59,7 @@ defmodule Nopea.SYKLI.Target do strategy: Map.get(task, :strategy) } - result = Nopea.Deploy.run(spec) + result = Nopea.Deploy.deploy(spec) case result.status do :completed -> {:ok, result} diff --git a/test/nopea/deploy_integration_test.exs b/test/nopea/deploy_integration_test.exs index b573ff9..1bde765 100644 --- a/test/nopea/deploy_integration_test.exs +++ b/test/nopea/deploy_integration_test.exs @@ -11,6 +11,12 @@ defmodule Nopea.DeployIntegrationTest do setup do start_supervised!(Nopea.Cache) start_supervised!({Nopea.Memory, []}) + + # Stub get_resource — no real cluster in tests + Mox.stub(Nopea.K8sMock, :get_resource, fn _api, _kind, _name, _ns -> + {:error, :not_found} + end) + :ok end diff --git a/test/nopea/deploy_test.exs b/test/nopea/deploy_test.exs index b0095cd..c7f9151 100644 --- a/test/nopea/deploy_test.exs +++ b/test/nopea/deploy_test.exs @@ -11,6 +11,12 @@ defmodule Nopea.DeployTest do setup do # Stub K8s mock to fall through to real implementation (works for empty manifests) Mox.stub_with(Nopea.K8sMock, Nopea.K8s) + + # Stub get_resource to return not_found — no real cluster in tests + Mox.stub(Nopea.K8sMock, :get_resource, fn _api, _kind, _name, _ns -> + {:error, :not_found} + end) + # Start Memory for context tracking start_supervised!({Nopea.Memory, []}) # Start Cache for state recording @@ -48,8 +54,8 @@ defmodule Nopea.DeployTest do Deploy.run(spec) - # Memory.record_deploy is a cast, give it time - Process.sleep(50) + # Flush Memory mailbox — node_count is a call, so all prior casts complete first + _ = Nopea.Memory.node_count() ctx = Nopea.Memory.get_deploy_context("memory-test-svc", "default") assert ctx.known == true @@ -85,7 +91,7 @@ defmodule Nopea.DeployTest do end describe "strategy selection" do - test "always uses direct when no explicit strategy" do + test "unknown service (no memory) defaults to direct" do spec = %Spec{ service: "clean-svc", namespace: "default", @@ -96,6 +102,131 @@ defmodule Nopea.DeployTest do result = Deploy.run(spec) assert result.strategy == :direct end + + test "known service with high failure confidence auto-selects canary" do + # First, create failure history so Memory knows about this service + Nopea.Memory.record_deploy(%{ + service: "flaky-svc", + namespace: "default", + status: :failed, + error: {:timeout, "connection refused"}, + concurrent_deploys: [] + }) + + # Reinforce the failure pattern to push confidence above threshold + for _ <- 1..4 do + Nopea.Memory.record_deploy(%{ + service: "flaky-svc", + namespace: "default", + status: :failed, + error: {:timeout, "connection refused"}, + concurrent_deploys: [] + }) + end + + # Flush Memory mailbox — node_count is a call, so all prior casts complete first + _ = Nopea.Memory.node_count() + + # Verify memory has failure patterns above threshold + ctx = Nopea.Memory.get_deploy_context("flaky-svc", "default") + assert ctx.known == true + assert Enum.any?(ctx.failure_patterns, fn p -> p.confidence > 0.15 end) + + # Now deploy with nil strategy — should auto-select canary + deployment = Nopea.Test.Factory.sample_deployment_manifest("flaky-svc", "default") + + Nopea.K8sMock + |> expect(:apply_manifest, fn manifest, "default" -> + assert manifest["kind"] == "Rollout" + {:ok, manifest} + end) + + spec = %Spec{ + service: "flaky-svc", + namespace: "default", + manifests: [deployment], + strategy: nil + } + + result = Deploy.run(spec) + assert result.strategy == :canary + end + + test "known service with low failure confidence stays direct" do + # Single success — known but no failure patterns + Nopea.Memory.record_deploy(%{ + service: "stable-svc", + namespace: "default", + status: :completed, + error: nil, + concurrent_deploys: [] + }) + + _ = Nopea.Memory.node_count() + + ctx = Nopea.Memory.get_deploy_context("stable-svc", "default") + assert ctx.known == true + assert ctx.failure_patterns == [] + + spec = %Spec{ + service: "stable-svc", + namespace: "default", + manifests: [], + strategy: nil + } + + result = Deploy.run(spec) + assert result.strategy == :direct + end + + test "explicit strategy always overrides memory" do + # Create failure history + for _ <- 1..5 do + Nopea.Memory.record_deploy(%{ + service: "override-svc", + namespace: "default", + status: :failed, + error: "crash", + concurrent_deploys: [] + }) + end + + _ = Nopea.Memory.node_count() + + spec = %Spec{ + service: "override-svc", + namespace: "default", + manifests: [], + strategy: :direct + } + + result = Deploy.run(spec) + assert result.strategy == :direct + end + end + + describe "verify_deploy crash propagation" do + test "malformed manifest raises instead of returning false" do + # A manifest missing "apiVersion" and "kind" will cause Drift.verify_manifest + # to raise KeyError — this should propagate, not be silently caught + malformed = %{"metadata" => %{"name" => "bad"}} + + Nopea.K8sMock + |> expect(:apply_manifests, fn _manifests, _ns -> + {:ok, [malformed]} + end) + + spec = %Spec{ + service: "crash-test-svc", + namespace: "default", + manifests: [malformed], + strategy: :direct + } + + assert_raise KeyError, fn -> + Deploy.run(spec) + end + end end describe "Kulta strategies" do diff --git a/test/nopea/distributed_registry_test.exs b/test/nopea/distributed_registry_test.exs index c90e8a6..a635359 100644 --- a/test/nopea/distributed_registry_test.exs +++ b/test/nopea/distributed_registry_test.exs @@ -12,6 +12,28 @@ defmodule Nopea.DistributedRegistryTest do @moduletag :distributed + # Poll-based assertion for async operations (replaces Process.sleep barriers) + defp assert_eventually(fun, timeout_ms \\ 500, interval_ms \\ 10) do + deadline = System.monotonic_time(:millisecond) + timeout_ms + + do_poll(fun, deadline, interval_ms) + end + + defp do_poll(fun, deadline, interval_ms) do + case fun.() do + true -> + true + + false -> + if System.monotonic_time(:millisecond) >= deadline do + flunk("assert_eventually timed out") + else + Process.sleep(interval_ms) + do_poll(fun, deadline, interval_ms) + end + end + end + # Start registry once for all tests setup_all do case DistributedRegistry.start_link([]) do @@ -66,8 +88,10 @@ defmodule Nopea.DistributedRegistryTest do # Kill the agent Agent.stop(agent) - # Give Horde time to clean up - Process.sleep(100) + # Poll until Horde cleans up the registration + assert_eventually(fn -> + DistributedRegistry.lookup(key) == {:error, :not_found} + end) # Should be able to re-register {:ok, agent2} = Agent.start_link(fn -> 99 end, name: DistributedRegistry.via(key)) @@ -90,8 +114,10 @@ defmodule Nopea.DistributedRegistryTest do Process.sleep(1000) end) - # Give it time to register - Process.sleep(50) + # Poll until registration completes + assert_eventually(fn -> + match?({:ok, _}, DistributedRegistry.lookup(key)) + end) # Should be able to look up the task's pid assert {:ok, pid} = DistributedRegistry.lookup(key) @@ -110,7 +136,10 @@ defmodule Nopea.DistributedRegistryTest do Process.sleep(1000) end) - Process.sleep(50) + # Poll until first registration completes + assert_eventually(fn -> + match?({:ok, _}, DistributedRegistry.lookup(key)) + end) # Second process tries to register same key task2 = @@ -135,6 +164,11 @@ defmodule Nopea.DistributedRegistryTest do key = "lookup-test-#{:rand.uniform(10000)}" {:ok, agent} = Agent.start_link(fn -> :found end, name: DistributedRegistry.via(key)) + # Horde registration may need a moment to propagate + assert_eventually(fn -> + match?({:ok, _}, DistributedRegistry.lookup(key)) + end) + assert {:ok, ^agent} = DistributedRegistry.lookup(key) Agent.stop(agent) diff --git a/test/nopea/mcp_test.exs b/test/nopea/mcp_test.exs index 193b9db..02fb8ce 100644 --- a/test/nopea/mcp_test.exs +++ b/test/nopea/mcp_test.exs @@ -24,7 +24,7 @@ defmodule Nopea.MCPTest do end describe "handle_request/1 tools/list" do - test "lists available tools" do + test "lists all 5 tools" do request = %{ "jsonrpc" => "2.0", "id" => 2, @@ -35,13 +35,14 @@ defmodule Nopea.MCPTest do assert {:ok, response} = MCP.handle_request(request) tools = response["result"]["tools"] assert is_list(tools) - assert tools != [] tool_names = Enum.map(tools, & &1["name"]) + assert length(tool_names) == 5 assert "nopea_deploy" in tool_names assert "nopea_context" in tool_names assert "nopea_history" in tool_names assert "nopea_health" in tool_names + assert "nopea_explain" in tool_names end end @@ -133,6 +134,78 @@ defmodule Nopea.MCPTest do end end + describe "handle_request/1 tools/call nopea_deploy" do + test "returns error when service is missing" do + request = %{ + "jsonrpc" => "2.0", + "id" => 20, + "method" => "tools/call", + "params" => %{ + "name" => "nopea_deploy", + "arguments" => %{} + } + } + + assert {:ok, response} = MCP.handle_request(request) + assert response["error"]["message"] == "service is required" + end + + test "returns error when service is empty string" do + request = %{ + "jsonrpc" => "2.0", + "id" => 21, + "method" => "tools/call", + "params" => %{ + "name" => "nopea_deploy", + "arguments" => %{"service" => ""} + } + } + + assert {:ok, response} = MCP.handle_request(request) + assert response["error"]["message"] == "service is required" + end + end + + describe "handle_request/1 tools/call nopea_history" do + test "returns no history when cache unavailable" do + request = %{ + "jsonrpc" => "2.0", + "id" => 30, + "method" => "tools/call", + "params" => %{ + "name" => "nopea_history", + "arguments" => %{"service" => "unknown-svc"} + } + } + + assert {:ok, response} = MCP.handle_request(request) + content = response["result"]["content"] + text = hd(content)["text"] + decoded = Jason.decode!(text) + # Cache not running in async test → either "Cache not available" or "No history found" + assert decoded["message"] != nil + end + end + + describe "handle_request/1 tools/call nopea_explain" do + test "returns default message when memory unavailable" do + request = %{ + "jsonrpc" => "2.0", + "id" => 40, + "method" => "tools/call", + "params" => %{ + "name" => "nopea_explain", + "arguments" => %{"service" => "test-svc"} + } + } + + assert {:ok, response} = MCP.handle_request(request) + content = response["result"]["content"] + text = hd(content)["text"] + assert text =~ "Memory not available" + end + end + describe "encode/decode" do test "round-trips through JSON" do request = %{ diff --git a/test/nopea/mcp_with_memory_test.exs b/test/nopea/mcp_with_memory_test.exs new file mode 100644 index 0000000..ab634d7 --- /dev/null +++ b/test/nopea/mcp_with_memory_test.exs @@ -0,0 +1,138 @@ +defmodule Nopea.MCPWithMemoryTest do + use ExUnit.Case, async: false + + alias Nopea.MCP + + setup do + start_supervised!({Nopea.Memory, []}) + start_supervised!(Nopea.Cache) + :ok + end + + describe "nopea_explain with memory" do + test "explains strategy for unknown service" do + request = %{ + "jsonrpc" => "2.0", + "id" => 1, + "method" => "tools/call", + "params" => %{ + "name" => "nopea_explain", + "arguments" => %{"service" => "new-svc", "namespace" => "default"} + } + } + + assert {:ok, response} = MCP.handle_request(request) + content = response["result"]["content"] + text = hd(content)["text"] + assert text =~ "No deployment history" + assert text =~ "direct" + end + + test "explains strategy for known service with failures" do + # Record failures to build memory context + for _ <- 1..3 do + Nopea.Memory.record_deploy(%{ + service: "fragile-svc", + namespace: "prod", + status: :failed, + error: {:timeout, "connection timeout"}, + concurrent_deploys: [] + }) + end + + _ = Nopea.Memory.node_count() + + request = %{ + "jsonrpc" => "2.0", + "id" => 2, + "method" => "tools/call", + "params" => %{ + "name" => "nopea_explain", + "arguments" => %{"service" => "fragile-svc", "namespace" => "prod"} + } + } + + assert {:ok, response} = MCP.handle_request(request) + content = response["result"]["content"] + text = hd(content)["text"] + assert text =~ "Failure patterns detected" + assert text =~ "canary" + end + end + + describe "nopea_context with memory" do + test "returns context for known service" do + Nopea.Memory.record_deploy(%{ + service: "api-svc", + namespace: "default", + status: :completed, + error: nil, + concurrent_deploys: [] + }) + + _ = Nopea.Memory.node_count() + + request = %{ + "jsonrpc" => "2.0", + "id" => 3, + "method" => "tools/call", + "params" => %{ + "name" => "nopea_context", + "arguments" => %{"service" => "api-svc", "namespace" => "default"} + } + } + + assert {:ok, response} = MCP.handle_request(request) + content = response["result"]["content"] + text = hd(content)["text"] + decoded = Jason.decode!(text) + assert decoded["known"] == true + assert decoded["service"] == "api-svc" + end + end + + describe "nopea_history with cache" do + test "returns state for known service" do + Nopea.Cache.put_service_state("cached-svc", %{ + status: :completed, + last_deploy: "01ABC", + last_deploy_at: DateTime.utc_now() + }) + + request = %{ + "jsonrpc" => "2.0", + "id" => 4, + "method" => "tools/call", + "params" => %{ + "name" => "nopea_history", + "arguments" => %{"service" => "cached-svc"} + } + } + + assert {:ok, response} = MCP.handle_request(request) + content = response["result"]["content"] + text = hd(content)["text"] + decoded = Jason.decode!(text) + assert decoded["service"] == "cached-svc" + assert decoded["state"] != nil + end + + test "returns no history for unknown service" do + request = %{ + "jsonrpc" => "2.0", + "id" => 5, + "method" => "tools/call", + "params" => %{ + "name" => "nopea_history", + "arguments" => %{"service" => "nonexistent-svc"} + } + } + + assert {:ok, response} = MCP.handle_request(request) + content = response["result"]["content"] + text = hd(content)["text"] + decoded = Jason.decode!(text) + assert decoded["message"] == "No history found" + end + end +end diff --git a/test/nopea/memory/ingestor_test.exs b/test/nopea/memory/ingestor_test.exs index 686262b..2f543e3 100644 --- a/test/nopea/memory/ingestor_test.exs +++ b/test/nopea/memory/ingestor_test.exs @@ -158,6 +158,40 @@ defmodule Nopea.Memory.IngestorTest do assert {:ok, _} = Graph.get_node(graph, redis_id) assert {:ok, _} = Graph.get_node(graph, config_id) end + + test "creates deployed_together edges between service and concurrent deploys" do + graph = Graph.new() + + result = %{ + service: "auth-service", + namespace: "production", + status: :completed, + error: nil, + concurrent_deploys: ["redis", "config-service"] + } + + graph = Ingestor.ingest(graph, result) + + service_id = Identity.compute_id(:concept, "auth-service") + + together_rels = + graph + |> Graph.neighbors(service_id, :outgoing) + |> Enum.filter(fn rel -> rel.relation == :deployed_together end) + + assert length(together_rels) == 2 + + target_ids = Enum.map(together_rels, & &1.target) |> Enum.sort() + + expected_ids = + [ + Identity.compute_id(:concept, "redis"), + Identity.compute_id(:concept, "config-service") + ] + |> Enum.sort() + + assert target_ids == expected_ids + end end describe "ingest/2 edge cases" do diff --git a/test/nopea/sykli/target_test.exs b/test/nopea/sykli/target_test.exs index 6418512..8bb9c73 100644 --- a/test/nopea/sykli/target_test.exs +++ b/test/nopea/sykli/target_test.exs @@ -1,10 +1,11 @@ defmodule Nopea.SYKLI.TargetTest do - use ExUnit.Case, async: true + use ExUnit.Case, async: false import Mox alias Nopea.SYKLI.Target + setup :set_mox_global setup :verify_on_exit! describe "name/0" do @@ -44,6 +45,11 @@ defmodule Nopea.SYKLI.TargetTest do describe "run_task/3" do setup do Mox.stub_with(Nopea.K8sMock, Nopea.K8s) + + Mox.stub(Nopea.K8sMock, :get_resource, fn _api, _kind, _name, _ns -> + {:error, :not_found} + end) + {:ok, state} = Target.setup([]) %{state: state} end @@ -89,5 +95,29 @@ defmodule Nopea.SYKLI.TargetTest do assert {:error, reason} = Target.run_task(task, state, []) assert reason != nil end + + test "routes through ServiceAgent when available", %{state: _state} do + # Start the ServiceAgent infrastructure + start_supervised!({Registry, keys: :unique, name: Nopea.Registry}) + start_supervised!(Nopea.ServiceAgent.Supervisor) + start_supervised!(Nopea.Cache) + start_supervised!({Nopea.Memory, []}) + + {:ok, state} = Target.setup(namespace: "staging") + + task = %{ + name: "deploy-agent-test", + service: "agent-routed-svc", + manifests: [], + strategy: :direct + } + + assert {:ok, result} = Target.run_task(task, state, []) + assert result.status == :completed + + # Verify ServiceAgent was used — it should have state for this service + assert {:ok, agent_status} = Nopea.ServiceAgent.status("agent-routed-svc") + assert agent_status.deploy_count == 1 + end end end