From 996e6ada8c7b297ee1ae6c54aa2e2198f39af0ce Mon Sep 17 00:00:00 2001 From: Yair Etziony Date: Tue, 3 Mar 2026 02:04:59 +0100 Subject: [PATCH 1/2] fix: harden occurrence pipeline error handling, strengthen type design Replace overbroad rescue in generate_occurrence with explicit error handling for each failure mode (Occurrence.new, persist, LogEmitter). Add @enforce_keys to Result and Events, narrow strategy type from atom() to :direct | :canary | :blue_green, add Result.rolledback/5 factory, add NodeKind.t() type, and fix start_log_emitter typespec. Remove unreachable execute_strategy fallback, add emit_status_log catch-all, check all LogEmitter return values, and log disabled emitter events at debug level. Fix conditional assertion in integration test, add tests for build_entity edge cases, persist error path, and Result enforce_keys. Clean up leaked emitter processes in OccurrenceLogTest. Co-Authored-By: Claude Opus 4.6 --- lib/nopea/deploy.ex | 150 ++++++++++++++++--------- lib/nopea/deploy/result.ex | 32 +++++- lib/nopea/deploy/spec.ex | 2 +- lib/nopea/events.ex | 1 + lib/nopea/events/emitter.ex | 2 +- lib/nopea/graph/node.ex | 2 +- lib/nopea/graph/node_kind.ex | 2 + lib/nopea/occurrence.ex | 31 ++--- test/nopea/deploy/result_test.exs | 24 ++++ test/nopea/deploy_integration_test.exs | 20 ++-- test/nopea/occurrence_log_test.exs | 17 ++- test/nopea/occurrence_test.exs | 53 +++++++++ 12 files changed, 248 insertions(+), 88 deletions(-) diff --git a/lib/nopea/deploy.ex b/lib/nopea/deploy.ex index 79ab119..516766a 100644 --- a/lib/nopea/deploy.ex +++ b/lib/nopea/deploy.ex @@ -145,15 +145,6 @@ defmodule Nopea.Deploy do end end - defp execute_strategy(other, spec) do - Logger.warning("Unrecognized strategy, falling back to direct", - service: spec.service, - strategy: inspect(other) - ) - - Nopea.Strategy.Direct.execute(spec) - end - defp k8s_module do Application.get_env(:nopea, :k8s_module, Nopea.K8s) end @@ -233,30 +224,31 @@ defmodule Nopea.Deploy do # Persist to .nopea/ directory workdir = File.cwd!() - Nopea.Occurrence.persist(occurrence, workdir) + + case Nopea.Occurrence.persist(occurrence, workdir) do + :ok -> + :ok + + {:error, reason} -> + Logger.error("Failed to persist occurrence", + service: result.service, + deploy_id: result.deploy_id, + error: inspect(reason) + ) + end rescue error -> - Logger.error("Failed to generate occurrence", + Logger.error("Failed to generate occurrence: #{Exception.message(error)}", service: result.service, deploy_id: result.deploy_id, - error: inspect(error), - stacktrace: __STACKTRACE__ |> Exception.format_stacktrace() + error: Exception.format(:error, error, __STACKTRACE__) ) end defp emit_deploy_logs(occurrence, result) do case Nopea.Occurrence.start_log_emitter(occurrence) do {:ok, emitter} -> - FalseProtocol.LogEmitter.info_full( - emitter, - "deploy started for #{result.service}", - %FalseProtocol.Semantic{ - event: "deploy.apply.start", - what_happened: - "started applying #{result.manifest_count} manifests to #{result.namespace}" - } - ) - + log_deploy_start(emitter, result) emit_status_log(emitter, result) Nopea.Occurrence.attach_log_ref(occurrence, emitter) @@ -270,42 +262,96 @@ defmodule Nopea.Deploy do end end + defp log_deploy_start(emitter, result) do + case FalseProtocol.LogEmitter.info_full( + emitter, + "deploy started for #{result.service}", + %FalseProtocol.Semantic{ + event: "deploy.apply.start", + what_happened: + "started applying #{result.manifest_count} manifests to #{result.namespace}" + } + ) do + {:ok, _entry} -> + :ok + + {:error, reason} -> + Logger.warning("Failed to emit deploy start log", reason: inspect(reason)) + end + end + defp emit_status_log(emitter, %{status: :completed} = result) do - FalseProtocol.LogEmitter.info_full( - emitter, - "deploy completed in #{result.duration_ms}ms", - %FalseProtocol.Semantic{ - event: "deploy.apply.complete", - what_happened: "#{result.service} deployed successfully", - parameters: %{"verified" => result.verified, "duration_ms" => result.duration_ms} - } - ) + case FalseProtocol.LogEmitter.info_full( + emitter, + "deploy completed in #{result.duration_ms}ms", + %FalseProtocol.Semantic{ + event: "deploy.apply.complete", + what_happened: "#{result.service} deployed successfully", + parameters: %{"verified" => result.verified, "duration_ms" => result.duration_ms} + } + ) do + {:ok, _entry} -> + :ok + + {:error, reason} -> + Logger.warning("Failed to emit deploy complete log", reason: inspect(reason)) + end end defp emit_status_log(emitter, %{status: :failed} = result) do - FalseProtocol.LogEmitter.emit( - emitter, - :error, - "deploy failed: #{inspect(result.error)}", - %FalseProtocol.Semantic{ - event: "deploy.apply.failed", - what_happened: "#{result.service} deployment failed", - impact: "service in #{result.namespace} is not updated" - } - ) + case FalseProtocol.LogEmitter.emit( + emitter, + :error, + "deploy failed: #{inspect(result.error)}", + %FalseProtocol.Semantic{ + event: "deploy.apply.failed", + what_happened: "#{result.service} deployment failed", + impact: "service in #{result.namespace} is not updated" + } + ) do + {:ok, _entry} -> + :ok + + {:error, reason} -> + Logger.warning("Failed to emit deploy failed log", reason: inspect(reason)) + end end defp emit_status_log(emitter, %{status: :rolledback} = result) do - FalseProtocol.LogEmitter.emit( - emitter, - :warning, - "deploy rolledback: #{inspect(result.error)}", - %FalseProtocol.Semantic{ - event: "deploy.apply.rolledback", - what_happened: "#{result.service} deployment rolled back", - impact: "service in #{result.namespace} reverted to previous version" - } - ) + case FalseProtocol.LogEmitter.emit( + emitter, + :warning, + "deploy rolledback: #{inspect(result.error)}", + %FalseProtocol.Semantic{ + event: "deploy.apply.rolledback", + what_happened: "#{result.service} deployment rolled back", + impact: "service in #{result.namespace} reverted to previous version" + } + ) do + {:ok, _entry} -> + :ok + + {:error, reason} -> + Logger.warning("Failed to emit deploy rollback log", reason: inspect(reason)) + end + end + + defp emit_status_log(emitter, result) do + case FalseProtocol.LogEmitter.emit( + emitter, + :warning, + "deploy finished with status: #{inspect(result.status)}", + %FalseProtocol.Semantic{ + event: "deploy.apply.unknown", + what_happened: "#{result.service} deployment ended with status #{result.status}" + } + ) do + {:ok, _entry} -> + :ok + + {:error, reason} -> + Logger.warning("Failed to emit deploy status log", reason: inspect(reason)) + end end defp emit_start(spec, deploy_id, strategy) do diff --git a/lib/nopea/deploy/result.ex b/lib/nopea/deploy/result.ex index d969cc5..98c6bf5 100644 --- a/lib/nopea/deploy/result.ex +++ b/lib/nopea/deploy/result.ex @@ -6,12 +6,14 @@ defmodule Nopea.Deploy.Result do verification status, and any errors. """ + @type strategy :: :direct | :canary | :blue_green + @type t :: %__MODULE__{ deploy_id: String.t(), service: String.t(), namespace: String.t(), status: :completed | :failed | :rolledback, - strategy: atom(), + strategy: strategy(), manifest_count: non_neg_integer(), duration_ms: non_neg_integer(), verified: boolean(), @@ -20,6 +22,7 @@ defmodule Nopea.Deploy.Result do timestamp: DateTime.t() } + @enforce_keys [:deploy_id, :service, :namespace, :status, :strategy] defstruct [ :deploy_id, :service, @@ -34,7 +37,14 @@ defmodule Nopea.Deploy.Result do timestamp: nil ] - @spec success(String.t(), Nopea.Deploy.Spec.t(), atom(), [map()], non_neg_integer(), boolean()) :: + @spec success( + String.t(), + Nopea.Deploy.Spec.t(), + strategy(), + [map()], + non_neg_integer(), + boolean() + ) :: t() def success(deploy_id, spec, strategy, applied, duration_ms, verified) do %__MODULE__{ @@ -51,7 +61,7 @@ defmodule Nopea.Deploy.Result do } end - @spec failure(String.t(), Nopea.Deploy.Spec.t(), atom(), term(), non_neg_integer()) :: t() + @spec failure(String.t(), Nopea.Deploy.Spec.t(), strategy(), term(), non_neg_integer()) :: t() def failure(deploy_id, spec, strategy, error, duration_ms) do %__MODULE__{ deploy_id: deploy_id, @@ -65,4 +75,20 @@ defmodule Nopea.Deploy.Result do timestamp: DateTime.utc_now() } end + + @spec rolledback(String.t(), Nopea.Deploy.Spec.t(), strategy(), term(), non_neg_integer()) :: + t() + def rolledback(deploy_id, spec, strategy, error, duration_ms) do + %__MODULE__{ + deploy_id: deploy_id, + service: spec.service, + namespace: spec.namespace, + status: :rolledback, + strategy: strategy, + manifest_count: length(spec.manifests), + duration_ms: duration_ms, + error: error, + timestamp: DateTime.utc_now() + } + end end diff --git a/lib/nopea/deploy/spec.ex b/lib/nopea/deploy/spec.ex index 12a91be..039564b 100644 --- a/lib/nopea/deploy/spec.ex +++ b/lib/nopea/deploy/spec.ex @@ -9,7 +9,7 @@ defmodule Nopea.Deploy.Spec do service: String.t(), namespace: String.t(), manifests: [map()], - strategy: atom() | nil, + strategy: Nopea.Deploy.Result.strategy() | nil, manifest_path: String.t() | nil, timeout_ms: pos_integer() } diff --git a/lib/nopea/events.ex b/lib/nopea/events.ex index cb15b2a..af5c80e 100644 --- a/lib/nopea/events.ex +++ b/lib/nopea/events.ex @@ -33,6 +33,7 @@ defmodule Nopea.Events do subject: subject() } + @enforce_keys [:id, :type, :source, :specversion, :timestamp, :subject] defstruct [:id, :type, :source, :specversion, :timestamp, :subject] @event_type_map %{ diff --git a/lib/nopea/events/emitter.ex b/lib/nopea/events/emitter.ex index 3168392..954f96a 100644 --- a/lib/nopea/events/emitter.ex +++ b/lib/nopea/events/emitter.ex @@ -112,7 +112,7 @@ defmodule Nopea.Events.Emitter do @impl true def handle_cast({:emit, _event}, %{enabled: false} = state) do - # Silently ignore when disabled + Logger.debug("CDEvent ignored (emitter disabled)") {:noreply, state} end diff --git a/lib/nopea/graph/node.ex b/lib/nopea/graph/node.ex index e6ccf90..5d88812 100644 --- a/lib/nopea/graph/node.ex +++ b/lib/nopea/graph/node.ex @@ -14,7 +14,7 @@ defmodule Nopea.Graph.Node do @type t :: %__MODULE__{ id: String.t(), name: String.t(), - kind: atom(), + kind: NodeKind.t(), relevance: float(), observations: non_neg_integer(), first_seen: String.t(), diff --git a/lib/nopea/graph/node_kind.ex b/lib/nopea/graph/node_kind.ex index 06e6c1f..3cea791 100644 --- a/lib/nopea/graph/node_kind.ex +++ b/lib/nopea/graph/node_kind.ex @@ -5,6 +5,8 @@ defmodule Nopea.Graph.NodeKind do Value object — immutable, equality by value. """ + @type t :: :error | :concept + @kinds [:error, :concept] @spec valid?(term()) :: boolean() diff --git a/lib/nopea/occurrence.ex b/lib/nopea/occurrence.ex index 27b3a93..af7c9aa 100644 --- a/lib/nopea/occurrence.ex +++ b/lib/nopea/occurrence.ex @@ -28,19 +28,22 @@ defmodule Nopea.Occurrence do def build(result, memory_context \\ nil) do {type_suffix, severity, outcome} = classify(result.status) - {:ok, occ} = - Occurrence.new("nopea", "deploy.run.#{type_suffix}", - severity: severity, - outcome: outcome - ) - - occ - |> maybe_set_namespace(result) - |> maybe_add_entities(result) - |> Occurrence.with_data(build_deploy_data(result)) - |> Occurrence.with_history(build_history(result)) - |> maybe_add_error(result) - |> maybe_add_reasoning(result, memory_context) + case Occurrence.new("nopea", "deploy.run.#{type_suffix}", + severity: severity, + outcome: outcome + ) do + {:ok, occ} -> + occ + |> maybe_set_namespace(result) + |> maybe_add_entities(result) + |> Occurrence.with_data(build_deploy_data(result)) + |> Occurrence.with_history(build_history(result)) + |> maybe_add_error(result) + |> maybe_add_reasoning(result, memory_context) + + {:error, reason} -> + raise "FalseProtocol.Occurrence.new failed: #{inspect(reason)}" + end end @doc """ @@ -48,7 +51,7 @@ defmodule Nopea.Occurrence do Mode is `:both` — deploy logs are human-readable AND AI-structured. """ - @spec start_log_emitter(Occurrence.t()) :: {:ok, pid()} + @spec start_log_emitter(Occurrence.t()) :: {:ok, pid()} | {:error, term()} def start_log_emitter(%Occurrence{} = occ) do FalseProtocol.LogEmitter.start_link(occ.id, "nopea", :both) end diff --git a/test/nopea/deploy/result_test.exs b/test/nopea/deploy/result_test.exs index a6e7a19..19de4ca 100644 --- a/test/nopea/deploy/result_test.exs +++ b/test/nopea/deploy/result_test.exs @@ -44,4 +44,28 @@ defmodule Nopea.Deploy.ResultTest do assert result.verified == false end end + + describe "rolledback/5" do + test "builds a rolledback result" do + spec = sample_spec() + result = Result.rolledback("01GHI", spec, :blue_green, {:apply_failed, "oom"}, 8000) + + assert result.deploy_id == "01GHI" + assert result.status == :rolledback + assert result.strategy == :blue_green + assert result.error == {:apply_failed, "oom"} + assert result.duration_ms == 8000 + assert result.verified == false + assert result.manifest_count == 2 + assert %DateTime{} = result.timestamp + end + end + + describe "enforce_keys" do + test "requires mandatory fields" do + assert_raise ArgumentError, ~r/keys must also be given/, fn -> + struct!(Result, %{}) + end + end + end end diff --git a/test/nopea/deploy_integration_test.exs b/test/nopea/deploy_integration_test.exs index 1423e86..b573ff9 100644 --- a/test/nopea/deploy_integration_test.exs +++ b/test/nopea/deploy_integration_test.exs @@ -113,16 +113,16 @@ defmodule Nopea.DeployIntegrationTest do # Check occurrence file was generated occurrence_path = Path.join(File.cwd!(), ".nopea/occurrence.json") - if File.exists?(occurrence_path) do - {:ok, content} = File.read(occurrence_path) - {:ok, occurrence} = Jason.decode(content) - - assert is_binary(occurrence["id"]) - assert String.starts_with?(occurrence["type"], "deploy.run.") - assert occurrence["source"] == "nopea" - assert is_map(occurrence["data"]) - assert occurrence["data"]["service"] == "occ-test-svc" - end + assert File.exists?(occurrence_path), "Expected occurrence.json to be generated" + + {:ok, content} = File.read(occurrence_path) + {:ok, occurrence} = Jason.decode(content) + + assert is_binary(occurrence["id"]) + assert String.starts_with?(occurrence["type"], "deploy.run.") + assert occurrence["source"] == "nopea" + assert is_map(occurrence["data"]) + assert occurrence["data"]["service"] == "occ-test-svc" end end end diff --git a/test/nopea/occurrence_log_test.exs b/test/nopea/occurrence_log_test.exs index 16b3ea7..06c35b8 100644 --- a/test/nopea/occurrence_log_test.exs +++ b/test/nopea/occurrence_log_test.exs @@ -16,18 +16,23 @@ defmodule Nopea.OccurrenceLogTest do applied_resources: [] } + defp start_emitter(occ) do + {:ok, emitter} = NopeaOccurrence.start_log_emitter(occ) + on_exit(fn -> if Process.alive?(emitter), do: GenServer.stop(emitter) end) + emitter + end + describe "start_log_emitter/1" do test "starts a log emitter for the occurrence" do occ = NopeaOccurrence.build(@result) - assert {:ok, emitter} = NopeaOccurrence.start_log_emitter(occ) + emitter = start_emitter(occ) assert is_pid(emitter) end test "emitter uses :both mode" do occ = NopeaOccurrence.build(@result) - {:ok, emitter} = NopeaOccurrence.start_log_emitter(occ) + emitter = start_emitter(occ) - # :both mode requires both message and semantic semantic = %FalseProtocol.Semantic{ event: "deploy.test.event", what_happened: "test event" @@ -41,7 +46,7 @@ defmodule Nopea.OccurrenceLogTest do test "emitter sequences entries correctly" do occ = NopeaOccurrence.build(@result) - {:ok, emitter} = NopeaOccurrence.start_log_emitter(occ) + emitter = start_emitter(occ) semantic = %FalseProtocol.Semantic{ event: "deploy.test.first", @@ -63,7 +68,7 @@ defmodule Nopea.OccurrenceLogTest do test "entries reference the parent occurrence" do occ = NopeaOccurrence.build(@result) - {:ok, emitter} = NopeaOccurrence.start_log_emitter(occ) + emitter = start_emitter(occ) semantic = %FalseProtocol.Semantic{ event: "deploy.test.ref", @@ -78,7 +83,7 @@ defmodule Nopea.OccurrenceLogTest do describe "attach_log_ref/2" do test "attaches log_ref with entry count" do occ = NopeaOccurrence.build(@result) - {:ok, emitter} = NopeaOccurrence.start_log_emitter(occ) + emitter = start_emitter(occ) semantic = %FalseProtocol.Semantic{ event: "deploy.test.count", diff --git a/test/nopea/occurrence_test.exs b/test/nopea/occurrence_test.exs index 26d9391..cdec8db 100644 --- a/test/nopea/occurrence_test.exs +++ b/test/nopea/occurrence_test.exs @@ -227,6 +227,54 @@ defmodule Nopea.OccurrenceTest do end end + describe "build_entity edge cases" do + test "handles resource with missing uid and namespace" do + result = %{ + @successful_result + | applied_resources: [ + %{ + "kind" => "ConfigMap", + "metadata" => %{ + "name" => "my-config", + "resourceVersion" => "1" + } + } + ] + } + + occ = Nopea.Occurrence.build(result) + + assert [entity] = occ.context.entities + assert entity.type == "ConfigMap" + assert entity.name == "my-config" + assert entity.id == "unknown" + assert entity.namespace == "" + end + + test "skips malformed resources without crashing" do + result = %{ + @successful_result + | applied_resources: [ + "not a map", + %{"no_kind" => true}, + %{ + "kind" => "Service", + "metadata" => %{ + "name" => "valid-svc", + "uid" => "svc-uid", + "resourceVersion" => "2" + } + } + ] + } + + occ = Nopea.Occurrence.build(result) + + assert [entity] = occ.context.entities + assert entity.name == "valid-svc" + end + end + describe "persist/2" do setup do tmp_dir = @@ -265,6 +313,11 @@ defmodule Nopea.OccurrenceTest do assert hd(files) |> String.ends_with?(".etf") end + test "returns error when workdir is not writable", _context do + occ = Nopea.Occurrence.build(@successful_result) + assert {:error, _reason} = Nopea.Occurrence.persist(occ, "/nonexistent/path/that/fails") + end + test "ETF round-trips to same struct", %{workdir: workdir} do occ = Nopea.Occurrence.build(@successful_result) :ok = Nopea.Occurrence.persist(occ, workdir) From 0d7114415863c612df57a44a2a2d8b2048471c30 Mon Sep 17 00:00:00 2001 From: Yair Etziony Date: Tue, 3 Mar 2026 02:19:48 +0100 Subject: [PATCH 2/2] fix: narrow Node.new/3 spec to NodeKind.t() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align the @spec with the struct type — new/3 accepts NodeKind.t(), not atom(). Addresses Copilot review comment on PR #33. Co-Authored-By: Claude Opus 4.6 --- lib/nopea/graph/node.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/nopea/graph/node.ex b/lib/nopea/graph/node.ex index 5d88812..7b33faa 100644 --- a/lib/nopea/graph/node.ex +++ b/lib/nopea/graph/node.ex @@ -23,7 +23,7 @@ defmodule Nopea.Graph.Node do @node_death_threshold 0.01 - @spec new(atom(), String.t(), String.t()) :: t() + @spec new(NodeKind.t(), String.t(), String.t()) :: t() def new(kind, name, ulid) when is_atom(kind) and is_binary(name) and is_binary(ulid) do true = NodeKind.valid?(kind)