From 94c286fd5610f8c4e2677c2a7e1b2e1d7d46a3da Mon Sep 17 00:00:00 2001 From: Ian Miller Date: Tue, 9 Jun 2026 11:55:14 +0100 Subject: [PATCH 1/9] feat(status): add observedGeneration to AgentRuntimeStatus Tracks the most recent metadata.generation processed by the controller, enabling clients to detect stale status via generation skew. Assisted-By: Claude (Anthropic AI) Signed-off-by: Ian Miller --- .../crds/agent.kagenti.dev_agentruntimes.yaml | 5 ++++ .../api/v1alpha1/agentruntime_types.go | 4 +++ .../agent.kagenti.dev_agentruntimes.yaml | 5 ++++ .../controller/agentruntime_controller.go | 2 ++ .../agentruntime_controller_test.go | 25 +++++++++++++++++++ 5 files changed, 41 insertions(+) diff --git a/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml b/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml index 49176751..2fcae00f 100644 --- a/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml +++ b/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml @@ -486,6 +486,11 @@ spec: items: type: string type: array + observedGeneration: + description: ObservedGeneration is the most recent generation observed + by the controller. + format: int64 + type: integer phase: description: Phase is the high-level state of the AgentRuntime enum: diff --git a/kagenti-operator/api/v1alpha1/agentruntime_types.go b/kagenti-operator/api/v1alpha1/agentruntime_types.go index b192be5a..32439c3f 100644 --- a/kagenti-operator/api/v1alpha1/agentruntime_types.go +++ b/kagenti-operator/api/v1alpha1/agentruntime_types.go @@ -188,6 +188,10 @@ type CardStatus struct { // AgentRuntimeStatus defines the observed state of AgentRuntime. type AgentRuntimeStatus struct { + // ObservedGeneration is the most recent generation observed by the controller. + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + // Phase is the high-level state of the AgentRuntime // +optional Phase RuntimePhase `json:"phase,omitempty"` diff --git a/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml b/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml index 49176751..2fcae00f 100644 --- a/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml +++ b/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml @@ -486,6 +486,11 @@ spec: items: type: string type: array + observedGeneration: + description: ObservedGeneration is the most recent generation observed + by the controller. + format: int64 + type: integer phase: description: Phase is the high-level state of the AgentRuntime enum: diff --git a/kagenti-operator/internal/controller/agentruntime_controller.go b/kagenti-operator/internal/controller/agentruntime_controller.go index eefc9ceb..9819bbbc 100644 --- a/kagenti-operator/internal/controller/agentruntime_controller.go +++ b/kagenti-operator/internal/controller/agentruntime_controller.go @@ -137,6 +137,8 @@ func (r *AgentRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request return ctrl.Result{}, client.IgnoreNotFound(err) } + rt.Status.ObservedGeneration = rt.Generation + // 2. Handle deletion if !rt.DeletionTimestamp.IsZero() { return r.handleDeletion(ctx, rt) diff --git a/kagenti-operator/internal/controller/agentruntime_controller_test.go b/kagenti-operator/internal/controller/agentruntime_controller_test.go index 5d5fa179..d086a4e8 100644 --- a/kagenti-operator/internal/controller/agentruntime_controller_test.go +++ b/kagenti-operator/internal/controller/agentruntime_controller_test.go @@ -252,6 +252,31 @@ var _ = Describe("AgentRuntime Controller", func() { }) }) + Context("When setting observedGeneration", func() { + It("should set status.observedGeneration to metadata.generation after reconcile", func() { + dep := newDeployment("obsgen-deploy", namespace) + Expect(k8sClient.Create(ctx, dep)).To(Succeed()) + defer func() { _ = k8sClient.Delete(ctx, dep) }() + + rt := newAgentRuntime("obsgen-rt", namespace, "obsgen-deploy", agentv1alpha1.RuntimeTypeAgent) + Expect(k8sClient.Create(ctx, rt)).To(Succeed()) + defer func() { _ = k8sClient.Delete(ctx, rt) }() + + r := newReconciler() + nn := types.NamespacedName{Name: "obsgen-rt", Namespace: namespace} + + // First reconcile: adds finalizer + _, _ = r.Reconcile(ctx, reconcile.Request{NamespacedName: nn}) + // Second reconcile: full reconcile + _, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: nn}) + Expect(err).NotTo(HaveOccurred()) + + updated := &agentv1alpha1.AgentRuntime{} + Expect(k8sClient.Get(ctx, nn, updated)).To(Succeed()) + Expect(updated.Status.ObservedGeneration).To(Equal(updated.Generation)) + }) + }) + Context("When setting status", func() { It("should set status to Active with Ready condition", func() { dep := newDeployment("status-deploy", namespace) From 99405fa99579d8aed828dfbd08ba5092487be34d Mon Sep 17 00:00:00 2001 From: Ian Miller Date: Tue, 9 Jun 2026 11:57:21 +0100 Subject: [PATCH 2/9] refactor(status): remove Phase from AgentRuntimeStatus Phase (Pending/Active/Error) was redundant with conditions. Clients should use Ready condition and observedGeneration instead. Assisted-By: Claude (Anthropic AI) Signed-off-by: Ian Miller --- .../crds/agent.kagenti.dev_agentruntimes.yaml | 11 ----------- .../api/v1alpha1/agentruntime_types.go | 14 -------------- .../crd/bases/agent.kagenti.dev_agentruntimes.yaml | 11 ----------- .../controller/agentruntime_config_test.go | 3 --- .../internal/controller/agentruntime_controller.go | 5 ----- .../controller/agentruntime_controller_test.go | 5 +---- 6 files changed, 1 insertion(+), 48 deletions(-) diff --git a/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml b/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml index 2fcae00f..9a556af3 100644 --- a/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml +++ b/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml @@ -26,10 +26,6 @@ spec: jsonPath: .spec.targetRef.name name: Target type: string - - description: Runtime Phase - jsonPath: .status.phase - name: Phase - type: string - description: Card Fetch Status jsonPath: .status.conditions[?(@.type=='CardFetched')].status name: CardFetched @@ -491,13 +487,6 @@ spec: by the controller. format: int64 type: integer - phase: - description: Phase is the high-level state of the AgentRuntime - enum: - - Pending - - Active - - Error - type: string type: object required: - spec diff --git a/kagenti-operator/api/v1alpha1/agentruntime_types.go b/kagenti-operator/api/v1alpha1/agentruntime_types.go index 32439c3f..88ca64c3 100644 --- a/kagenti-operator/api/v1alpha1/agentruntime_types.go +++ b/kagenti-operator/api/v1alpha1/agentruntime_types.go @@ -28,15 +28,6 @@ const ( RuntimeTypeTool RuntimeType = "tool" ) -// +kubebuilder:validation:Enum=Pending;Active;Error -type RuntimePhase string - -const ( - RuntimePhasePending RuntimePhase = "Pending" - RuntimePhaseActive RuntimePhase = "Active" - RuntimePhaseError RuntimePhase = "Error" -) - // +kubebuilder:validation:Enum=mtls;http type TransportSecurity string @@ -192,10 +183,6 @@ type AgentRuntimeStatus struct { // +optional ObservedGeneration int64 `json:"observedGeneration,omitempty"` - // Phase is the high-level state of the AgentRuntime - // +optional - Phase RuntimePhase `json:"phase,omitempty"` - // ConfiguredPods is the count of pods with expected labels/config // +optional ConfiguredPods int32 `json:"configuredPods,omitempty"` @@ -221,7 +208,6 @@ type AgentRuntimeStatus struct { // +kubebuilder:resource:shortName=art;agentrt // +kubebuilder:printcolumn:name="Type",type="string",JSONPath=".spec.type",description="Workload Type" // +kubebuilder:printcolumn:name="Target",type="string",JSONPath=".spec.targetRef.name",description="Target Workload" -// +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase",description="Runtime Phase" // +kubebuilder:printcolumn:name="CardFetched",type="string",JSONPath=".status.conditions[?(@.type=='CardFetched')].status",description="Card Fetch Status",priority=1 // +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" diff --git a/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml b/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml index 2fcae00f..9a556af3 100644 --- a/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml +++ b/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml @@ -26,10 +26,6 @@ spec: jsonPath: .spec.targetRef.name name: Target type: string - - description: Runtime Phase - jsonPath: .status.phase - name: Phase - type: string - description: Card Fetch Status jsonPath: .status.conditions[?(@.type=='CardFetched')].status name: CardFetched @@ -491,13 +487,6 @@ spec: by the controller. format: int64 type: integer - phase: - description: Phase is the high-level state of the AgentRuntime - enum: - - Pending - - Active - - Error - type: string type: object required: - spec diff --git a/kagenti-operator/internal/controller/agentruntime_config_test.go b/kagenti-operator/internal/controller/agentruntime_config_test.go index 0c422c89..6c53f73c 100644 --- a/kagenti-operator/internal/controller/agentruntime_config_test.go +++ b/kagenti-operator/internal/controller/agentruntime_config_test.go @@ -334,9 +334,6 @@ var _ = Describe("AgentRuntime Config", func() { updated := &agentv1alpha1.AgentRuntime{} Expect(k8sClient.Get(ctx, nn, updated)).To(Succeed()) - // Should still be Active (warning doesn't block reconciliation) - Expect(updated.Status.Phase).To(Equal(agentv1alpha1.RuntimePhaseActive)) - // Should have ConfigResolved condition with warning var configCond *metav1.Condition for i := range updated.Status.Conditions { diff --git a/kagenti-operator/internal/controller/agentruntime_controller.go b/kagenti-operator/internal/controller/agentruntime_controller.go index 9819bbbc..bf500be5 100644 --- a/kagenti-operator/internal/controller/agentruntime_controller.go +++ b/kagenti-operator/internal/controller/agentruntime_controller.go @@ -270,7 +270,6 @@ func (r *AgentRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request // 8. Update status (retry on conflict to preserve all conditions computed above) rt.Status.ConfiguredPods = configuredPods - r.setPhase(rt, agentv1alpha1.RuntimePhaseActive) r.setCondition(rt, ConditionTypeReady, metav1.ConditionTrue, "Configured", fmt.Sprintf("Workload %s configured with config-hash %s", rt.Spec.TargetRef.Name, configResult.Hash[:12])) if fg.SkillDiscovery { @@ -772,10 +771,6 @@ func (r *AgentRuntimeReconciler) handleDeletion(ctx context.Context, rt *agentv1 return ctrl.Result{}, nil } -func (r *AgentRuntimeReconciler) setPhase(rt *agentv1alpha1.AgentRuntime, phase agentv1alpha1.RuntimePhase) { - rt.Status.Phase = phase -} - func (r *AgentRuntimeReconciler) setCondition(rt *agentv1alpha1.AgentRuntime, condType string, status metav1.ConditionStatus, reason, message string) { meta.SetStatusCondition(&rt.Status.Conditions, metav1.Condition{ Type: condType, diff --git a/kagenti-operator/internal/controller/agentruntime_controller_test.go b/kagenti-operator/internal/controller/agentruntime_controller_test.go index d086a4e8..21e4cdf6 100644 --- a/kagenti-operator/internal/controller/agentruntime_controller_test.go +++ b/kagenti-operator/internal/controller/agentruntime_controller_test.go @@ -297,7 +297,6 @@ var _ = Describe("AgentRuntime Controller", func() { updated := &agentv1alpha1.AgentRuntime{} Expect(k8sClient.Get(ctx, nn, updated)).To(Succeed()) - Expect(updated.Status.Phase).To(Equal(agentv1alpha1.RuntimePhaseActive)) Expect(updated.Status.Conditions).NotTo(BeEmpty()) var readyCond *metav1.Condition @@ -360,7 +359,7 @@ var _ = Describe("AgentRuntime Controller", func() { _ = k8sClient.Delete(ctx, rt) }) - It("should set Error phase and TargetNotFound condition", func() { + It("should set TargetNotFound condition", func() { r := newReconciler() // First reconcile: adds finalizer @@ -377,8 +376,6 @@ var _ = Describe("AgentRuntime Controller", func() { updated := &agentv1alpha1.AgentRuntime{} Expect(k8sClient.Get(ctx, types.NamespacedName{Name: "rt-no-target", Namespace: namespace}, updated)).To(Succeed()) - Expect(updated.Status.Phase).To(Equal(agentv1alpha1.RuntimePhaseError)) - var targetCond *metav1.Condition for i := range updated.Status.Conditions { if updated.Status.Conditions[i].Type == ConditionTypeTargetResolved { From ca4f0ef7fcea13149f59b61f685fa170158705f7 Mon Sep 17 00:00:00 2001 From: Ian Miller Date: Tue, 9 Jun 2026 12:08:44 +0100 Subject: [PATCH 3/9] refactor(status): remove CardFetched and SkillsDiscovered conditions These fine-grained conditions added noise without actionable signal. Card fetch and skill discovery status is still observable via status.card and status.linkedSkills fields respectively. Assisted-By: Claude (Anthropic AI) Signed-off-by: Ian Miller --- .../controller/agentruntime_controller.go | 22 +- .../agentruntime_controller_test.go | 236 +----------------- kagenti-operator/test/e2e/e2e_test.go | 55 ---- 3 files changed, 4 insertions(+), 309 deletions(-) diff --git a/kagenti-operator/internal/controller/agentruntime_controller.go b/kagenti-operator/internal/controller/agentruntime_controller.go index bf500be5..6ea022c0 100644 --- a/kagenti-operator/internal/controller/agentruntime_controller.go +++ b/kagenti-operator/internal/controller/agentruntime_controller.go @@ -69,11 +69,9 @@ const ( AnnotationRestartPending = "kagenti.io/restart-pending" // Condition types for AgentRuntime status. - ConditionTypeReady = "Ready" - ConditionTypeTargetResolved = "TargetResolved" - ConditionTypeConfigResolved = "ConfigResolved" - ConditionTypeCardFetched = "CardFetched" - ConditionTypeSkillsDiscovered = "SkillsDiscovered" + ConditionTypeReady = "Ready" + ConditionTypeTargetResolved = "TargetResolved" + ConditionTypeConfigResolved = "ConfigResolved" // AnnotationLastCardFetchHash stores the change-detection key used to skip // redundant card fetches when the workload's pod template has not changed. @@ -807,8 +805,6 @@ func (r *AgentRuntimeReconciler) fetchAndUpdateCard(ctx context.Context, rt *age if !r.EnableCardDiscovery { if rt.Status.Card != nil { rt.Status.Card = nil - r.setCondition(rt, ConditionTypeCardFetched, metav1.ConditionFalse, "DiscoveryDisabled", - "Card discovery is disabled; stale card data cleared") } return } @@ -820,21 +816,17 @@ func (r *AgentRuntimeReconciler) fetchAndUpdateCard(ctx context.Context, rt *age lastHash = annotations[AnnotationLastCardFetchHash] } if changeKey != "" && changeKey == lastHash && rt.Status.Card != nil { - r.setCondition(rt, ConditionTypeCardFetched, metav1.ConditionTrue, "FetchSkipped", - "Pod template unchanged; existing card data still valid") return } if ready, msg := r.checkWorkloadReady(ctx, rt.Namespace, rt.Spec.TargetRef); !ready { logger.V(1).Info("Workload not ready for card discovery", "reason", msg) - r.setCondition(rt, ConditionTypeCardFetched, metav1.ConditionFalse, "WorkloadNotReady", msg) return } svc, port, err := r.resolveServiceForWorkload(ctx, rt.Namespace, rt.Spec.TargetRef) if err != nil { logger.V(1).Info("Service resolution failed for card discovery", "error", err) - r.setCondition(rt, ConditionTypeCardFetched, metav1.ConditionFalse, "ServiceNotFound", err.Error()) return } @@ -842,7 +834,6 @@ func (r *AgentRuntimeReconciler) fetchAndUpdateCard(ctx context.Context, rt *age cardData, fetchResult, transportSecurity, err := r.fetchCard(ctx, rt, svc, port, protocol) if err != nil { logger.Error(err, "Card fetch failed", "workload", rt.Spec.TargetRef.Name) - r.setCondition(rt, ConditionTypeCardFetched, metav1.ConditionFalse, "FetchFailed", err.Error()) return } @@ -880,13 +871,6 @@ func (r *AgentRuntimeReconciler) fetchAndUpdateCard(ctx context.Context, rt *age rt.Status.Card = cardStatus - conditionReason := "Fetched" - if transportSecurity == agentv1alpha1.TransportSecurityHTTP { - conditionReason = "FetchedInsecure" - } - r.setCondition(rt, ConditionTypeCardFetched, metav1.ConditionTrue, conditionReason, - fmt.Sprintf("Successfully fetched agent card for %s", cardData.Name)) - r.persistCardFetchAnnotation(ctx, rt, changeKey) } diff --git a/kagenti-operator/internal/controller/agentruntime_controller_test.go b/kagenti-operator/internal/controller/agentruntime_controller_test.go index 21e4cdf6..2700202a 100644 --- a/kagenti-operator/internal/controller/agentruntime_controller_test.go +++ b/kagenti-operator/internal/controller/agentruntime_controller_test.go @@ -18,7 +18,6 @@ package controller import ( "context" - "fmt" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -868,15 +867,6 @@ var _ = Describe("AgentRuntime Controller", func() { } r.fetchAndUpdateCard(ctx, rt) Expect(rt.Status.Card).To(BeNil()) - - var cardCond *metav1.Condition - for i := range rt.Status.Conditions { - if rt.Status.Conditions[i].Type == ConditionTypeCardFetched { - cardCond = &rt.Status.Conditions[i] - break - } - } - Expect(cardCond).To(BeNil(), "No CardFetched condition should be set when card was already nil") }) It("should clear existing card data when feature flag is disabled", func() { @@ -897,17 +887,6 @@ var _ = Describe("AgentRuntime Controller", func() { } r.fetchAndUpdateCard(ctx, rt) Expect(rt.Status.Card).To(BeNil()) - - var cardCond *metav1.Condition - for i := range rt.Status.Conditions { - if rt.Status.Conditions[i].Type == ConditionTypeCardFetched { - cardCond = &rt.Status.Conditions[i] - break - } - } - Expect(cardCond).NotTo(BeNil()) - Expect(cardCond.Status).To(Equal(metav1.ConditionFalse)) - Expect(cardCond.Reason).To(Equal("DiscoveryDisabled")) }) It("should set ServiceNotFound condition when no service exists", func() { @@ -930,17 +909,6 @@ var _ = Describe("AgentRuntime Controller", func() { } r.fetchAndUpdateCard(ctx, rt) Expect(rt.Status.Card).To(BeNil()) - - var cardCond *metav1.Condition - for i := range rt.Status.Conditions { - if rt.Status.Conditions[i].Type == ConditionTypeCardFetched { - cardCond = &rt.Status.Conditions[i] - break - } - } - Expect(cardCond).NotTo(BeNil()) - Expect(cardCond.Status).To(Equal(metav1.ConditionFalse)) - Expect(cardCond.Reason).To(Equal("ServiceNotFound")) }) }) @@ -971,16 +939,6 @@ var _ = Describe("AgentRuntime Controller", func() { Expect(rt.Status.Card).NotTo(BeNil(), "existing card data should be retained on fetch failure") Expect(rt.Status.Card.Name).To(Equal("previous-agent")) Expect(rt.Status.Card.CardHash).To(Equal("abc123")) - - var cardCond *metav1.Condition - for i := range rt.Status.Conditions { - if rt.Status.Conditions[i].Type == ConditionTypeCardFetched { - cardCond = &rt.Status.Conditions[i] - break - } - } - Expect(cardCond).NotTo(BeNil()) - Expect(cardCond.Status).To(Equal(metav1.ConditionFalse)) }) }) @@ -993,7 +951,6 @@ var _ = Describe("AgentRuntime Controller", func() { r := &AgentRuntimeReconciler{Client: k8sClient, EnableCardDiscovery: false} r.fetchAndUpdateCard(ctx, rt) Expect(rt.Status.Card).To(BeNil()) - // No CardFetched condition should be set when card was already nil }) It("should clear populated card data when flag is toggled off", func() { @@ -1014,7 +971,7 @@ var _ = Describe("AgentRuntime Controller", func() { }) Context("Card annotation patch must not wipe in-memory status", func() { - It("should persist CardFetched condition and card data after annotation patch", func() { + It("should persist card data and conditions after annotation patch", func() { depName := "card-patch-deploy" svcName := depName dep := newDeployment(depName, namespace) @@ -1066,12 +1023,6 @@ var _ = Describe("AgentRuntime Controller", func() { Expect(rt.Status.Card.Version).To(Equal("2.0")) Expect(rt.Status.Card.CardHash).NotTo(BeEmpty()) - // CardFetched condition must survive (stub fetcher uses plain HTTP path) - cardCond := meta.FindStatusCondition(rt.Status.Conditions, ConditionTypeCardFetched) - Expect(cardCond).NotTo(BeNil(), "CardFetched condition must not be wiped by annotation patch") - Expect(cardCond.Status).To(Equal(metav1.ConditionTrue)) - Expect(cardCond.Reason).To(Equal("FetchedInsecure")) - // Conditions set before fetchAndUpdateCard must also survive targetCond := meta.FindStatusCondition(rt.Status.Conditions, ConditionTypeTargetResolved) Expect(targetCond).NotTo(BeNil(), "TargetResolved condition must not be wiped by annotation patch") @@ -1113,11 +1064,6 @@ var _ = Describe("AgentRuntime Controller", func() { Expect(rt.Status.Card).NotTo(BeNil()) Expect(rt.Status.Card.TransportSecurity).To(Equal(agentv1alpha1.TransportSecurityHTTP)) - - cardCond := meta.FindStatusCondition(rt.Status.Conditions, ConditionTypeCardFetched) - Expect(cardCond).NotTo(BeNil()) - Expect(cardCond.Status).To(Equal(metav1.ConditionTrue)) - Expect(cardCond.Reason).To(Equal("FetchedInsecure")) }) It("should set transportSecurity mTLS and reason Fetched for authenticated fetcher", func() { @@ -1159,11 +1105,6 @@ var _ = Describe("AgentRuntime Controller", func() { Expect(rt.Status.Card).NotTo(BeNil()) Expect(rt.Status.Card.TransportSecurity).To(Equal(agentv1alpha1.TransportSecurityMTLS)) Expect(rt.Status.Card.AttestedAgentSpiffeID).To(Equal("spiffe://trust.domain/agent")) - - cardCond := meta.FindStatusCondition(rt.Status.Conditions, ConditionTypeCardFetched) - Expect(cardCond).NotTo(BeNil()) - Expect(cardCond.Status).To(Equal(metav1.ConditionTrue)) - Expect(cardCond.Reason).To(Equal("Fetched")) }) It("should update transport security when transport changes on re-fetch", func() { @@ -1220,181 +1161,6 @@ var _ = Describe("AgentRuntime Controller", func() { r2.fetchAndUpdateCard(ctx, rt) Expect(rt.Status.Card).NotTo(BeNil()) Expect(rt.Status.Card.TransportSecurity).To(Equal(agentv1alpha1.TransportSecurityHTTP)) - - cardCond := meta.FindStatusCondition(rt.Status.Conditions, ConditionTypeCardFetched) - Expect(cardCond).NotTo(BeNil()) - Expect(cardCond.Reason).To(Equal("FetchedInsecure")) - }) - }) - - Context("Unified condition model (US2)", func() { - It("should set WorkloadNotReady when Deployment has zero readyReplicas", func() { - depName := "unready-deploy" - dep := newDeployment(depName, namespace) - Expect(k8sClient.Create(ctx, dep)).To(Succeed()) - defer func() { _ = k8sClient.Delete(ctx, dep) }() - - // Deployment starts with 0 readyReplicas (default) - - svc := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{Name: depName, Namespace: namespace}, - Spec: corev1.ServiceSpec{ - Selector: map[string]string{"app": depName}, - Ports: []corev1.ServicePort{{Name: "http", Port: 8080, Protocol: corev1.ProtocolTCP}}, - }, - } - Expect(k8sClient.Create(ctx, svc)).To(Succeed()) - defer func() { _ = k8sClient.Delete(ctx, svc) }() - - rt := newAgentRuntime("unready-rt", namespace, depName, agentv1alpha1.RuntimeTypeAgent) - Expect(k8sClient.Create(ctx, rt)).To(Succeed()) - defer func() { _ = k8sClient.Delete(ctx, rt) }() - - r := &AgentRuntimeReconciler{ - Client: k8sClient, - EnableCardDiscovery: true, - AgentFetcher: &stubCardFetcher{ - card: &agentv1alpha1.AgentCardData{Name: "Agent", Version: "1.0"}, - }, - } - r.fetchAndUpdateCard(ctx, rt) - - Expect(rt.Status.Card).To(BeNil()) - cardCond := meta.FindStatusCondition(rt.Status.Conditions, ConditionTypeCardFetched) - Expect(cardCond).NotTo(BeNil()) - Expect(cardCond.Status).To(Equal(metav1.ConditionFalse)) - Expect(cardCond.Reason).To(Equal("WorkloadNotReady")) - }) - - It("should set ServiceNotFound when workload is ready but no Service exists", func() { - depName := "ready-no-svc-deploy" - dep := newDeployment(depName, namespace) - Expect(k8sClient.Create(ctx, dep)).To(Succeed()) - defer func() { _ = k8sClient.Delete(ctx, dep) }() - - setDeploymentReady(depName, namespace) - - rt := newAgentRuntime("ready-no-svc-rt", namespace, depName, agentv1alpha1.RuntimeTypeAgent) - Expect(k8sClient.Create(ctx, rt)).To(Succeed()) - defer func() { _ = k8sClient.Delete(ctx, rt) }() - - r := &AgentRuntimeReconciler{ - Client: k8sClient, - EnableCardDiscovery: true, - AgentFetcher: &stubCardFetcher{ - card: &agentv1alpha1.AgentCardData{Name: "Agent", Version: "1.0"}, - }, - } - r.fetchAndUpdateCard(ctx, rt) - - Expect(rt.Status.Card).To(BeNil()) - cardCond := meta.FindStatusCondition(rt.Status.Conditions, ConditionTypeCardFetched) - Expect(cardCond).NotTo(BeNil()) - Expect(cardCond.Status).To(Equal(metav1.ConditionFalse)) - Expect(cardCond.Reason).To(Equal("ServiceNotFound")) - }) - - It("should set DiscoveryDisabled when feature flag is off", func() { - now := metav1.Now() - rt := &agentv1alpha1.AgentRuntime{ - ObjectMeta: metav1.ObjectMeta{Name: "us2-disabled-rt", Namespace: namespace}, - Status: agentv1alpha1.AgentRuntimeStatus{ - Card: &agentv1alpha1.CardStatus{ - AgentCardData: agentv1alpha1.AgentCardData{Name: "old"}, - LastCardFetchTime: &now, - }, - }, - } - r := &AgentRuntimeReconciler{Client: k8sClient, EnableCardDiscovery: false} - r.fetchAndUpdateCard(ctx, rt) - - Expect(rt.Status.Card).To(BeNil()) - cardCond := meta.FindStatusCondition(rt.Status.Conditions, ConditionTypeCardFetched) - Expect(cardCond).NotTo(BeNil()) - Expect(cardCond.Status).To(Equal(metav1.ConditionFalse)) - Expect(cardCond.Reason).To(Equal("DiscoveryDisabled")) - }) - }) - - Context("FetchSkipped and FetchFailed conditions (US2)", func() { - It("should set FetchSkipped when pod template has not changed", func() { - depName := "skip-fetch-deploy" - dep := newDeployment(depName, namespace) - Expect(k8sClient.Create(ctx, dep)).To(Succeed()) - defer func() { _ = k8sClient.Delete(ctx, dep) }() - setDeploymentReady(depName, namespace) - - svc := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{Name: depName, Namespace: namespace}, - Spec: corev1.ServiceSpec{ - Selector: map[string]string{"app": depName}, - Ports: []corev1.ServicePort{{Name: "http", Port: 8080, Protocol: corev1.ProtocolTCP}}, - }, - } - Expect(k8sClient.Create(ctx, svc)).To(Succeed()) - defer func() { _ = k8sClient.Delete(ctx, svc) }() - - rt := newAgentRuntime("skip-fetch-rt", namespace, depName, agentv1alpha1.RuntimeTypeAgent) - Expect(k8sClient.Create(ctx, rt)).To(Succeed()) - defer func() { _ = k8sClient.Delete(ctx, rt) }() - - r := &AgentRuntimeReconciler{ - Client: k8sClient, - EnableCardDiscovery: true, - AgentFetcher: &stubCardFetcher{ - card: &agentv1alpha1.AgentCardData{Name: "Agent", Version: "1.0"}, - }, - } - - // First fetch succeeds and persists the change key annotation - r.fetchAndUpdateCard(ctx, rt) - Expect(rt.Status.Card).NotTo(BeNil()) - - // Second fetch with unchanged template should skip - r.fetchAndUpdateCard(ctx, rt) - - cardCond := meta.FindStatusCondition(rt.Status.Conditions, ConditionTypeCardFetched) - Expect(cardCond).NotTo(BeNil()) - Expect(cardCond.Status).To(Equal(metav1.ConditionTrue)) - Expect(cardCond.Reason).To(Equal("FetchSkipped")) - }) - - It("should set FetchFailed when fetcher returns an error", func() { - depName := "fetch-fail-deploy" - dep := newDeployment(depName, namespace) - Expect(k8sClient.Create(ctx, dep)).To(Succeed()) - defer func() { _ = k8sClient.Delete(ctx, dep) }() - setDeploymentReady(depName, namespace) - - svc := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{Name: depName, Namespace: namespace}, - Spec: corev1.ServiceSpec{ - Selector: map[string]string{"app": depName}, - Ports: []corev1.ServicePort{{Name: "http", Port: 8080, Protocol: corev1.ProtocolTCP}}, - }, - } - Expect(k8sClient.Create(ctx, svc)).To(Succeed()) - defer func() { _ = k8sClient.Delete(ctx, svc) }() - - rt := newAgentRuntime("fetch-fail-rt", namespace, depName, agentv1alpha1.RuntimeTypeAgent) - Expect(k8sClient.Create(ctx, rt)).To(Succeed()) - defer func() { _ = k8sClient.Delete(ctx, rt) }() - - r := &AgentRuntimeReconciler{ - Client: k8sClient, - EnableCardDiscovery: true, - AgentFetcher: &stubCardFetcher{ - err: fmt.Errorf("connection refused"), - }, - } - r.fetchAndUpdateCard(ctx, rt) - - Expect(rt.Status.Card).To(BeNil()) - cardCond := meta.FindStatusCondition(rt.Status.Conditions, ConditionTypeCardFetched) - Expect(cardCond).NotTo(BeNil()) - Expect(cardCond.Status).To(Equal(metav1.ConditionFalse)) - Expect(cardCond.Reason).To(Equal("FetchFailed")) - Expect(cardCond.Message).To(ContainSubstring("connection refused")) }) }) diff --git a/kagenti-operator/test/e2e/e2e_test.go b/kagenti-operator/test/e2e/e2e_test.go index c35c12ac..9a011818 100644 --- a/kagenti-operator/test/e2e/e2e_test.go +++ b/kagenti-operator/test/e2e/e2e_test.go @@ -2098,15 +2098,6 @@ rules: g.Expect(skills).To(BeEmpty()) }).Should(Succeed()) - By("verifying SkillsDiscovered condition is absent") - Consistently(func(g Gomega) { - status, err := utils.KubectlGetJsonpath("agentruntime", "skill-discovery-agent", - skillDiscoveryTestNamespace, - "{.status.conditions[?(@.type=='SkillsDiscovered')].status}") - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(status).To(BeEmpty(), "SkillsDiscovered condition should not be set when feature gate is disabled") - }, 10*time.Second, 2*time.Second).Should(Succeed()) - By("verifying operator did NOT mutate the Deployment (no skill volumes)") Eventually(func(g Gomega) { volumes, err := utils.KubectlGetJsonpath("deployment", "skill-discovery-agent", @@ -2197,27 +2188,6 @@ rules: g.Expect(raw).To(ContainSubstring("openshift-review")) }).Should(Succeed()) - By("verifying SkillsDiscovered condition is True with reason SkillsFound") - Eventually(func(g Gomega) { - status, err := utils.KubectlGetJsonpath("agentruntime", "skill-discovery-agent", - skillDiscoveryTestNamespace, - "{.status.conditions[?(@.type=='SkillsDiscovered')].status}") - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(status).To(Equal("True")) - - reason, err := utils.KubectlGetJsonpath("agentruntime", "skill-discovery-agent", - skillDiscoveryTestNamespace, - "{.status.conditions[?(@.type=='SkillsDiscovered')].reason}") - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(reason).To(Equal("SkillsFound")) - - message, err := utils.KubectlGetJsonpath("agentruntime", "skill-discovery-agent", - skillDiscoveryTestNamespace, - "{.status.conditions[?(@.type=='SkillsDiscovered')].message}") - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(message).To(ContainSubstring("2 linked skill(s)")) - }).Should(Succeed()) - By("verifying Deployment was NOT mutated (no skill volumes added)") Eventually(func(g Gomega) { volumes, err := utils.KubectlGetJsonpath("deployment", "skill-discovery-agent", @@ -2247,14 +2217,6 @@ rules: g.Expect(raw).To(ContainSubstring("openshift-review")) }).Should(Succeed()) - By("verifying SkillsDiscovered message reflects new count") - Eventually(func(g Gomega) { - message, err := utils.KubectlGetJsonpath("agentruntime", "skill-discovery-agent", - skillDiscoveryTestNamespace, - "{.status.conditions[?(@.type=='SkillsDiscovered')].message}") - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(message).To(ContainSubstring("3 linked skill(s)")) - }).Should(Succeed()) }) It("should clear linkedSkills when annotation is removed", func() { @@ -2274,14 +2236,6 @@ rules: g.Expect(skills).To(BeEmpty()) }).Should(Succeed()) - By("verifying SkillsDiscovered condition is removed") - Eventually(func(g Gomega) { - status, err := utils.KubectlGetJsonpath("agentruntime", "skill-discovery-agent", - skillDiscoveryTestNamespace, - "{.status.conditions[?(@.type=='SkillsDiscovered')].status}") - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(status).To(BeEmpty()) - }).Should(Succeed()) }) It("should update linkedSkills and rollout pods when an OCI skill volume is removed", func() { @@ -2342,15 +2296,6 @@ rules: g.Expect(raw).NotTo(ContainSubstring("openshift-review")) }).Should(Succeed()) - By("verifying SkillsDiscovered message reflects 1 skill") - Eventually(func(g Gomega) { - message, err := utils.KubectlGetJsonpath("agentruntime", "oci-skill-agent", - skillDiscoveryTestNamespace, - "{.status.conditions[?(@.type=='SkillsDiscovered')].message}") - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(message).To(ContainSubstring("1 linked skill(s)")) - }).Should(Succeed()) - By("verifying Deployment only has the summarizer volume") Eventually(func(g Gomega) { volumes, err := utils.KubectlGetJsonpath("deployment", "oci-skill-agent", From 4f3908ace339194ae6395141dd1d254283b5cf72 Mon Sep 17 00:00:00 2001 From: Ian Miller Date: Tue, 9 Jun 2026 12:09:43 +0100 Subject: [PATCH 4/9] refactor(status): move TargetResolved after labeling; add Ready printer column TargetResolved=True now set after applyWorkloadConfig succeeds, not merely after resolveTargetRef. Replaces the CardFetched printer column with Ready for a cleaner `kubectl get agentruntime` output. Assisted-By: Claude (Anthropic AI) Signed-off-by: Ian Miller --- .../crds/agent.kagenti.dev_agentruntimes.yaml | 7 +++---- kagenti-operator/api/v1alpha1/agentruntime_types.go | 2 +- .../config/crd/bases/agent.kagenti.dev_agentruntimes.yaml | 7 +++---- .../internal/controller/agentruntime_controller.go | 6 +++--- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml b/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml index 9a556af3..05504e25 100644 --- a/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml +++ b/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml @@ -26,10 +26,9 @@ spec: jsonPath: .spec.targetRef.name name: Target type: string - - description: Card Fetch Status - jsonPath: .status.conditions[?(@.type=='CardFetched')].status - name: CardFetched - priority: 1 + - description: Ready Status + jsonPath: .status.conditions[?(@.type=='Ready')].status + name: Ready type: string - jsonPath: .metadata.creationTimestamp name: Age diff --git a/kagenti-operator/api/v1alpha1/agentruntime_types.go b/kagenti-operator/api/v1alpha1/agentruntime_types.go index 88ca64c3..4fde168a 100644 --- a/kagenti-operator/api/v1alpha1/agentruntime_types.go +++ b/kagenti-operator/api/v1alpha1/agentruntime_types.go @@ -208,7 +208,7 @@ type AgentRuntimeStatus struct { // +kubebuilder:resource:shortName=art;agentrt // +kubebuilder:printcolumn:name="Type",type="string",JSONPath=".spec.type",description="Workload Type" // +kubebuilder:printcolumn:name="Target",type="string",JSONPath=".spec.targetRef.name",description="Target Workload" -// +kubebuilder:printcolumn:name="CardFetched",type="string",JSONPath=".status.conditions[?(@.type=='CardFetched')].status",description="Card Fetch Status",priority=1 +// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status",description="Ready Status" // +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" // AgentRuntime attaches runtime configuration to a backing workload classified as an diff --git a/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml b/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml index 9a556af3..05504e25 100644 --- a/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml +++ b/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml @@ -26,10 +26,9 @@ spec: jsonPath: .spec.targetRef.name name: Target type: string - - description: Card Fetch Status - jsonPath: .status.conditions[?(@.type=='CardFetched')].status - name: CardFetched - priority: 1 + - description: Ready Status + jsonPath: .status.conditions[?(@.type=='Ready')].status + name: Ready type: string - jsonPath: .metadata.creationTimestamp name: Age diff --git a/kagenti-operator/internal/controller/agentruntime_controller.go b/kagenti-operator/internal/controller/agentruntime_controller.go index 6ea022c0..45e2a56b 100644 --- a/kagenti-operator/internal/controller/agentruntime_controller.go +++ b/kagenti-operator/internal/controller/agentruntime_controller.go @@ -162,9 +162,6 @@ func (r *AgentRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request return ctrl.Result{RequeueAfter: 30 * time.Second}, nil } - r.setCondition(rt, ConditionTypeTargetResolved, metav1.ConditionTrue, "TargetFound", - fmt.Sprintf("%s %s resolved", rt.Spec.TargetRef.Kind, rt.Spec.TargetRef.Name)) - // 4.1. Complete two-phase Sandbox restart if pending. if rt.Spec.TargetRef.Kind == KindSandbox { if result, done, err := r.completeSandboxRestart(ctx, rt); done { @@ -253,6 +250,9 @@ func (r *AgentRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request return ctrl.Result{RequeueAfter: 30 * time.Second}, nil } + r.setCondition(rt, ConditionTypeTargetResolved, metav1.ConditionTrue, "TargetFound", + fmt.Sprintf("%s %s resolved", rt.Spec.TargetRef.Kind, rt.Spec.TargetRef.Name)) + // 6.5. Discover linked skills from workload annotation (set by kagenti backend or user) fg := r.getFeatureGates() var linkedSkills []string From 83b0fb2300875fa44e439becd2495d33857da6cc Mon Sep 17 00:00:00 2001 From: Ian Miller Date: Tue, 9 Jun 2026 12:14:03 +0100 Subject: [PATCH 5/9] docs: update for status simplification (Phase removal, condition cleanup) - api-reference.md: add observedGeneration, remove phase, remove SkillsDiscovered condition, update kubectl examples - GETTING_STARTED.md: replace PHASE column with READY - test/e2e/README.md: update scenario descriptions for new status shape - test/e2e/e2e_test.go: replace Phase assertions with Ready/TargetResolved condition checks Assisted-By: Claude (Anthropic AI) Signed-off-by: Ian Miller --- kagenti-operator/GETTING_STARTED.md | 4 +- kagenti-operator/docs/api-reference.md | 12 ++-- kagenti-operator/test/e2e/README.md | 16 +++--- kagenti-operator/test/e2e/e2e_test.go | 76 ++++++++++++-------------- 4 files changed, 48 insertions(+), 60 deletions(-) diff --git a/kagenti-operator/GETTING_STARTED.md b/kagenti-operator/GETTING_STARTED.md index e67b2377..68d9361e 100644 --- a/kagenti-operator/GETTING_STARTED.md +++ b/kagenti-operator/GETTING_STARTED.md @@ -141,8 +141,8 @@ The controller will: kubectl get agentruntime -n team1 # Example output: -# NAME TYPE TARGET PHASE AGE -# weather-agent-runtime agent weather-agent Active 2m +# NAME TYPE TARGET READY AGE +# weather-agent-runtime agent weather-agent True 2m # View detailed conditions kubectl describe agentruntime weather-agent-runtime -n team1 diff --git a/kagenti-operator/docs/api-reference.md b/kagenti-operator/docs/api-reference.md index ed719b06..102a57ae 100644 --- a/kagenti-operator/docs/api-reference.md +++ b/kagenti-operator/docs/api-reference.md @@ -462,7 +462,7 @@ The AgentRuntime controller applies the following labels and annotations to the | Field | Type | Description | |-------|------|-------------| -| `phase` | string | High-level state of the AgentRuntime (`Pending`, `Active`, or `Error`) | +| `observedGeneration` | int64 | Most recent generation observed by the controller | | `configuredPods` | int32 | Count of pods with expected labels/configuration | | `conditions` | [][Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#condition-v1-meta) | Current state of the AgentRuntime | @@ -480,7 +480,6 @@ The AgentRuntime controller applies the following labels and annotations to the | `IstioMeshEnrolled` | True | `NamespaceLabeled` | Namespace labeled with `istio-discovery=enabled` and `istio.io/dataplane-mode=ambient` for Istio ambient mesh enrollment | | `IstioMeshEnrolled` | False | `OptedOut` | Namespace has `kagenti.io/istio-mesh=disabled` annotation; Istio mesh labels not applied | | `IstioMeshEnrolled` | False | `PatchFailed` | Failed to patch namespace labels (e.g., RBAC misconfiguration). Non-fatal; reconcile continues. | -| `SkillsDiscovered` | True | `SkillsFound` | Linked skills discovered from `kagenti.io/skills` annotation on the target workload | | `SkillsMounted` | True | `SkillsApplied` | OCI skill ImageVolumes applied to the target workload | | `SkillsMounted` | False | `FeatureGateDisabled` | Skills defined but `skillImageVolumes` feature gate is disabled | | `SkillsMounted` | False | `UnsupportedWorkloadKind` | Skills defined but the target workload kind (e.g., Sandbox) does not support skill ImageVolumes | @@ -586,16 +585,13 @@ kubectl get art kubectl get agentruntimes # Example output: -# NAME TYPE TARGET PHASE AGE -# weather-agent-runtime agent weather-agent Active 5m -# calculator-tool-runtime tool calculator-tool Active 3m +# NAME TYPE TARGET READY AGE +# weather-agent-runtime agent weather-agent True 5m +# calculator-tool-runtime tool calculator-tool True 3m # Get detailed information kubectl describe agentruntime weather-agent-runtime -# View runtime phase -kubectl get art weather-agent-runtime -o jsonpath='{.status.phase}' - # View configured pods count kubectl get art weather-agent-runtime -o jsonpath='{.status.configuredPods}' ``` diff --git a/kagenti-operator/test/e2e/README.md b/kagenti-operator/test/e2e/README.md index 8eba5015..a02472ef 100644 --- a/kagenti-operator/test/e2e/README.md +++ b/kagenti-operator/test/e2e/README.md @@ -81,10 +81,10 @@ kind delete cluster | Audit mode | With signature | Unsigned card syncs (Synced=True) but reports SignatureVerified=False with reason SignatureInvalidAudit | | Signed agent | With signature | SPIRE-signed card gets SignatureVerified=True, correct SPIFFE ID, Synced=True, and Bound=True | | Apply labels and config-hash | Agent lifecycle | AgentRuntime controller adds `kagenti.io/type=agent`, `managed-by`, config-hash, and triggers AgentCard auto-creation | -| Phase=Active and Ready=True | Agent lifecycle | AgentRuntime CR reaches Active phase with Ready=True condition | +| Ready=True | Agent lifecycle | AgentRuntime CR reaches Ready=True condition | | Idempotent re-reconcile | Agent lifecycle | Deployment generation stays stable over 30s (no spurious updates) | | Clean up on deletion | Agent lifecycle | Deletion preserves `kagenti.io/type`, removes `managed-by`, config-hash stays the same (no CR fields in hash) | -| Missing target error | Error cases | AgentRuntime targeting non-existent Deployment sets Phase=Error | +| Missing target error | Error cases | AgentRuntime targeting non-existent Deployment sets TargetResolved=False | | Tool type label | Tool type | AgentRuntime with type=tool applies `kagenti.io/type=tool` label and no AgentCard is created | | StatefulSet target | StatefulSet target | AgentRuntime applies labels, config-hash, and reaches Active for a StatefulSet workload | | Identity/trace overrides | Identity and trace overrides | AgentRuntime with identity+trace spec produces the same config-hash as a minimal CR (CR fields excluded from hash) | @@ -200,8 +200,8 @@ AgentRuntime controller flow: │ spec.type: agent │────▶│ Resolve target │◀───┘ │ runtime-ns-defaults ConfigMap │ │ spec.targetRef: │ │ Resolve config (2-layer)│◀────────│ (namespace defaults, layer 2) │ │ name: runtime-agent- │ │ Apply labels + hash │ │ │ -│ target │ │ Set Phase=Active │ │ runtime-agent-target Deployment │ -└───────────────────────────┘ └──────────┬───────────────┘ │ runtime-tool-target Deployment │ +│ target │ └──────────┬───────────────┘ │ runtime-agent-target Deployment │ +└───────────────────────────┘ │ │ runtime-tool-target Deployment │ │ │ runtime-sts-target StatefulSet │ │ │ runtime-minimal-target Deploy. │ │ │ runtime-overrides-target Deploy.│ @@ -313,11 +313,11 @@ verifies the cross-controller interaction: once `kagenti.io/type=agent` is appli the existing `protocol.kagenti.io/a2a` label, AgentCardSync auto-creates an AgentCard (`runtime-agent-target-deployment-card`) with the correct `managed-by` label and targetRef. -#### Phase=Active and Ready=True +#### Ready=True Uses the AgentRuntime CR from the previous test (ordered context). Once the controller has -resolved the target and applied configuration, it sets `status.phase=Active` and the `Ready` -condition to `True`. Test verifies both fields via jsonpath. +resolved the target and applied configuration, it sets the `Ready` condition to `True`. +Test verifies via jsonpath. #### Idempotent re-reconcile @@ -341,7 +341,7 @@ Deletes the AgentRuntime CR and verifies the finalizer (`kagenti.io/cleanup`) ru Creates an AgentRuntime CR targeting `nonexistent-deployment`. The controller's target resolution fails because no Deployment with that name exists. The controller sets -`status.phase=Error`. Test verifies the Error phase via jsonpath, then cleans up the CR. +`TargetResolved=False`. Test verifies via jsonpath, then cleans up the CR. #### Tool type label diff --git a/kagenti-operator/test/e2e/e2e_test.go b/kagenti-operator/test/e2e/e2e_test.go index 9a011818..190a8dcb 100644 --- a/kagenti-operator/test/e2e/e2e_test.go +++ b/kagenti-operator/test/e2e/e2e_test.go @@ -1201,15 +1201,7 @@ rules: }).Should(Succeed()) }) - It("should set Phase=Active and Ready=True", func() { - By("verifying phase is Active") - Eventually(func(g Gomega) { - phase, err := utils.KubectlGetJsonpath("agentruntime", "test-agent-runtime", - agentRuntimeTestNamespace, "{.status.phase}") - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(phase).To(Equal("Active")) - }).Should(Succeed()) - + It("should set Ready=True", func() { By("verifying Ready condition is True") Eventually(func(g Gomega) { readyStatus, err := utils.KubectlGetJsonpath("agentruntime", "test-agent-runtime", @@ -1293,17 +1285,17 @@ rules: }) Context("Error cases", func() { - It("should set Phase=Error for missing target", func() { + It("should set TargetResolved=False for missing target", func() { By("creating AgentRuntime targeting non-existent deployment") _, err := utils.KubectlApplyStdin(runtimeMissingTargetCRFixture(), agentRuntimeTestNamespace) Expect(err).NotTo(HaveOccurred()) - By("verifying phase is Error") + By("verifying TargetResolved condition is False") Eventually(func(g Gomega) { - phase, err := utils.KubectlGetJsonpath("agentruntime", "test-missing-target", - agentRuntimeTestNamespace, "{.status.phase}") + status, err := utils.KubectlGetJsonpath("agentruntime", "test-missing-target", + agentRuntimeTestNamespace, "{.status.conditions[?(@.type=='TargetResolved')].status}") g.Expect(err).NotTo(HaveOccurred()) - g.Expect(phase).To(Equal("Error")) + g.Expect(status).To(Equal("False")) }).Should(Succeed()) By("verifying TargetResolved condition mentions the target") @@ -1391,12 +1383,12 @@ rules: g.Expect(managedBy).To(Equal("kagenti-operator")) }).Should(Succeed()) - By("verifying Phase=Active") + By("verifying Ready=True") Eventually(func(g Gomega) { - phase, err := utils.KubectlGetJsonpath("agentruntime", "test-sts-runtime", - agentRuntimeTestNamespace, "{.status.phase}") + readyStatus, err := utils.KubectlGetJsonpath("agentruntime", "test-sts-runtime", + agentRuntimeTestNamespace, "{.status.conditions[?(@.type=='Ready')].status}") g.Expect(err).NotTo(HaveOccurred()) - g.Expect(phase).To(Equal("Active")) + g.Expect(readyStatus).To(Equal("True")) }).Should(Succeed()) By("verifying config-hash on pod template") @@ -1438,12 +1430,12 @@ rules: var minimalHash, overridesHash string - By("waiting for minimal CR to reach Active") + By("waiting for minimal CR to become Ready") Eventually(func(g Gomega) { - phase, err := utils.KubectlGetJsonpath("agentruntime", "test-minimal-runtime", - agentRuntimeTestNamespace, "{.status.phase}") + readyStatus, err := utils.KubectlGetJsonpath("agentruntime", "test-minimal-runtime", + agentRuntimeTestNamespace, "{.status.conditions[?(@.type=='Ready')].status}") g.Expect(err).NotTo(HaveOccurred()) - g.Expect(phase).To(Equal("Active")) + g.Expect(readyStatus).To(Equal("True")) }).Should(Succeed()) By("recording minimal config-hash") @@ -1456,12 +1448,12 @@ rules: minimalHash = hash }).Should(Succeed()) - By("waiting for overrides CR to reach Active") + By("waiting for overrides CR to become Ready") Eventually(func(g Gomega) { - phase, err := utils.KubectlGetJsonpath("agentruntime", "test-overrides-runtime", - agentRuntimeTestNamespace, "{.status.phase}") + readyStatus, err := utils.KubectlGetJsonpath("agentruntime", "test-overrides-runtime", + agentRuntimeTestNamespace, "{.status.conditions[?(@.type=='Ready')].status}") g.Expect(err).NotTo(HaveOccurred()) - g.Expect(phase).To(Equal("Active")) + g.Expect(readyStatus).To(Equal("True")) }).Should(Succeed()) By("recording overrides config-hash") @@ -1666,12 +1658,12 @@ rules: SetDefaultEventuallyPollingInterval(time.Second) It("should apply labels to workload when AgentRuntime is created", func() { - By("waiting for AgentRuntime phase=Active") + By("waiting for AgentRuntime Ready=True") Eventually(func(g Gomega) { - phase, err := utils.KubectlGetJsonpath("agentruntime", "combined-agent", - combinedTestNamespace, "{.status.phase}") + readyStatus, err := utils.KubectlGetJsonpath("agentruntime", "combined-agent", + combinedTestNamespace, "{.status.conditions[?(@.type=='Ready')].status}") g.Expect(err).NotTo(HaveOccurred()) - g.Expect(phase).To(Equal("Active")) + g.Expect(readyStatus).To(Equal("True")) }).Should(Succeed()) By("verifying Ready condition is True") @@ -2082,12 +2074,12 @@ rules: return err }, 1*time.Minute, 5*time.Second).Should(Succeed()) - By("waiting for AgentRuntime phase=Active") + By("waiting for AgentRuntime Ready=True") Eventually(func(g Gomega) { - phase, err := utils.KubectlGetJsonpath("agentruntime", "skill-discovery-agent", - skillDiscoveryTestNamespace, "{.status.phase}") + readyStatus, err := utils.KubectlGetJsonpath("agentruntime", "skill-discovery-agent", + skillDiscoveryTestNamespace, "{.status.conditions[?(@.type=='Ready')].status}") g.Expect(err).NotTo(HaveOccurred()) - g.Expect(phase).To(Equal("Active")) + g.Expect(readyStatus).To(Equal("True")) }).Should(Succeed()) By("verifying status.linkedSkills is empty") @@ -2171,12 +2163,12 @@ rules: return err }, 1*time.Minute, 5*time.Second).Should(Succeed()) - By("waiting for AgentRuntime phase=Active") + By("waiting for AgentRuntime Ready=True") Eventually(func(g Gomega) { - phase, err := utils.KubectlGetJsonpath("agentruntime", "skill-discovery-agent", - skillDiscoveryTestNamespace, "{.status.phase}") + readyStatus, err := utils.KubectlGetJsonpath("agentruntime", "skill-discovery-agent", + skillDiscoveryTestNamespace, "{.status.conditions[?(@.type=='Ready')].status}") g.Expect(err).NotTo(HaveOccurred()) - g.Expect(phase).To(Equal("Active")) + g.Expect(readyStatus).To(Equal("True")) }).Should(Succeed()) By("verifying status.linkedSkills contains discovered skills") @@ -2250,12 +2242,12 @@ rules: return err }, 1*time.Minute, 5*time.Second).Should(Succeed()) - By("waiting for AgentRuntime phase=Active") + By("waiting for AgentRuntime Ready=True") Eventually(func(g Gomega) { - phase, err := utils.KubectlGetJsonpath("agentruntime", "oci-skill-agent", - skillDiscoveryTestNamespace, "{.status.phase}") + readyStatus, err := utils.KubectlGetJsonpath("agentruntime", "oci-skill-agent", + skillDiscoveryTestNamespace, "{.status.conditions[?(@.type=='Ready')].status}") g.Expect(err).NotTo(HaveOccurred()) - g.Expect(phase).To(Equal("Active")) + g.Expect(readyStatus).To(Equal("True")) }).Should(Succeed()) By("verifying both skills are discovered") From 0ffc335bbddcde13638fc80c54a964bc2f52a465 Mon Sep 17 00:00:00 2001 From: Ian Miller Date: Tue, 9 Jun 2026 14:40:04 +0100 Subject: [PATCH 6/9] fix(e2e): combine Ready and linkedSkills checks to prevent race The skill discovery e2e test could observe Ready=True from a reconcile that ran before the controller restarted with skillDiscovery enabled, causing linkedSkills to appear empty. Combining both assertions in a single Eventually block ensures the test waits for a reconcile that reflects the enabled feature gate. Assisted-By: Claude (Anthropic AI) Signed-off-by: Ian Miller --- kagenti-operator/test/e2e/e2e_test.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kagenti-operator/test/e2e/e2e_test.go b/kagenti-operator/test/e2e/e2e_test.go index 190a8dcb..a2cea427 100644 --- a/kagenti-operator/test/e2e/e2e_test.go +++ b/kagenti-operator/test/e2e/e2e_test.go @@ -2163,16 +2163,13 @@ rules: return err }, 1*time.Minute, 5*time.Second).Should(Succeed()) - By("waiting for AgentRuntime Ready=True") + By("waiting for AgentRuntime Ready=True with linkedSkills populated") Eventually(func(g Gomega) { readyStatus, err := utils.KubectlGetJsonpath("agentruntime", "skill-discovery-agent", skillDiscoveryTestNamespace, "{.status.conditions[?(@.type=='Ready')].status}") g.Expect(err).NotTo(HaveOccurred()) g.Expect(readyStatus).To(Equal("True")) - }).Should(Succeed()) - By("verifying status.linkedSkills contains discovered skills") - Eventually(func(g Gomega) { raw, err := utils.KubectlGetJsonpath("agentruntime", "skill-discovery-agent", skillDiscoveryTestNamespace, "{.status.linkedSkills}") g.Expect(err).NotTo(HaveOccurred()) From d68e9097e3445fb3651aca765e2f881a38ae5d21 Mon Sep 17 00:00:00 2001 From: Ian Miller Date: Wed, 10 Jun 2026 11:46:15 +0100 Subject: [PATCH 7/9] refactor(status): restore TargetResolved to after resolveTargetRef Moves TargetResolved=True back to immediately after resolveTargetRef succeeds, preserving the clear semantic: "I found the deployment." The Ready condition gates the full success path. Addresses review feedback from @pdettori. Assisted-By: Claude (Anthropic AI) Signed-off-by: Ian Miller --- .../internal/controller/agentruntime_controller.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kagenti-operator/internal/controller/agentruntime_controller.go b/kagenti-operator/internal/controller/agentruntime_controller.go index 45e2a56b..6ea022c0 100644 --- a/kagenti-operator/internal/controller/agentruntime_controller.go +++ b/kagenti-operator/internal/controller/agentruntime_controller.go @@ -162,6 +162,9 @@ func (r *AgentRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request return ctrl.Result{RequeueAfter: 30 * time.Second}, nil } + r.setCondition(rt, ConditionTypeTargetResolved, metav1.ConditionTrue, "TargetFound", + fmt.Sprintf("%s %s resolved", rt.Spec.TargetRef.Kind, rt.Spec.TargetRef.Name)) + // 4.1. Complete two-phase Sandbox restart if pending. if rt.Spec.TargetRef.Kind == KindSandbox { if result, done, err := r.completeSandboxRestart(ctx, rt); done { @@ -250,9 +253,6 @@ func (r *AgentRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request return ctrl.Result{RequeueAfter: 30 * time.Second}, nil } - r.setCondition(rt, ConditionTypeTargetResolved, metav1.ConditionTrue, "TargetFound", - fmt.Sprintf("%s %s resolved", rt.Spec.TargetRef.Kind, rt.Spec.TargetRef.Name)) - // 6.5. Discover linked skills from workload annotation (set by kagenti backend or user) fg := r.getFeatureGates() var linkedSkills []string From e6e5c7b655a52287adf1f3f582438b5c4a2ab78d Mon Sep 17 00:00:00 2001 From: Ian Miller Date: Wed, 10 Jun 2026 11:51:50 +0100 Subject: [PATCH 8/9] test(status): add ConfigApplyError path coverage Uses an updateFailClient wrapper to simulate Deployment update failures, verifying that Ready=False with reason ConfigApplyError is set when applyWorkloadConfig fails. Addresses review feedback from @pdettori. Assisted-By: Claude (Anthropic AI) Signed-off-by: Ian Miller --- .../agentruntime_controller_test.go | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/kagenti-operator/internal/controller/agentruntime_controller_test.go b/kagenti-operator/internal/controller/agentruntime_controller_test.go index 2700202a..363e63ce 100644 --- a/kagenti-operator/internal/controller/agentruntime_controller_test.go +++ b/kagenti-operator/internal/controller/agentruntime_controller_test.go @@ -18,6 +18,7 @@ package controller import ( "context" + "fmt" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -26,10 +27,12 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" agentv1alpha1 "github.com/kagenti/operator/api/v1alpha1" @@ -37,6 +40,27 @@ import ( webhookconfig "github.com/kagenti/operator/internal/webhook/config" ) +// updateFailClient wraps a client.Client and rejects Update calls on +// objects whose Kind matches failKind, simulating RBAC or conflict errors. +type updateFailClient struct { + client.Client + failKind string +} + +func (c *updateFailClient) Update(ctx context.Context, obj client.Object, opts ...client.UpdateOption) error { + gvks, _, _ := c.Scheme().ObjectKinds(obj) + for _, gvk := range gvks { + if gvk.Kind == c.failKind { + return fmt.Errorf("simulated update failure for %s", c.failKind) + } + } + return c.Client.Update(ctx, obj, opts...) +} + +func (c *updateFailClient) Scheme() *runtime.Scheme { + return c.Client.Scheme() +} + type stubCardFetcher struct { card *agentv1alpha1.AgentCardData err error @@ -388,6 +412,42 @@ var _ = Describe("AgentRuntime Controller", func() { }) }) + Context("When applyWorkloadConfig fails", func() { + It("should set Ready=False with reason ConfigApplyError", func() { + dep := newDeployment("apply-fail-deploy", namespace) + Expect(k8sClient.Create(ctx, dep)).To(Succeed()) + defer func() { _ = k8sClient.Delete(ctx, dep) }() + + rt := newAgentRuntime("apply-fail-rt", namespace, "apply-fail-deploy", agentv1alpha1.RuntimeTypeAgent) + Expect(k8sClient.Create(ctx, rt)).To(Succeed()) + defer func() { _ = k8sClient.Delete(ctx, rt) }() + + // First reconcile with normal client: adds finalizer + r := newReconciler() + nn := types.NamespacedName{Name: "apply-fail-rt", Namespace: namespace} + _, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: nn}) + Expect(err).NotTo(HaveOccurred()) + + // Second reconcile with a client that rejects Deployment updates + rFail := &AgentRuntimeReconciler{ + Client: &updateFailClient{Client: k8sClient, failKind: "Deployment"}, + APIReader: k8sClient, + Scheme: scheme.Scheme, + } + result, err := rFail.Reconcile(ctx, reconcile.Request{NamespacedName: nn}) + Expect(err).NotTo(HaveOccurred()) + Expect(result.RequeueAfter).NotTo(BeZero(), "should requeue on config apply error") + + updated := &agentv1alpha1.AgentRuntime{} + Expect(k8sClient.Get(ctx, nn, updated)).To(Succeed()) + + readyCond := meta.FindStatusCondition(updated.Status.Conditions, ConditionTypeReady) + Expect(readyCond).NotTo(BeNil()) + Expect(readyCond.Status).To(Equal(metav1.ConditionFalse)) + Expect(readyCond.Reason).To(Equal("ConfigApplyError")) + }) + }) + Context("When the AgentRuntime type is tool", func() { var dep *appsv1.Deployment var rt *agentv1alpha1.AgentRuntime From bb9b9d2e71e0251bd27c632af90c40cf2a6e3b40 Mon Sep 17 00:00:00 2001 From: Ian Miller Date: Wed, 10 Jun 2026 22:49:39 +0100 Subject: [PATCH 9/9] fix: remove stale setPhase and SkillsDiscovered refs from main merge Main added SCC RoleBinding error path and skill status update code that still referenced removed symbols (setPhase, RuntimePhaseError, ConditionTypeSkillsDiscovered). Aligned with status simplification. Assisted-By: Claude (Anthropic AI) Signed-off-by: Ian Miller --- .../controller/agentruntime_controller.go | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/kagenti-operator/internal/controller/agentruntime_controller.go b/kagenti-operator/internal/controller/agentruntime_controller.go index 6ea022c0..0c4070dd 100644 --- a/kagenti-operator/internal/controller/agentruntime_controller.go +++ b/kagenti-operator/internal/controller/agentruntime_controller.go @@ -213,11 +213,7 @@ func (r *AgentRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request if r.Recorder != nil { r.Recorder.Event(rt, corev1.EventTypeWarning, "SCCBindingError", err.Error()) } - r.setPhase(rt, agentv1alpha1.RuntimePhaseError) - r.setCondition(rt, ConditionTypeReady, metav1.ConditionFalse, "SCCBindingError", err.Error()) - if statusErr := r.Status().Update(ctx, rt); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } + r.updateErrorStatus(ctx, req.NamespacedName, ConditionTypeReady, "SCCBindingError", err.Error()) return ctrl.Result{RequeueAfter: 30 * time.Second}, nil } @@ -272,15 +268,8 @@ func (r *AgentRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request fmt.Sprintf("Workload %s configured with config-hash %s", rt.Spec.TargetRef.Name, configResult.Hash[:12])) if fg.SkillDiscovery { rt.Status.LinkedSkills = linkedSkills - if len(linkedSkills) > 0 { - r.setCondition(rt, ConditionTypeSkillsDiscovered, metav1.ConditionTrue, "SkillsFound", - fmt.Sprintf("%d linked skill(s) discovered from workload annotation", len(linkedSkills))) - } else { - meta.RemoveStatusCondition(&rt.Status.Conditions, ConditionTypeSkillsDiscovered) - } } else { rt.Status.LinkedSkills = nil - meta.RemoveStatusCondition(&rt.Status.Conditions, ConditionTypeSkillsDiscovered) } desired := rt.Status.DeepCopy() if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { @@ -725,7 +714,6 @@ func (r *AgentRuntimeReconciler) handleDeletion(ctx context.Context, rt *agentv1 delete(workloadLabels, LabelManagedBy) acc.obj.SetLabels(workloadLabels) - // Remove kagenti.io/type from PodTemplateSpec pod labels so future pods // are not presented to the webhook with the type label. podLabels := acc.getPodLabels(acc.obj) @@ -779,8 +767,8 @@ func (r *AgentRuntimeReconciler) setCondition(rt *agentv1alpha1.AgentRuntime, co }) } -// updateErrorStatus sets the AgentRuntime phase to Error and updates a condition -// with retry-on-conflict semantics, re-fetching the object on each attempt. +// updateErrorStatus updates a condition to False with retry-on-conflict +// semantics, re-fetching the object on each attempt. func (r *AgentRuntimeReconciler) updateErrorStatus(ctx context.Context, key types.NamespacedName, condType, reason, message string) { logger := log.FromContext(ctx) if statusErr := retry.RetryOnConflict(retry.DefaultRetry, func() error { @@ -788,7 +776,6 @@ func (r *AgentRuntimeReconciler) updateErrorStatus(ctx context.Context, key type if err := r.Get(ctx, key, latest); err != nil { return err } - r.setPhase(latest, agentv1alpha1.RuntimePhaseError) r.setCondition(latest, condType, metav1.ConditionFalse, reason, message) return r.Status().Update(ctx, latest) }); statusErr != nil {