Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,9 @@
jsonPath: .spec.targetRef.name
name: Target
type: string
- description: Runtime Phase
jsonPath: .status.phase
name: Phase
type: string
- description: Card Fetch Status
jsonPath: .status.conditions[?(@.type=='CardFetched')].status
name: CardFetched
priority: 1
- description: Ready Status
jsonPath: .status.conditions[?(@.type=='Ready')].status
name: Ready
type: string
- jsonPath: .metadata.creationTimestamp
name: Age
Expand Down Expand Up @@ -425,7 +420,7 @@
lastTransitionTime:
description: |-
lastTransitionTime is the last time the condition transitioned from one status to another.
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.

Check warning on line 423 in charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml

View workflow job for this annotation

GitHub Actions / YAML Lint

423:151 [line-length] line too long (162 > 150 characters)
format: date-time
type: string
message:
Expand All @@ -437,7 +432,7 @@
observedGeneration:
description: |-
observedGeneration represents the .metadata.generation that the condition was set based upon.
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date

Check warning on line 435 in charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml

View workflow job for this annotation

GitHub Actions / YAML Lint

435:151 [line-length] line too long (162 > 150 characters)
with respect to the current state of the instance.
format: int64
minimum: 0
Expand Down Expand Up @@ -486,13 +481,11 @@
items:
type: string
type: array
phase:
description: Phase is the high-level state of the AgentRuntime
enum:
- Pending
- Active
- Error
type: string
observedGeneration:
description: ObservedGeneration is the most recent generation observed
by the controller.
format: int64
type: integer
type: object
required:
- spec
Expand Down
4 changes: 2 additions & 2 deletions kagenti-operator/GETTING_STARTED.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ The controller will:
kubectl get agentruntime -n team1

# Example output:
# NAME TYPE TARGET PHASE AGE
# weather-agent-runtime agent weather-agent Active 2m
# NAME TYPE TARGET READY AGE
# weather-agent-runtime agent weather-agent True 2m

# View detailed conditions
kubectl describe agentruntime weather-agent-runtime -n team1
Expand Down
16 changes: 3 additions & 13 deletions kagenti-operator/api/v1alpha1/agentruntime_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,6 @@ const (
RuntimeTypeTool RuntimeType = "tool"
)

// +kubebuilder:validation:Enum=Pending;Active;Error
type RuntimePhase string

const (
RuntimePhasePending RuntimePhase = "Pending"
RuntimePhaseActive RuntimePhase = "Active"
RuntimePhaseError RuntimePhase = "Error"
)

// +kubebuilder:validation:Enum=mtls;http
type TransportSecurity string

Expand Down Expand Up @@ -188,9 +179,9 @@ type CardStatus struct {

// AgentRuntimeStatus defines the observed state of AgentRuntime.
type AgentRuntimeStatus struct {
// Phase is the high-level state of the AgentRuntime
// ObservedGeneration is the most recent generation observed by the controller.
// +optional
Phase RuntimePhase `json:"phase,omitempty"`
ObservedGeneration int64 `json:"observedGeneration,omitempty"`

// ConfiguredPods is the count of pods with expected labels/config
// +optional
Expand All @@ -217,8 +208,7 @@ type AgentRuntimeStatus struct {
// +kubebuilder:resource:shortName=art;agentrt
// +kubebuilder:printcolumn:name="Type",type="string",JSONPath=".spec.type",description="Workload Type"
// +kubebuilder:printcolumn:name="Target",type="string",JSONPath=".spec.targetRef.name",description="Target Workload"
// +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase",description="Runtime Phase"
// +kubebuilder:printcolumn:name="CardFetched",type="string",JSONPath=".status.conditions[?(@.type=='CardFetched')].status",description="Card Fetch Status",priority=1
// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status",description="Ready Status"
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"

// AgentRuntime attaches runtime configuration to a backing workload classified as an
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,9 @@ spec:
jsonPath: .spec.targetRef.name
name: Target
type: string
- description: Runtime Phase
jsonPath: .status.phase
name: Phase
type: string
- description: Card Fetch Status
jsonPath: .status.conditions[?(@.type=='CardFetched')].status
name: CardFetched
priority: 1
- description: Ready Status
jsonPath: .status.conditions[?(@.type=='Ready')].status
name: Ready
type: string
- jsonPath: .metadata.creationTimestamp
name: Age
Expand Down Expand Up @@ -486,13 +481,11 @@ spec:
items:
type: string
type: array
phase:
description: Phase is the high-level state of the AgentRuntime
enum:
- Pending
- Active
- Error
type: string
observedGeneration:
description: ObservedGeneration is the most recent generation observed
by the controller.
format: int64
type: integer
type: object
required:
- spec
Expand Down
12 changes: 4 additions & 8 deletions kagenti-operator/docs/api-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ The AgentRuntime controller applies the following labels and annotations to the

| Field | Type | Description |
|-------|------|-------------|
| `phase` | string | High-level state of the AgentRuntime (`Pending`, `Active`, or `Error`) |
| `observedGeneration` | int64 | Most recent generation observed by the controller |
| `configuredPods` | int32 | Count of pods with expected labels/configuration |
| `conditions` | [][Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#condition-v1-meta) | Current state of the AgentRuntime |

Expand All @@ -480,7 +480,6 @@ The AgentRuntime controller applies the following labels and annotations to the
| `IstioMeshEnrolled` | True | `NamespaceLabeled` | Namespace labeled with `istio-discovery=enabled` and `istio.io/dataplane-mode=ambient` for Istio ambient mesh enrollment |
| `IstioMeshEnrolled` | False | `OptedOut` | Namespace has `kagenti.io/istio-mesh=disabled` annotation; Istio mesh labels not applied |
| `IstioMeshEnrolled` | False | `PatchFailed` | Failed to patch namespace labels (e.g., RBAC misconfiguration). Non-fatal; reconcile continues. |
| `SkillsDiscovered` | True | `SkillsFound` | Linked skills discovered from `kagenti.io/skills` annotation on the target workload |
| `SkillsMounted` | True | `SkillsApplied` | OCI skill ImageVolumes applied to the target workload |
| `SkillsMounted` | False | `FeatureGateDisabled` | Skills defined but `skillImageVolumes` feature gate is disabled |
| `SkillsMounted` | False | `UnsupportedWorkloadKind` | Skills defined but the target workload kind (e.g., Sandbox) does not support skill ImageVolumes |
Expand Down Expand Up @@ -586,16 +585,13 @@ kubectl get art
kubectl get agentruntimes

# Example output:
# NAME TYPE TARGET PHASE AGE
# weather-agent-runtime agent weather-agent Active 5m
# calculator-tool-runtime tool calculator-tool Active 3m
# NAME TYPE TARGET READY AGE
# weather-agent-runtime agent weather-agent True 5m
# calculator-tool-runtime tool calculator-tool True 3m

# Get detailed information
kubectl describe agentruntime weather-agent-runtime

# View runtime phase
kubectl get art weather-agent-runtime -o jsonpath='{.status.phase}'

# View configured pods count
kubectl get art weather-agent-runtime -o jsonpath='{.status.configuredPods}'
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,9 +334,6 @@ var _ = Describe("AgentRuntime Config", func() {
updated := &agentv1alpha1.AgentRuntime{}
Expect(k8sClient.Get(ctx, nn, updated)).To(Succeed())

// Should still be Active (warning doesn't block reconciliation)
Expect(updated.Status.Phase).To(Equal(agentv1alpha1.RuntimePhaseActive))

// Should have ConfigResolved condition with warning
var configCond *metav1.Condition
for i := range updated.Status.Conditions {
Expand Down
48 changes: 8 additions & 40 deletions kagenti-operator/internal/controller/agentruntime_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,9 @@ const (
AnnotationRestartPending = "kagenti.io/restart-pending"

// Condition types for AgentRuntime status.
ConditionTypeReady = "Ready"
ConditionTypeTargetResolved = "TargetResolved"
ConditionTypeConfigResolved = "ConfigResolved"
ConditionTypeCardFetched = "CardFetched"
ConditionTypeSkillsDiscovered = "SkillsDiscovered"
ConditionTypeReady = "Ready"
ConditionTypeTargetResolved = "TargetResolved"
ConditionTypeConfigResolved = "ConfigResolved"

// AnnotationLastCardFetchHash stores the change-detection key used to skip
// redundant card fetches when the workload's pod template has not changed.
Expand Down Expand Up @@ -137,6 +135,8 @@ func (r *AgentRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{}, client.IgnoreNotFound(err)
}

rt.Status.ObservedGeneration = rt.Generation

// 2. Handle deletion
if !rt.DeletionTimestamp.IsZero() {
return r.handleDeletion(ctx, rt)
Expand Down Expand Up @@ -213,11 +213,7 @@ func (r *AgentRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request
if r.Recorder != nil {
r.Recorder.Event(rt, corev1.EventTypeWarning, "SCCBindingError", err.Error())
}
r.setPhase(rt, agentv1alpha1.RuntimePhaseError)
r.setCondition(rt, ConditionTypeReady, metav1.ConditionFalse, "SCCBindingError", err.Error())
if statusErr := r.Status().Update(ctx, rt); statusErr != nil {
logger.Error(statusErr, "Failed to update status")
}
r.updateErrorStatus(ctx, req.NamespacedName, ConditionTypeReady, "SCCBindingError", err.Error())
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}

Expand Down Expand Up @@ -268,20 +264,12 @@ func (r *AgentRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request

// 8. Update status (retry on conflict to preserve all conditions computed above)
rt.Status.ConfiguredPods = configuredPods
r.setPhase(rt, agentv1alpha1.RuntimePhaseActive)
r.setCondition(rt, ConditionTypeReady, metav1.ConditionTrue, "Configured",
fmt.Sprintf("Workload %s configured with config-hash %s", rt.Spec.TargetRef.Name, configResult.Hash[:12]))
if fg.SkillDiscovery {
rt.Status.LinkedSkills = linkedSkills
if len(linkedSkills) > 0 {
r.setCondition(rt, ConditionTypeSkillsDiscovered, metav1.ConditionTrue, "SkillsFound",
fmt.Sprintf("%d linked skill(s) discovered from workload annotation", len(linkedSkills)))
} else {
meta.RemoveStatusCondition(&rt.Status.Conditions, ConditionTypeSkillsDiscovered)
}
} else {
rt.Status.LinkedSkills = nil
meta.RemoveStatusCondition(&rt.Status.Conditions, ConditionTypeSkillsDiscovered)
}
desired := rt.Status.DeepCopy()
if err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
Expand Down Expand Up @@ -726,7 +714,6 @@ func (r *AgentRuntimeReconciler) handleDeletion(ctx context.Context, rt *agentv1
delete(workloadLabels, LabelManagedBy)
acc.obj.SetLabels(workloadLabels)


// Remove kagenti.io/type from PodTemplateSpec pod labels so future pods
// are not presented to the webhook with the type label.
podLabels := acc.getPodLabels(acc.obj)
Expand Down Expand Up @@ -770,10 +757,6 @@ func (r *AgentRuntimeReconciler) handleDeletion(ctx context.Context, rt *agentv1
return ctrl.Result{}, nil
}

func (r *AgentRuntimeReconciler) setPhase(rt *agentv1alpha1.AgentRuntime, phase agentv1alpha1.RuntimePhase) {
rt.Status.Phase = phase
}

func (r *AgentRuntimeReconciler) setCondition(rt *agentv1alpha1.AgentRuntime, condType string, status metav1.ConditionStatus, reason, message string) {
meta.SetStatusCondition(&rt.Status.Conditions, metav1.Condition{
Type: condType,
Expand All @@ -784,16 +767,15 @@ func (r *AgentRuntimeReconciler) setCondition(rt *agentv1alpha1.AgentRuntime, co
})
}

// updateErrorStatus sets the AgentRuntime phase to Error and updates a condition
// with retry-on-conflict semantics, re-fetching the object on each attempt.
// updateErrorStatus updates a condition to False with retry-on-conflict
// semantics, re-fetching the object on each attempt.
func (r *AgentRuntimeReconciler) updateErrorStatus(ctx context.Context, key types.NamespacedName, condType, reason, message string) {
logger := log.FromContext(ctx)
if statusErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
latest := &agentv1alpha1.AgentRuntime{}
if err := r.Get(ctx, key, latest); err != nil {
return err
}
r.setPhase(latest, agentv1alpha1.RuntimePhaseError)
r.setCondition(latest, condType, metav1.ConditionFalse, reason, message)
return r.Status().Update(ctx, latest)
}); statusErr != nil {
Expand All @@ -810,8 +792,6 @@ func (r *AgentRuntimeReconciler) fetchAndUpdateCard(ctx context.Context, rt *age
if !r.EnableCardDiscovery {
if rt.Status.Card != nil {
rt.Status.Card = nil
r.setCondition(rt, ConditionTypeCardFetched, metav1.ConditionFalse, "DiscoveryDisabled",
"Card discovery is disabled; stale card data cleared")
}
return
}
Expand All @@ -823,29 +803,24 @@ func (r *AgentRuntimeReconciler) fetchAndUpdateCard(ctx context.Context, rt *age
lastHash = annotations[AnnotationLastCardFetchHash]
}
if changeKey != "" && changeKey == lastHash && rt.Status.Card != nil {
r.setCondition(rt, ConditionTypeCardFetched, metav1.ConditionTrue, "FetchSkipped",
"Pod template unchanged; existing card data still valid")
return
}

if ready, msg := r.checkWorkloadReady(ctx, rt.Namespace, rt.Spec.TargetRef); !ready {
logger.V(1).Info("Workload not ready for card discovery", "reason", msg)
r.setCondition(rt, ConditionTypeCardFetched, metav1.ConditionFalse, "WorkloadNotReady", msg)
return
}

svc, port, err := r.resolveServiceForWorkload(ctx, rt.Namespace, rt.Spec.TargetRef)
if err != nil {
logger.V(1).Info("Service resolution failed for card discovery", "error", err)
r.setCondition(rt, ConditionTypeCardFetched, metav1.ConditionFalse, "ServiceNotFound", err.Error())
return
}

protocol := agentcard.A2AProtocol
cardData, fetchResult, transportSecurity, err := r.fetchCard(ctx, rt, svc, port, protocol)
if err != nil {
logger.Error(err, "Card fetch failed", "workload", rt.Spec.TargetRef.Name)
r.setCondition(rt, ConditionTypeCardFetched, metav1.ConditionFalse, "FetchFailed", err.Error())
return
}

Expand Down Expand Up @@ -883,13 +858,6 @@ func (r *AgentRuntimeReconciler) fetchAndUpdateCard(ctx context.Context, rt *age

rt.Status.Card = cardStatus

conditionReason := "Fetched"
if transportSecurity == agentv1alpha1.TransportSecurityHTTP {
conditionReason = "FetchedInsecure"
}
r.setCondition(rt, ConditionTypeCardFetched, metav1.ConditionTrue, conditionReason,
fmt.Sprintf("Successfully fetched agent card for %s", cardData.Name))

r.persistCardFetchAnnotation(ctx, rt, changeKey)
}

Expand Down
Loading
Loading