From 60401cba2747a43cb68f90a51a149711099a7eda Mon Sep 17 00:00:00 2001 From: Daniels Nagornuks Date: Tue, 9 Jun 2026 12:08:16 +0100 Subject: [PATCH 1/6] Feat: add ValidatingAdmissionPolicy to protect kagenti.io/type label Adds a ValidatingAdmissionPolicy (agent-label-protection) that prevents manual application of the kagenti.io/type label on Deployments and StatefulSets. Only the operator's service account is allowed to set this label via an AgentRuntime CR. Users who attempt to add the label directly are rejected with a message directing them to create an AgentRuntime instead. The policy allows non-operator users to update workloads that already carry the label as long as they don't change its value. Signed-off-by: Daniels Nagornuks Assisted-By: Claude (Anthropic AI) --- .../config/default/kustomization.yaml | 2 + .../config/vap/kustomization.yaml | 3 + .../validating-admission-policy-binding.yaml | 10 +++ .../vap/validating-admission-policy.yaml | 64 +++++++++++++++++++ kagenti-operator/docs/architecture.md | 36 +++++++++++ 5 files changed, 115 insertions(+) create mode 100644 kagenti-operator/config/vap/kustomization.yaml create mode 100644 kagenti-operator/config/vap/validating-admission-policy-binding.yaml create mode 100644 kagenti-operator/config/vap/validating-admission-policy.yaml diff --git a/kagenti-operator/config/default/kustomization.yaml b/kagenti-operator/config/default/kustomization.yaml index 98dada3e..25662e5a 100644 --- a/kagenti-operator/config/default/kustomization.yaml +++ b/kagenti-operator/config/default/kustomization.yaml @@ -23,6 +23,8 @@ resources: - ../webhook # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. - ../certmanager +# [VAP] ValidatingAdmissionPolicy that prevents manual application of kagenti.io/type labels. +- ../vap # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. #- ../prometheus # [METRICS] Expose the controller manager metrics service. diff --git a/kagenti-operator/config/vap/kustomization.yaml b/kagenti-operator/config/vap/kustomization.yaml new file mode 100644 index 00000000..26d1f0d4 --- /dev/null +++ b/kagenti-operator/config/vap/kustomization.yaml @@ -0,0 +1,3 @@ +resources: +- validating-admission-policy.yaml +- validating-admission-policy-binding.yaml diff --git a/kagenti-operator/config/vap/validating-admission-policy-binding.yaml b/kagenti-operator/config/vap/validating-admission-policy-binding.yaml new file mode 100644 index 00000000..efbcd6c9 --- /dev/null +++ b/kagenti-operator/config/vap/validating-admission-policy-binding.yaml @@ -0,0 +1,10 @@ +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicyBinding +metadata: + name: agent-label-protection + labels: + app.kubernetes.io/name: kagenti-operator + app.kubernetes.io/component: admission +spec: + policyName: agent-label-protection + validationActions: [Deny] diff --git a/kagenti-operator/config/vap/validating-admission-policy.yaml b/kagenti-operator/config/vap/validating-admission-policy.yaml new file mode 100644 index 00000000..bf6198d7 --- /dev/null +++ b/kagenti-operator/config/vap/validating-admission-policy.yaml @@ -0,0 +1,64 @@ +# ValidatingAdmissionPolicy: prevent manual application of the kagenti.io/type +# label on Deployments and StatefulSets. Only the kagenti-operator controller +# (via an AgentRuntime CR) is allowed to set this label. Users who need the +# label must create an AgentRuntime targeting their workload instead. +# +# The policy is split into two layers: +# +# matchConditions — skip evaluation entirely when: +# 1. The object does not carry kagenti.io/type (irrelevant requests), OR +# 2. The request originates from a trusted service account. +# Two operator SA identities are listed because kustomize (make deploy) +# and Helm use different namespaces and SA names: +# - kagenti-operator-system/kagenti-operator-controller-manager (kustomize) +# - kagenti-system/controller-manager (Helm) +# +# validations — for everyone else, allow only UPDATE requests where the +# label already existed on the previous revision with the same value +# (i.e. the user is modifying other fields but leaving the label intact). +# +# CREATE requests that carry the label are always rejected for non-operator +# callers, because the "label unchanged" check evaluates to false on CREATE +# (there is no oldObject). +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicy +metadata: + name: agent-label-protection + labels: + app.kubernetes.io/name: kagenti-operator + app.kubernetes.io/component: admission +spec: + failurePolicy: Fail + matchConstraints: + resourceRules: + - apiGroups: ["apps"] + apiVersions: ["v1"] + operations: ["CREATE", "UPDATE"] + resources: ["deployments", "statefulsets"] + + matchConditions: + - name: 'has-agent-type-label' + expression: >- + has(object.metadata.labels) && 'kagenti.io/type' in object.metadata.labels + + - name: 'not-operator-service-account' + expression: >- + !(request.userInfo.username == 'system:serviceaccount:kagenti-operator-system:kagenti-operator-controller-manager') + && !(request.userInfo.username == 'system:serviceaccount:kagenti-system:controller-manager') + + variables: + - name: labelUnchanged + expression: >- + request.operation == 'UPDATE' + && has(oldObject.metadata.labels) + && 'kagenti.io/type' in oldObject.metadata.labels + && oldObject.metadata.labels['kagenti.io/type'] == object.metadata.labels['kagenti.io/type'] + + validations: + - expression: "variables.labelUnchanged" + messageExpression: >- + 'The kagenti.io/type label on ' + + object.metadata.namespace + '/' + object.metadata.name + + ' can only be applied by the kagenti-operator via an AgentRuntime CR.' + + ' Create an AgentRuntime targeting this workload instead of manually setting the label.' + reason: Forbidden diff --git a/kagenti-operator/docs/architecture.md b/kagenti-operator/docs/architecture.md index 7a022f4f..c86562a2 100644 --- a/kagenti-operator/docs/architecture.md +++ b/kagenti-operator/docs/architecture.md @@ -82,6 +82,9 @@ The Kagenti Operator is a Kubernetes controller that implements the [Operator Pa - **AgentCard Validator**: Ensures `targetRef` is set on AgentCards. Rejects duplicate `targetRef` entries (prevents multiple AgentCards targeting the same workload in a namespace). - **AgentRuntime Validator**: Rejects duplicate `targetRef` entries (prevents multiple AgentRuntime CRs targeting the same workload in a namespace). Uses authoritative API server reads to eliminate informer cache-lag races. +#### Admission Policies +- **Agent Label Protection (ValidatingAdmissionPolicy)**: Prevents manual application of the `kagenti.io/type` label on Deployments and StatefulSets. Only the operator's service account (via an AgentRuntime CR) is allowed to set this label. Users who attempt to add the label directly are rejected with a message directing them to create an AgentRuntime instead. The policy allows non-operator users to update workloads that already carry the label, as long as they don't change its value. + #### Signature Providers - **X5CProvider**: Validates `x5c` certificate chains against the SPIRE X.509 trust bundle and verifies JWS signatures using the leaf public key @@ -101,11 +104,13 @@ graph TB subgraph "Kagenti Operator" ValidationWebhook[Validating Webhooks] InjectionWebhook[AuthBridge Mutating Webhook] + VAP[Agent Label Protection VAP] CardController[AgentCard Controller] SyncController[AgentCardSync Controller] RuntimeController[AgentRuntime Controller] CardCR -->|Validates| ValidationWebhook RuntimeCR -->|Validates| ValidationWebhook + Deployment -->|CREATE/UPDATE with kagenti.io/type| VAP ValidationWebhook -->|Valid CR| CardController end @@ -142,6 +147,7 @@ graph TB style RuntimeCR fill:#e1f5fe style ValidationWebhook fill:#fff3e0 style InjectionWebhook fill:#fff3e0 + style VAP fill:#fff3e0 style CardController fill:#ffe0b2 style SyncController fill:#ffe0b2 style Deployment fill:#d1c4e9 @@ -357,6 +363,36 @@ Source: `internal/controller/agentcard_networkpolicy_controller.go` - The AgentRuntime controller reads ConfigMaps from `kagenti-system` (cluster defaults) regardless of mode — this requires cross-namespace read access - Namespace defaults ConfigMaps are read from the workload's own namespace +### Admission Control — Agent Label Protection + +The operator deploys a `ValidatingAdmissionPolicy` (VAP) that prevents direct application of the `kagenti.io/type` label on Deployments and StatefulSets. This label is the entry point for the entire kagenti platform (webhook injection, agent discovery, client registration), so it must only be set through the official enrollment path — creating an AgentRuntime CR. + +#### How It Works + +| Layer | Purpose | +|-------|---------| +| **matchConstraints** | Targets CREATE and UPDATE of `apps/v1` Deployments and StatefulSets | +| **matchConditions** | Skips evaluation when the object doesn't have `kagenti.io/type` or when the request comes from the operator's service account | +| **validation** | On UPDATE, allows the request only if `kagenti.io/type` was already present with the same value (user is modifying other fields). On CREATE, always rejects since the label should not be set manually. | + +#### Scenarios + +| Action | Result | +|--------|--------| +| User creates Deployment with `kagenti.io/type: agent` | **Rejected** — create an AgentRuntime instead | +| User adds `kagenti.io/type` to existing Deployment | **Rejected** — create an AgentRuntime instead | +| User changes `kagenti.io/type` from `agent` to `tool` | **Rejected** — update the AgentRuntime instead | +| User updates Deployment that already has the label (label unchanged) | **Allowed** | +| User removes `kagenti.io/type` from Deployment | **Allowed** (matchCondition skips — new object has no label) | +| Operator controller applies label via AgentRuntime | **Allowed** (service account exemption) | + +#### Resources + +The VAP is deployed as part of the operator's kustomize manifests (`config/vap/`): + +- `ValidatingAdmissionPolicy` — `agent-label-protection` +- `ValidatingAdmissionPolicyBinding` — binds with `validationActions: [Deny]` + ### Secret Management ## Reconciliation Loops From 969e82f0ce0041365f309ae8508537eb44c32ae9 Mon Sep 17 00:00:00 2001 From: Daniels Nagornuks Date: Tue, 9 Jun 2026 12:21:58 +0100 Subject: [PATCH 2/6] Test: update E2E tests to use AgentRuntime for label application Removes manual kagenti.io/type labels from all E2E test fixtures (echoAgent, noProtoAgent, auditAgent, signedAgent, authBridgeAgent, authBridgeDisabledAgent) and adds AgentRuntime CRs to drive the label via the operator. Each test now deploys the workload without the protected label, creates an AgentRuntime, waits for the operator to apply kagenti.io/type, then proceeds with existing assertions. This aligns E2E tests with the new ValidatingAdmissionPolicy that prevents manual label application. Signed-off-by: Daniels Nagornuks Assisted-By: Claude (Anthropic AI) --- kagenti-operator/test/e2e/e2e_test.go | 92 ++++++++++++++++++++++++-- kagenti-operator/test/e2e/fixtures.go | 93 ++++++++++++++++++++------- 2 files changed, 157 insertions(+), 28 deletions(-) diff --git a/kagenti-operator/test/e2e/e2e_test.go b/kagenti-operator/test/e2e/e2e_test.go index a2cea427..f550bf4a 100644 --- a/kagenti-operator/test/e2e/e2e_test.go +++ b/kagenti-operator/test/e2e/e2e_test.go @@ -426,6 +426,28 @@ var _ = Describe("AuthBridge Injection E2E", Ordered, func() { _, err := utils.KubectlApplyStdin(authBridgeAgentFixture(), authBridgeTestNamespace) Expect(err).NotTo(HaveOccurred()) + By("waiting for operator to apply kagenti.io/type label via AgentRuntime") + Eventually(func(g Gomega) { + typeLabel, labelErr := utils.KubectlGetJsonpath("deployment", "authbridge-agent", authBridgeTestNamespace, + "{.metadata.labels.kagenti\\.io/type}") + g.Expect(labelErr).NotTo(HaveOccurred()) + g.Expect(typeLabel).To(Equal("agent")) + }, 1*time.Minute, 2*time.Second).Should(Succeed()) + + By("waiting for pod template to have kagenti.io/type label (triggers rolling update)") + Eventually(func(g Gomega) { + tmplLabel, labelErr := utils.KubectlGetJsonpath("deployment", "authbridge-agent", authBridgeTestNamespace, + "{.spec.template.metadata.labels.kagenti\\.io/type}") + g.Expect(labelErr).NotTo(HaveOccurred()) + g.Expect(tmplLabel).To(Equal("agent")) + }, 1*time.Minute, 2*time.Second).Should(Succeed()) + + By("waiting for rollout to complete") + rolloutCmd := exec.Command("kubectl", "rollout", "status", + "deployment/authbridge-agent", "-n", authBridgeTestNamespace, "--timeout=3m") + _, rolloutErr := utils.Run(rolloutCmd) + Expect(rolloutErr).NotTo(HaveOccurred()) + By("waiting for deployment to be ready") Expect(utils.WaitForDeploymentReady("authbridge-agent", authBridgeTestNamespace, 3*time.Minute)).To(Succeed()) @@ -485,11 +507,11 @@ var _ = Describe("AuthBridge Injection E2E", Ordered, func() { }) It("should not duplicate sidecars on pod recreation (idempotency)", func() { - By("getting current pod name") + By("getting current injected pod name") var oldPodName string Eventually(func(g Gomega) { cmd := exec.Command("kubectl", "get", "pods", - "-l", "app.kubernetes.io/name=authbridge-agent", + "-l", "app.kubernetes.io/name=authbridge-agent,kagenti.io/type=agent", "-n", authBridgeTestNamespace, "-o", "jsonpath={.items[0].metadata.name}") output, err := utils.Run(cmd) @@ -506,7 +528,7 @@ var _ = Describe("AuthBridge Injection E2E", Ordered, func() { By("waiting for new pod to be running with a different name") Eventually(func(g Gomega) { cmd := exec.Command("kubectl", "get", "pods", - "-l", "app.kubernetes.io/name=authbridge-agent", + "-l", "app.kubernetes.io/name=authbridge-agent,kagenti.io/type=agent", "-n", authBridgeTestNamespace, "-o", "jsonpath={.items[0].metadata.name}") output, err := utils.Run(cmd) @@ -521,7 +543,7 @@ var _ = Describe("AuthBridge Injection E2E", Ordered, func() { By("verifying exactly 1 envoy-proxy and 1 proxy-init (no separate spiffe-helper)") cmd = exec.Command("kubectl", "get", "pods", - "-l", "app.kubernetes.io/name=authbridge-agent", + "-l", "app.kubernetes.io/name=authbridge-agent,kagenti.io/type=agent", "-n", authBridgeTestNamespace, "-o", "jsonpath={.items[0].spec.containers[*].name}") containers, err := utils.Run(cmd) @@ -531,7 +553,7 @@ var _ = Describe("AuthBridge Injection E2E", Ordered, func() { "spiffe-helper is bundled inside envoy-proxy, should not appear as a separate container") cmd = exec.Command("kubectl", "get", "pods", - "-l", "app.kubernetes.io/name=authbridge-agent", + "-l", "app.kubernetes.io/name=authbridge-agent,kagenti.io/type=agent", "-n", authBridgeTestNamespace, "-o", "jsonpath={.items[0].spec.initContainers[*].name}") initContainers, err := utils.Run(cmd) @@ -555,6 +577,14 @@ var _ = Describe("AuthBridge Injection E2E", Ordered, func() { _, err = utils.KubectlApplyStdin(authBridgeDisabledAgentFixture(), authBridgeTestNamespace) Expect(err).NotTo(HaveOccurred()) + By("waiting for operator to apply kagenti.io/type label via AgentRuntime") + Eventually(func(g Gomega) { + typeLabel, labelErr := utils.KubectlGetJsonpath("deployment", "authbridge-disabled-agent", + authBridgeTestNamespace, "{.metadata.labels.kagenti\\.io/type}") + g.Expect(labelErr).NotTo(HaveOccurred()) + g.Expect(typeLabel).To(Equal("agent")) + }, 1*time.Minute, 2*time.Second).Should(Succeed()) + By("waiting for deployment to be ready") Expect(utils.WaitForDeploymentReady( "authbridge-disabled-agent", authBridgeTestNamespace, 2*time.Minute, @@ -776,6 +806,18 @@ var _ = Describe("AgentCard E2E", Ordered, func() { _, err := utils.KubectlApplyStdin(noProtocolAgentFixture(), testNamespace) Expect(err).NotTo(HaveOccurred()) + By("creating AgentRuntime for noproto-agent") + _, err = utils.KubectlApplyStdin(noProtoAgentRuntimeFixture(), testNamespace) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for operator to apply kagenti.io/type label") + Eventually(func(g Gomega) { + typeLabel, labelErr := utils.KubectlGetJsonpath("deployment", "noproto-agent", testNamespace, + "{.metadata.labels.kagenti\\.io/type}") + g.Expect(labelErr).NotTo(HaveOccurred()) + g.Expect(typeLabel).To(Equal("agent")) + }, 1*time.Minute, 2*time.Second).Should(Succeed()) + By("waiting for deployment to be ready") Expect(utils.WaitForDeploymentReady("noproto-agent", testNamespace, 2*time.Minute)).To(Succeed()) @@ -789,10 +831,22 @@ var _ = Describe("AgentCard E2E", Ordered, func() { }) It("should auto-create AgentCard for labelled workload", func() { - By("deploying echo-agent with agent and protocol labels") + By("deploying echo-agent with protocol label") _, err := utils.KubectlApplyStdin(echoAgentFixture(), testNamespace) Expect(err).NotTo(HaveOccurred()) + By("creating AgentRuntime for echo-agent") + _, err = utils.KubectlApplyStdin(echoAgentRuntimeFixture(), testNamespace) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for operator to apply kagenti.io/type label") + Eventually(func(g Gomega) { + typeLabel, labelErr := utils.KubectlGetJsonpath("deployment", "echo-agent", testNamespace, + "{.metadata.labels.kagenti\\.io/type}") + g.Expect(labelErr).NotTo(HaveOccurred()) + g.Expect(typeLabel).To(Equal("agent")) + }, 1*time.Minute, 2*time.Second).Should(Succeed()) + By("waiting for deployment to be ready") Expect(utils.WaitForDeploymentReady("echo-agent", testNamespace, 2*time.Minute)).To(Succeed()) @@ -919,6 +973,19 @@ var _ = Describe("AgentCard E2E", Ordered, func() { By("deploying audit-agent (unsigned)") _, err := utils.KubectlApplyStdin(auditAgentFixture(), testNamespace) Expect(err).NotTo(HaveOccurred()) + + By("creating AgentRuntime for audit-agent") + _, err = utils.KubectlApplyStdin(auditAgentRuntimeFixture(), testNamespace) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for operator to apply kagenti.io/type label") + Eventually(func(g Gomega) { + typeLabel, labelErr := utils.KubectlGetJsonpath("deployment", "audit-agent", testNamespace, + "{.metadata.labels.kagenti\\.io/type}") + g.Expect(labelErr).NotTo(HaveOccurred()) + g.Expect(typeLabel).To(Equal("agent")) + }, 1*time.Minute, 2*time.Second).Should(Succeed()) + Expect(utils.WaitForDeploymentReady("audit-agent", testNamespace, 2*time.Minute)).To(Succeed()) By("updating auto-created AgentCard for audit-agent") @@ -960,6 +1027,19 @@ var _ = Describe("AgentCard E2E", Ordered, func() { By("deploying signed-agent stack") _, err = utils.KubectlApplyStdin(signedAgentFixture(), testNamespace) Expect(err).NotTo(HaveOccurred()) + + By("creating AgentRuntime for signed-agent") + _, err = utils.KubectlApplyStdin(signedAgentRuntimeFixture(), testNamespace) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for operator to apply kagenti.io/type label") + Eventually(func(g Gomega) { + typeLabel, labelErr := utils.KubectlGetJsonpath("deployment", "signed-agent", testNamespace, + "{.metadata.labels.kagenti\\.io/type}") + g.Expect(labelErr).NotTo(HaveOccurred()) + g.Expect(typeLabel).To(Equal("agent")) + }, 1*time.Minute, 2*time.Second).Should(Succeed()) + Expect(utils.WaitForDeploymentReady("signed-agent", testNamespace, 3*time.Minute)).To(Succeed()) By("updating auto-created AgentCard with identityBinding") diff --git a/kagenti-operator/test/e2e/fixtures.go b/kagenti-operator/test/e2e/fixtures.go index 2c0f7e52..e6f99be7 100644 --- a/kagenti-operator/test/e2e/fixtures.go +++ b/kagenti-operator/test/e2e/fixtures.go @@ -51,6 +51,7 @@ data: } // echoAgentFixture returns YAML for echo-agent Deployment + Service (used by S1, S3). +// The kagenti.io/type label is applied by the operator via an AgentRuntime CR. func echoAgentFixture() string { return `apiVersion: apps/v1 kind: Deployment @@ -58,7 +59,6 @@ metadata: name: echo-agent namespace: ` + testNamespace + ` labels: - kagenti.io/type: agent protocol.kagenti.io/a2a: "" app.kubernetes.io/name: echo-agent spec: @@ -66,12 +66,10 @@ spec: selector: matchLabels: app.kubernetes.io/name: echo-agent - kagenti.io/type: agent template: metadata: labels: app.kubernetes.io/name: echo-agent - kagenti.io/type: agent kagenti.io/inject: disabled protocol.kagenti.io/a2a: "" spec: @@ -125,8 +123,8 @@ spec: ` } -// noProtocolAgentFixture returns YAML for noproto-agent Deployment (S2) - has -// kagenti.io/type=agent but NO protocol.kagenti.io/* label. +// noProtocolAgentFixture returns YAML for noproto-agent Deployment (S2) - receives +// kagenti.io/type=agent via AgentRuntime but has NO protocol.kagenti.io/* label. // kagenti.io/inject=disabled is set because this test validates AgentCard sync // behaviour, not sidecar injection. Without the opt-out the defaults-only // injection path would inject sidecars that the pause container cannot support. @@ -137,19 +135,16 @@ metadata: name: noproto-agent namespace: ` + testNamespace + ` labels: - kagenti.io/type: agent app.kubernetes.io/name: noproto-agent spec: replicas: 1 selector: matchLabels: app.kubernetes.io/name: noproto-agent - kagenti.io/type: agent template: metadata: labels: app.kubernetes.io/name: noproto-agent - kagenti.io/type: agent kagenti.io/inject: disabled spec: securityContext: @@ -197,6 +192,7 @@ spec: } // auditAgentFixture returns YAML for audit-agent Deployment + Service (S5). +// The kagenti.io/type label is applied by the operator via an AgentRuntime CR. func auditAgentFixture() string { return `apiVersion: apps/v1 kind: Deployment @@ -204,7 +200,6 @@ metadata: name: audit-agent namespace: ` + testNamespace + ` labels: - kagenti.io/type: agent protocol.kagenti.io/a2a: "" app.kubernetes.io/name: audit-agent spec: @@ -212,12 +207,10 @@ spec: selector: matchLabels: app.kubernetes.io/name: audit-agent - kagenti.io/type: agent template: metadata: labels: app.kubernetes.io/name: audit-agent - kagenti.io/type: agent kagenti.io/inject: disabled protocol.kagenti.io/a2a: "" spec: @@ -358,7 +351,6 @@ metadata: name: signed-agent namespace: ` + testNamespace + ` labels: - kagenti.io/type: agent protocol.kagenti.io/a2a: "" app.kubernetes.io/name: signed-agent spec: @@ -366,12 +358,10 @@ spec: selector: matchLabels: app.kubernetes.io/name: signed-agent - kagenti.io/type: agent template: metadata: labels: app.kubernetes.io/name: signed-agent - kagenti.io/type: agent kagenti.io/inject: disabled protocol.kagenti.io/a2a: "" spec: @@ -505,6 +495,70 @@ spec: ` } +// echoAgentRuntimeFixture returns YAML for an AgentRuntime CR targeting echo-agent. +func echoAgentRuntimeFixture() string { + return `apiVersion: agent.kagenti.dev/v1alpha1 +kind: AgentRuntime +metadata: + name: echo-agent + namespace: ` + testNamespace + ` +spec: + type: agent + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: echo-agent +` +} + +// noProtoAgentRuntimeFixture returns YAML for an AgentRuntime CR targeting noproto-agent. +func noProtoAgentRuntimeFixture() string { + return `apiVersion: agent.kagenti.dev/v1alpha1 +kind: AgentRuntime +metadata: + name: noproto-agent + namespace: ` + testNamespace + ` +spec: + type: agent + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: noproto-agent +` +} + +// auditAgentRuntimeFixture returns YAML for an AgentRuntime CR targeting audit-agent. +func auditAgentRuntimeFixture() string { + return `apiVersion: agent.kagenti.dev/v1alpha1 +kind: AgentRuntime +metadata: + name: audit-agent + namespace: ` + testNamespace + ` +spec: + type: agent + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: audit-agent +` +} + +// signedAgentRuntimeFixture returns YAML for an AgentRuntime CR targeting signed-agent. +func signedAgentRuntimeFixture() string { + return `apiVersion: agent.kagenti.dev/v1alpha1 +kind: AgentRuntime +metadata: + name: signed-agent + namespace: ` + testNamespace + ` +spec: + type: agent + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: signed-agent +` +} + // --- AgentRuntime E2E fixtures --- const agentRuntimeTestNamespace = "e2e-agentruntime-test" @@ -967,8 +1021,8 @@ spec: } // authBridgeAgentFixture returns YAML for the authbridge-agent Deployment, -// ServiceAccount, and Service. The deployment uses a Python echo server on -// port 8080 and has the kagenti.io/type=agent label required for injection. +// ServiceAccount, and Service. The kagenti.io/type label is applied by the +// operator via the AgentRuntime CR created in BeforeAll. func authBridgeAgentFixture() string { return `apiVersion: v1 kind: ServiceAccount @@ -982,19 +1036,16 @@ metadata: name: authbridge-agent namespace: ` + authBridgeTestNamespace + ` labels: - kagenti.io/type: agent app.kubernetes.io/name: authbridge-agent spec: replicas: 1 selector: matchLabels: app.kubernetes.io/name: authbridge-agent - kagenti.io/type: agent template: metadata: labels: app.kubernetes.io/name: authbridge-agent - kagenti.io/type: agent spec: serviceAccountName: authbridge-agent securityContext: @@ -1043,6 +1094,7 @@ spec: // authBridgeDisabledAgentFixture returns YAML for a Deployment that opts out // of sidecar injection via the kagenti.io/inject=disabled pod template label. +// The kagenti.io/type label is applied by the operator via an AgentRuntime CR. func authBridgeDisabledAgentFixture() string { return `apiVersion: apps/v1 kind: Deployment @@ -1050,19 +1102,16 @@ metadata: name: authbridge-disabled-agent namespace: ` + authBridgeTestNamespace + ` labels: - kagenti.io/type: agent app.kubernetes.io/name: authbridge-disabled-agent spec: replicas: 1 selector: matchLabels: app.kubernetes.io/name: authbridge-disabled-agent - kagenti.io/type: agent template: metadata: labels: app.kubernetes.io/name: authbridge-disabled-agent - kagenti.io/type: agent kagenti.io/inject: disabled spec: securityContext: From 6cd7a04642f59fe215be7420416e08858a5c6bf1 Mon Sep 17 00:00:00 2001 From: Daniels Nagornuks Date: Tue, 9 Jun 2026 12:37:34 +0100 Subject: [PATCH 3/6] Refactor: remove DefaultsConfigReconciler The DefaultsConfigReconciler maintained config-hash annotations on workloads with kagenti.io/type that were not managed by an AgentRuntime CR. With the new ValidatingAdmissionPolicy enforcing that the label can only be set via an AgentRuntime, no unmanaged workloads can exist. The reconciler was effectively a no-op and is now removed along with its tests and the ComputeDefaultsOnlyHash helper. Signed-off-by: Daniels Nagornuks Assisted-By: Claude (Anthropic AI) --- kagenti-operator/cmd/main.go | 9 - .../injector/defaults_config_reconciler.go | 300 -------- .../defaults_config_reconciler_test.go | 693 ------------------ 3 files changed, 1002 deletions(-) delete mode 100644 kagenti-operator/internal/webhook/injector/defaults_config_reconciler.go delete mode 100644 kagenti-operator/internal/webhook/injector/defaults_config_reconciler_test.go diff --git a/kagenti-operator/cmd/main.go b/kagenti-operator/cmd/main.go index e6ddde42..065a5adb 100644 --- a/kagenti-operator/cmd/main.go +++ b/kagenti-operator/cmd/main.go @@ -627,15 +627,6 @@ func main() { os.Exit(1) } - // Defaults-only config reconciler: propagates ConfigMap changes to - // workloads that have kagenti.io/type but no AgentRuntime CR. - if err = (&injector.DefaultsConfigReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "DefaultsConfig") - os.Exit(1) - } } // +kubebuilder:scaffold:builder diff --git a/kagenti-operator/internal/webhook/injector/defaults_config_reconciler.go b/kagenti-operator/internal/webhook/injector/defaults_config_reconciler.go deleted file mode 100644 index a7fcdd6f..00000000 --- a/kagenti-operator/internal/webhook/injector/defaults_config_reconciler.go +++ /dev/null @@ -1,300 +0,0 @@ -/* -Copyright 2026. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package injector - -import ( - "context" - "errors" - "fmt" - - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/util/retry" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/builder" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/predicate" - - "github.com/kagenti/operator/internal/controller" -) - -// DefaultsConfigReconciler watches cluster and namespace ConfigMaps and -// updates the kagenti.io/config-hash annotation on workloads that have -// the kagenti.io/type label but are NOT managed by an AgentRuntime CR. -// -// This ensures that sidecar configuration stays current even when no -// AgentRuntime CR exists (e.g. after CR deletion with the type label -// preserved, or workloads that rely purely on platform defaults). -// -// NOTE: This reconciler intentionally lives in the webhook package for -// now. It is expected to move to a dedicated controller in the future. -type DefaultsConfigReconciler struct { - client.Client - Scheme *runtime.Scheme -} - -func (r *DefaultsConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - logger := log.FromContext(ctx).WithValues("configmap", req.NamespacedName) - - // Determine scope from the request key. We cannot rely solely on - // fetching the ConfigMap because it may have been deleted — and a - // deletion still changes the defaults-only hash (one input is gone). - var namespaces []string - - cm := &corev1.ConfigMap{} - err := r.Get(ctx, req.NamespacedName, cm) - - switch { - case err == nil && isClusterConfigMap(cm): - // Cluster-level ConfigMap updated — affects all namespaces. - ns, err := r.namespacesWithKagentiWorkloads(ctx) - if err != nil { - logger.Error(err, "failed to list namespaces with kagenti workloads") - return ctrl.Result{}, err - } - namespaces = ns - - case err == nil && isNamespaceDefaultsConfigMap(cm): - // Namespace-level defaults ConfigMap updated. - namespaces = []string{cm.Namespace} - - case err == nil: - // ConfigMap exists but is not relevant (predicate should prevent this). - return ctrl.Result{}, nil - - case apierrors.IsNotFound(err): - // ConfigMap was deleted. Use the request key to infer scope. - if isClusterConfigMapKey(req.NamespacedName) { - ns, err := r.namespacesWithKagentiWorkloads(ctx) - if err != nil { - logger.Error(err, "failed to list namespaces with kagenti workloads") - return ctrl.Result{}, err - } - namespaces = ns - } else { - // Namespace-level ConfigMap deleted — re-hash workloads in that namespace. - namespaces = []string{req.Namespace} - } - logger.Info("ConfigMap deleted, re-hashing affected workloads") - - default: - return ctrl.Result{}, err - } - - var firstErr error - for _, ns := range namespaces { - if err := r.reconcileWorkloadsInNamespace(ctx, ns); err != nil { - logger.Error(err, "failed to reconcile workloads", "namespace", ns) - if firstErr == nil { - firstErr = err - } - } - } - return ctrl.Result{}, firstErr -} - -// reconcileWorkloadsInNamespace updates config-hash on Deployments and -// StatefulSets that carry the kagenti.io/type label but are not managed -// by an AgentRuntime CR. Errors from individual workload updates are -// accumulated and returned so that controller-runtime requeues the request. -func (r *DefaultsConfigReconciler) reconcileWorkloadsInNamespace(ctx context.Context, namespace string) error { - logger := log.FromContext(ctx).WithValues("namespace", namespace) - var errs []error - - // Process Deployments - deployList := &appsv1.DeploymentList{} - if err := r.List(ctx, deployList, - client.InNamespace(namespace), - client.HasLabels{KagentiTypeLabel}, - ); err != nil { - return err - } - for i := range deployList.Items { - dep := &deployList.Items[i] - if isManagedByAgentRuntime(dep) { - continue - } - if err := r.updateConfigHash(ctx, namespace, dep.Name, "Deployment"); err != nil { - logger.Error(err, "failed to update Deployment config-hash", "name", dep.Name) - errs = append(errs, err) - } - } - - // Process StatefulSets - ssList := &appsv1.StatefulSetList{} - if err := r.List(ctx, ssList, - client.InNamespace(namespace), - client.HasLabels{KagentiTypeLabel}, - ); err != nil { - return err - } - for i := range ssList.Items { - ss := &ssList.Items[i] - if isManagedByAgentRuntime(ss) { - continue - } - if err := r.updateConfigHash(ctx, namespace, ss.Name, "StatefulSet"); err != nil { - logger.Error(err, "failed to update StatefulSet config-hash", "name", ss.Name) - errs = append(errs, err) - } - } - - return errors.Join(errs...) -} - -// updateConfigHash computes the defaults-only hash and applies it to -// the workload's PodTemplateSpec if it differs from the current value. -func (r *DefaultsConfigReconciler) updateConfigHash(ctx context.Context, namespace, name, kind string) error { - logger := log.FromContext(ctx).WithValues("workload", name, "kind", kind) - - configResult, err := controller.ComputeConfigHash(ctx, r.Client, namespace) - if err != nil { - return err - } - newHash := configResult.Hash - - return retry.RetryOnConflict(retry.DefaultRetry, func() error { - key := types.NamespacedName{Name: name, Namespace: namespace} - - switch kind { - case "Deployment": - dep := &appsv1.Deployment{} - if err := r.Get(ctx, key, dep); err != nil { - return client.IgnoreNotFound(err) - } - current := dep.Spec.Template.Annotations[controller.AnnotationConfigHash] - if current == newHash { - return nil - } - if dep.Spec.Template.Annotations == nil { - dep.Spec.Template.Annotations = make(map[string]string) - } - dep.Spec.Template.Annotations[controller.AnnotationConfigHash] = newHash - logger.Info("Updating config-hash to defaults-only", - "oldHash", truncateHash(current), "newHash", truncateHash(newHash)) - return r.Update(ctx, dep) - - case "StatefulSet": - ss := &appsv1.StatefulSet{} - if err := r.Get(ctx, key, ss); err != nil { - return client.IgnoreNotFound(err) - } - current := ss.Spec.Template.Annotations[controller.AnnotationConfigHash] - if current == newHash { - return nil - } - if ss.Spec.Template.Annotations == nil { - ss.Spec.Template.Annotations = make(map[string]string) - } - ss.Spec.Template.Annotations[controller.AnnotationConfigHash] = newHash - logger.Info("Updating config-hash to defaults-only", - "oldHash", truncateHash(current), "newHash", truncateHash(newHash)) - return r.Update(ctx, ss) - - default: - return fmt.Errorf("unsupported workload kind: %s", kind) - } - }) -} - -// namespacesWithKagentiWorkloads returns all namespaces that contain at -// least one Deployment or StatefulSet with the kagenti.io/type label. -// TODO: consider adding a field indexer on KagentiTypeLabel if cluster size grows. -func (r *DefaultsConfigReconciler) namespacesWithKagentiWorkloads(ctx context.Context) ([]string, error) { - seen := make(map[string]bool) - - deployList := &appsv1.DeploymentList{} - if err := r.List(ctx, deployList, client.HasLabels{KagentiTypeLabel}); err != nil { - return nil, err - } - for i := range deployList.Items { - seen[deployList.Items[i].Namespace] = true - } - - ssList := &appsv1.StatefulSetList{} - if err := r.List(ctx, ssList, client.HasLabels{KagentiTypeLabel}); err != nil { - return nil, err - } - for i := range ssList.Items { - seen[ssList.Items[i].Namespace] = true - } - - namespaces := make([]string, 0, len(seen)) - for ns := range seen { - namespaces = append(namespaces, ns) - } - return namespaces, nil -} - -// SetupWithManager registers the reconciler with the controller-runtime manager. -func (r *DefaultsConfigReconciler) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - Named("defaults-config"). - For(&corev1.ConfigMap{}, builder.WithPredicates(kagentiConfigMapPredicate())). - Complete(r) -} - -// kagentiConfigMapPredicate filters to only cluster-level defaults and -// namespace-level defaults ConfigMaps. -func kagentiConfigMapPredicate() predicate.Predicate { - return predicate.NewPredicateFuncs(func(obj client.Object) bool { - cm, ok := obj.(*corev1.ConfigMap) - if !ok { - return false - } - return isClusterConfigMap(cm) || isNamespaceDefaultsConfigMap(cm) - }) -} - -func isClusterConfigMap(cm *corev1.ConfigMap) bool { - return isClusterConfigMapKey(types.NamespacedName{Name: cm.Name, Namespace: cm.Namespace}) -} - -// isClusterConfigMapKey checks whether a NamespacedName refers to one of the -// cluster-level defaults ConfigMaps. Used for both live objects and deletion -// events where the object no longer exists. -func isClusterConfigMapKey(key types.NamespacedName) bool { - if key.Namespace != controller.ClusterDefaultsNamespace { - return false - } - return key.Name == controller.ClusterDefaultsConfigMapName || - key.Name == controller.ClusterFeatureGatesConfigMapName -} - -func isNamespaceDefaultsConfigMap(cm *corev1.ConfigMap) bool { - labels := cm.GetLabels() - return labels != nil && labels[controller.LabelNamespaceDefaults] == "true" -} - -// isManagedByAgentRuntime checks if a workload is actively managed by -// an AgentRuntime CR. The AgentRuntime controller sets this label when -// the CR is active and removes it on CR deletion. -func isManagedByAgentRuntime(obj client.Object) bool { - labels := obj.GetLabels() - return labels != nil && labels[controller.LabelManagedBy] == controller.LabelManagedByValue -} - -func truncateHash(h string) string { - if len(h) > 12 { - return h[:12] - } - return h -} diff --git a/kagenti-operator/internal/webhook/injector/defaults_config_reconciler_test.go b/kagenti-operator/internal/webhook/injector/defaults_config_reconciler_test.go deleted file mode 100644 index 73147dd1..00000000 --- a/kagenti-operator/internal/webhook/injector/defaults_config_reconciler_test.go +++ /dev/null @@ -1,693 +0,0 @@ -/* -Copyright 2026. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package injector - -import ( - "context" - "testing" - - "github.com/kagenti/operator/internal/controller" - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func newReconcilerScheme() *runtime.Scheme { - scheme := runtime.NewScheme() - _ = corev1.AddToScheme(scheme) - _ = appsv1.AddToScheme(scheme) - return scheme -} - -func newDefaultsReconciler(objs ...client.Object) *DefaultsConfigReconciler { - scheme := newReconcilerScheme() - fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objs...).Build() - return &DefaultsConfigReconciler{ - Client: fakeClient, - Scheme: scheme, - } -} - -func newLabeledDeployment(name, namespace string, extraLabels map[string]string) *appsv1.Deployment { - labels := map[string]string{ - KagentiTypeLabel: KagentiTypeAgent, - } - for k, v := range extraLabels { - labels[k] = v - } - return &appsv1.Deployment{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - Labels: labels, - }, - Spec: appsv1.DeploymentSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"app": name}, - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{"app": name}, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Name: "agent", Image: "agent:latest"}, - }, - }, - }, - }, - } -} - -func newLabeledStatefulSet(name, namespace string) *appsv1.StatefulSet { - return &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - Labels: map[string]string{ - KagentiTypeLabel: KagentiTypeAgent, - }, - }, - Spec: appsv1.StatefulSetSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"app": name}, - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{"app": name}, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Name: "agent", Image: "agent:latest"}, - }, - }, - }, - }, - } -} - -func newClusterDefaultsConfigMap(data map[string]string) *corev1.ConfigMap { - return &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: controller.ClusterDefaultsConfigMapName, - Namespace: controller.ClusterDefaultsNamespace, - }, - Data: data, - } -} - -func newNamespaceDefaultsConfigMap(namespace string, data map[string]string) *corev1.ConfigMap { - return &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ns-defaults", - Namespace: namespace, - Labels: map[string]string{ - controller.LabelNamespaceDefaults: "true", - }, - }, - Data: data, - } -} - -func TestDefaultsConfigReconciler_SkipsManagedWorkload(t *testing.T) { - dep := newLabeledDeployment("my-agent", "team1", map[string]string{ - controller.LabelManagedBy: controller.LabelManagedByValue, - }) - cm := newClusterDefaultsConfigMap(map[string]string{"key": "val"}) - - r := newDefaultsReconciler(dep, cm) - ctx := context.Background() - - _, err := r.Reconcile(ctx, ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: controller.ClusterDefaultsConfigMapName, - Namespace: controller.ClusterDefaultsNamespace, - }, - }) - if err != nil { - t.Fatalf("Reconcile() returned error: %v", err) - } - - // Verify config-hash was NOT set (workload is managed by AgentRuntime) - updated := &appsv1.Deployment{} - if err := r.Get(ctx, client.ObjectKeyFromObject(dep), updated); err != nil { - t.Fatalf("failed to get deployment: %v", err) - } - if hash := updated.Spec.Template.Annotations[controller.AnnotationConfigHash]; hash != "" { - t.Errorf("expected no config-hash on managed workload, got %q", hash) - } -} - -func TestDefaultsConfigReconciler_UpdatesUnmanagedWorkload(t *testing.T) { - dep := newLabeledDeployment("my-agent", "team1", nil) - cm := newClusterDefaultsConfigMap(map[string]string{"key": "val"}) - - r := newDefaultsReconciler(dep, cm) - ctx := context.Background() - - _, err := r.Reconcile(ctx, ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: controller.ClusterDefaultsConfigMapName, - Namespace: controller.ClusterDefaultsNamespace, - }, - }) - if err != nil { - t.Fatalf("Reconcile() returned error: %v", err) - } - - updated := &appsv1.Deployment{} - if err := r.Get(ctx, client.ObjectKeyFromObject(dep), updated); err != nil { - t.Fatalf("failed to get deployment: %v", err) - } - hash := updated.Spec.Template.Annotations[controller.AnnotationConfigHash] - if hash == "" { - t.Fatal("expected config-hash to be set on unmanaged workload") - } -} - -func TestDefaultsConfigReconciler_IdempotentWhenHashUnchanged(t *testing.T) { - dep := newLabeledDeployment("my-agent", "team1", nil) - cm := newClusterDefaultsConfigMap(map[string]string{"key": "val"}) - - r := newDefaultsReconciler(dep, cm) - ctx := context.Background() - - // First reconcile — sets the hash - _, err := r.Reconcile(ctx, ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: controller.ClusterDefaultsConfigMapName, - Namespace: controller.ClusterDefaultsNamespace, - }, - }) - if err != nil { - t.Fatalf("first Reconcile() returned error: %v", err) - } - - updated := &appsv1.Deployment{} - if err := r.Get(ctx, client.ObjectKeyFromObject(dep), updated); err != nil { - t.Fatalf("failed to get deployment: %v", err) - } - rvAfterFirst := updated.ResourceVersion - - // Second reconcile — should be a no-op - _, err = r.Reconcile(ctx, ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: controller.ClusterDefaultsConfigMapName, - Namespace: controller.ClusterDefaultsNamespace, - }, - }) - if err != nil { - t.Fatalf("second Reconcile() returned error: %v", err) - } - - if err := r.Get(ctx, client.ObjectKeyFromObject(dep), updated); err != nil { - t.Fatalf("failed to get deployment: %v", err) - } - if updated.ResourceVersion != rvAfterFirst { - t.Error("expected no update on second reconcile (hash unchanged)") - } -} - -func TestDefaultsConfigReconciler_HandlesNamespaceDefaults(t *testing.T) { - dep := newLabeledDeployment("my-agent", "team1", nil) - cm := newNamespaceDefaultsConfigMap("team1", map[string]string{"ns-key": "ns-val"}) - - r := newDefaultsReconciler(dep, cm) - ctx := context.Background() - - _, err := r.Reconcile(ctx, ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "ns-defaults", - Namespace: "team1", - }, - }) - if err != nil { - t.Fatalf("Reconcile() returned error: %v", err) - } - - updated := &appsv1.Deployment{} - if err := r.Get(ctx, client.ObjectKeyFromObject(dep), updated); err != nil { - t.Fatalf("failed to get deployment: %v", err) - } - hash := updated.Spec.Template.Annotations[controller.AnnotationConfigHash] - if hash == "" { - t.Fatal("expected config-hash to be set for namespace defaults change") - } -} - -func TestDefaultsConfigReconciler_IgnoresIrrelevantConfigMap(t *testing.T) { - dep := newLabeledDeployment("my-agent", "team1", nil) - cm := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: "unrelated-config", - Namespace: "team1", - }, - Data: map[string]string{"foo": "bar"}, - } - - r := newDefaultsReconciler(dep, cm) - ctx := context.Background() - - _, err := r.Reconcile(ctx, ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "unrelated-config", - Namespace: "team1", - }, - }) - if err != nil { - t.Fatalf("Reconcile() returned error: %v", err) - } - - updated := &appsv1.Deployment{} - if err := r.Get(ctx, client.ObjectKeyFromObject(dep), updated); err != nil { - t.Fatalf("failed to get deployment: %v", err) - } - if hash := updated.Spec.Template.Annotations[controller.AnnotationConfigHash]; hash != "" { - t.Errorf("expected no config-hash update for irrelevant ConfigMap, got %q", hash) - } -} - -func TestDefaultsConfigReconciler_HandlesStatefulSet(t *testing.T) { - ss := newLabeledStatefulSet("my-agent", "team1") - cm := newClusterDefaultsConfigMap(map[string]string{"key": "val"}) - - r := newDefaultsReconciler(ss, cm) - ctx := context.Background() - - _, err := r.Reconcile(ctx, ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: controller.ClusterDefaultsConfigMapName, - Namespace: controller.ClusterDefaultsNamespace, - }, - }) - if err != nil { - t.Fatalf("Reconcile() returned error: %v", err) - } - - updated := &appsv1.StatefulSet{} - if err := r.Get(ctx, client.ObjectKeyFromObject(ss), updated); err != nil { - t.Fatalf("failed to get statefulset: %v", err) - } - hash := updated.Spec.Template.Annotations[controller.AnnotationConfigHash] - if hash == "" { - t.Fatal("expected config-hash to be set on unmanaged StatefulSet") - } -} - -func TestDefaultsConfigReconciler_ConfigMapNotFound(t *testing.T) { - r := newDefaultsReconciler() - ctx := context.Background() - - _, err := r.Reconcile(ctx, ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "nonexistent", - Namespace: "team1", - }, - }) - if err != nil { - t.Fatalf("Reconcile() should not error on NotFound, got: %v", err) - } -} - -func TestDefaultsConfigReconciler_ConfigMapDeleted_UpdatesWorkloads(t *testing.T) { - // When a namespace defaults ConfigMap is deleted, the defaults-only hash - // changes (one input is gone). The reconciler should still update workloads. - dep := newLabeledDeployment("my-agent", "team1", nil) - // Pre-set a stale hash to verify it gets updated - dep.Spec.Template.Annotations = map[string]string{ - controller.AnnotationConfigHash: "stale-hash-value", - } - - // No ConfigMap in the fake client — simulates deletion - r := newDefaultsReconciler(dep) - ctx := context.Background() - - _, err := r.Reconcile(ctx, ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "ns-defaults", - Namespace: "team1", - }, - }) - if err != nil { - t.Fatalf("Reconcile() returned error: %v", err) - } - - updated := &appsv1.Deployment{} - if err := r.Get(ctx, client.ObjectKeyFromObject(dep), updated); err != nil { - t.Fatalf("failed to get deployment: %v", err) - } - hash := updated.Spec.Template.Annotations[controller.AnnotationConfigHash] - if hash == "stale-hash-value" { - t.Error("expected config-hash to be updated after ConfigMap deletion") - } - if hash == "" { - t.Error("expected config-hash to be set to defaults-only hash") - } -} - -func TestDefaultsConfigReconciler_ClusterConfigMapDeleted_UpdatesAllNamespaces(t *testing.T) { - dep1 := newLabeledDeployment("agent-1", "team1", nil) - dep1.Spec.Template.Annotations = map[string]string{ - controller.AnnotationConfigHash: "stale-hash", - } - dep2 := newLabeledDeployment("agent-2", "team2", nil) - dep2.Spec.Template.Annotations = map[string]string{ - controller.AnnotationConfigHash: "stale-hash", - } - - // No cluster ConfigMap — simulates deletion - r := newDefaultsReconciler(dep1, dep2) - ctx := context.Background() - - _, err := r.Reconcile(ctx, ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: controller.ClusterDefaultsConfigMapName, - Namespace: controller.ClusterDefaultsNamespace, - }, - }) - if err != nil { - t.Fatalf("Reconcile() returned error: %v", err) - } - - for _, dep := range []*appsv1.Deployment{dep1, dep2} { - updated := &appsv1.Deployment{} - if err := r.Get(ctx, client.ObjectKeyFromObject(dep), updated); err != nil { - t.Fatalf("failed to get deployment %s: %v", dep.Name, err) - } - hash := updated.Spec.Template.Annotations[controller.AnnotationConfigHash] - if hash == "stale-hash" || hash == "" { - t.Errorf("deployment %s/%s: expected config-hash to be updated, got %q", - dep.Namespace, dep.Name, hash) - } - } -} - -func TestDefaultsConfigReconciler_MixedManagedAndUnmanaged(t *testing.T) { - // Both managed and unmanaged workloads in the same namespace — only - // the unmanaged one should get its config-hash updated. - managed := newLabeledDeployment("managed-agent", "team1", map[string]string{ - controller.LabelManagedBy: controller.LabelManagedByValue, - }) - unmanaged := newLabeledDeployment("orphan-agent", "team1", nil) - cm := newClusterDefaultsConfigMap(map[string]string{"key": "val"}) - - r := newDefaultsReconciler(managed, unmanaged, cm) - ctx := context.Background() - - _, err := r.Reconcile(ctx, ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: controller.ClusterDefaultsConfigMapName, - Namespace: controller.ClusterDefaultsNamespace, - }, - }) - if err != nil { - t.Fatalf("Reconcile() returned error: %v", err) - } - - // Managed workload should NOT be updated - updatedManaged := &appsv1.Deployment{} - if err := r.Get(ctx, client.ObjectKeyFromObject(managed), updatedManaged); err != nil { - t.Fatalf("failed to get managed deployment: %v", err) - } - if hash := updatedManaged.Spec.Template.Annotations[controller.AnnotationConfigHash]; hash != "" { - t.Errorf("expected no config-hash on managed workload, got %q", hash) - } - - // Unmanaged workload should be updated - updatedUnmanaged := &appsv1.Deployment{} - if err := r.Get(ctx, client.ObjectKeyFromObject(unmanaged), updatedUnmanaged); err != nil { - t.Fatalf("failed to get unmanaged deployment: %v", err) - } - if hash := updatedUnmanaged.Spec.Template.Annotations[controller.AnnotationConfigHash]; hash == "" { - t.Error("expected config-hash on unmanaged workload") - } -} - -func TestDefaultsConfigReconciler_MultiNamespaceFanOut(t *testing.T) { - // Cluster ConfigMap change should update workloads across multiple namespaces. - dep1 := newLabeledDeployment("agent-a", "ns1", nil) - dep2 := newLabeledDeployment("agent-b", "ns2", nil) - dep3 := newLabeledDeployment("agent-c", "ns3", nil) - cm := newClusterDefaultsConfigMap(map[string]string{"key": "val"}) - - r := newDefaultsReconciler(dep1, dep2, dep3, cm) - ctx := context.Background() - - _, err := r.Reconcile(ctx, ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: controller.ClusterDefaultsConfigMapName, - Namespace: controller.ClusterDefaultsNamespace, - }, - }) - if err != nil { - t.Fatalf("Reconcile() returned error: %v", err) - } - - for _, dep := range []*appsv1.Deployment{dep1, dep2, dep3} { - updated := &appsv1.Deployment{} - if err := r.Get(ctx, client.ObjectKeyFromObject(dep), updated); err != nil { - t.Fatalf("failed to get deployment %s/%s: %v", dep.Namespace, dep.Name, err) - } - hash := updated.Spec.Template.Annotations[controller.AnnotationConfigHash] - if hash == "" { - t.Errorf("deployment %s/%s: expected config-hash to be set", dep.Namespace, dep.Name) - } - } -} - -func TestDefaultsConfigReconciler_FeatureGatesConfigMapTrigger(t *testing.T) { - dep := newLabeledDeployment("my-agent", "team1", nil) - cm := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: controller.ClusterFeatureGatesConfigMapName, - Namespace: controller.ClusterDefaultsNamespace, - }, - Data: map[string]string{"globalEnabled": "true"}, - } - - r := newDefaultsReconciler(dep, cm) - ctx := context.Background() - - _, err := r.Reconcile(ctx, ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: controller.ClusterFeatureGatesConfigMapName, - Namespace: controller.ClusterDefaultsNamespace, - }, - }) - if err != nil { - t.Fatalf("Reconcile() returned error: %v", err) - } - - updated := &appsv1.Deployment{} - if err := r.Get(ctx, client.ObjectKeyFromObject(dep), updated); err != nil { - t.Fatalf("failed to get deployment: %v", err) - } - hash := updated.Spec.Template.Annotations[controller.AnnotationConfigHash] - if hash == "" { - t.Fatal("expected config-hash to be set for feature-gates ConfigMap change") - } -} - -func TestIsClusterConfigMapKey(t *testing.T) { - tests := []struct { - name string - key types.NamespacedName - expected bool - }{ - { - name: "platform config", - key: types.NamespacedName{Name: controller.ClusterDefaultsConfigMapName, Namespace: controller.ClusterDefaultsNamespace}, - expected: true, - }, - { - name: "feature gates", - key: types.NamespacedName{Name: controller.ClusterFeatureGatesConfigMapName, Namespace: controller.ClusterDefaultsNamespace}, - expected: true, - }, - { - name: "wrong namespace", - key: types.NamespacedName{Name: controller.ClusterDefaultsConfigMapName, Namespace: "other-ns"}, - expected: false, - }, - { - name: "random configmap", - key: types.NamespacedName{Name: "random", Namespace: "random-ns"}, - expected: false, - }, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - if got := isClusterConfigMapKey(tc.key); got != tc.expected { - t.Errorf("isClusterConfigMapKey() = %v, want %v", got, tc.expected) - } - }) - } -} - -func TestIsClusterConfigMap(t *testing.T) { - tests := []struct { - name string - cm *corev1.ConfigMap - expected bool - }{ - { - name: "platform config", - cm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: controller.ClusterDefaultsConfigMapName, - Namespace: controller.ClusterDefaultsNamespace, - }, - }, - expected: true, - }, - { - name: "feature gates", - cm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: controller.ClusterFeatureGatesConfigMapName, - Namespace: controller.ClusterDefaultsNamespace, - }, - }, - expected: true, - }, - { - name: "wrong namespace", - cm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: controller.ClusterDefaultsConfigMapName, - Namespace: "other-ns", - }, - }, - expected: false, - }, - { - name: "wrong name", - cm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: "other-config", - Namespace: controller.ClusterDefaultsNamespace, - }, - }, - expected: false, - }, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - if got := isClusterConfigMap(tc.cm); got != tc.expected { - t.Errorf("isClusterConfigMap() = %v, want %v", got, tc.expected) - } - }) - } -} - -func TestIsNamespaceDefaultsConfigMap(t *testing.T) { - tests := []struct { - name string - cm *corev1.ConfigMap - expected bool - }{ - { - name: "has defaults label", - cm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ns-defaults", - Namespace: "team1", - Labels: map[string]string{ - controller.LabelNamespaceDefaults: "true", - }, - }, - }, - expected: true, - }, - { - name: "no labels", - cm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ns-defaults", - Namespace: "team1", - }, - }, - expected: false, - }, - { - name: "wrong label value", - cm: &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ns-defaults", - Namespace: "team1", - Labels: map[string]string{ - controller.LabelNamespaceDefaults: "false", - }, - }, - }, - expected: false, - }, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - if got := isNamespaceDefaultsConfigMap(tc.cm); got != tc.expected { - t.Errorf("isNamespaceDefaultsConfigMap() = %v, want %v", got, tc.expected) - } - }) - } -} - -func TestIsManagedByAgentRuntime(t *testing.T) { - tests := []struct { - name string - labels map[string]string - expected bool - }{ - { - name: "managed", - labels: map[string]string{ - controller.LabelManagedBy: controller.LabelManagedByValue, - }, - expected: true, - }, - { - name: "not managed - no labels", - labels: nil, - expected: false, - }, - { - name: "not managed - different value", - labels: map[string]string{ - controller.LabelManagedBy: "other", - }, - expected: false, - }, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - dep := &appsv1.Deployment{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - Labels: tc.labels, - }, - } - if got := isManagedByAgentRuntime(dep); got != tc.expected { - t.Errorf("isManagedByAgentRuntime() = %v, want %v", got, tc.expected) - } - }) - } -} From be9a32a35abf859cc0f7372afb300c2c06021455 Mon Sep 17 00:00:00 2001 From: Daniels Nagornuks Date: Wed, 10 Jun 2026 18:20:23 +0100 Subject: [PATCH 4/6] Docs: update documentation and samples for VAP enforcement Remove manual kagenti.io/type labeling from docs, samples, and comments. All examples now use AgentRuntime CRs, matching the ValidatingAdmissionPolicy that prevents direct label application. Fix auto-created AgentCard naming to use the {name}-{kind}-card pattern produced by AgentCardSync. Signed-off-by: Daniels Nagornuks --- kagenti-operator/GETTING_STARTED.md | 106 ++++-------------- ...v1alpha1_agentruntime_skill_discovery.yaml | 1 - kagenti-operator/docs/api-reference.md | 41 ++++--- .../docs/dynamic-agent-discovery.md | 35 ++++-- .../operator-managed-client-registration.md | 2 +- .../controller/agentruntime_controller.go | 11 +- kagenti-operator/test/e2e/README.md | 7 +- kagenti-operator/test/e2e/fixtures.go | 3 +- 8 files changed, 76 insertions(+), 130 deletions(-) diff --git a/kagenti-operator/GETTING_STARTED.md b/kagenti-operator/GETTING_STARTED.md index 68d9361e..9d805b25 100644 --- a/kagenti-operator/GETTING_STARTED.md +++ b/kagenti-operator/GETTING_STARTED.md @@ -40,10 +40,14 @@ This scenario demonstrates the complete lifecycle of an AI agent deployment on t ## Overview ### Kagenti Operator -The Kagenti Operator discovers, indexes, and secures AI agents deployed in Kubernetes. There are two ways to enroll workloads: +The Kagenti Operator discovers, indexes, and secures AI agents deployed in Kubernetes. Enroll workloads by creating an `AgentRuntime` CR: -1. **AgentRuntime CR (Recommended)** — Create a Deployment with a `protocol.kagenti.io/a2a` label and an `AgentRuntime` CR pointing to it. The controller applies `kagenti.io/type` labels and triggers sidecar injection automatically. The protocol label enables automatic AgentCard creation for agent discovery. -2. **Manual labels** — Add the `kagenti.io/type: agent` label directly to your Deployment or StatefulSet. This is simpler for quick tests but does not provide identity or observability configuration. +1. Create a Deployment with a `protocol.kagenti.io/a2a` label +2. Create an `AgentRuntime` CR pointing to it +3. The controller applies `kagenti.io/type` labels and triggers sidecar injection automatically +4. The protocol label enables automatic AgentCard creation for agent discovery + +A `ValidatingAdmissionPolicy` prevents the `kagenti.io/type` label from being set directly on Deployments or StatefulSets — it can only be applied by the operator via an AgentRuntime CR. > **Note:** The `Agent` Custom Resource is deprecated and will be removed in a future release. @@ -180,86 +184,6 @@ kubectl delete agentruntime weather-agent-runtime -n team1 --- -## Deploy an Agent with Manual Labels (Alternative) - -Deploy an agent as a standard Kubernetes Deployment with the required `kagenti.io/type: agent` label. The operator will automatically discover the workload and create an AgentCard for it. This approach does not provide AgentRuntime's identity or observability configuration. - -### Quick Example Deployment - -```yaml -kubectl apply -f - <` | `""` (existence implies support) | Yes (at least one) | Protocol(s) the agent speaks (e.g., `protocol.kagenti.io/a2a`, `protocol.kagenti.io/mcp`) | -| `app.kubernetes.io/name` | `` | Recommended | Standard Kubernetes app name label | +| Label | Value | Set By | Description | +|-------|-------|--------|-------------| +| `kagenti.io/type` | `agent` or `tool` | Operator (via AgentRuntime) | Classifies the workload. Applied automatically when an AgentRuntime CR targets it. | +| `protocol.kagenti.io/` | `""` (existence implies support) | User | Protocol(s) the agent speaks (e.g., `protocol.kagenti.io/a2a`, `protocol.kagenti.io/mcp`). Required for AgentCard auto-creation. | +| `app.kubernetes.io/name` | `` | User | Standard Kubernetes app name label (recommended) | --- diff --git a/kagenti-operator/docs/dynamic-agent-discovery.md b/kagenti-operator/docs/dynamic-agent-discovery.md index 7094bb7d..aa07a35c 100644 --- a/kagenti-operator/docs/dynamic-agent-discovery.md +++ b/kagenti-operator/docs/dynamic-agent-discovery.md @@ -40,9 +40,11 @@ Exposes /.well-known/ Enables kubectl get agentcards ## How It Works -### 1. Workload Labeling +### 1. Workload Enrollment via AgentRuntime -Agent workloads (Deployments or StatefulSets) must be labeled to enable discovery: +Agent workloads (Deployments or StatefulSets) are enrolled by creating an `AgentRuntime` CR. The operator's controller applies the `kagenti.io/type` label automatically — a `ValidatingAdmissionPolicy` prevents setting it directly on workloads. + +The workload itself carries a protocol label to declare which protocol it speaks: ```yaml apiVersion: apps/v1 @@ -50,18 +52,27 @@ kind: Deployment metadata: name: weather-agent labels: - kagenti.io/type: agent # Identifies as an agent protocol.kagenti.io/a2a: "" # Speaks A2A protocol app.kubernetes.io/name: weather-agent spec: # ... standard Deployment spec +--- +apiVersion: agent.kagenti.dev/v1alpha1 +kind: AgentRuntime +metadata: + name: weather-agent +spec: + type: agent + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: weather-agent ``` -Multiple protocols can be declared simultaneously: +Multiple protocols can be declared simultaneously on the Deployment: ```yaml labels: - kagenti.io/type: agent protocol.kagenti.io/a2a: "" # Speaks A2A protocol.kagenti.io/mcp: "" # Also speaks MCP ``` @@ -204,7 +215,6 @@ kind: Deployment metadata: name: assistant-agent labels: - kagenti.io/type: agent protocol.kagenti.io/a2a: "" app.kubernetes.io/name: assistant-agent spec: @@ -234,9 +244,20 @@ spec: - name: http port: 8000 targetPort: 8000 +--- +apiVersion: agent.kagenti.dev/v1alpha1 +kind: AgentRuntime +metadata: + name: assistant-agent +spec: + type: agent + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: assistant-agent ``` -The AgentCard is created automatically by the AgentCardSync controller when agent labels are present, or can be created explicitly with `targetRef`. +The operator applies the `kagenti.io/type: agent` label to the Deployment, and the AgentCard is created automatically by the AgentCardSync controller when both `kagenti.io/type` and a protocol label are present. ### Query Agent Cards diff --git a/kagenti-operator/docs/operator-managed-client-registration.md b/kagenti-operator/docs/operator-managed-client-registration.md index c81236cf..c33728d4 100644 --- a/kagenti-operator/docs/operator-managed-client-registration.md +++ b/kagenti-operator/docs/operator-managed-client-registration.md @@ -104,7 +104,7 @@ Other workloads are ignored by this controller. ### 3.2 Workload - **Deployment** or **StatefulSet** (not bare Pods for operator ownership of Secrets). -- Pod template labels: `kagenti.io/type: agent` or `tool` (subject to `injectTools`). Do **not** set `kagenti.io/client-registration-inject: "true"` unless you require the legacy sidecar. +- An **AgentRuntime** CR targeting the workload — the operator applies `kagenti.io/type` to the Deployment/StatefulSet and its PodTemplateSpec (a `ValidatingAdmissionPolicy` prevents manual application). Do **not** set `kagenti.io/client-registration-inject: "true"` unless you require the legacy sidecar. - For **SPIRE-enabled** namespaces: `spec.template.spec.serviceAccountName` must be a **dedicated** ServiceAccount (not `default`). ### 3.3 Operator configuration diff --git a/kagenti-operator/internal/controller/agentruntime_controller.go b/kagenti-operator/internal/controller/agentruntime_controller.go index 0c4070dd..e5732840 100644 --- a/kagenti-operator/internal/controller/agentruntime_controller.go +++ b/kagenti-operator/internal/controller/agentruntime_controller.go @@ -1152,13 +1152,10 @@ func (r *AgentRuntimeReconciler) mapClusterConfigMapToAgentRuntimes(ctx context. // whose computed hash actually changed re-stamp the pod template). func (r *AgentRuntimeReconciler) mapNamespaceConfigMapToAgentRuntimes(ctx context.Context, obj client.Object) []reconcile.Request { labels := obj.GetLabels() - // goconst flags this literal as the 11th "true" in the codebase and - // suggests reusing AnnotationRestartPendingValue, but that constant - // is semantically a restart-pending marker, not a generic label-true - // value — reusing it would obscure intent. Existing code (e.g. - // defaults_config_reconciler.go) uses the same literal-true idiom - // for label checks; rather than introduce a fresh `labelValueTrue` - // constant only here, suppress the rule on this one line. + // goconst flags this literal "true" and suggests reusing + // AnnotationRestartPendingValue, but that constant is semantically a + // restart-pending marker, not a generic label-true value — reusing it + // would obscure intent. isNsDefaults := labels[LabelNamespaceDefaults] == "true" //nolint:goconst isAuthBridgeRuntime := obj.GetName() == AuthBridgeRuntimeConfigMapName diff --git a/kagenti-operator/test/e2e/README.md b/kagenti-operator/test/e2e/README.md index a02472ef..efbe12df 100644 --- a/kagenti-operator/test/e2e/README.md +++ b/kagenti-operator/test/e2e/README.md @@ -83,7 +83,7 @@ kind delete cluster | Apply labels and config-hash | Agent lifecycle | AgentRuntime controller adds `kagenti.io/type=agent`, `managed-by`, config-hash, and triggers AgentCard auto-creation | | Ready=True | Agent lifecycle | AgentRuntime CR reaches Ready=True condition | | Idempotent re-reconcile | Agent lifecycle | Deployment generation stays stable over 30s (no spurious updates) | -| Clean up on deletion | Agent lifecycle | Deletion preserves `kagenti.io/type`, removes `managed-by`, config-hash stays the same (no CR fields in hash) | +| Clean up on deletion | Agent lifecycle | Deletion removes `kagenti.io/type`, `managed-by`, and config-hash from the workload | | Missing target error | Error cases | AgentRuntime targeting non-existent Deployment sets TargetResolved=False | | Tool type label | Tool type | AgentRuntime with type=tool applies `kagenti.io/type=tool` label and no AgentCard is created | | StatefulSet target | StatefulSet target | AgentRuntime applies labels, config-hash, and reaches Active for a StatefulSet workload | @@ -331,10 +331,9 @@ spec on each reconcile loop, which would trigger unnecessary rolling restarts. Deletes the AgentRuntime CR and verifies the finalizer (`kagenti.io/cleanup`) runs correctly: 1. **Target Deployment still exists** — the controller cleans up labels, not the workload -2. **`kagenti.io/type=agent` preserved** — workload remains classified after runtime removal +2. **`kagenti.io/type` removed** — the label was applied by the operator and is removed on cleanup 3. **`app.kubernetes.io/managed-by` removed** — workload is no longer operator-managed -4. **`kagenti.io/config-hash` stays the same** — no CR fields in hash, so deletion does not - change the hash or trigger a rolling update +4. **`kagenti.io/config-hash` removed** — annotation is cleared since the workload is no longer managed 5. **AgentRuntime CR returns 404** — finalizer completed and CR was fully deleted #### Missing target error diff --git a/kagenti-operator/test/e2e/fixtures.go b/kagenti-operator/test/e2e/fixtures.go index e6f99be7..dcc7dcb2 100644 --- a/kagenti-operator/test/e2e/fixtures.go +++ b/kagenti-operator/test/e2e/fixtures.go @@ -126,8 +126,7 @@ spec: // noProtocolAgentFixture returns YAML for noproto-agent Deployment (S2) - receives // kagenti.io/type=agent via AgentRuntime but has NO protocol.kagenti.io/* label. // kagenti.io/inject=disabled is set because this test validates AgentCard sync -// behaviour, not sidecar injection. Without the opt-out the defaults-only -// injection path would inject sidecars that the pause container cannot support. +// behaviour, not sidecar injection. func noProtocolAgentFixture() string { return `apiVersion: apps/v1 kind: Deployment From 18ca74527d47e07af47976d862e4e79759438fc4 Mon Sep 17 00:00:00 2001 From: Daniels Nagornuks Date: Wed, 10 Jun 2026 18:21:27 +0100 Subject: [PATCH 5/6] Feat: add UI backend and Helm SA exceptions to VAP Exempt the kagenti-backend service account and the Helm-deployed operator SA (kagenti-system:controller-manager) from the agent label protection policy. Signed-off-by: Daniels Nagornuks --- .../config/vap/validating-admission-policy.yaml | 10 ++++++---- kagenti-operator/docs/architecture.md | 5 +++-- .../docs/controller-webhook-interaction.md | 6 +++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/kagenti-operator/config/vap/validating-admission-policy.yaml b/kagenti-operator/config/vap/validating-admission-policy.yaml index bf6198d7..76e610fc 100644 --- a/kagenti-operator/config/vap/validating-admission-policy.yaml +++ b/kagenti-operator/config/vap/validating-admission-policy.yaml @@ -1,13 +1,14 @@ # ValidatingAdmissionPolicy: prevent manual application of the kagenti.io/type # label on Deployments and StatefulSets. Only the kagenti-operator controller -# (via an AgentRuntime CR) is allowed to set this label. Users who need the -# label must create an AgentRuntime targeting their workload instead. +# (via an AgentRuntime CR) and the UI backend are allowed to set this label. +# Users who need the label must create an AgentRuntime targeting their workload +# or use the UI. # # The policy is split into two layers: # # matchConditions — skip evaluation entirely when: # 1. The object does not carry kagenti.io/type (irrelevant requests), OR -# 2. The request originates from a trusted service account. +# 2. The request originates from the operator's or UI backend's service account. # Two operator SA identities are listed because kustomize (make deploy) # and Helm use different namespaces and SA names: # - kagenti-operator-system/kagenti-operator-controller-manager (kustomize) @@ -41,10 +42,11 @@ spec: expression: >- has(object.metadata.labels) && 'kagenti.io/type' in object.metadata.labels - - name: 'not-operator-service-account' + - name: 'not-operator-or-ui-backend-service-account' expression: >- !(request.userInfo.username == 'system:serviceaccount:kagenti-operator-system:kagenti-operator-controller-manager') && !(request.userInfo.username == 'system:serviceaccount:kagenti-system:controller-manager') + && !(request.userInfo.username == 'system:serviceaccount:kagenti-system:kagenti-backend') variables: - name: labelUnchanged diff --git a/kagenti-operator/docs/architecture.md b/kagenti-operator/docs/architecture.md index c86562a2..606f58d2 100644 --- a/kagenti-operator/docs/architecture.md +++ b/kagenti-operator/docs/architecture.md @@ -83,7 +83,7 @@ The Kagenti Operator is a Kubernetes controller that implements the [Operator Pa - **AgentRuntime Validator**: Rejects duplicate `targetRef` entries (prevents multiple AgentRuntime CRs targeting the same workload in a namespace). Uses authoritative API server reads to eliminate informer cache-lag races. #### Admission Policies -- **Agent Label Protection (ValidatingAdmissionPolicy)**: Prevents manual application of the `kagenti.io/type` label on Deployments and StatefulSets. Only the operator's service account (via an AgentRuntime CR) is allowed to set this label. Users who attempt to add the label directly are rejected with a message directing them to create an AgentRuntime instead. The policy allows non-operator users to update workloads that already carry the label, as long as they don't change its value. +- **Agent Label Protection (ValidatingAdmissionPolicy)**: Prevents manual application of the `kagenti.io/type` label on Deployments and StatefulSets. Only the UI backend operator's service accounts (via an AgentRuntime CR) are allowed to set this label. Users who attempt to add the label directly are rejected with a message directing them to create an AgentRuntime instead. The policy allows non-operator users to update workloads that already carry the label, as long as they don't change its value. #### Signature Providers - **X5CProvider**: Validates `x5c` certificate chains against the SPIRE X.509 trust bundle and verifies JWS signatures using the leaf public key @@ -372,7 +372,7 @@ The operator deploys a `ValidatingAdmissionPolicy` (VAP) that prevents direct ap | Layer | Purpose | |-------|---------| | **matchConstraints** | Targets CREATE and UPDATE of `apps/v1` Deployments and StatefulSets | -| **matchConditions** | Skips evaluation when the object doesn't have `kagenti.io/type` or when the request comes from the operator's service account | +| **matchConditions** | Skips evaluation when the object doesn't have `kagenti.io/type` or when the request comes from the UI backend's or operator's service account | | **validation** | On UPDATE, allows the request only if `kagenti.io/type` was already present with the same value (user is modifying other fields). On CREATE, always rejects since the label should not be set manually. | #### Scenarios @@ -385,6 +385,7 @@ The operator deploys a `ValidatingAdmissionPolicy` (VAP) that prevents direct ap | User updates Deployment that already has the label (label unchanged) | **Allowed** | | User removes `kagenti.io/type` from Deployment | **Allowed** (matchCondition skips — new object has no label) | | Operator controller applies label via AgentRuntime | **Allowed** (service account exemption) | +| User deploys an agent via UI | **Allowed** (UI backend service account exemption) | #### Resources diff --git a/kagenti-operator/docs/controller-webhook-interaction.md b/kagenti-operator/docs/controller-webhook-interaction.md index cb5361c3..a7fe99d0 100644 --- a/kagenti-operator/docs/controller-webhook-interaction.md +++ b/kagenti-operator/docs/controller-webhook-interaction.md @@ -160,7 +160,7 @@ The webhook performs its own merge at Pod CREATE time, including CR overrides, t ## Global and Cluster Configuration -When workloads are deployed with the right labels (`kagenti.io/type: agent` or `tool`), the webhook uses two levels of global configuration regardless of whether an AgentRuntime CR exists: +When workloads have the `kagenti.io/type` label (applied by the operator via an AgentRuntime CR), the webhook uses two levels of global configuration: ### PlatformConfig (Global Defaults) @@ -195,9 +195,9 @@ When `perWorkloadConfigResolution` is **false** (default), the webhook builds si When `perWorkloadConfigResolution` is **true**, the webhook resolves all config values at admission time by reading namespace ConfigMaps and AgentRuntime CR overrides, then injects literal environment variable values into the sidecar containers. -## Defaults-Only Path (No AgentRuntime CR) +## AgentRuntime Required — Admission Policy -When a workload has `kagenti.io/type` labels applied manually (without an AgentRuntime CR): +A `ValidatingAdmissionPolicy` prevents the `kagenti.io/type` label from being set directly on Deployments or StatefulSets. Only the backend UI's and operator's service accounts (via the AgentRuntime controller) can apply this label. This ensures every enrolled workload has a corresponding AgentRuntime CR, which provides: - The webhook still evaluates the workload for injection using PlatformConfig and feature gates - Configuration comes from PlatformConfig (layer 1) and namespace ConfigMaps (layer 2) only From 3d6111617b4ad52fa169c20483288c7362f75641 Mon Sep 17 00:00:00 2001 From: Daniels Nagornuks Date: Wed, 10 Jun 2026 21:38:45 +0100 Subject: [PATCH 6/6] Feat: add Helm chart templates for VAP agent label protection Add ValidatingAdmissionPolicy and ValidatingAdmissionPolicyBinding as Helm chart templates. Signed-off-by: Daniels Nagornuks --- .../validating-admission-policy-binding.yaml | 12 +++++ .../vap/validating-admission-policy.yaml | 46 +++++++++++++++++++ charts/kagenti-operator/values.yaml | 10 ++++ 3 files changed, 68 insertions(+) create mode 100644 charts/kagenti-operator/templates/vap/validating-admission-policy-binding.yaml create mode 100644 charts/kagenti-operator/templates/vap/validating-admission-policy.yaml diff --git a/charts/kagenti-operator/templates/vap/validating-admission-policy-binding.yaml b/charts/kagenti-operator/templates/vap/validating-admission-policy-binding.yaml new file mode 100644 index 00000000..1644a3eb --- /dev/null +++ b/charts/kagenti-operator/templates/vap/validating-admission-policy-binding.yaml @@ -0,0 +1,12 @@ +{{- if .Values.vap.agentLabelProtection.enable }} +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicyBinding +metadata: + name: agent-label-protection + labels: + {{- include "chart.labels" . | nindent 4 }} + app.kubernetes.io/component: admission +spec: + policyName: agent-label-protection + validationActions: [Deny] +{{- end }} diff --git a/charts/kagenti-operator/templates/vap/validating-admission-policy.yaml b/charts/kagenti-operator/templates/vap/validating-admission-policy.yaml new file mode 100644 index 00000000..846150bf --- /dev/null +++ b/charts/kagenti-operator/templates/vap/validating-admission-policy.yaml @@ -0,0 +1,46 @@ +{{- if .Values.vap.agentLabelProtection.enable }} +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicy +metadata: + name: agent-label-protection + labels: + {{- include "chart.labels" . | nindent 4 }} + app.kubernetes.io/component: admission +spec: + failurePolicy: Fail + matchConstraints: + resourceRules: + - apiGroups: ["apps"] + apiVersions: ["v1"] + operations: ["CREATE", "UPDATE"] + resources: ["deployments", "statefulsets"] + + matchConditions: + - name: 'has-agent-type-label' + expression: >- + has(object.metadata.labels) && 'kagenti.io/type' in object.metadata.labels + + - name: 'not-exempt-service-account' + expression: >- + !(request.userInfo.username == 'system:serviceaccount:{{ .Release.Namespace }}:{{ .Values.controllerManager.serviceAccountName }}') + {{- range .Values.vap.agentLabelProtection.exemptServiceAccounts }} + && !(request.userInfo.username == 'system:serviceaccount:{{ . }}') + {{- end }} + + variables: + - name: labelUnchanged + expression: >- + request.operation == 'UPDATE' + && has(oldObject.metadata.labels) + && 'kagenti.io/type' in oldObject.metadata.labels + && oldObject.metadata.labels['kagenti.io/type'] == object.metadata.labels['kagenti.io/type'] + + validations: + - expression: "variables.labelUnchanged" + messageExpression: >- + 'The kagenti.io/type label on ' + + object.metadata.namespace + '/' + object.metadata.name + + ' can only be applied by the kagenti-operator via an AgentRuntime CR.' + + ' Create an AgentRuntime targeting this workload instead of manually setting the label.' + reason: Forbidden +{{- end }} diff --git a/charts/kagenti-operator/values.yaml b/charts/kagenti-operator/values.yaml index 352d1368..24aa55f6 100644 --- a/charts/kagenti-operator/values.yaml +++ b/charts/kagenti-operator/values.yaml @@ -76,6 +76,16 @@ metrics: webhook: enable: true +# [VAP]: ValidatingAdmissionPolicy for agent label protection. +# Prevents manual application of the kagenti.io/type label on Deployments and +# StatefulSets. Only the backend UI and the operator SAs (derived from +# controllerManager.serviceAccountName and the release namespace) are exempt. +vap: + agentLabelProtection: + enable: true + exemptServiceAccounts: + - "kagenti-system:kagenti-backend" + # [PROMETHEUS]: To enable a ServiceMonitor to export metrics to Prometheus set true prometheus: enable: false