diff --git a/charts/kagenti-operator/templates/manager/clusterspiffeid.yaml b/charts/kagenti-operator/templates/manager/clusterspiffeid.yaml new file mode 100644 index 00000000..2b327b98 --- /dev/null +++ b/charts/kagenti-operator/templates/manager/clusterspiffeid.yaml @@ -0,0 +1,17 @@ +{{- if .Values.verifiedFetch.enabled }} +apiVersion: spire.spiffe.io/v1alpha1 +kind: ClusterSPIFFEID +metadata: + name: kagenti-operator + labels: + {{- include "chart.labels" . | nindent 4 }} +spec: + spiffeIDTemplate: "spiffe://{{ "{{ .TrustDomain }}" }}/ns/{{ "{{ .PodMeta.Namespace }}" }}/sa/{{ "{{ .PodSpec.ServiceAccountName }}" }}" + podSelector: + matchLabels: + app.kubernetes.io/name: {{ include "chart.name" . }} + control-plane: controller-manager + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: {{ .Release.Namespace }} +{{- end }} diff --git a/charts/kagenti-operator/templates/manager/manager.yaml b/charts/kagenti-operator/templates/manager/manager.yaml index c292f24f..6a208550 100644 --- a/charts/kagenti-operator/templates/manager/manager.yaml +++ b/charts/kagenti-operator/templates/manager/manager.yaml @@ -34,6 +34,16 @@ spec: {{- if .Values.mlflow.enable }} - "--enable-mlflow=true" {{- end }} + {{- if .Values.verifiedFetch.enabled }} + - "--enable-verified-fetch=true" + - "--verified-fetch-spiffe-socket={{ .Values.verifiedFetch.spiffeEndpointSocket }}" + {{- if and .Values.verifiedFetch.spireTrustDomain (not .Values.signatureVerification.enabled) }} + - "--spire-trust-domain={{ .Values.verifiedFetch.spireTrustDomain }}" + {{- end }} + {{- end }} + {{- if or .Values.enforceNetworkPolicies .Values.signatureVerification.enforceNetworkPolicies }} + - "--enforce-network-policies=true" + {{- end }} {{- if .Values.signatureVerification.enabled }} - "--require-a2a-signature=true" {{- if .Values.signatureVerification.auditMode }} @@ -42,6 +52,9 @@ spec: {{- if .Values.signatureVerification.enforceNetworkPolicies }} - "--enforce-network-policies=true" {{- end }} + {{- if .Values.signatureVerification.spireTrustDomain }} + - "--spire-trust-domain={{ .Values.signatureVerification.spireTrustDomain }}" + {{- end }} {{- if .Values.signatureVerification.spireTrustBundle.configMapName }} - "--spire-trust-bundle-configmap={{ .Values.signatureVerification.spireTrustBundle.configMapName }}" {{- end }} @@ -121,6 +134,11 @@ spec: mountPath: /tmp/k8s-metrics-server/metrics-certs readOnly: true {{- end }} + {{- if .Values.verifiedFetch.enabled }} + - name: spiffe-workload-api + mountPath: /spiffe-workload-api + readOnly: true + {{- end }} securityContext: {{- toYaml .Values.controllerManager.securityContext | nindent 8 }} serviceAccountName: {{ .Values.controllerManager.serviceAccountName }} @@ -144,3 +162,9 @@ spec: secret: secretName: kagenti-operator-metrics-server-cert {{- end }} + {{- if .Values.verifiedFetch.enabled }} + - name: spiffe-workload-api + csi: + driver: "csi.spiffe.io" + readOnly: true + {{- end }} diff --git a/charts/kagenti-operator/values.yaml b/charts/kagenti-operator/values.yaml index 8e71e5b4..b7d70235 100644 --- a/charts/kagenti-operator/values.yaml +++ b/charts/kagenti-operator/values.yaml @@ -88,10 +88,25 @@ certmanager: networkPolicy: enable: false +# Enforce NetworkPolicies based on AgentCard verification status. +# Works with both verifiedFetch (mTLS) and signatureVerification (init-signer). +enforceNetworkPolicies: false + # [MLFLOW]: MLflow experiment tracking integration mlflow: enable: false +# [VERIFIED FETCH]: mTLS-authenticated fetch of agent cards via SPIFFE identity (Phase 1) +# When enabled, the operator uses go-spiffe mTLS to fetch agent cards and records +# the agent's attested SPIFFE ID in CRD status. +verifiedFetch: + enabled: false + spiffeEndpointSocket: "unix:///spiffe-workload-api/spire-agent.sock" + # Agents must expose a Service port named "agent-tls" (default 8443). + # The operator discovers the TLS endpoint by port name, not number. + # SPIRE trust domain for identity binding (required when enabled) + spireTrustDomain: "" + # [SIGNATURE VERIFICATION]: A2A agent card signature verification via SPIRE x5c signatureVerification: # Enable signature verification for agent cards diff --git a/kagenti-operator/api/v1alpha1/agentcard_types.go b/kagenti-operator/api/v1alpha1/agentcard_types.go index a7306ea1..39c63973 100644 --- a/kagenti-operator/api/v1alpha1/agentcard_types.go +++ b/kagenti-operator/api/v1alpha1/agentcard_types.go @@ -48,8 +48,11 @@ type IdentityBinding struct { // +kubebuilder:validation:Pattern=`^[a-zA-Z0-9]([a-zA-Z0-9\-\.]*[a-zA-Z0-9])?$` TrustDomain string `json:"trustDomain,omitempty"` - // Strict enables enforcement mode: binding failures trigger network isolation. - // When false (default), results are recorded in status only (audit mode). + // Strict controls whether binding failures trigger enforcement actions + // (label removal, restrictive NetworkPolicy). + // When true, binding failure removes the verified label and applies restrictive NetworkPolicy. + // When false (default), binding results are recorded in status only; + // the workload retains its verified label and permissive policy. // +optional // +kubebuilder:default=false Strict bool `json:"strict,omitempty"` @@ -116,6 +119,11 @@ type AgentCardStatus struct { // +optional CardId string `json:"cardId,omitempty"` + // AttestedAgentSpiffeID is the SPIFFE ID extracted from the agent's TLS peer certificate + // during authenticated (mTLS) fetch. Set only when verifiedFetch is enabled and successful. + // +optional + AttestedAgentSpiffeID string `json:"attestedAgentSpiffeId,omitempty"` + // ExpectedSpiffeID is the SPIFFE ID used for binding evaluation. // +optional ExpectedSpiffeID string `json:"expectedSpiffeID,omitempty"` @@ -339,11 +347,12 @@ type SkillParameter struct { // +kubebuilder:printcolumn:name="Kind",type="string",JSONPath=".status.targetRef.kind",description="Workload Kind" // +kubebuilder:printcolumn:name="Target",type="string",JSONPath=".status.targetRef.name",description="Target Workload" // +kubebuilder:printcolumn:name="Agent",type="string",JSONPath=".status.card.name",description="Agent Name" -// +kubebuilder:printcolumn:name="Verified",type="boolean",JSONPath=".status.validSignature",description="Signature Verified" +// +kubebuilder:printcolumn:name="Verified",type="string",JSONPath=".status.conditions[?(@.type=='Verified')].status",description="Identity Verified" // +kubebuilder:printcolumn:name="Bound",type="boolean",JSONPath=".status.bindingStatus.bound",description="Identity Bound" // +kubebuilder:printcolumn:name="Synced",type="string",JSONPath=".status.conditions[?(@.type=='Synced')].status",description="Sync Status" // +kubebuilder:printcolumn:name="LastSync",type="date",JSONPath=".status.lastSyncTime",description="Last Sync Time" // +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" +// +kubebuilder:printcolumn:name="AttestedAgent",type="string",JSONPath=".status.attestedAgentSpiffeId",description="Attested Agent SPIFFE ID",priority=1 // AgentCard is the Schema for the agentcards API. type AgentCard struct { diff --git a/kagenti-operator/cmd/agentcard-signer/main.go b/kagenti-operator/cmd/agentcard-signer/main.go index 31fea356..f2522af6 100644 --- a/kagenti-operator/cmd/agentcard-signer/main.go +++ b/kagenti-operator/cmd/agentcard-signer/main.go @@ -18,14 +18,6 @@ package main import ( "context" - "crypto" - "crypto/ecdsa" - "crypto/elliptic" - "crypto/rand" - "crypto/rsa" - "crypto/sha256" - "crypto/x509" - "encoding/base64" "encoding/json" "fmt" "os" @@ -42,6 +34,7 @@ import ( "github.com/spiffe/go-spiffe/v2/workloadapi" agentv1alpha1 "github.com/kagenti/operator/api/v1alpha1" + "github.com/kagenti/operator/internal/agentcard" "github.com/kagenti/operator/internal/signature" ) @@ -84,7 +77,7 @@ func run() error { if err != nil { return fmt.Errorf("failed to fetch X.509-SVID: %w", err) } - defer zeroPrivateKey(svid.PrivateKey) + defer signature.ZeroPrivateKey(svid.PrivateKey) spiffeID := svid.ID.String() logJSON("info", "fetched SVID", "spiffe_id", spiffeID) @@ -99,7 +92,7 @@ func run() error { return fmt.Errorf("failed to parse unsigned card JSON: %w", err) } - signedCard, err := signCard(&cardData, svid.PrivateKey, svid.Certificates) + signedCard, err := signature.SignCard(&cardData, svid.PrivateKey, svid.Certificates) if err != nil { return fmt.Errorf("signing failed: %w", err) } @@ -148,7 +141,7 @@ func writeConfigMapWithClient( ctx context.Context, clientset k8sclient.Interface, agentName, namespace string, signedCard []byte, ) error { - cmName := agentName + "-card-signed" + cmName := agentcard.ConfigMapName(agentName) cm := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{Name: cmName, Namespace: namespace}, Data: map[string]string{"agent-card.json": string(signedCard)}, @@ -171,7 +164,7 @@ func fetchSVID(ctx context.Context, socketPath string) (*x509svid.SVID, error) { if err != nil { return nil, fmt.Errorf("failed to create workload API client: %w", err) } - defer client.Close() + defer client.Close() //nolint:errcheck // best-effort cleanup svid, err := client.FetchX509SVID(ctx) if err != nil { @@ -180,161 +173,6 @@ func fetchSVID(ctx context.Context, socketPath string) (*x509svid.SVID, error) { return svid, nil } -// signCard signs AgentCard data and returns the signed JSON. -func signCard(cardData *agentv1alpha1.AgentCardData, privateKey crypto.Signer, certs []*x509.Certificate) ([]byte, error) { - if cardData == nil { - return nil, fmt.Errorf("card data is nil") - } - if len(certs) == 0 { - return nil, fmt.Errorf("no certificates in SVID chain") - } - leaf := certs[0] - - alg, err := algorithmForKey(privateKey.Public()) - if err != nil { - return nil, err - } - - kid := computeKID(leaf) - - x5c := make([]string, len(certs)) - for i, cert := range certs { - x5c[i] = base64.StdEncoding.EncodeToString(cert.Raw) - } - - header := &signature.ProtectedHeader{ - Algorithm: alg, - KeyID: kid, - Type: "JOSE", - X5C: x5c, - } - - protectedB64, err := signature.EncodeProtectedHeader(header) - if err != nil { - return nil, fmt.Errorf("failed to encode protected header: %w", err) - } - - payload, err := signature.CreateCanonicalCardJSON(cardData) - if err != nil { - return nil, fmt.Errorf("failed to create canonical JSON: %w", err) - } - - payloadB64 := base64.RawURLEncoding.EncodeToString(payload) - signingInput := []byte(protectedB64 + "." + payloadB64) - - sigBytes, err := signInput(privateKey, alg, signingInput) - if err != nil { - return nil, fmt.Errorf("signing failed: %w", err) - } - - sigB64 := base64.RawURLEncoding.EncodeToString(sigBytes) - - cardData.Signatures = []agentv1alpha1.AgentCardSignature{ - { - Protected: protectedB64, - Signature: sigB64, - }, - } - - output, err := json.MarshalIndent(cardData, "", " ") - if err != nil { - return nil, fmt.Errorf("failed to marshal signed card: %w", err) - } - - return output, nil -} - -// algorithmForKey maps a public key type to its JWS algorithm. -func algorithmForKey(pub crypto.PublicKey) (string, error) { - switch k := pub.(type) { - case *rsa.PublicKey: - if k.N.BitLen() < 2048 { - return "", fmt.Errorf("RSA key too small: %d bits (minimum 2048)", k.N.BitLen()) - } - return "RS256", nil - case *ecdsa.PublicKey: - switch k.Curve { - case elliptic.P256(): - return "ES256", nil - case elliptic.P384(): - return "ES384", nil - case elliptic.P521(): - return "ES512", nil - default: - return "", fmt.Errorf("unsupported ECDSA curve: %s", k.Curve.Params().Name) - } - default: - return "", fmt.Errorf("unsupported key type: %T", pub) - } -} - -// computeKID derives a key ID from the leaf cert's SHA-256 fingerprint (first 8 bytes). -func computeKID(leaf *x509.Certificate) string { - fp := sha256.Sum256(leaf.Raw) - return fmt.Sprintf("%x", fp[:8]) -} - -func signInput(signer crypto.Signer, alg string, input []byte) ([]byte, error) { - hashFunc, err := signature.HashForAlgorithm(alg) - if err != nil { - return nil, err - } - - h := hashFunc.New() - h.Write(input) - hashed := h.Sum(nil) - - switch alg { - case "RS256", "RS384", "RS512": - return signer.Sign(rand.Reader, hashed, hashFunc) - case "ES256", "ES384", "ES512": - return signECDSARaw(signer, hashed, alg) - default: - return nil, fmt.Errorf("unsupported algorithm: %s", alg) - } -} - -// signECDSARaw signs with ECDSA and encodes as fixed-width R||S (RFC 7518 §3.4). -func signECDSARaw(signer crypto.Signer, hashed []byte, alg string) ([]byte, error) { - ecKey, ok := signer.(*ecdsa.PrivateKey) - if !ok { - return nil, fmt.Errorf("expected *ecdsa.PrivateKey, got %T", signer) - } - - r, s, err := ecdsa.Sign(rand.Reader, ecKey, hashed) - if err != nil { - return nil, fmt.Errorf("ECDSA sign failed: %w", err) - } - - keySize := signature.CurveByteSize(ecKey.Curve) - sig := make([]byte, 2*keySize) - rBytes := r.Bytes() - sBytes := s.Bytes() - copy(sig[keySize-len(rBytes):keySize], rBytes) - copy(sig[2*keySize-len(sBytes):], sBytes) - - return sig, nil -} - -// zeroPrivateKey zeroes private key material in memory (best-effort). -func zeroPrivateKey(key crypto.Signer) { - switch k := key.(type) { - case *ecdsa.PrivateKey: - if k.D != nil { - k.D.SetInt64(0) - } - case *rsa.PrivateKey: - if k.D != nil { - k.D.SetInt64(0) - } - for _, p := range k.Primes { - if p != nil { - p.SetInt64(0) - } - } - } -} - func envOrDefault(key, defaultVal string) string { if v := os.Getenv(key); v != "" { return v diff --git a/kagenti-operator/cmd/agentcard-signer/main_test.go b/kagenti-operator/cmd/agentcard-signer/main_test.go index 551efcaf..e2ff3354 100644 --- a/kagenti-operator/cmd/agentcard-signer/main_test.go +++ b/kagenti-operator/cmd/agentcard-signer/main_test.go @@ -106,7 +106,7 @@ func testCard() *agentv1alpha1.AgentCardData { } } -// --- signCard tests --- +// --- SignCard tests (via shared library) --- func TestSignCard_ECDSA_P256(t *testing.T) { ca := newTestCA(t) @@ -114,9 +114,9 @@ func TestSignCard_ECDSA_P256(t *testing.T) { leaf, _ := ca.issueLeaf(t, &key.PublicKey, "spiffe://example.org/ns/default/sa/test") card := testCard() - output, err := signCard(card, key, []*x509.Certificate{leaf, ca.Cert}) + output, err := signature.SignCard(card, key, []*x509.Certificate{leaf, ca.Cert}) if err != nil { - t.Fatalf("signCard failed: %v", err) + t.Fatalf("SignCard failed: %v", err) } var parsed agentv1alpha1.AgentCardData @@ -148,9 +148,9 @@ func TestSignCard_ECDSA_P384(t *testing.T) { leaf, _ := ca.issueLeaf(t, &key.PublicKey, "spiffe://example.org/agent") card := testCard() - output, err := signCard(card, key, []*x509.Certificate{leaf}) + output, err := signature.SignCard(card, key, []*x509.Certificate{leaf}) if err != nil { - t.Fatalf("signCard failed: %v", err) + t.Fatalf("SignCard failed: %v", err) } var parsed agentv1alpha1.AgentCardData @@ -170,9 +170,9 @@ func TestSignCard_RSA(t *testing.T) { leaf, _ := ca.issueLeaf(t, &key.PublicKey, "spiffe://example.org/rsa-agent") card := testCard() - output, err := signCard(card, key, []*x509.Certificate{leaf}) + output, err := signature.SignCard(card, key, []*x509.Certificate{leaf}) if err != nil { - t.Fatalf("signCard failed: %v", err) + t.Fatalf("SignCard failed: %v", err) } var parsed agentv1alpha1.AgentCardData @@ -188,7 +188,7 @@ func TestSignCard_RSA(t *testing.T) { func TestSignCard_NilCardData(t *testing.T) { key, _ := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) - _, err := signCard(nil, key, []*x509.Certificate{{}}) + _, err := signature.SignCard(nil, key, []*x509.Certificate{{}}) if err == nil { t.Error("expected error for nil card data") } @@ -196,7 +196,7 @@ func TestSignCard_NilCardData(t *testing.T) { func TestSignCard_NoCertificates(t *testing.T) { key, _ := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) - _, err := signCard(testCard(), key, nil) + _, err := signature.SignCard(testCard(), key, nil) if err == nil { t.Error("expected error for empty cert chain") } @@ -210,20 +210,21 @@ func TestSignCard_ECDSA_RawRS_ByteLength(t *testing.T) { leaf, _ := ca.issueLeaf(t, &key.PublicKey, "spiffe://example.org/agent") card := testCard() - output, err := signCard(card, key, []*x509.Certificate{leaf}) + output, err := signature.SignCard(card, key, []*x509.Certificate{leaf}) if err != nil { - t.Fatalf("signCard failed: %v", err) + t.Fatalf("SignCard failed: %v", err) } var parsed agentv1alpha1.AgentCardData - json.Unmarshal(output, &parsed) + if err := json.Unmarshal(output, &parsed); err != nil { + t.Fatalf("failed to unmarshal signed card: %v", err) + } sigBytes, err := base64.RawURLEncoding.DecodeString(parsed.Signatures[0].Signature) if err != nil { t.Fatalf("failed to decode signature: %v", err) } - // ES256 raw R||S must be exactly 64 bytes (32 + 32) if len(sigBytes) != 64 { t.Errorf("ES256 raw R||S signature must be 64 bytes, got %d (likely DER-encoded)", len(sigBytes)) } @@ -235,16 +236,17 @@ func TestSignCard_ECDSA_P384_RawRS_ByteLength(t *testing.T) { leaf, _ := ca.issueLeaf(t, &key.PublicKey, "spiffe://example.org/agent") card := testCard() - output, err := signCard(card, key, []*x509.Certificate{leaf}) + output, err := signature.SignCard(card, key, []*x509.Certificate{leaf}) if err != nil { - t.Fatalf("signCard failed: %v", err) + t.Fatalf("SignCard failed: %v", err) } var parsed agentv1alpha1.AgentCardData - json.Unmarshal(output, &parsed) + if err := json.Unmarshal(output, &parsed); err != nil { + t.Fatalf("failed to unmarshal signed card: %v", err) + } sigBytes, _ := base64.RawURLEncoding.DecodeString(parsed.Signatures[0].Signature) - // ES384 raw R||S must be exactly 96 bytes (48 + 48) if len(sigBytes) != 96 { t.Errorf("ES384 raw R||S signature must be 96 bytes, got %d", len(sigBytes)) } @@ -258,14 +260,15 @@ func TestSignCard_X5C_StandardBase64(t *testing.T) { leaf, leafDER := ca.issueLeaf(t, &key.PublicKey, "spiffe://example.org/agent") card := testCard() - output, _ := signCard(card, key, []*x509.Certificate{leaf, ca.Cert}) + output, _ := signature.SignCard(card, key, []*x509.Certificate{leaf, ca.Cert}) var parsed agentv1alpha1.AgentCardData - json.Unmarshal(output, &parsed) + if err := json.Unmarshal(output, &parsed); err != nil { + t.Fatalf("failed to unmarshal signed card: %v", err) + } header, _ := signature.DecodeProtectedHeader(parsed.Signatures[0].Protected) - // x5c must use standard base64 (not base64url) per RFC 7515 §4.1.6 decoded, err := base64.StdEncoding.DecodeString(header.X5C[0]) if err != nil { t.Fatalf("x5c[0] is not valid standard base64: %v", err) @@ -281,10 +284,12 @@ func TestSignCard_X5C_LeafFirst(t *testing.T) { leaf, _ := ca.issueLeaf(t, &key.PublicKey, "spiffe://example.org/agent") card := testCard() - output, _ := signCard(card, key, []*x509.Certificate{leaf, ca.Cert}) + output, _ := signature.SignCard(card, key, []*x509.Certificate{leaf, ca.Cert}) var parsed agentv1alpha1.AgentCardData - json.Unmarshal(output, &parsed) + if err := json.Unmarshal(output, &parsed); err != nil { + t.Fatalf("failed to unmarshal signed card: %v", err) + } header, _ := signature.DecodeProtectedHeader(parsed.Signatures[0].Protected) @@ -308,22 +313,20 @@ func TestComputeKID(t *testing.T) { key, _ := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) leaf, _ := ca.issueLeaf(t, &key.PublicKey, "spiffe://example.org/agent") - kid := computeKID(leaf) + kid := signature.ComputeKID(leaf) fp := sha256.Sum256(leaf.Raw) - expected := big.NewInt(0).SetBytes(fp[:8]).Text(16) - // kid should be first 16 hex chars of SHA-256 fingerprint + _ = big.NewInt(0).SetBytes(fp[:8]).Text(16) if len(kid) != 16 { t.Errorf("expected kid length 16, got %d: %s", len(kid), kid) } - _ = expected // format may differ in leading zeros, just check length } -// --- algorithmForKey tests --- +// --- AlgorithmForKey tests --- func TestAlgorithmForKey_ECDSA_P256(t *testing.T) { key, _ := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) - alg, err := algorithmForKey(&key.PublicKey) + alg, err := signature.AlgorithmForKey(&key.PublicKey) if err != nil { t.Fatal(err) } @@ -334,7 +337,7 @@ func TestAlgorithmForKey_ECDSA_P256(t *testing.T) { func TestAlgorithmForKey_ECDSA_P384(t *testing.T) { key, _ := ecdsa.GenerateKey(elliptic.P384(), rand.Reader) - alg, err := algorithmForKey(&key.PublicKey) + alg, err := signature.AlgorithmForKey(&key.PublicKey) if err != nil { t.Fatal(err) } @@ -345,7 +348,7 @@ func TestAlgorithmForKey_ECDSA_P384(t *testing.T) { func TestAlgorithmForKey_ECDSA_P521(t *testing.T) { key, _ := ecdsa.GenerateKey(elliptic.P521(), rand.Reader) - alg, err := algorithmForKey(&key.PublicKey) + alg, err := signature.AlgorithmForKey(&key.PublicKey) if err != nil { t.Fatal(err) } @@ -356,7 +359,7 @@ func TestAlgorithmForKey_ECDSA_P521(t *testing.T) { func TestAlgorithmForKey_RSA(t *testing.T) { key, _ := rsa.GenerateKey(rand.Reader, 2048) - alg, err := algorithmForKey(&key.PublicKey) + alg, err := signature.AlgorithmForKey(&key.PublicKey) if err != nil { t.Fatal(err) } @@ -367,17 +370,17 @@ func TestAlgorithmForKey_RSA(t *testing.T) { func TestAlgorithmForKey_RSA_TooSmall(t *testing.T) { key, _ := rsa.GenerateKey(rand.Reader, 1024) - _, err := algorithmForKey(&key.PublicKey) + _, err := signature.AlgorithmForKey(&key.PublicKey) if err == nil { t.Error("expected error for 1024-bit RSA key") } } -// --- zeroPrivateKey tests --- +// --- ZeroPrivateKey tests --- func TestZeroPrivateKey_ECDSA(t *testing.T) { key, _ := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) - zeroPrivateKey(key) + signature.ZeroPrivateKey(key) if key.D.Sign() != 0 { t.Error("expected ECDSA D to be zeroed") } @@ -385,7 +388,7 @@ func TestZeroPrivateKey_ECDSA(t *testing.T) { func TestZeroPrivateKey_RSA(t *testing.T) { key, _ := rsa.GenerateKey(rand.Reader, 2048) - zeroPrivateKey(key) + signature.ZeroPrivateKey(key) if key.D.Sign() != 0 { t.Error("expected RSA D to be zeroed") } @@ -397,8 +400,6 @@ func TestZeroPrivateKey_RSA(t *testing.T) { } // --- Canonical JSON cross-validation --- -// Signer uses signature.CreateCanonicalCardJSON -- verify the output matches -// what the verifier expects. func TestSignCard_CanonicalJSON_CrossValidation(t *testing.T) { ca := newTestCA(t) @@ -406,27 +407,20 @@ func TestSignCard_CanonicalJSON_CrossValidation(t *testing.T) { leaf, _ := ca.issueLeaf(t, &key.PublicKey, "spiffe://example.org/agent") card := testCard() - output, err := signCard(card, key, []*x509.Certificate{leaf}) + output, err := signature.SignCard(card, key, []*x509.Certificate{leaf}) if err != nil { - t.Fatalf("signCard failed: %v", err) + t.Fatalf("SignCard failed: %v", err) } var parsed agentv1alpha1.AgentCardData - json.Unmarshal(output, &parsed) + if err := json.Unmarshal(output, &parsed); err != nil { + t.Fatalf("failed to unmarshal signed card: %v", err) + } - // Re-derive the canonical JSON from the parsed card (without signatures) cardWithoutSigs := parsed cardWithoutSigs.Signatures = nil - canonical, err := signature.CreateCanonicalCardJSON(&cardWithoutSigs) - if err != nil { - t.Fatalf("CreateCanonicalCardJSON failed: %v", err) - } - // Reconstruct the signing input and verify the signature sig := parsed.Signatures[0] - payloadB64 := base64.RawURLEncoding.EncodeToString(canonical) - signingInput := sig.Protected + "." + payloadB64 - pubPEM, _ := signature.MarshalPublicKeyToPEM(&key.PublicKey) result, err := signature.VerifyJWS(&cardWithoutSigs, &sig, pubPEM) if err != nil { @@ -435,7 +429,6 @@ func TestSignCard_CanonicalJSON_CrossValidation(t *testing.T) { if !result.Verified { t.Errorf("cross-validation failed: signer output not verified by VerifyJWS: %s", result.Details) } - _ = signingInput } // --- End-to-end: signer output verified by X5CProvider --- @@ -446,15 +439,16 @@ func TestSignCard_VerifiedByX5CProvider(t *testing.T) { leaf, _ := ca.issueLeaf(t, &key.PublicKey, "spiffe://example.org/ns/default/sa/test") card := testCard() - output, err := signCard(card, key, []*x509.Certificate{leaf, ca.Cert}) + output, err := signature.SignCard(card, key, []*x509.Certificate{leaf, ca.Cert}) if err != nil { - t.Fatalf("signCard failed: %v", err) + t.Fatalf("SignCard failed: %v", err) } var parsed agentv1alpha1.AgentCardData - json.Unmarshal(output, &parsed) + if err := json.Unmarshal(output, &parsed); err != nil { + t.Fatalf("failed to unmarshal signed card: %v", err) + } - // Build an X5CProvider with the test CA pool := x509.NewCertPool() pool.AddCert(ca.Cert) provider := &signature.X5CProvider{} @@ -477,7 +471,7 @@ func TestSignCard_VerifiedByX5CProvider(t *testing.T) { // --- writeConfigMap tests --- func TestWriteConfigMapWithClient_Create(t *testing.T) { - fakeClient := k8sfake.NewSimpleClientset() + fakeClient := k8sfake.NewSimpleClientset() //nolint:staticcheck // NewClientset requires generated apply configs cardJSON := []byte(`{"name":"test-agent","version":"1.0"}`) err := writeConfigMapWithClient(context.Background(), fakeClient, "my-agent", "test-ns", cardJSON) @@ -500,6 +494,7 @@ func TestWriteConfigMapWithClient_Update(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "my-agent-card-signed", Namespace: "test-ns"}, Data: map[string]string{"agent-card.json": `{"name":"old"}`}, } + //nolint:staticcheck // NewClientset requires generated apply configs fakeClient := k8sfake.NewSimpleClientset(existing) newCardJSON := []byte(`{"name":"updated-agent","version":"2.0"}`) @@ -523,7 +518,4 @@ func TestWriteConfigMap_MissingEnvVars(t *testing.T) { if err == nil { t.Fatal("expected error when env vars are missing") } - if testing.Verbose() { - t.Logf("writeConfigMap error: %v", err) - } } diff --git a/kagenti-operator/cmd/main.go b/kagenti-operator/cmd/main.go index ebf4feb0..76f248d1 100644 --- a/kagenti-operator/cmd/main.go +++ b/kagenti-operator/cmd/main.go @@ -42,6 +42,8 @@ import ( metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" + "github.com/spiffe/go-spiffe/v2/workloadapi" + agentv1alpha1 "github.com/kagenti/operator/api/v1alpha1" "github.com/kagenti/operator/internal/agentcard" "github.com/kagenti/operator/internal/controller" @@ -99,6 +101,9 @@ func main() { var enforceNetworkPolicies bool var enableMLflow bool + var enableVerifiedFetch bool + var verifiedFetchSpiffeSocket string + var spireTrustDomain string var spireTrustBundleConfigMapName string var spireTrustBundleConfigMapNS string @@ -137,6 +142,12 @@ func main() { flag.BoolVar(&enableMLflow, "enable-mlflow", false, "Enable MLflow experiment tracking integration") + flag.BoolVar(&enableVerifiedFetch, "enable-verified-fetch", false, + "Enable mTLS-authenticated fetch of agent cards via SPIFFE identity") + flag.StringVar(&verifiedFetchSpiffeSocket, "verified-fetch-spiffe-socket", + "unix:///spiffe-workload-api/spire-agent.sock", + "SPIFFE Workload API socket path for verified fetch") + flag.StringVar(&spireTrustDomain, "spire-trust-domain", "", "SPIRE trust domain for identity binding (e.g. 'example.org')") flag.StringVar(&spireTrustBundleConfigMapName, "spire-trust-bundle-configmap", "", @@ -286,21 +297,23 @@ func main() { os.Exit(1) } - if !requireA2ASignature { - setupLog.Info("WARNING: --require-a2a-signature is false. Identity binding requires " + - "--require-a2a-signature=true to function. AgentCards with spec.identityBinding " + - "will always report NotBound.") + if !requireA2ASignature && !enableVerifiedFetch { + setupLog.Info("WARNING: Neither --require-a2a-signature nor --enable-verified-fetch is set. " + + "Identity binding requires at least one trust mechanism to function. " + + "AgentCards with spec.identityBinding will report NotBound.") } var sigProvider signature.Provider if requireA2ASignature { if spireTrustDomain == "" { - setupLog.Error(errors.New("missing required flag"), "--spire-trust-domain is required when --require-a2a-signature=true") + setupLog.Error(errors.New("missing required flag"), + "--spire-trust-domain is required when --require-a2a-signature=true") os.Exit(1) } if spireTrustBundleConfigMapName == "" || spireTrustBundleConfigMapNS == "" { setupLog.Error(errors.New("missing required flags"), - "--spire-trust-bundle-configmap and --spire-trust-bundle-configmap-namespace are required when --require-a2a-signature=true") + "--spire-trust-bundle-configmap and --spire-trust-bundle-configmap-namespace "+ + "are required when --require-a2a-signature=true") os.Exit(1) } @@ -325,17 +338,57 @@ func main() { "auditMode", signatureAuditMode) } - if err = (&controller.AgentCardReconciler{ + agentFetcher := agentcard.NewConfigMapFetcher(mgr.GetAPIReader()) + + var authenticatedFetcher agentcard.AuthenticatedFetcher + if enableVerifiedFetch { + fetchCtx, fetchCancel := context.WithTimeout(ctx, 30*time.Second) + fetchX509Source, fetchSourceErr := workloadapi.NewX509Source( + fetchCtx, + workloadapi.WithClientOptions(workloadapi.WithAddr(verifiedFetchSpiffeSocket)), + ) + fetchCancel() + + if fetchSourceErr != nil { + setupLog.Info("WARNING: SPIRE unavailable for verified fetch, falling back to default fetcher", + "error", fetchSourceErr.Error(), + "socket", verifiedFetchSpiffeSocket) + enableVerifiedFetch = false + } else { + td := spireTrustDomain + if td == "" { + setupLog.Error(errors.New("missing required flag"), + "--spire-trust-domain is required when --enable-verified-fetch=true") + os.Exit(1) + } + fetcher, fetcherErr := agentcard.NewSpiffeFetcher(fetchX509Source, td) + if fetcherErr != nil { + setupLog.Error(fetcherErr, "Failed to create authenticated fetcher") + os.Exit(1) + } + authenticatedFetcher = fetcher + defer fetchX509Source.Close() //nolint:errcheck + setupLog.Info("Verified fetch enabled (mTLS via SPIFFE)", + "socket", verifiedFetchSpiffeSocket, + "trustDomain", td) + } + } + + agentCardReconciler := &controller.AgentCardReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Recorder: mgr.GetEventRecorderFor("agentcard-controller"), - AgentFetcher: agentcard.NewConfigMapFetcher(mgr.GetAPIReader()), + AgentFetcher: agentFetcher, + AuthenticatedFetcher: authenticatedFetcher, + EnableVerifiedFetch: enableVerifiedFetch, SignatureProvider: sigProvider, RequireSignature: requireA2ASignature, SignatureAuditMode: signatureAuditMode, SpireTrustDomain: spireTrustDomain, SVIDExpiryGracePeriod: svidExpiryGracePeriod, - }).SetupWithManager(mgr); err != nil { + } + + if err = agentCardReconciler.SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "AgentCard") os.Exit(1) } @@ -345,6 +398,7 @@ func main() { Client: mgr.GetClient(), Scheme: mgr.GetScheme(), EnforceNetworkPolicies: enforceNetworkPolicies, + OperatorNamespace: os.Getenv("POD_NAMESPACE"), } npReconciler.DiscoverKubeAPIServerCIDRs( context.Background(), mgr.GetAPIReader(), @@ -353,7 +407,7 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "AgentCardNetworkPolicy") os.Exit(1) } - setupLog.Info("Network policy enforcement enabled for signature verification") + setupLog.Info("Network policy enforcement enabled for identity verification") } if err = (&controller.AgentCardSyncReconciler{ diff --git a/kagenti-operator/cmd/test-tls-agent/Dockerfile b/kagenti-operator/cmd/test-tls-agent/Dockerfile new file mode 100644 index 00000000..b2bd0195 --- /dev/null +++ b/kagenti-operator/cmd/test-tls-agent/Dockerfile @@ -0,0 +1,21 @@ +FROM docker.io/golang:1.26 AS builder +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace +COPY go.mod go.mod +COPY go.sum go.sum +RUN go mod download + +COPY cmd/test-tls-agent/ cmd/test-tls-agent/ +COPY api/ api/ +COPY internal/ internal/ + +RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o test-tls-agent ./cmd/test-tls-agent/ + +FROM gcr.io/distroless/static:nonroot +WORKDIR / +COPY --from=builder /workspace/test-tls-agent . +USER 65532:65532 + +ENTRYPOINT ["/test-tls-agent"] diff --git a/kagenti-operator/cmd/test-tls-agent/main.go b/kagenti-operator/cmd/test-tls-agent/main.go new file mode 100644 index 00000000..503d077c --- /dev/null +++ b/kagenti-operator/cmd/test-tls-agent/main.go @@ -0,0 +1,175 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "context" + "crypto/tls" + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "os/signal" + "sync" + "syscall" + "time" + + "github.com/spiffe/go-spiffe/v2/spiffetls/tlsconfig" + "github.com/spiffe/go-spiffe/v2/workloadapi" +) + +const ( + defaultSocket = "unix:///run/spire/sockets/agent.sock" + defaultTLSPort = "8443" + defaultHTTPPort = "8080" +) + +func main() { + socketPath := envOrDefault("SPIFFE_ENDPOINT_SOCKET", defaultSocket) + tlsPort := envOrDefault("TLS_PORT", defaultTLSPort) + httpPort := envOrDefault("HTTP_PORT", defaultHTTPPort) + agentName := envOrDefault("AGENT_NAME", "tls-test-agent") + podNamespace := envOrDefault("POD_NAMESPACE", "default") + + cardJSON := buildAgentCard(agentName, podNamespace) + + mux := http.NewServeMux() + mux.HandleFunc("/.well-known/agent-card.json", func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write(cardJSON) + }) + mux.HandleFunc("/healthz", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok")) + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sig := make(chan os.Signal, 1) + signal.Notify(sig, syscall.SIGTERM, syscall.SIGINT) + + var wg sync.WaitGroup + + // HTTP server (plain, for fallback testing) + httpServer := &http.Server{ + Addr: ":" + httpPort, + Handler: mux, + ReadHeaderTimeout: 5 * time.Second, + } + wg.Add(1) + go func() { + defer wg.Done() + log.Printf("Starting HTTP server on :%s", httpPort) + if err := httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed { + log.Fatalf("HTTP server error: %v", err) + } + }() + + // TLS server using SPIFFE SVID + x509Source, err := workloadapi.NewX509Source( + ctx, + workloadapi.WithClientOptions(workloadapi.WithAddr(socketPath)), + ) + if err != nil { + log.Fatalf("Failed to create X509Source: %v", err) + } + defer x509Source.Close() //nolint:errcheck + + svid, err := x509Source.GetX509SVID() + if err != nil { + log.Fatalf("Failed to get initial SVID: %v", err) + } + log.Printf("Got SVID: %s", svid.ID) + + tlsCfg := tlsconfig.MTLSServerConfig(x509Source, x509Source, tlsconfig.AuthorizeAny()) + + tlsServer := &http.Server{ + Addr: ":" + tlsPort, + Handler: mux, + ReadHeaderTimeout: 5 * time.Second, + TLSConfig: tlsCfg, + } + wg.Add(1) + go func() { + defer wg.Done() + log.Printf("Starting TLS server on :%s (SPIFFE mTLS)", tlsPort) + ln, err := tls.Listen("tcp", ":"+tlsPort, tlsCfg) + if err != nil { + log.Fatalf("TLS listen error: %v", err) + } + if err := tlsServer.Serve(ln); err != nil && err != http.ErrServerClosed { + log.Fatalf("TLS server error: %v", err) + } + }() + + <-sig + log.Println("Shutting down...") + cancel() + + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer shutdownCancel() + _ = httpServer.Shutdown(shutdownCtx) + _ = tlsServer.Shutdown(shutdownCtx) + wg.Wait() +} + +func buildAgentCard(agentName, namespace string) []byte { + cardPath := envOrDefault("AGENT_CARD_PATH", "") + if cardPath != "" { + data, err := os.ReadFile(cardPath) + if err == nil { + return data + } + log.Printf("Warning: could not read %s: %v, using default card", cardPath, err) + } + + card := map[string]interface{}{ + "name": agentName, + "description": "TLS test agent for Phase 1 verified fetch testing", + "url": fmt.Sprintf("https://%s.%s.svc.cluster.local:8443", agentName, namespace), + "version": "1.0.0", + "capabilities": map[string]interface{}{ + "streaming": false, + "pushNotifications": false, + }, + "defaultInputModes": []string{"text/plain"}, + "defaultOutputModes": []string{"text/plain"}, + "skills": []map[string]interface{}{ + { + "name": "echo", + "description": "Echoes input back (test skill)", + "inputModes": []string{"text/plain"}, + "outputModes": []string{"text/plain"}, + }, + }, + } + + data, err := json.MarshalIndent(card, "", " ") + if err != nil { + log.Fatalf("Failed to marshal agent card: %v", err) + } + return data +} + +func envOrDefault(key, defaultVal string) string { + if v := os.Getenv(key); v != "" { + return v + } + return defaultVal +} diff --git a/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentcards.yaml b/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentcards.yaml index 11f366d7..e2315ef7 100644 --- a/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentcards.yaml +++ b/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentcards.yaml @@ -34,10 +34,10 @@ spec: jsonPath: .status.card.name name: Agent type: string - - description: Signature Verified - jsonPath: .status.validSignature + - description: Identity Verified + jsonPath: .status.conditions[?(@.type=='Verified')].status name: Verified - type: boolean + type: string - description: Identity Bound jsonPath: .status.bindingStatus.bound name: Bound @@ -53,6 +53,11 @@ spec: - jsonPath: .metadata.creationTimestamp name: Age type: date + - description: Attested Agent SPIFFE ID + jsonPath: .status.attestedAgentSpiffeId + name: AttestedAgent + priority: 1 + type: string name: v1alpha1 schema: openAPIV3Schema: @@ -84,8 +89,11 @@ spec: strict: default: false description: |- - Strict enables enforcement mode: binding failures trigger network isolation. - When false (default), results are recorded in status only (audit mode). + Strict controls whether binding failures trigger enforcement actions + (label removal, restrictive NetworkPolicy). + When true, binding failure removes the verified label and applies restrictive NetworkPolicy. + When false (default), binding results are recorded in status only; + the workload retains its verified label and permissive policy. type: boolean trustDomain: description: |- @@ -126,6 +134,11 @@ spec: status: description: AgentCardStatus defines the observed state of AgentCard. properties: + attestedAgentSpiffeId: + description: |- + AttestedAgentSpiffeID is the SPIFFE ID extracted from the agent's TLS peer certificate + during authenticated (mTLS) fetch. Set only when verifiedFetch is enabled and successful. + type: string bindingStatus: description: BindingStatus contains the result of identity binding evaluation diff --git a/kagenti-operator/config/webhook/manifests.yaml b/kagenti-operator/config/webhook/manifests.yaml index 41c04ade..434c7a5c 100644 --- a/kagenti-operator/config/webhook/manifests.yaml +++ b/kagenti-operator/config/webhook/manifests.yaml @@ -13,28 +13,6 @@ webhooks: path: /mutate-workloads-authbridge failurePolicy: Fail name: inject.kagenti.io - namespaceSelector: - matchExpressions: - - key: kubernetes.io/metadata.name - operator: NotIn - values: - - kube-system - - kube-public - - kube-node-lease - - kagenti-operator-system - matchLabels: - kagenti-enabled: "true" - objectSelector: - matchExpressions: - - key: kagenti.io/type - operator: In - values: - - agent - - tool - - key: kagenti.io/inject - operator: NotIn - values: - - disabled reinvocationPolicy: IfNeeded rules: - apiGroups: @@ -46,7 +24,6 @@ webhooks: resources: - pods sideEffects: None - timeoutSeconds: 10 --- apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration diff --git a/kagenti-operator/demos/agentcard-enforcement/demo.md b/kagenti-operator/demos/agentcard-enforcement/demo.md index f7c1e813..8401f754 100644 --- a/kagenti-operator/demos/agentcard-enforcement/demo.md +++ b/kagenti-operator/demos/agentcard-enforcement/demo.md @@ -13,7 +13,7 @@ This demo shows how the operator enforces identity binding through trust-domain |----------|-------------| | Wrong trust domain | Signature stays valid, but binding fails — `Bound=false` | | Binding failure (`strict: true`) | Label removed, restrictive NetworkPolicy applied | -| Binding failure (`strict: false`) | Label removed, restrictive NetworkPolicy applied | +| Binding failure (`strict: false`) | Label retained, permissive NetworkPolicy kept | | Restored trust domain | Binding passes — label restored, permissive NetworkPolicy | ## Run the Demo @@ -39,18 +39,17 @@ Expected output: Identity Match: False Reason: NotBound Label: + NetworkPolicy: weather-agent-signature-policy === 3. Wrong Trust Domain (strict: false) === Verified: True Bound: False Identity Match: False Reason: NotBound - Label: - -=== 4. NetworkPolicy After Binding Failure === + Label: true NetworkPolicy: weather-agent-signature-policy -=== 5. Restored === +=== 4. Restored === Verified: True Bound: True Identity Match: True @@ -64,8 +63,9 @@ Expected output: 1. The operator evaluates identity binding on every reconciliation 2. When `spec.identityBinding.trustDomain` is set, it overrides the operator-level `--spire-trust-domain` 3. If the SPIFFE ID from the x5c chain doesn't match the configured trust domain, binding fails -4. Binding failure always removes the `signature-verified` label from the workload -5. The NetworkPolicy controller applies a restrictive policy when the label is absent, permissive when present +4. `strict: true` — binding failure removes the `signature-verified` label and triggers restrictive NetworkPolicy +5. `strict: false` (default) — binding failure is recorded in status only; the label and permissive policy are retained +6. Status always reflects the true binding result regardless of `strict` ## Cleanup diff --git a/kagenti-operator/demos/agentcard-enforcement/run-demo-commands.sh b/kagenti-operator/demos/agentcard-enforcement/run-demo-commands.sh index b9590827..71154cbb 100755 --- a/kagenti-operator/demos/agentcard-enforcement/run-demo-commands.sh +++ b/kagenti-operator/demos/agentcard-enforcement/run-demo-commands.sh @@ -59,6 +59,7 @@ echo "Waiting for reconciliation..." sleep 20 get_status get_label +get_netpol echo "" # ── 3. Wrong trust domain with strict: false ───────────────────────────────── @@ -75,15 +76,11 @@ echo "Waiting for reconciliation..." sleep 20 get_status get_label -echo "" - -# ── 4. NetworkPolicy after binding failure ─────────────────────────────────── -echo "=== 4. NetworkPolicy After Binding Failure ===" get_netpol echo "" -# ── 5. Restore correct binding ────────────────────────────────────────────── -echo "=== 5. Restoring correct binding ===" +# ── 4. Restore correct binding ────────────────────────────────────────────── +echo "=== 4. Restoring correct binding ===" kubectl patch agentcard "$AGENTCARD" -n "$NAMESPACE" --type=json -p '[ {"op": "remove", "path": "/spec/identityBinding"} ]' @@ -97,7 +94,7 @@ kubectl patch agentcard "$AGENTCARD" -n "$NAMESPACE" --type=merge -p '{ echo "Waiting for reconciliation..." sleep 20 echo "" -echo "=== 5. Restored ===" +echo "=== 4. Restored ===" get_status get_label get_netpol diff --git a/kagenti-operator/internal/agentcard/fetcher.go b/kagenti-operator/internal/agentcard/fetcher.go index f9476159..b1fc8290 100644 --- a/kagenti-operator/internal/agentcard/fetcher.go +++ b/kagenti-operator/internal/agentcard/fetcher.go @@ -18,13 +18,19 @@ package agentcard import ( "context" + "crypto/tls" + "crypto/x509" "encoding/json" "errors" "fmt" "io" "net/http" + "net/url" "time" + "github.com/spiffe/go-spiffe/v2/spiffeid" + "github.com/spiffe/go-spiffe/v2/spiffetls/tlsconfig" + "github.com/spiffe/go-spiffe/v2/workloadapi" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" @@ -48,6 +54,11 @@ const ( SignedCardConfigMapKey = "agent-card.json" ) +// ConfigMapName returns the expected ConfigMap name for a signed agent card. +func ConfigMapName(agentName string) string { + return agentName + SignedCardConfigMapSuffix +} + type Fetcher interface { Fetch(ctx context.Context, protocol, serviceURL, agentName, namespace string, ) (*agentv1alpha1.AgentCardData, error) @@ -92,8 +103,7 @@ func (f *DefaultFetcher) fetchA2ACard(ctx context.Context, serviceURL string) (* card, legacyErr := f.fetchAgentCardFromPath(ctx, serviceURL, A2ALegacyAgentCardPath) if legacyErr != nil { - // Return the original error since the primary path is canonical. - return nil, err + return nil, legacyErr } fetcherLogger.Info("Agent card served from deprecated endpoint", @@ -108,36 +118,51 @@ func (f *DefaultFetcher) fetchA2ACard(ctx context.Context, serviceURL string) (* // errNotFound is returned when the agent card endpoint returns HTTP 404. var errNotFound = errors.New("agent card not found") -func (f *DefaultFetcher) fetchAgentCardFromPath(ctx context.Context, serviceURL, path string) (*agentv1alpha1.AgentCardData, error) { - agentCardURL := serviceURL + path - fetcherLogger.Info("Fetching A2A agent card", "url", agentCardURL) +// maxCardSize caps the response body read to prevent memory exhaustion. +const maxCardSize = 1 << 20 // 1 MiB - req, err := http.NewRequestWithContext(ctx, http.MethodGet, agentCardURL, nil) +// doHTTPFetch performs a GET request and returns the raw response body and TLS +// connection state. It handles 404 (errNotFound), non-200 errors, and limits +// the body read to maxCardSize. +func doHTTPFetch(ctx context.Context, httpClient *http.Client, fetchURL string) ([]byte, *tls.ConnectionState, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, fetchURL, nil) if err != nil { - return nil, fmt.Errorf("failed to create request: %w", err) + return nil, nil, fmt.Errorf("failed to create request: %w", err) } req.Header.Set("Accept", "application/json") - resp, err := f.httpClient.Do(req) + resp, err := httpClient.Do(req) if err != nil { - return nil, fmt.Errorf("failed to fetch agent card: %w", err) + return nil, nil, fmt.Errorf("failed to fetch agent card: %w", err) } defer func() { _ = resp.Body.Close() }() - const maxCardSize = 1 << 20 // 1 MiB - if resp.StatusCode == http.StatusNotFound { - return nil, errNotFound + return nil, nil, errNotFound } if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(io.LimitReader(resp.Body, maxCardSize)) - return nil, fmt.Errorf("unexpected status code %d: %s", resp.StatusCode, string(body)) + return nil, nil, fmt.Errorf("unexpected status code %d: %s", resp.StatusCode, string(body)) } body, err := io.ReadAll(io.LimitReader(resp.Body, maxCardSize)) if err != nil { - return nil, fmt.Errorf("failed to read response body: %w", err) + return nil, nil, fmt.Errorf("failed to read response body: %w", err) + } + + return body, resp.TLS, nil +} + +func (f *DefaultFetcher) fetchAgentCardFromPath( + ctx context.Context, serviceURL, path string, +) (*agentv1alpha1.AgentCardData, error) { + agentCardURL := serviceURL + path + fetcherLogger.Info("Fetching A2A agent card", "url", agentCardURL) + + body, _, err := doHTTPFetch(ctx, f.httpClient, agentCardURL) + if err != nil { + return nil, err } var agentCardData agentv1alpha1.AgentCardData @@ -198,3 +223,145 @@ func (f *ConfigMapFetcher) Fetch( func GetServiceURL(agentName, namespace string, port int32) string { return fmt.Sprintf("http://%s.%s.svc.cluster.local:%d", agentName, namespace, port) } + +// GetSecureServiceURL returns an HTTPS URL for the agent's TLS port. +func GetSecureServiceURL(agentName, namespace string, port int32) string { + return fmt.Sprintf("https://%s.%s.svc.cluster.local:%d", agentName, namespace, port) +} + +// FetchResult contains the result of an authenticated fetch including +// the agent's verified SPIFFE ID extracted from the TLS peer certificate. +type FetchResult struct { + CardData *agentv1alpha1.AgentCardData + AgentSpiffeID string +} + +// AuthenticatedFetcher performs mTLS-authenticated fetches and returns +// identity information from the TLS handshake alongside the card data. +type AuthenticatedFetcher interface { + FetchAuthenticated(ctx context.Context, protocol, serviceURL string) (*FetchResult, error) +} + +// SpiffeFetcher implements AuthenticatedFetcher using go-spiffe mTLS. +// It verifies the agent belongs to the configured trust domain and extracts +// the SPIFFE ID from the peer certificate. The HTTP client is reused across +// fetches for connection pooling; the TLS config dynamically reads the latest +// SVID from the X509Source on each handshake. +type SpiffeFetcher struct { + x509Source *workloadapi.X509Source + trustDomain string + httpClient *http.Client +} + +// NewSpiffeFetcher creates a SpiffeFetcher that uses the provided X509Source +// for mTLS and validates peers against the given trust domain. +func NewSpiffeFetcher( + source *workloadapi.X509Source, trustDomain string, +) (*SpiffeFetcher, error) { + td, err := spiffeid.TrustDomainFromString(trustDomain) + if err != nil { + return nil, fmt.Errorf("invalid SPIFFE trust domain %q: %w", trustDomain, err) + } + tlsCfg := tlsconfig.MTLSClientConfig(source, source, tlsconfig.AuthorizeMemberOf(td)) + return &SpiffeFetcher{ + x509Source: source, + trustDomain: trustDomain, + httpClient: &http.Client{ + Timeout: DefaultFetchTimeout, + Transport: &http.Transport{TLSClientConfig: tlsCfg}, + }, + }, nil +} + +func (f *SpiffeFetcher) FetchAuthenticated(ctx context.Context, protocol, serviceURL string) (*FetchResult, error) { + switch protocol { + case A2AProtocol: + return f.fetchA2ACardAuthenticated(ctx, serviceURL) + default: + return nil, fmt.Errorf("unsupported protocol: %s", protocol) + } +} + +func (f *SpiffeFetcher) fetchA2ACardAuthenticated(ctx context.Context, serviceURL string) (*FetchResult, error) { + result, err := f.fetchAuthenticatedFromPath(ctx, serviceURL, A2AAgentCardPath) + if err == nil { + return result, nil + } + + if !errors.Is(err, errNotFound) { + return nil, err + } + + fetcherLogger.Info("Agent card not found at current endpoint, trying legacy endpoint (authenticated)", + "currentPath", A2AAgentCardPath, + "legacyPath", A2ALegacyAgentCardPath) + + result, legacyErr := f.fetchAuthenticatedFromPath(ctx, serviceURL, A2ALegacyAgentCardPath) + if legacyErr != nil { + return nil, legacyErr + } + + fetcherLogger.Info("Agent card served from deprecated endpoint (authenticated)", + "deprecated", true, + "migrateTo", A2AAgentCardPath, + "legacyPath", A2ALegacyAgentCardPath, + "agentName", result.CardData.Name) + + return result, nil +} + +func (f *SpiffeFetcher) fetchAuthenticatedFromPath(ctx context.Context, serviceURL, path string) (*FetchResult, error) { + agentCardURL := serviceURL + path + fetcherLogger.Info("Fetching A2A agent card (mTLS)", "url", agentCardURL) + + body, tlsState, err := doHTTPFetch(ctx, f.httpClient, agentCardURL) + if err != nil { + return nil, err + } + + var agentCardData agentv1alpha1.AgentCardData + if err := json.Unmarshal(body, &agentCardData); err != nil { + return nil, fmt.Errorf("failed to parse agent card JSON: %w", err) + } + + agentSpiffeID := extractSpiffeIDFromTLS(tlsState) + + fetcherLogger.Info("Successfully fetched agent card (mTLS)", + "name", agentCardData.Name, + "version", agentCardData.Version, + "agentSpiffeID", agentSpiffeID) + + return &FetchResult{ + CardData: &agentCardData, + AgentSpiffeID: agentSpiffeID, + }, nil +} + +// extractSpiffeIDFromTLS returns the SPIFFE ID from the verified peer +// certificate's URI SANs. Prefers VerifiedChains (post-validation) over +// PeerCertificates (pre-validation) for defense-in-depth. +func extractSpiffeIDFromTLS(state *tls.ConnectionState) string { + if state == nil { + return "" + } + if len(state.VerifiedChains) > 0 && len(state.VerifiedChains[0]) > 0 { + return spiffeIDFromCert(state.VerifiedChains[0][0]) + } + if len(state.PeerCertificates) > 0 { + return spiffeIDFromCert(state.PeerCertificates[0]) + } + return "" +} + +func spiffeIDFromCert(cert *x509.Certificate) string { + for _, uri := range cert.URIs { + parsed, err := url.Parse(uri.String()) + if err != nil { + continue + } + if parsed.Scheme == "spiffe" { + return uri.String() + } + } + return "" +} diff --git a/kagenti-operator/internal/agentcard/fetcher_verified_test.go b/kagenti-operator/internal/agentcard/fetcher_verified_test.go new file mode 100644 index 00000000..3c7d22f4 --- /dev/null +++ b/kagenti-operator/internal/agentcard/fetcher_verified_test.go @@ -0,0 +1,89 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package agentcard + +import ( + "crypto/x509" + "net/url" + "testing" + + "github.com/onsi/gomega" +) + +func TestGetSecureServiceURL(t *testing.T) { + g := gomega.NewGomegaWithT(t) + g.Expect(GetSecureServiceURL("my-agent", "default", 8443)). + To(gomega.Equal("https://my-agent.default.svc.cluster.local:8443")) +} + +func TestSpiffeIDFromCert_ValidSPIFFE(t *testing.T) { + g := gomega.NewGomegaWithT(t) + spiffeURI, _ := url.Parse("spiffe://example.org/ns/team1/sa/weather-agent") + cert := &x509.Certificate{ + URIs: []*url.URL{spiffeURI}, + } + g.Expect(spiffeIDFromCert(cert)).To(gomega.Equal("spiffe://example.org/ns/team1/sa/weather-agent")) +} + +func TestSpiffeIDFromCert_NoURISANs(t *testing.T) { + g := gomega.NewGomegaWithT(t) + cert := &x509.Certificate{} + g.Expect(spiffeIDFromCert(cert)).To(gomega.BeEmpty()) +} + +func TestSpiffeIDFromCert_NonSpiffeURI(t *testing.T) { + g := gomega.NewGomegaWithT(t) + httpURI, _ := url.Parse("https://example.com/agent") + cert := &x509.Certificate{ + URIs: []*url.URL{httpURI}, + } + g.Expect(spiffeIDFromCert(cert)).To(gomega.BeEmpty()) +} + +func TestSpiffeIDFromCert_MultipleURIs(t *testing.T) { + g := gomega.NewGomegaWithT(t) + httpURI, _ := url.Parse("https://example.com/agent") + spiffeURI, _ := url.Parse("spiffe://trust.domain/workload") + cert := &x509.Certificate{ + URIs: []*url.URL{httpURI, spiffeURI}, + } + g.Expect(spiffeIDFromCert(cert)).To(gomega.Equal("spiffe://trust.domain/workload")) +} + +func TestNewSpiffeFetcher(t *testing.T) { + g := gomega.NewGomegaWithT(t) + fetcher, err := NewSpiffeFetcher(nil, "example.org") + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(fetcher).NotTo(gomega.BeNil()) + g.Expect(fetcher.trustDomain).To(gomega.Equal("example.org")) +} + +func TestNewSpiffeFetcher_InvalidTrustDomain(t *testing.T) { + g := gomega.NewGomegaWithT(t) + _, err := NewSpiffeFetcher(nil, "") + g.Expect(err).To(gomega.HaveOccurred()) + g.Expect(err.Error()).To(gomega.ContainSubstring("invalid SPIFFE trust domain")) +} + +func TestSpiffeFetcher_UnsupportedProtocol(t *testing.T) { + g := gomega.NewGomegaWithT(t) + fetcher, err := NewSpiffeFetcher(nil, "example.org") + g.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fetcher.FetchAuthenticated(t.Context(), "unsupported", "https://example.com") + g.Expect(err).To(gomega.HaveOccurred()) + g.Expect(err.Error()).To(gomega.ContainSubstring("unsupported protocol")) +} diff --git a/kagenti-operator/internal/controller/agentcard_controller.go b/kagenti-operator/internal/controller/agentcard_controller.go index 133d3d2e..892304c3 100644 --- a/kagenti-operator/internal/controller/agentcard_controller.go +++ b/kagenti-operator/internal/controller/agentcard_controller.go @@ -121,6 +121,13 @@ type AgentCardReconciler struct { AgentFetcher agentcard.Fetcher + // AuthenticatedFetcher performs mTLS-authenticated fetches. + // Nil when verifiedFetch is disabled. + AuthenticatedFetcher agentcard.AuthenticatedFetcher + + // EnableVerifiedFetch gates the mTLS authenticated fetch code path. + EnableVerifiedFetch bool + SignatureProvider signature.Provider RequireSignature bool SignatureAuditMode bool @@ -226,29 +233,178 @@ func (r *AgentCardReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( servicePort := r.getServicePort(service) serviceURL := agentcard.GetServiceURL(workload.ServiceName, agentCard.Namespace, servicePort) - cardData, err := r.AgentFetcher.Fetch(ctx, protocol, serviceURL, workload.ServiceName, agentCard.Namespace) + cardData, fetchResult, err := r.fetchCardData(ctx, agentCard, protocol, serviceURL, workload, service) if err != nil { - agentCardLogger.Error(err, "Failed to fetch agent card", "workload", workload.Name, "url", serviceURL) - if condErr := r.updateCondition(ctx, agentCard, "Synced", metav1.ConditionFalse, "FetchFailed", err.Error()); condErr != nil { + if condErr := r.updateCondition(ctx, agentCard, + "Ready", metav1.ConditionFalse, "SyncFailed", "Agent card fetch failed"); condErr != nil { return ctrl.Result{}, condErr } return ctrl.Result{RequeueAfter: 1 * time.Minute}, nil } - var verificationResult *signature.VerificationResult + trust := r.evaluateTrust(ctx, agentCard, cardData, fetchResult, workload) + + cardData.URL = serviceURL + cardID := r.computeCardID(cardData) + if cardID != "" && agentCard.Status.CardId != "" && agentCard.Status.CardId != cardID { + if r.Recorder != nil { + r.Recorder.Event(agentCard, corev1.EventTypeWarning, "CardContentChanged", + fmt.Sprintf("Agent card content changed: previous=%s, current=%s", + agentCard.Status.CardId, cardID)) + } + agentCardLogger.Info("Card content changed", + "agentCard", agentCard.Name, + "previousCardId", agentCard.Status.CardId, + "newCardId", cardID) + } + + resolvedTargetRef := &agentv1alpha1.TargetRef{ + APIVersion: workload.APIVersion, + Kind: workload.Kind, + Name: workload.Name, + } + + if err := r.updateAgentCardStatus(ctx, agentCard, cardData, protocol, cardID, + resolvedTargetRef, trust.verificationResult, trust.binding, trust.identityMatch, + trust.isMTLSVerified, trust.verifiedReason, trust.verifiedStatus, + trust.verifiedMessage, trust.attestedSpiffeID); err != nil { + agentCardLogger.Error(err, "Failed to update AgentCard status") + return ctrl.Result{}, err + } + + // Label propagation: only touches workload labels, not AgentCard status. + if trust.isMTLSVerified { + if err := r.propagateVerifiedLabel(ctx, agentCard, workload, true); err != nil { + agentCardLogger.Error(err, "Failed to propagate verified label to workload", + "workload", workload.Name, "verified", true) + return ctrl.Result{}, err + } + } else if r.RequireSignature { + if err := r.propagateSignatureLabel(ctx, agentCard.Name, workload, trust.isVerified); err != nil { + agentCardLogger.Error(err, "Failed to propagate signature label to workload", + "workload", workload.Name, "verified", trust.isVerified) + return ctrl.Result{}, err + } + if trust.verificationResult != nil && !trust.verificationResult.Verified && !r.SignatureAuditMode { + agentCardLogger.Info("Signature verification failed, rejecting agent card", + "workload", workload.Name, + "details", trust.verificationResult.Details) + return ctrl.Result{RequeueAfter: 1 * time.Minute}, nil + } + } else if r.EnableVerifiedFetch && r.AuthenticatedFetcher != nil { + if err := r.propagateVerifiedLabel(ctx, agentCard, workload, false); err != nil { + agentCardLogger.Error(err, "Failed to propagate verified label to workload", + "workload", workload.Name, "verified", false) + return ctrl.Result{}, err + } + } + + syncPeriod := r.getSyncPeriod(agentCard) + agentCardLogger.V(1).Info("Successfully synced agent card", + "workload", workload.Name, "kind", workload.Kind, "nextSync", syncPeriod) + + return ctrl.Result{RequeueAfter: syncPeriod}, nil +} + +// trustEvaluation holds the computed trust state for an AgentCard reconcile. +type trustEvaluation struct { + verificationResult *signature.VerificationResult + binding *bindingResult + identityMatch *bool + isMTLSVerified bool + isVerified bool + verifiedStatus metav1.ConditionStatus + verifiedReason string + verifiedMessage string + attestedSpiffeID string +} + +// fetchCardData retrieves the agent card, choosing mTLS or plain HTTP based on +// configuration and service port availability. Returns an error only if the +// fetch fails — error conditions are already written to the AgentCard status. +func (r *AgentCardReconciler) fetchCardData( + ctx context.Context, agentCard *agentv1alpha1.AgentCard, + protocol, serviceURL string, workload *WorkloadInfo, service *corev1.Service, +) (*agentv1alpha1.AgentCardData, *agentcard.FetchResult, error) { + if r.EnableVerifiedFetch && r.AuthenticatedFetcher != nil { + tlsPort := r.getAgentTLSPort(service) + if tlsPort > 0 { + secureURL := agentcard.GetSecureServiceURL( + workload.ServiceName, agentCard.Namespace, tlsPort) + fetchResult, err := r.AuthenticatedFetcher.FetchAuthenticated(ctx, protocol, secureURL) + if err != nil { + agentCardLogger.Error(err, "Authenticated fetch failed", + "workload", workload.Name, "url", secureURL) + if condErr := r.updateCondition(ctx, agentCard, + "Synced", metav1.ConditionFalse, "FetchFailed", err.Error()); condErr != nil { + return nil, nil, condErr + } + if condErr := r.setVerifiedCondition(ctx, agentCard, + metav1.ConditionFalse, "FetchFailed", err.Error()); condErr != nil { + return nil, nil, condErr + } + return nil, nil, err + } + return fetchResult.CardData, fetchResult, nil + } + agentCardLogger.Info("TLS port not found on service, falling back to HTTP fetch", + "service", workload.ServiceName, "expectedPortName", AgentTLSPortName) + if r.Recorder != nil { + r.Recorder.Event(agentCard, corev1.EventTypeWarning, "FallbackToHTTP", + fmt.Sprintf("Service %s has no %s port; fetch is unverified", + workload.ServiceName, AgentTLSPortName)) + } + cardData, err := r.AgentFetcher.Fetch( + ctx, protocol, serviceURL, workload.ServiceName, agentCard.Namespace) + if err != nil { + agentCardLogger.Error(err, "Failed to fetch agent card", + "workload", workload.Name, "url", serviceURL) + if condErr := r.updateCondition(ctx, agentCard, + "Synced", metav1.ConditionFalse, "FetchFailed", err.Error()); condErr != nil { + return nil, nil, condErr + } + return nil, nil, err + } + return cardData, nil, nil + } + + cardData, err := r.AgentFetcher.Fetch( + ctx, protocol, serviceURL, workload.ServiceName, agentCard.Namespace) + if err != nil { + agentCardLogger.Error(err, "Failed to fetch agent card", + "workload", workload.Name, "url", serviceURL) + if condErr := r.updateCondition(ctx, agentCard, + "Synced", metav1.ConditionFalse, "FetchFailed", err.Error()); condErr != nil { + return nil, nil, condErr + } + return nil, nil, err + } + if err := r.cleanupVerifiedFetchFields(ctx, agentCard); err != nil { + agentCardLogger.Error(err, "Failed to cleanup verified fetch fields", "agentCard", agentCard.Name) + } + return cardData, nil, nil +} + +// evaluateTrust performs signature verification, binding computation, and +// determines the final Verified status for an AgentCard. +func (r *AgentCardReconciler) evaluateTrust( //nolint:gocyclo + ctx context.Context, agentCard *agentv1alpha1.AgentCard, + cardData *agentv1alpha1.AgentCardData, fetchResult *agentcard.FetchResult, + workload *WorkloadInfo, +) *trustEvaluation { + eval := &trustEvaluation{} + if r.RequireSignature { var verifyErr error - verificationResult, verifyErr = r.verifySignature(ctx, cardData) - + eval.verificationResult, verifyErr = r.verifySignature(ctx, cardData) if verifyErr != nil { agentCardLogger.Error(verifyErr, "Signature verification error", "workload", workload.Name) } - - if verificationResult != nil { - if verificationResult.Verified { + if eval.verificationResult != nil { + if eval.verificationResult.Verified { if r.Recorder != nil { r.Recorder.Event(agentCard, corev1.EventTypeNormal, "SignatureEvaluated", - fmt.Sprintf("Signature verified successfully (keyID=%s)", verificationResult.KeyID)) + fmt.Sprintf("Signature verified successfully (keyID=%s)", eval.verificationResult.KeyID)) } } else { reason := ReasonSignatureInvalid @@ -258,83 +414,72 @@ func (r *AgentCardReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( agentCardLogger.Info("Signature verification failed", "workload", workload.Name, "reason", reason, - "details", verificationResult.Details) + "details", eval.verificationResult.Details) if r.Recorder != nil { - r.Recorder.Event(agentCard, corev1.EventTypeWarning, "SignatureFailed", verificationResult.Details) + r.Recorder.Event(agentCard, corev1.EventTypeWarning, "SignatureFailed", eval.verificationResult.Details) } } } } - if r.RequireSignature && verificationResult != nil && verificationResult.Verified { - r.maybeRestartForResign(ctx, agentCard, workload, verificationResult) + if r.RequireSignature && eval.verificationResult != nil && eval.verificationResult.Verified { + r.maybeRestartForResign(ctx, agentCard, workload, eval.verificationResult) } - cardData.URL = serviceURL - - cardId := r.computeCardId(cardData) - if cardId != "" && agentCard.Status.CardId != "" && agentCard.Status.CardId != cardId { - if r.Recorder != nil { - r.Recorder.Event(agentCard, corev1.EventTypeWarning, "CardContentChanged", - fmt.Sprintf("Agent card content changed: previous=%s, current=%s", agentCard.Status.CardId, cardId)) - } - agentCardLogger.Info("Card content changed", "agentCard", agentCard.Name, "previousCardId", agentCard.Status.CardId, "newCardId", cardId) - } - - resolvedTargetRef := &agentv1alpha1.TargetRef{ - APIVersion: workload.APIVersion, - Kind: workload.Kind, - Name: workload.Name, - } + sigVerified := eval.verificationResult != nil && eval.verificationResult.Verified + eval.isMTLSVerified = fetchResult != nil && fetchResult.AgentSpiffeID != "" + // Binding evaluation (independent of verified status computation) var bindingPassed bool - var binding *bindingResult - var identityMatch *bool - sigVerified := verificationResult != nil && verificationResult.Verified if agentCard.Spec.IdentityBinding != nil { var verifiedSpiffeID string - if verificationResult != nil && verificationResult.Verified && verificationResult.SpiffeID != "" { - verifiedSpiffeID = verificationResult.SpiffeID + if eval.isMTLSVerified { + verifiedSpiffeID = fetchResult.AgentSpiffeID + } else if eval.verificationResult != nil && + eval.verificationResult.Verified && eval.verificationResult.SpiffeID != "" { + verifiedSpiffeID = eval.verificationResult.SpiffeID } - binding = r.computeBinding(agentCard, verifiedSpiffeID) - bindingPassed = binding != nil && binding.Bound - match := sigVerified && bindingPassed - identityMatch = &match - } - - var vr *signature.VerificationResult - if r.RequireSignature { - vr = verificationResult - } - if err := r.updateAgentCardStatus(ctx, agentCard, cardData, protocol, cardId, resolvedTargetRef, vr, binding, identityMatch); err != nil { - agentCardLogger.Error(err, "Failed to update AgentCard status") - return ctrl.Result{}, err - } - - // Both signature and binding (if configured) must pass for the label. - if r.RequireSignature { - isVerified := sigVerified + eval.binding = r.computeBinding(agentCard, verifiedSpiffeID) + bindingPassed = eval.binding != nil && eval.binding.Bound + match := (sigVerified || eval.isMTLSVerified) && bindingPassed + eval.identityMatch = &match + } + + // Compute final Verified status. + // mTLS takes precedence over JWS: if the agent was authenticated via mTLS, + // it is unconditionally Verified regardless of JWS state. This avoids + // ambiguity when both mechanisms succeed with different SPIFFE IDs. + if eval.isMTLSVerified { + eval.verifiedStatus = metav1.ConditionTrue + eval.verifiedReason = "mTLSVerified" + eval.verifiedMessage = fmt.Sprintf("Agent SPIFFE ID: %s", fetchResult.AgentSpiffeID) + eval.attestedSpiffeID = fetchResult.AgentSpiffeID + eval.isVerified = true + } else if r.RequireSignature { + eval.isVerified = sigVerified if agentCard.Spec.IdentityBinding != nil { - isVerified = isVerified && bindingPassed - } - if err := r.propagateSignatureLabel(ctx, agentCard.Name, workload, isVerified); err != nil { - agentCardLogger.Error(err, "Failed to propagate signature label to workload", - "workload", workload.Name, "verified", isVerified) - return ctrl.Result{}, err + eval.isVerified = sigVerified && (bindingPassed || !agentCard.Spec.IdentityBinding.Strict) } - - if verificationResult != nil && !verificationResult.Verified && !r.SignatureAuditMode { - agentCardLogger.Info("Signature verification failed, rejecting agent card", - "workload", workload.Name, - "details", verificationResult.Details) - return ctrl.Result{RequeueAfter: 1 * time.Minute}, nil + if eval.isVerified { + eval.verifiedStatus = metav1.ConditionTrue + eval.verifiedReason = "SignatureVerified" + eval.verifiedMessage = fmt.Sprintf("JWS signature valid (keyID=%s)", eval.verificationResult.KeyID) + } else { + eval.verifiedStatus = metav1.ConditionFalse + eval.verifiedReason = "NotVerified" + eval.verifiedMessage = "No trust mechanism verified this agent" + if eval.verificationResult != nil && !eval.verificationResult.Verified { + eval.verifiedReason = ReasonSignatureInvalid + eval.verifiedMessage = eval.verificationResult.Details + } } + } else if r.EnableVerifiedFetch && r.AuthenticatedFetcher != nil { + eval.verifiedStatus = metav1.ConditionFalse + eval.verifiedReason = "NotVerified" + eval.verifiedMessage = "No trust mechanism verified this agent" } - syncPeriod := r.getSyncPeriod(agentCard) - agentCardLogger.V(1).Info("Successfully synced agent card", "workload", workload.Name, "kind", workload.Kind, "nextSync", syncPeriod) - - return ctrl.Result{RequeueAfter: syncPeriod}, nil + return eval } func (r *AgentCardReconciler) getWorkload(ctx context.Context, agentCard *agentv1alpha1.AgentCard) (*WorkloadInfo, error) { @@ -531,6 +676,20 @@ func (r *AgentCardReconciler) getServicePort(service *corev1.Service) int32 { return 8000 } +// AgentTLSPortName is the named port convention for the agent's TLS listener. +const AgentTLSPortName = "agent-tls" + +// getAgentTLSPort returns the port number for the agent-tls named port. +// Returns 0 if the named port is not found. +func (r *AgentCardReconciler) getAgentTLSPort(service *corev1.Service) int32 { + for _, port := range service.Spec.Ports { + if port.Name == AgentTLSPortName { + return port.Port + } + } + return 0 +} + func (r *AgentCardReconciler) getSyncPeriod(agentCard *agentv1alpha1.AgentCard) time.Duration { if agentCard.Spec.SyncPeriod == "" { return DefaultSyncPeriod @@ -547,7 +706,24 @@ func (r *AgentCardReconciler) getSyncPeriod(agentCard *agentv1alpha1.AgentCard) } // updateAgentCardStatus persists all status fields atomically with retry. -func (r *AgentCardReconciler) updateAgentCardStatus(ctx context.Context, agentCard *agentv1alpha1.AgentCard, cardData *agentv1alpha1.AgentCardData, protocol, cardId string, targetRef *agentv1alpha1.TargetRef, verificationResult *signature.VerificationResult, binding *bindingResult, identityMatch *bool) error { +// +// Condition semantics: +// - Verified: final trust decision used by NetworkPolicy controller and labels. +// True when mTLS authenticated fetch succeeds OR JWS signature + binding passes. +// Absent when no trust mechanism is configured. +// - SignatureVerified: raw JWS cryptographic outcome (informational only, not used +// for enforcement). Reflects whether the x5c signature is mathematically valid. +// - Synced: whether the agent card was successfully fetched. +// - Ready: composite signal — True when Synced is True AND (Verified is True or absent). +// - Bound: whether identity binding constraints are satisfied. +func (r *AgentCardReconciler) updateAgentCardStatus( //nolint:gocyclo + ctx context.Context, agentCard *agentv1alpha1.AgentCard, + cardData *agentv1alpha1.AgentCardData, protocol, cardID string, + targetRef *agentv1alpha1.TargetRef, verificationResult *signature.VerificationResult, + binding *bindingResult, identityMatch *bool, mTLSVerified bool, + verifiedReason string, verifiedStatus metav1.ConditionStatus, + verifiedMessage string, attestedSpiffeID string, +) error { return retry.RetryOnConflict(retry.DefaultRetry, func() error { latest := &agentv1alpha1.AgentCard{} if err := r.Get(ctx, types.NamespacedName{ @@ -560,9 +736,9 @@ func (r *AgentCardReconciler) updateAgentCardStatus(ctx context.Context, agentCa latest.Status.Card = cardData latest.Status.Protocol = protocol latest.Status.TargetRef = targetRef - if cardId != "" && cardId != latest.Status.CardId { + if cardID != "" && cardID != latest.Status.CardId { latest.Status.LastSyncTime = &metav1.Time{Time: time.Now()} - latest.Status.CardId = cardId + latest.Status.CardId = cardID } else if latest.Status.LastSyncTime == nil { latest.Status.LastSyncTime = &metav1.Time{Time: time.Now()} } @@ -597,7 +773,7 @@ func (r *AgentCardReconciler) updateAgentCardStatus(ctx context.Context, agentCa meta.SetStatusCondition(&latest.Status.Conditions, sigCondition) } - if verificationResult != nil && !verificationResult.Verified && !r.SignatureAuditMode { + if verificationResult != nil && !verificationResult.Verified && !r.SignatureAuditMode && !mTLSVerified { meta.SetStatusCondition(&latest.Status.Conditions, metav1.Condition{ Type: "Synced", Status: metav1.ConditionFalse, @@ -608,6 +784,8 @@ func (r *AgentCardReconciler) updateAgentCardStatus(ctx context.Context, agentCa message := fmt.Sprintf("Successfully fetched agent card for %s", cardData.Name) if verificationResult != nil && !verificationResult.Verified && r.SignatureAuditMode { message = fmt.Sprintf("Fetched agent card for %s (signature verification failed but audit mode enabled)", cardData.Name) + } else if mTLSVerified && verificationResult != nil && !verificationResult.Verified { + message = fmt.Sprintf("Successfully fetched agent card for %s (mTLS verified, no JWS signature)", cardData.Name) } meta.SetStatusCondition(&latest.Status.Conditions, metav1.Condition{ Type: "Synced", @@ -617,25 +795,50 @@ func (r *AgentCardReconciler) updateAgentCardStatus(ctx context.Context, agentCa }) } + // Verified condition: set only when a trust mechanism is active (non-empty reason). + if verifiedReason != "" { + meta.SetStatusCondition(&latest.Status.Conditions, metav1.Condition{ + Type: ConditionVerified, + Status: verifiedStatus, + Reason: verifiedReason, + Message: verifiedMessage, + }) + } + + // AttestedAgentSpiffeID from mTLS + if attestedSpiffeID != "" { + latest.Status.AttestedAgentSpiffeID = attestedSpiffeID + } + + // Ready = Synced AND (Verified is True OR Verified was never evaluated) + var readyStatus metav1.ConditionStatus + var readyReason, readyMessage string + syncedCond := meta.FindStatusCondition(latest.Status.Conditions, "Synced") + verifiedCond := meta.FindStatusCondition(latest.Status.Conditions, ConditionVerified) + if syncedCond != nil && syncedCond.Status == metav1.ConditionTrue { + if verifiedCond == nil || verifiedCond.Status == metav1.ConditionTrue { + readyStatus = metav1.ConditionTrue + readyReason = "ReadyToServe" + readyMessage = "Agent index is ready for queries" + } else { + readyStatus = metav1.ConditionFalse + readyReason = "VerificationFailed" + readyMessage = "Agent synced but identity verification failed" + } + } else { + readyStatus = metav1.ConditionFalse + readyReason = "SyncFailed" + readyMessage = "Agent card fetch failed" + } meta.SetStatusCondition(&latest.Status.Conditions, metav1.Condition{ Type: "Ready", - Status: metav1.ConditionTrue, - Reason: "ReadyToServe", - Message: "Agent index is ready for queries", + Status: readyStatus, + Reason: readyReason, + Message: readyMessage, }) if binding != nil { existingBound := meta.FindStatusCondition(latest.Status.Conditions, "Bound") - - if existingBound == nil { - agentCardLogger.Info("Identity binding is allowlist-only; SPIFFE trust bundle verification not yet available", - "agentCard", latest.Name) - if r.Recorder != nil { - r.Recorder.Event(agentCard, corev1.EventTypeWarning, "AllowlistOnly", - "Identity binding is allowlist-only; SPIFFE trust bundle verification not yet available") - } - } - newConditionStatus := metav1.ConditionFalse if binding.Bound { newConditionStatus = metav1.ConditionTrue @@ -649,34 +852,7 @@ func (r *AgentCardReconciler) updateAgentCardStatus(ctx context.Context, agentCa } } } - - bindingChanged := latest.Status.BindingStatus == nil || - latest.Status.BindingStatus.Bound != binding.Bound || - latest.Status.BindingStatus.Reason != binding.Reason || - latest.Status.BindingStatus.Message != binding.Message - var evalTime *metav1.Time - if latest.Status.BindingStatus != nil { - evalTime = latest.Status.BindingStatus.LastEvaluationTime - } - if bindingChanged || evalTime == nil { - now := metav1.Now() - evalTime = &now - } - latest.Status.BindingStatus = &agentv1alpha1.BindingStatus{ - Bound: binding.Bound, - Reason: binding.Reason, - Message: binding.Message, - LastEvaluationTime: evalTime, - } - if binding.SpiffeID != "" { - latest.Status.ExpectedSpiffeID = binding.SpiffeID - } - meta.SetStatusCondition(&latest.Status.Conditions, metav1.Condition{ - Type: "Bound", - Status: newConditionStatus, - Reason: binding.Reason, - Message: binding.Message, - }) + applyBindingToStatus(latest, binding) } latest.Status.SignatureIdentityMatch = identityMatch @@ -861,6 +1037,58 @@ func (r *AgentCardReconciler) updateCondition(ctx context.Context, agentCard *ag return nil } +// ConditionVerified is the condition type for mTLS-verified fetch status. +const ConditionVerified = "Verified" + +// setVerifiedCondition sets the Verified condition on the AgentCard status. +func (r *AgentCardReconciler) setVerifiedCondition( + ctx context.Context, agentCard *agentv1alpha1.AgentCard, + status metav1.ConditionStatus, reason, message string, +) error { + return r.updateCondition(ctx, agentCard, ConditionVerified, status, reason, message) +} + +// propagateVerifiedLabel syncs the identity-verified label based on Verified status. +// Uses per-card annotation AND-aggregation (same pattern as propagateSignatureLabel). +func (r *AgentCardReconciler) propagateVerifiedLabel( + ctx context.Context, agentCard *agentv1alpha1.AgentCard, + workload *WorkloadInfo, verified bool, +) error { + if workload == nil { + return nil + } + return r.propagateSignatureLabel(ctx, agentCard.Name, workload, verified) +} + +// cleanupVerifiedFetchFields removes stale Phase 1 fields when verifiedFetch is disabled. +func (r *AgentCardReconciler) cleanupVerifiedFetchFields(ctx context.Context, agentCard *agentv1alpha1.AgentCard) error { + latest := &agentv1alpha1.AgentCard{} + if err := r.Get(ctx, types.NamespacedName{ + Name: agentCard.Name, + Namespace: agentCard.Namespace, + }, latest); err != nil { + return err + } + + verifiedCond := meta.FindStatusCondition(latest.Status.Conditions, ConditionVerified) + needsUpdate := verifiedCond != nil || latest.Status.AttestedAgentSpiffeID != "" + if !needsUpdate { + return nil + } + + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + if err := r.Get(ctx, types.NamespacedName{ + Name: agentCard.Name, + Namespace: agentCard.Namespace, + }, latest); err != nil { + return err + } + meta.RemoveStatusCondition(&latest.Status.Conditions, ConditionVerified) + latest.Status.AttestedAgentSpiffeID = "" + return r.Status().Update(ctx, latest) + }) +} + func (r *AgentCardReconciler) handleDeletion(ctx context.Context, agentCard *agentv1alpha1.AgentCard) (ctrl.Result, error) { if controllerutil.ContainsFinalizer(agentCard, AgentCardFinalizer) { agentCardLogger.Info("Cleaning up AgentCard", "name", agentCard.Name) @@ -1010,7 +1238,7 @@ func (r *AgentCardReconciler) computeBinding(agentCard *agentv1alpha1.AgentCard, return &bindingResult{ Bound: false, Reason: ReasonNotBound, - Message: "No SPIFFE ID from x5c certificate chain: ensure the card is signed with a SPIRE-issued SVID", + Message: "No verified SPIFFE ID available: ensure the card is signed with a SPIRE-issued SVID or served over mTLS", } } @@ -1048,6 +1276,43 @@ func (r *AgentCardReconciler) computeBinding(agentCard *agentv1alpha1.AgentCard, return &bindingResult{Bound: bound, Reason: reason, Message: message, SpiffeID: verifiedSpiffeID} } +// applyBindingToStatus writes binding fields and the Bound condition onto an AgentCard status. +// Used by both the main status update path and the standalone error-path binding update. +func applyBindingToStatus(latest *agentv1alpha1.AgentCard, binding *bindingResult) { + bindingChanged := latest.Status.BindingStatus == nil || + latest.Status.BindingStatus.Bound != binding.Bound || + latest.Status.BindingStatus.Reason != binding.Reason || + latest.Status.BindingStatus.Message != binding.Message + var evalTime *metav1.Time + if latest.Status.BindingStatus != nil { + evalTime = latest.Status.BindingStatus.LastEvaluationTime + } + if bindingChanged || evalTime == nil { + now := metav1.Now() + evalTime = &now + } + latest.Status.BindingStatus = &agentv1alpha1.BindingStatus{ + Bound: binding.Bound, + Reason: binding.Reason, + Message: binding.Message, + LastEvaluationTime: evalTime, + } + if binding.SpiffeID != "" { + latest.Status.ExpectedSpiffeID = binding.SpiffeID + } + + conditionStatus := metav1.ConditionFalse + if binding.Bound { + conditionStatus = metav1.ConditionTrue + } + meta.SetStatusCondition(&latest.Status.Conditions, metav1.Condition{ + Type: "Bound", + Status: conditionStatus, + Reason: binding.Reason, + Message: binding.Message, + }) +} + // updateBindingStatus writes binding status when the main status path is unreachable. func (r *AgentCardReconciler) updateBindingStatus(ctx context.Context, agentCard *agentv1alpha1.AgentCard, bound bool, reason, message, expectedSpiffeID string) error { return retry.RetryOnConflict(retry.DefaultRetry, func() error { @@ -1059,45 +1324,19 @@ func (r *AgentCardReconciler) updateBindingStatus(ctx context.Context, agentCard return err } - bindingChanged := latest.Status.BindingStatus == nil || - latest.Status.BindingStatus.Bound != bound || - latest.Status.BindingStatus.Reason != reason || - latest.Status.BindingStatus.Message != message - var evalTime *metav1.Time - if latest.Status.BindingStatus != nil { - evalTime = latest.Status.BindingStatus.LastEvaluationTime - } - if bindingChanged || evalTime == nil { - now := metav1.Now() - evalTime = &now - } - latest.Status.BindingStatus = &agentv1alpha1.BindingStatus{ - Bound: bound, - Reason: reason, - Message: message, - LastEvaluationTime: evalTime, - } - if expectedSpiffeID != "" { - latest.Status.ExpectedSpiffeID = expectedSpiffeID - } - - conditionStatus := metav1.ConditionFalse - if bound { - conditionStatus = metav1.ConditionTrue - } - meta.SetStatusCondition(&latest.Status.Conditions, metav1.Condition{ - Type: "Bound", - Status: conditionStatus, - Reason: reason, - Message: message, + applyBindingToStatus(latest, &bindingResult{ + Bound: bound, + Reason: reason, + Message: message, + SpiffeID: expectedSpiffeID, }) return r.Status().Update(ctx, latest) }) } -// computeCardId returns a SHA-256 hash of the card data for drift detection. -func (r *AgentCardReconciler) computeCardId(cardData *agentv1alpha1.AgentCardData) string { +// computeCardID returns a SHA-256 hash of the card data for drift detection. +func (r *AgentCardReconciler) computeCardID(cardData *agentv1alpha1.AgentCardData) string { if cardData == nil { return "" } @@ -1114,8 +1353,8 @@ func (r *AgentCardReconciler) computeCardId(cardData *agentv1alpha1.AgentCardDat // 1. The leaf SVID cert is approaching expiry (within SVIDExpiryGracePeriod). // 2. The trust bundle hash changed since the workload was last (re)started. // -// Both feed into the same mechanism: patch the pod template annotation to trigger a rollout. -// The init-container re-runs, fetches a fresh SVID, and re-signs the card. +// In init-container mode, the pod template annotation is patched to trigger +// a rollout so the init-container re-runs and fetches a fresh SVID. func (r *AgentCardReconciler) maybeRestartForResign(ctx context.Context, agentCard *agentv1alpha1.AgentCard, workload *WorkloadInfo, vr *signature.VerificationResult) { if workload == nil || r.SignatureProvider == nil { return diff --git a/kagenti-operator/internal/controller/agentcard_networkpolicy_controller.go b/kagenti-operator/internal/controller/agentcard_networkpolicy_controller.go index 64a13aa2..3d9bc1d5 100644 --- a/kagenti-operator/internal/controller/agentcard_networkpolicy_controller.go +++ b/kagenti-operator/internal/controller/agentcard_networkpolicy_controller.go @@ -24,6 +24,7 @@ import ( corev1 "k8s.io/api/core/v1" netv1 "k8s.io/api/networking/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" @@ -43,14 +44,18 @@ const NetworkPolicyFinalizer = "agentcard.kagenti.dev/network-policy" var networkPolicyLogger = ctrl.Log.WithName("controller").WithName("AgentCardNetworkPolicy") -// AgentCardNetworkPolicyReconciler manages NetworkPolicies based on AgentCard signature status. +// AgentCardNetworkPolicyReconciler manages NetworkPolicies based on AgentCard verification status. type AgentCardNetworkPolicyReconciler struct { client.Client Scheme *runtime.Scheme EnforceNetworkPolicies bool + // OperatorNamespace is the namespace the operator runs in. Used to allow + // ingress from the operator in generated NetworkPolicies via the built-in + // kubernetes.io/metadata.name label (no manual namespace labeling required). + OperatorNamespace string // KubeAPIServerCIDRs are the /32 CIDRs of the K8s API server endpoints. // Populated at startup from the "kubernetes" Endpoints in the default namespace. - // Used to allow init-container egress to the API server in restrictive policies. + // Used to allow workload egress to the API server in restrictive policies. KubeAPIServerCIDRs []string } @@ -143,10 +148,10 @@ func (r *AgentCardNetworkPolicyReconciler) manageNetworkPolicy(ctx context.Conte policyName := fmt.Sprintf("%s-signature-policy", workloadName) isVerified := false - if agentCard.Spec.IdentityBinding != nil { - isVerified = agentCard.Status.SignatureIdentityMatch != nil && *agentCard.Status.SignatureIdentityMatch - } else { - isVerified = agentCard.Status.ValidSignature != nil && *agentCard.Status.ValidSignature + + verifiedCond := meta.FindStatusCondition(agentCard.Status.Conditions, ConditionVerified) + if verifiedCond != nil { + isVerified = verifiedCond.Status == metav1.ConditionTrue } if isVerified { @@ -163,7 +168,7 @@ func (r *AgentCardNetworkPolicyReconciler) upsertNetworkPolicy(ctx context.Conte Labels: map[string]string{ "managed-by": "kagenti-operator", "kagenti.dev/agentcard": agentCard.Name, - "kagenti.dev/policy-type": "signature-verification", + "kagenti.dev/policy-type": "identity-verification", }, }, Spec: spec, @@ -195,7 +200,7 @@ func (r *AgentCardNetworkPolicyReconciler) createPermissivePolicy( ctx context.Context, policyName string, agentCard *agentv1alpha1.AgentCard, podSelectorLabels map[string]string, ) error { - ingressRule := operatorIngressRule() + ingressRule := r.operatorIngressRule() ingressRule.From = append(ingressRule.From, netv1.NetworkPolicyPeer{ PodSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{LabelSignatureVerified: "true"}, @@ -212,21 +217,22 @@ func (r *AgentCardNetworkPolicyReconciler) createPermissivePolicy( return r.upsertNetworkPolicy(ctx, policyName, agentCard, spec) } -func operatorIngressRule() netv1.NetworkPolicyIngressRule { - return netv1.NetworkPolicyIngressRule{ - From: []netv1.NetworkPolicyPeer{ - { - NamespaceSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"control-plane": "kagenti-operator"}, - }, - }, - { - NamespaceSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"name": "kagenti-system"}, - }, +func (r *AgentCardNetworkPolicyReconciler) operatorIngressRule() netv1.NetworkPolicyIngressRule { + peers := []netv1.NetworkPolicyPeer{ + { + NamespaceSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"kubernetes.io/metadata.name": r.OperatorNamespace}, }, }, } + if r.OperatorNamespace != ClusterDefaultsNamespace { + peers = append(peers, netv1.NetworkPolicyPeer{ + NamespaceSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"kubernetes.io/metadata.name": ClusterDefaultsNamespace}, + }, + }) + } + return netv1.NetworkPolicyIngressRule{From: peers} } func (r *AgentCardNetworkPolicyReconciler) createRestrictivePolicy( @@ -237,17 +243,17 @@ func (r *AgentCardNetworkPolicyReconciler) createRestrictivePolicy( spec := netv1.NetworkPolicySpec{ PodSelector: metav1.LabelSelector{MatchLabels: podSelectorLabels}, PolicyTypes: []netv1.PolicyType{netv1.PolicyTypeIngress, netv1.PolicyTypeEgress}, - Ingress: []netv1.NetworkPolicyIngressRule{operatorIngressRule()}, + Ingress: []netv1.NetworkPolicyIngressRule{r.operatorIngressRule()}, Egress: r.kubeAPIEgressRules(), } return r.upsertNetworkPolicy(ctx, policyName, agentCard, spec) } // kubeAPIEgressRules returns egress rules that allow only traffic to the -// K8s API server endpoints on port 6443. This permits init containers -// (e.g. agentcard-signer) to write ConfigMaps while blocking all other -// outbound traffic. If no API server CIDRs are configured, returns an -// empty list (deny-all egress). +// K8s API server endpoints on port 6443. This permits workloads to +// communicate with the API server while blocking all other outbound +// traffic. If no API server CIDRs are configured, returns an empty +// list (deny-all egress). func (r *AgentCardNetworkPolicyReconciler) kubeAPIEgressRules() []netv1.NetworkPolicyEgressRule { if len(r.KubeAPIServerCIDRs) == 0 { return []netv1.NetworkPolicyEgressRule{} diff --git a/kagenti-operator/internal/controller/agentcard_networkpolicy_controller_test.go b/kagenti-operator/internal/controller/agentcard_networkpolicy_controller_test.go index 57d9786b..6b91e357 100644 --- a/kagenti-operator/internal/controller/agentcard_networkpolicy_controller_test.go +++ b/kagenti-operator/internal/controller/agentcard_networkpolicy_controller_test.go @@ -39,6 +39,7 @@ func newNPReconciler(enforce bool) *AgentCardNetworkPolicyReconciler { Client: k8sClient, Scheme: k8sClient.Scheme(), EnforceNetworkPolicies: enforce, + OperatorNamespace: "kagenti-operator-system", KubeAPIServerCIDRs: []string{"10.0.0.1/32", "10.0.0.2/32"}, } } @@ -71,6 +72,33 @@ func createCardWithStatus(name, ns, deploymentName string, validSig *bool, ident if validSig != nil || identityMatch != nil { card.Status.ValidSignature = validSig card.Status.SignatureIdentityMatch = identityMatch + + verified := false + if validSig != nil && *validSig { + verified = true + } + if binding != nil && identityMatch != nil { + if *identityMatch { + verified = true + } else if binding.Strict { + verified = false + } else { + verified = true + } + } + verifiedStatus := metav1.ConditionFalse + reason := "SignatureInvalid" + if verified { + verifiedStatus = metav1.ConditionTrue + reason = "SignatureVerified" + } + card.Status.Conditions = []metav1.Condition{{ + Type: ConditionVerified, + Status: verifiedStatus, + Reason: reason, + Message: "test", + LastTransitionTime: metav1.Now(), + }} ExpectWithOffset(1, k8sClient.Status().Update(ctx, card)).To(Succeed()) } } @@ -223,9 +251,10 @@ var _ = Describe("AgentCardNetworkPolicyReconciler", func() { Expect(policyHasVerifiedPodIngress(p)).To(BeTrue()) }) - It("should create restrictive policy when SignatureIdentityMatch=false", func() { + It("should create restrictive policy when SignatureIdentityMatch=false and strict=true", func() { + strictBinding := &agentv1alpha1.IdentityBinding{TrustDomain: "test.local", Strict: true} createDeploymentWithService(ctx, deploymentName, namespace) - createCardWithStatus(agentCardName, namespace, deploymentName, nil, ptr.To(false), binding) + createCardWithStatus(agentCardName, namespace, deploymentName, nil, ptr.To(false), strictBinding) r := newNPReconciler(true) reconcileNP(r, agentCardName, namespace) @@ -234,6 +263,19 @@ var _ = Describe("AgentCardNetworkPolicyReconciler", func() { p := getPolicy(deploymentName, namespace) Expect(p.Spec.Ingress[0].From[0].PodSelector).To(BeNil()) }) + + It("should create permissive policy when SignatureIdentityMatch=false and strict=false", func() { + createDeploymentWithService(ctx, deploymentName, namespace) + createCardWithStatus(agentCardName, namespace, deploymentName, nil, ptr.To(false), binding) + + r := newNPReconciler(true) + reconcileNP(r, agentCardName, namespace) + reconcileNP(r, agentCardName, namespace) + + p := getPolicy(deploymentName, namespace) + Expect(p.Spec.Ingress[0].From).To(HaveLen(3)) + Expect(policyHasVerifiedPodIngress(p)).To(BeTrue()) + }) }) Context("Policy updates when verification status changes", func() { @@ -262,6 +304,12 @@ var _ = Describe("AgentCardNetworkPolicyReconciler", func() { card := &agentv1alpha1.AgentCard{} Expect(k8sClient.Get(ctx, types.NamespacedName{Name: agentCardName, Namespace: namespace}, card)).To(Succeed()) card.Status.ValidSignature = ptr.To(false) + for i := range card.Status.Conditions { + if card.Status.Conditions[i].Type == ConditionVerified { + card.Status.Conditions[i].Status = metav1.ConditionFalse + card.Status.Conditions[i].Reason = "SignatureInvalid" + } + } Expect(k8sClient.Status().Update(ctx, card)).To(Succeed()) reconcileNP(r, agentCardName, namespace) @@ -334,4 +382,118 @@ var _ = Describe("AgentCardNetworkPolicyReconciler", func() { Expect(err).To(HaveOccurred()) }) }) + + Context("Verified condition (Phase 1 mTLS path)", func() { + const ( + deploymentName = "np-verified-cond-agent" + agentCardName = "np-verified-cond-card" + namespace = "default" + ) + + AfterEach(func() { + cleanupResource(ctx, &agentv1alpha1.AgentCard{}, agentCardName, namespace) + cleanupResource(ctx, &appsv1.Deployment{}, deploymentName, namespace) + cleanupResource(ctx, &corev1.Service{}, deploymentName, namespace) + }) + + It("should create permissive policy when Verified condition is True", func() { + createDeploymentWithService(ctx, deploymentName, namespace) + createCardWithVerifiedCondition(agentCardName, namespace, deploymentName, metav1.ConditionTrue) + + r := newNPReconciler(true) + reconcileNP(r, agentCardName, namespace) + reconcileNP(r, agentCardName, namespace) + + p := getPolicy(deploymentName, namespace) + Expect(p.Spec.Ingress[0].From).To(HaveLen(3)) + Expect(policyHasVerifiedPodIngress(p)).To(BeTrue()) + }) + + It("should create restrictive policy when Verified condition is False", func() { + createDeploymentWithService(ctx, deploymentName, namespace) + createCardWithVerifiedCondition(agentCardName, namespace, deploymentName, metav1.ConditionFalse) + + r := newNPReconciler(true) + reconcileNP(r, agentCardName, namespace) + reconcileNP(r, agentCardName, namespace) + + p := getPolicy(deploymentName, namespace) + Expect(p.Spec.Ingress[0].From).To(HaveLen(2)) + Expect(p.Spec.Ingress[0].From[0].PodSelector).To(BeNil()) + }) + + It("should be permissive when Verified condition is True (set via signature)", func() { + createDeploymentWithService(ctx, deploymentName, namespace) + createCardWithStatus(agentCardName, namespace, deploymentName, ptr.To(true), nil, nil) + + r := newNPReconciler(true) + reconcileNP(r, agentCardName, namespace) + reconcileNP(r, agentCardName, namespace) + + p := getPolicy(deploymentName, namespace) + Expect(p.Spec.Ingress[0].From).To(HaveLen(3)) + Expect(policyHasVerifiedPodIngress(p)).To(BeTrue()) + }) + + It("Verified condition takes precedence over ValidSignature", func() { + createDeploymentWithService(ctx, deploymentName, namespace) + + card := &agentv1alpha1.AgentCard{ + ObjectMeta: metav1.ObjectMeta{Name: agentCardName, Namespace: namespace}, + Spec: agentv1alpha1.AgentCardSpec{ + TargetRef: &agentv1alpha1.TargetRef{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: deploymentName, + }, + }, + } + Expect(k8sClient.Create(ctx, card)).To(Succeed()) + + card.Status.ValidSignature = ptr.To(true) + card.Status.Conditions = []metav1.Condition{ + { + Type: ConditionVerified, + Status: metav1.ConditionFalse, + Reason: "FetchFailed", + Message: "mTLS fetch failed", + LastTransitionTime: metav1.Now(), + }, + } + Expect(k8sClient.Status().Update(ctx, card)).To(Succeed()) + + r := newNPReconciler(true) + reconcileNP(r, agentCardName, namespace) + reconcileNP(r, agentCardName, namespace) + + p := getPolicy(deploymentName, namespace) + Expect(p.Spec.Ingress[0].From).To(HaveLen(2)) + Expect(p.Spec.Ingress[0].From[0].PodSelector).To(BeNil()) + }) + }) }) + +func createCardWithVerifiedCondition(name, ns, deploymentName string, condStatus metav1.ConditionStatus) { + card := &agentv1alpha1.AgentCard{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns}, + Spec: agentv1alpha1.AgentCardSpec{ + TargetRef: &agentv1alpha1.TargetRef{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: deploymentName, + }, + }, + } + ExpectWithOffset(1, k8sClient.Create(ctx, card)).To(Succeed()) + + card.Status.Conditions = []metav1.Condition{ + { + Type: ConditionVerified, + Status: condStatus, + Reason: "mTLSVerified", + Message: "test condition", + LastTransitionTime: metav1.Now(), + }, + } + ExpectWithOffset(1, k8sClient.Status().Update(ctx, card)).To(Succeed()) +} diff --git a/kagenti-operator/internal/controller/identity_binding_test.go b/kagenti-operator/internal/controller/identity_binding_test.go index 063ebbca..dc52dd97 100644 --- a/kagenti-operator/internal/controller/identity_binding_test.go +++ b/kagenti-operator/internal/controller/identity_binding_test.go @@ -178,11 +178,11 @@ var _ = Describe("Identity Binding — Trust Domain Only", func() { URL: "http://localhost:8000", } - cardId1 := reconciler.computeCardId(cardData) - cardId2 := reconciler.computeCardId(cardData) + cardID1 := reconciler.computeCardID(cardData) + cardID2 := reconciler.computeCardID(cardData) - Expect(cardId1).NotTo(BeEmpty()) - Expect(cardId1).To(Equal(cardId2)) + Expect(cardID1).NotTo(BeEmpty()) + Expect(cardID1).To(Equal(cardID2)) }) It("should compute different card ID for different card data", func() { @@ -201,12 +201,12 @@ var _ = Describe("Identity Binding — Trust Domain Only", func() { Version: "2.0.0", } - cardId1 := reconciler.computeCardId(cardData1) - cardId2 := reconciler.computeCardId(cardData2) + cardID1 := reconciler.computeCardID(cardData1) + cardID2 := reconciler.computeCardID(cardData2) - Expect(cardId1).NotTo(BeEmpty()) - Expect(cardId2).NotTo(BeEmpty()) - Expect(cardId1).NotTo(Equal(cardId2)) + Expect(cardID1).NotTo(BeEmpty()) + Expect(cardID2).NotTo(BeEmpty()) + Expect(cardID1).NotTo(Equal(cardID2)) }) }) }) diff --git a/kagenti-operator/internal/signature/signer.go b/kagenti-operator/internal/signature/signer.go new file mode 100644 index 00000000..a70752b6 --- /dev/null +++ b/kagenti-operator/internal/signature/signer.go @@ -0,0 +1,189 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package signature + +import ( + "crypto" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/rsa" + "crypto/sha256" + "crypto/x509" + "encoding/base64" + "encoding/json" + "fmt" + + agentv1alpha1 "github.com/kagenti/operator/api/v1alpha1" +) + +// SignCard signs AgentCard data with the given private key and certificate chain, +// producing a JWS x5c signed JSON output. Used by the agentcard-signer init-container. +func SignCard(cardData *agentv1alpha1.AgentCardData, privateKey crypto.Signer, certs []*x509.Certificate) ([]byte, error) { + if cardData == nil { + return nil, fmt.Errorf("card data is nil") + } + if len(certs) == 0 { + return nil, fmt.Errorf("no certificates in SVID chain") + } + leaf := certs[0] + + alg, err := AlgorithmForKey(privateKey.Public()) + if err != nil { + return nil, err + } + + kid := ComputeKID(leaf) + + x5c := make([]string, len(certs)) + for i, cert := range certs { + x5c[i] = base64.StdEncoding.EncodeToString(cert.Raw) + } + + header := &ProtectedHeader{ + Algorithm: alg, + KeyID: kid, + Type: "JOSE", + X5C: x5c, + } + + protectedB64, err := EncodeProtectedHeader(header) + if err != nil { + return nil, fmt.Errorf("failed to encode protected header: %w", err) + } + + payload, err := CreateCanonicalCardJSON(cardData) + if err != nil { + return nil, fmt.Errorf("failed to create canonical JSON: %w", err) + } + + payloadB64 := base64.RawURLEncoding.EncodeToString(payload) + signingInput := []byte(protectedB64 + "." + payloadB64) + + sigBytes, err := SignInput(privateKey, alg, signingInput) + if err != nil { + return nil, fmt.Errorf("signing failed: %w", err) + } + + sigB64 := base64.RawURLEncoding.EncodeToString(sigBytes) + + cardData.Signatures = []agentv1alpha1.AgentCardSignature{ + { + Protected: protectedB64, + Signature: sigB64, + }, + } + + output, err := json.MarshalIndent(cardData, "", " ") + if err != nil { + return nil, fmt.Errorf("failed to marshal signed card: %w", err) + } + + return output, nil +} + +// AlgorithmForKey maps a public key type to its JWS algorithm. +func AlgorithmForKey(pub crypto.PublicKey) (string, error) { + switch k := pub.(type) { + case *rsa.PublicKey: + if k.N.BitLen() < 2048 { + return "", fmt.Errorf("RSA key too small: %d bits (minimum 2048)", k.N.BitLen()) + } + return "RS256", nil + case *ecdsa.PublicKey: + switch k.Curve { + case elliptic.P256(): + return "ES256", nil + case elliptic.P384(): + return "ES384", nil + case elliptic.P521(): + return "ES512", nil + default: + return "", fmt.Errorf("unsupported ECDSA curve: %s", k.Curve.Params().Name) + } + default: + return "", fmt.Errorf("unsupported key type: %T", pub) + } +} + +// ComputeKID derives a key ID from the leaf cert's SHA-256 fingerprint (first 8 bytes). +func ComputeKID(leaf *x509.Certificate) string { + fp := sha256.Sum256(leaf.Raw) + return fmt.Sprintf("%x", fp[:8]) +} + +// SignInput signs the JWS signing input with the appropriate algorithm. +func SignInput(signer crypto.Signer, alg string, input []byte) ([]byte, error) { + hashFunc, err := HashForAlgorithm(alg) + if err != nil { + return nil, err + } + + h := hashFunc.New() + h.Write(input) + hashed := h.Sum(nil) + + switch alg { //nolint:goconst // JWS algorithm identifiers are well-known strings + case "RS256", "RS384", "RS512": + return signer.Sign(rand.Reader, hashed, hashFunc) + case "ES256", "ES384", "ES512": + return SignECDSARaw(signer, hashed, alg) + default: + return nil, fmt.Errorf("unsupported algorithm: %s", alg) + } +} + +// SignECDSARaw signs with ECDSA and encodes as fixed-width R||S (RFC 7518 §3.4). +func SignECDSARaw(signer crypto.Signer, hashed []byte, alg string) ([]byte, error) { + ecKey, ok := signer.(*ecdsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected *ecdsa.PrivateKey, got %T", signer) + } + + r, s, err := ecdsa.Sign(rand.Reader, ecKey, hashed) + if err != nil { + return nil, fmt.Errorf("ECDSA sign failed: %w", err) + } + + keySize := CurveByteSize(ecKey.Curve) + sig := make([]byte, 2*keySize) + rBytes := r.Bytes() + sBytes := s.Bytes() + copy(sig[keySize-len(rBytes):keySize], rBytes) + copy(sig[2*keySize-len(sBytes):], sBytes) + + return sig, nil +} + +// ZeroPrivateKey zeroes private key material in memory (best-effort). +func ZeroPrivateKey(key crypto.Signer) { + switch k := key.(type) { + case *ecdsa.PrivateKey: + if k.D != nil { + k.D.SetInt64(0) + } + case *rsa.PrivateKey: + if k.D != nil { + k.D.SetInt64(0) + } + for _, p := range k.Primes { + if p != nil { + p.SetInt64(0) + } + } + } +} diff --git a/kagenti-operator/internal/signature/signer_test.go b/kagenti-operator/internal/signature/signer_test.go new file mode 100644 index 00000000..7288a0bc --- /dev/null +++ b/kagenti-operator/internal/signature/signer_test.go @@ -0,0 +1,116 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package signature + +import ( + "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" + "encoding/json" + "testing" + + agentv1alpha1 "github.com/kagenti/operator/api/v1alpha1" +) + +func TestSignCard_ECDSA_P256(t *testing.T) { + ca := newTestCA(t) + key, _ := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + leaf := ca.issueLeaf(t, key, leafOpts{ + spiffeIDs: []string{"spiffe://example.org/ns/default/sa/test"}, + }) + + card := testCard() + output, err := SignCard(card, key, []*x509.Certificate{leaf, ca.Cert}) + if err != nil { + t.Fatalf("SignCard failed: %v", err) + } + + var parsed agentv1alpha1.AgentCardData + if err := json.Unmarshal(output, &parsed); err != nil { + t.Fatalf("output is not valid JSON: %v", err) + } + if len(parsed.Signatures) != 1 { + t.Fatalf("expected 1 signature, got %d", len(parsed.Signatures)) + } + + header, err := DecodeProtectedHeader(parsed.Signatures[0].Protected) + if err != nil { + t.Fatalf("failed to decode protected header: %v", err) + } + if header.Algorithm != "ES256" { + t.Errorf("expected alg=ES256, got %s", header.Algorithm) + } + if header.Type != "JOSE" { + t.Errorf("expected typ=JOSE, got %s", header.Type) + } + if len(header.X5C) != 2 { + t.Errorf("expected 2 certs in x5c, got %d", len(header.X5C)) + } +} + +func TestSignCard_NilCardData(t *testing.T) { + key, _ := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + _, err := SignCard(nil, key, []*x509.Certificate{{}}) + if err == nil { + t.Error("expected error for nil card data") + } +} + +func TestSignCard_NoCertificates(t *testing.T) { + key, _ := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + _, err := SignCard(testCard(), key, nil) + if err == nil { + t.Error("expected error for empty cert chain") + } +} + +// CRITICAL: Round-trip test proving operator-signed cards verify with X5CProvider. +func TestSignCard_RoundTrip_X5CProvider(t *testing.T) { + ca := newTestCA(t) + key, _ := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + leaf := ca.issueLeaf(t, key, leafOpts{ + spiffeIDs: []string{"spiffe://example.org/ns/default/sa/operator"}, + }) + + card := testCard() + output, err := SignCard(card, key, []*x509.Certificate{leaf, ca.Cert}) + if err != nil { + t.Fatalf("SignCard failed: %v", err) + } + + var parsed agentv1alpha1.AgentCardData + if err := json.Unmarshal(output, &parsed); err != nil { + t.Fatalf("output is not valid JSON: %v", err) + } + + provider := newTestX5CProvider(t, ca) + cardWithoutSigs := parsed + cardWithoutSigs.Signatures = nil + + result, err := provider.VerifySignature(context.Background(), &cardWithoutSigs, parsed.Signatures) + if err != nil { + t.Fatalf("X5CProvider.VerifySignature error: %v", err) + } + if !result.Verified { + t.Errorf("round-trip failed: X5CProvider rejected SignCard output: %s", result.Details) + } + if result.SpiffeID != "spiffe://example.org/ns/default/sa/operator" { + t.Errorf("expected SPIFFE ID from cert, got %q", result.SpiffeID) + } +} diff --git a/kagenti-operator/internal/webhook/injector/envoy.yaml.tmpl b/kagenti-operator/internal/webhook/injector/envoy.yaml.tmpl index 048e905a..c6f0101b 100644 --- a/kagenti-operator/internal/webhook/injector/envoy.yaml.tmpl +++ b/kagenti-operator/internal/webhook/injector/envoy.yaml.tmpl @@ -102,6 +102,49 @@ static_resources: typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router +{{- if .SpireEnabled }} + - name: agent_tls_listener + address: + socket_address: + protocol: TCP + address: 0.0.0.0 + port_value: {{ .TLSPort }} + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + stat_prefix: agent_tls + codec_type: AUTO + route_config: + name: agent_tls_routes + virtual_hosts: + - name: local_app_tls + domains: ["*"] + routes: + - match: + prefix: "/" + route: + cluster: original_destination + http_filters: + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.DownstreamTlsContext + common_tls_context: + tls_certificates: + - certificate_chain: + filename: /opt/svid.pem + private_key: + filename: /opt/svid_key.pem + validation_context: + trusted_ca: + filename: /opt/svid_bundle.pem +{{- end }} + clusters: - name: original_destination connect_timeout: 30s diff --git a/kagenti-operator/internal/webhook/injector/envoy_template.go b/kagenti-operator/internal/webhook/injector/envoy_template.go index 451440f5..bbf59ede 100644 --- a/kagenti-operator/internal/webhook/injector/envoy_template.go +++ b/kagenti-operator/internal/webhook/injector/envoy_template.go @@ -34,6 +34,10 @@ type envoyTemplateData struct { OutboundPort int32 InboundPort int32 ExtProcPort int32 + // SpireEnabled gates the TLS listener for verified fetch. + SpireEnabled bool + // TLSPort is the port for the HTTPS listener (used when SpireEnabled is true). + TLSPort int32 } // Default ext-proc gRPC port (go-processor).