diff --git a/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml b/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml index 05504e25..60f96d49 100644 --- a/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml +++ b/charts/kagenti-operator/crds/agent.kagenti.dev_agentruntimes.yaml @@ -118,6 +118,7 @@ spec: type: object type: object mtlsMode: + default: permissive description: |- MTLSMode selects the mTLS posture between authbridge sidecars on the proxy-sidecar / lite paths. envoy-sidecar handles transport @@ -127,8 +128,8 @@ spec: Three valid values: - disabled Plaintext between sidecars (default). - permissive Inbound: byte-peek listener accepts both TLS and + disabled Plaintext between sidecars. + permissive (default) Inbound: byte-peek listener accepts both TLS and plaintext on the same port. Outbound: tries TLS, falls back to plaintext on handshake failure (one-line WARN log per fallback). Use during rollout. @@ -137,7 +138,7 @@ spec: completes. Resolution: AgentRuntime CR > namespace authbridge-runtime-config - mtls.mode > "disabled". Setting mtlsMode != disabled implicitly + mtls.mode > "permissive". Setting mtlsMode != disabled implicitly requires SPIRE — the operator auto-enables spire for the workload. CR-empty vs CR="disabled" are observably different in diff --git a/kagenti-operator/api/v1alpha1/agentruntime_types.go b/kagenti-operator/api/v1alpha1/agentruntime_types.go index 4fde168a..50ca0bba 100644 --- a/kagenti-operator/api/v1alpha1/agentruntime_types.go +++ b/kagenti-operator/api/v1alpha1/agentruntime_types.go @@ -86,8 +86,8 @@ type AgentRuntimeSpec struct { // // Three valid values: // - // disabled Plaintext between sidecars (default). - // permissive Inbound: byte-peek listener accepts both TLS and + // disabled Plaintext between sidecars. + // permissive (default) Inbound: byte-peek listener accepts both TLS and // plaintext on the same port. Outbound: tries TLS, // falls back to plaintext on handshake failure (one-line // WARN log per fallback). Use during rollout. @@ -96,7 +96,7 @@ type AgentRuntimeSpec struct { // completes. // // Resolution: AgentRuntime CR > namespace authbridge-runtime-config - // mtls.mode > "disabled". Setting mtlsMode != disabled implicitly + // mtls.mode > "permissive". Setting mtlsMode != disabled implicitly // requires SPIRE — the operator auto-enables spire for the workload. // // CR-empty vs CR="disabled" are observably different in @@ -111,6 +111,7 @@ type AgentRuntimeSpec struct { // process start). // // +optional + // +kubebuilder:default=permissive // +kubebuilder:validation:Enum=disabled;permissive;strict MTLSMode string `json:"mtlsMode,omitempty"` } diff --git a/kagenti-operator/cmd/main.go b/kagenti-operator/cmd/main.go index cef42202..a074d6da 100644 --- a/kagenti-operator/cmd/main.go +++ b/kagenti-operator/cmd/main.go @@ -171,10 +171,10 @@ func main() { flag.StringVar(&mlflowCAFile, "mlflow-ca-file", "", "Path to PEM-encoded CA bundle for MLflow TLS verification (appended to system pool)") - flag.BoolVar(&enableCardDiscovery, "enable-card-discovery", false, - "Enable automatic agent card discovery from AgentRuntime workloads into status.card") - flag.BoolVar(&enableVerifiedFetch, "enable-verified-fetch", false, - "Enable mTLS-authenticated fetch of agent cards via SPIFFE identity") + flag.BoolVar(&enableCardDiscovery, "enable-card-discovery", true, + "Enable automatic agent card discovery from AgentRuntime workloads into status.card (set to false to disable)") + flag.BoolVar(&enableVerifiedFetch, "enable-verified-fetch", true, + "Enable mTLS-authenticated fetch of agent cards via SPIFFE identity (set to false as kill switch)") flag.StringVar(&verifiedFetchSpiffeSocket, "verified-fetch-spiffe-socket", "unix:///spiffe-workload-api/spire-agent.sock", "SPIFFE Workload API socket path for verified fetch") @@ -237,6 +237,30 @@ func main() { ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + // Startup info logs for defaults that changed in this release. + if enableCardDiscovery { + setupLog.Info("card discovery enabled by default; set --enable-card-discovery=false to disable") + } + if enableVerifiedFetch { + setupLog.Info("verified fetch enabled by default; set --enable-verified-fetch=false to disable") + } + + // Deprecation warnings for legacy flags that are now superseded by + // mTLS defaults (permissive mode auto-enables SPIRE and identity binding). + for _, dep := range []struct { + name string + set bool + }{ + {"require-a2a-signature", requireA2ASignature}, + {"signature-audit-mode", signatureAuditMode}, + {"enforce-network-policies", enforceNetworkPolicies}, + } { + if dep.set { + setupLog.Info("DEPRECATED: flag is superseded by mTLS permissive default; will be removed in a future release", + "flag", dep.name) + } + } + ctx := ctrl.SetupSignalHandler() // ======================================== diff --git a/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml b/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml index 05504e25..60f96d49 100644 --- a/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml +++ b/kagenti-operator/config/crd/bases/agent.kagenti.dev_agentruntimes.yaml @@ -118,6 +118,7 @@ spec: type: object type: object mtlsMode: + default: permissive description: |- MTLSMode selects the mTLS posture between authbridge sidecars on the proxy-sidecar / lite paths. envoy-sidecar handles transport @@ -127,8 +128,8 @@ spec: Three valid values: - disabled Plaintext between sidecars (default). - permissive Inbound: byte-peek listener accepts both TLS and + disabled Plaintext between sidecars. + permissive (default) Inbound: byte-peek listener accepts both TLS and plaintext on the same port. Outbound: tries TLS, falls back to plaintext on handshake failure (one-line WARN log per fallback). Use during rollout. @@ -137,7 +138,7 @@ spec: completes. Resolution: AgentRuntime CR > namespace authbridge-runtime-config - mtls.mode > "disabled". Setting mtlsMode != disabled implicitly + mtls.mode > "permissive". Setting mtlsMode != disabled implicitly requires SPIRE — the operator auto-enables spire for the workload. CR-empty vs CR="disabled" are observably different in diff --git a/kagenti-operator/internal/controller/agentruntime_controller.go b/kagenti-operator/internal/controller/agentruntime_controller.go index f435aa64..a49672ed 100644 --- a/kagenti-operator/internal/controller/agentruntime_controller.go +++ b/kagenti-operator/internal/controller/agentruntime_controller.go @@ -63,6 +63,10 @@ const ( // Value is a JSON array of skill names, set by the kagenti backend or the user. AnnotationSkills = "kagenti.io/skills" + // AnnotationMTLSMode is the annotation applied to PodTemplateSpec to advertise the + // resolved mTLS posture. Read by authbridge sidecars for observability. + AnnotationMTLSMode = "kagenti.io/mtls-mode" + // AnnotationRestartPending marks a Sandbox that was scaled to 0 and needs // to be scaled back to 1 on the next reconcile cycle. Two-phase restart // avoids a race with the Sandbox controller's pod-name annotation. @@ -72,6 +76,7 @@ const ( ConditionTypeReady = "Ready" ConditionTypeTargetResolved = "TargetResolved" ConditionTypeConfigResolved = "ConfigResolved" + ConditionTypeMTLSReady = "MTLSReady" // AnnotationLastCardFetchHash stores the change-detection key used to skip // redundant card fetches when the workload's pod template has not changed. @@ -333,6 +338,12 @@ func (r *AgentRuntimeReconciler) applyWorkloadConfig(ctx context.Context, rt *ag key := types.NamespacedName{Name: ref.Name, Namespace: rt.Namespace} + // Resolve mTLS mode: CR value takes precedence, default to "permissive". + mtlsMode := rt.Spec.MTLSMode + if mtlsMode == "" { + mtlsMode = "permissive" + } + var configHashChanged bool err := retry.RetryOnConflict(retry.DefaultRetry, func() error { @@ -348,7 +359,8 @@ func (r *AgentRuntimeReconciler) applyWorkloadConfig(ctx context.Context, rt *ag alreadyConfigured := currentWorkloadLabels[LabelAgentType] == string(rt.Spec.Type) && currentWorkloadLabels[LabelManagedBy] == LabelManagedByValue && currentPodLabels[LabelAgentType] == string(rt.Spec.Type) && - currentPodAnnotations[AnnotationConfigHash] == configHash + currentPodAnnotations[AnnotationConfigHash] == configHash && + currentPodAnnotations[AnnotationMTLSMode] == mtlsMode if alreadyConfigured { return nil @@ -375,19 +387,21 @@ func (r *AgentRuntimeReconciler) applyWorkloadConfig(ctx context.Context, rt *ag podLabels[LabelAgentType] = string(rt.Spec.Type) acc.setPodLabels(acc.obj, podLabels) - // Apply config-hash annotation to PodTemplateSpec + // Apply config-hash and mtls-mode annotations to PodTemplateSpec podAnnotations := acc.getPodAnnotations(acc.obj) if podAnnotations == nil { podAnnotations = make(map[string]string) } podAnnotations[AnnotationConfigHash] = configHash + podAnnotations[AnnotationMTLSMode] = mtlsMode acc.setPodAnnotations(acc.obj, podAnnotations) logger.Info("Applying config to workload", "workload", ref.Name, "kind", ref.Kind, "type", string(rt.Spec.Type), - "configHash", configHash[:12]) + "configHash", configHash[:12], + "mtlsMode", mtlsMode) return r.Update(ctx, acc.obj) }) @@ -725,11 +739,12 @@ func (r *AgentRuntimeReconciler) handleDeletion(ctx context.Context, rt *agentv1 delete(podLabels, LabelAgentType) acc.setPodLabels(acc.obj, podLabels) - // Remove kagenti.io/config-hash from PodTemplateSpec pod annotations. - // This triggers the rolling update that replaces existing injected pods, - // and leaves the workload annotation-clean for any future AR. + // Remove kagenti.io/config-hash and kagenti.io/mtls-mode from PodTemplateSpec + // pod annotations. This triggers the rolling update that replaces existing + // injected pods, and leaves the workload annotation-clean for any future AR. podAnnotations := acc.getPodAnnotations(acc.obj) delete(podAnnotations, AnnotationConfigHash) + delete(podAnnotations, AnnotationMTLSMode) acc.setPodAnnotations(acc.obj, podAnnotations) logger.Info("Removed kagenti labels and config-hash from workload on AgentRuntime deletion", diff --git a/kagenti-operator/internal/webhook/injector/agentruntime_config.go b/kagenti-operator/internal/webhook/injector/agentruntime_config.go index 82280b5d..ee2e7d73 100644 --- a/kagenti-operator/internal/webhook/injector/agentruntime_config.go +++ b/kagenti-operator/internal/webhook/injector/agentruntime_config.go @@ -56,7 +56,7 @@ type AgentRuntimeOverrides struct { // mTLS posture — from .spec.mtlsMode // Nil = no per-workload override; the namespace's - // authbridge-runtime-config mtls.mode (if set) or "disabled" + // authbridge-runtime-config mtls.mode (if set) or "permissive" // applies. MTLSMode *string } diff --git a/kagenti-operator/internal/webhook/injector/envoy_template.go b/kagenti-operator/internal/webhook/injector/envoy_template.go index 74be9aca..15781ab7 100644 --- a/kagenti-operator/internal/webhook/injector/envoy_template.go +++ b/kagenti-operator/internal/webhook/injector/envoy_template.go @@ -72,19 +72,20 @@ func RenderEnvoyConfig(cfg *ResolvedConfig) (string, error) { return cfg.EnvoyYAML, nil } - // MTLSEnabled checks both "" and MTLSModeDisabled because - // ResolvedConfig leaves MTLSMode as "" when no source set it - // (CR / namespace ConfigMap / default — see ResolveConfig). The - // resolution chain only fills MTLSMode when something explicitly - // asked for it, so "" means "no opinion → treat as disabled". + // MTLSEnabled: empty string is treated as permissive (mTLS is on + // by default). Only MTLSModeDisabled explicitly disables mTLS. + effectiveMode := cfg.MTLSMode + if effectiveMode == "" { + effectiveMode = MTLSModePermissive + } data := envoyTemplateData{ AdminPort: cfg.Platform.Proxy.AdminPort, OutboundPort: cfg.Platform.Proxy.Port, InboundPort: cfg.Platform.Proxy.InboundProxyPort, ExtProcPort: defaultExtProcPort, - MTLSEnabled: cfg.MTLSMode != "" && cfg.MTLSMode != MTLSModeDisabled, - MTLSPermissive: cfg.MTLSMode == MTLSModePermissive, - MTLSStrict: cfg.MTLSMode == MTLSModeStrict, + MTLSEnabled: effectiveMode != MTLSModeDisabled, + MTLSPermissive: effectiveMode == MTLSModePermissive, + MTLSStrict: effectiveMode == MTLSModeStrict, } var buf bytes.Buffer diff --git a/kagenti-operator/internal/webhook/injector/envoy_template_test.go b/kagenti-operator/internal/webhook/injector/envoy_template_test.go index c12e14f0..6ed94d6e 100644 --- a/kagenti-operator/internal/webhook/injector/envoy_template_test.go +++ b/kagenti-operator/internal/webhook/injector/envoy_template_test.go @@ -70,10 +70,9 @@ func TestRenderEnvoyConfig_TemplateRendering(t *testing.T) { } func TestRenderEnvoyConfig_MTLSDisabled_NoTLSBlocks(t *testing.T) { - // Default / disabled mode — no TLS blocks should render. Locks in - // the existing plaintext shape so a future template edit can't - // silently leak TLS config into pods that didn't ask for it. - for _, mode := range []string{"", MTLSModeDisabled} { + // Explicitly disabled mode — no TLS blocks should render. + // Empty string is now treated as permissive (mTLS on by default). + for _, mode := range []string{MTLSModeDisabled} { t.Run("mode="+mode, func(t *testing.T) { cfg := &ResolvedConfig{ Platform: config.CompiledDefaults(), diff --git a/kagenti-operator/internal/webhook/injector/pod_mutator.go b/kagenti-operator/internal/webhook/injector/pod_mutator.go index 53ff0bab..3fb1754c 100644 --- a/kagenti-operator/internal/webhook/injector/pod_mutator.go +++ b/kagenti-operator/internal/webhook/injector/pod_mutator.go @@ -257,7 +257,7 @@ func (m *PodMutator) InjectAuthBridge(ctx context.Context, podSpec *corev1.PodSp } } if mtlsMode == "" { - mtlsMode = MTLSModeDisabled + mtlsMode = MTLSModePermissive mtlsSource = "default" } // Defense in depth: the CRD enum check rejects unknown values at @@ -270,10 +270,10 @@ func (m *PodMutator) InjectAuthBridge(ctx context.Context, podSpec *corev1.PodSp case MTLSModeDisabled, MTLSModePermissive, MTLSModeStrict: // recognized, keep as-is default: - mutatorLog.Info("WARN: unrecognized mtlsMode; defaulting to disabled", + mutatorLog.Info("WARN: unrecognized mtlsMode; defaulting to permissive", "namespace", namespace, "crName", crName, "unrecognized", mtlsMode, "source", mtlsSource) - mtlsMode = MTLSModeDisabled + mtlsMode = MTLSModePermissive mtlsSource = "default-invalid-fallback" } mutatorLog.Info("resolved mTLS mode", @@ -516,6 +516,15 @@ func (m *PodMutator) InjectAuthBridge(ctx context.Context, podSpec *corev1.PodSp )) } + // Set MTLS_MODE env var on the authbridge container so it knows the + // resolved mTLS posture at runtime. + for i := range podSpec.Containers { + if podSpec.Containers[i].Name == AuthBridgeProxyContainerName { + setOrAddEnv(&podSpec.Containers[i], "MTLS_MODE", mtlsMode) + break + } + } + // Inject HTTP_PROXY env vars into all existing app containers for i := range podSpec.Containers { c := &podSpec.Containers[i] @@ -620,6 +629,14 @@ func (m *PodMutator) InjectAuthBridge(ctx context.Context, podSpec *corev1.PodSp podSpec.Containers = append(podSpec.Containers, builder.BuildEnvoyProxyContainerWithSpireOption(spireEnabled)) } + // Set MTLS_MODE env var on the envoy-sidecar authbridge container. + for i := range podSpec.Containers { + if podSpec.Containers[i].Name == EnvoyProxyContainerName { + setOrAddEnv(&podSpec.Containers[i], "MTLS_MODE", mtlsMode) + break + } + } + if decision.ProxyInit.Inject && !containerExists(podSpec.InitContainers, ProxyInitContainerName) { outboundExclude := annotations[OutboundPortsExcludeAnnotation] inboundExclude := annotations[InboundPortsExcludeAnnotation] diff --git a/kagenti-operator/internal/webhook/injector/pod_mutator_test.go b/kagenti-operator/internal/webhook/injector/pod_mutator_test.go index 8abd0c4d..b113ce58 100644 --- a/kagenti-operator/internal/webhook/injector/pod_mutator_test.go +++ b/kagenti-operator/internal/webhook/injector/pod_mutator_test.go @@ -233,7 +233,11 @@ func TestInjectAuthBridge_RespectsExistingServiceAccountName(t *testing.T) { func TestInjectAuthBridge_NoSACreationWhenSpiffeHelperDisabled(t *testing.T) { // Spiffe-helper is injected by default for agents. SA creation is skipped // when spiffe-helper is explicitly opted out via its per-sidecar label. - m := newTestMutator(newAgentRuntime("test-ns", "my-agent")) + // MTLSMode must be set to "disabled" because the default (permissive) would + // auto-enable SPIRE, creating a ServiceAccount regardless of the spiffe-helper label. + rt := newAgentRuntime("test-ns", "my-agent") + rt.Spec.MTLSMode = "disabled" + m := newTestMutator(rt) ctx := context.Background() podSpec := &corev1.PodSpec{} diff --git a/kagenti-operator/internal/webhook/injector/resolved_config.go b/kagenti-operator/internal/webhook/injector/resolved_config.go index 75648cb5..50a8b0b1 100644 --- a/kagenti-operator/internal/webhook/injector/resolved_config.go +++ b/kagenti-operator/internal/webhook/injector/resolved_config.go @@ -59,7 +59,7 @@ type ResolvedConfig struct { // raw AuthBridgeRuntimeYAML so callers (e.g. RenderEnvoyConfig) can // branch on the resolved values without re-parsing the YAML. // AuthBridgeMode is "" when no source set it (caller picks the default). - // MTLSMode is "" when no source set it (caller treats as "disabled"). + // MTLSMode is "" when no source set it (caller treats as "permissive"). AuthBridgeMode string MTLSMode string } diff --git a/kagenti-operator/test/e2e/fixtures.go b/kagenti-operator/test/e2e/fixtures.go index 2c0f7e52..66fb8ad9 100644 --- a/kagenti-operator/test/e2e/fixtures.go +++ b/kagenti-operator/test/e2e/fixtures.go @@ -959,6 +959,7 @@ metadata: namespace: ` + authBridgeTestNamespace + ` spec: type: agent + mtlsMode: disabled targetRef: apiVersion: apps/v1 kind: Deployment @@ -1092,6 +1093,7 @@ metadata: namespace: ` + authBridgeTestNamespace + ` spec: type: agent + mtlsMode: disabled targetRef: apiVersion: apps/v1 kind: Deployment