From 50eafee99b31e942cb16114a181d5c81df4bb30a Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Thu, 28 May 2026 23:41:59 +0530 Subject: [PATCH 01/14] feat: deployed Keycloak and configure agentcube - Declarative Realm Import: Created a ConfigMap with the realm JSON defining clients (agentcube-sdk, agentcube-router, workloadmanager, agentcube-admin) and role hierarchy (sandbox:invoke -> sandbox:manage -> admin). - Deployment & Service: Added Keycloak Deployment and Service manifests with management-port health probes and a 300s startupProbe window. - Helm Integration: Added keycloak configuration values to values.yaml, gated behind 'keycloak.enabled: false' to ensure zero impact by default. Signed-off-by: Mahil Patel --- .../templates/keycloak/keycloak-realm.yaml | 106 ++++++++++++++++++ .../base/templates/keycloak/keycloak.yaml | 90 +++++++++++++++ manifests/charts/base/values.yaml | 22 ++++ 3 files changed, 218 insertions(+) create mode 100644 manifests/charts/base/templates/keycloak/keycloak-realm.yaml create mode 100644 manifests/charts/base/templates/keycloak/keycloak.yaml diff --git a/manifests/charts/base/templates/keycloak/keycloak-realm.yaml b/manifests/charts/base/templates/keycloak/keycloak-realm.yaml new file mode 100644 index 00000000..73404a4a --- /dev/null +++ b/manifests/charts/base/templates/keycloak/keycloak-realm.yaml @@ -0,0 +1,106 @@ +{{- if .Values.keycloak.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: keycloak-realm-config + namespace: {{ .Release.Namespace }} + labels: + app: keycloak +data: + # Filename must follow -realm.json convention for --import-realm to detect it. + agentcube-realm.json: | + { + "realm": {{ .Values.keycloak.realm | quote }}, + "enabled": true, + "sslRequired": "none", + + "roles": { + "realm": [ + { + "name": "sandbox:invoke", + "description": "Permission to invoke agent runtimes and code interpreters." + }, + { + "name": "sandbox:manage", + "description": "Permission to create/delete AgentRuntime and CodeInterpreter CRDs. Inherits sandbox:invoke.", + "composite": true, + "composites": { + "realm": ["sandbox:invoke"] + } + }, + { + "name": "admin", + "description": "Full administrative access. Inherits sandbox:manage.", + "composite": true, + "composites": { + "realm": ["sandbox:manage"] + } + } + ] + }, + + "defaultRoles": ["sandbox:invoke"], + + "clients": [ + { + "clientId": "agentcube-sdk", + "name": "AgentCube SDK", + "description": "Confidential client for SDK access. Supports client_credentials for service accounts and authorization_code for interactive users.", + "enabled": true, + "protocol": "openid-connect", + "publicClient": false, + "standardFlowEnabled": true, + "directAccessGrantsEnabled": false, + "serviceAccountsEnabled": true, + "redirectUris": ["*"], + "webOrigins": ["*"] + }, + { + "clientId": "agentcube-router", + "name": "AgentCube Router", + "description": "Internal service client for the Router component.", + "enabled": true, + "protocol": "openid-connect", + "publicClient": false, + "standardFlowEnabled": false, + "directAccessGrantsEnabled": false, + "serviceAccountsEnabled": true, + "redirectUris": [], + "webOrigins": [] + }, + { + "clientId": "workloadmanager", + "name": "WorkloadManager", + "description": "Audience client for downstream JWT validation. All auth flows disabled (modern replacement for deprecated bearer-only).", + "enabled": true, + "protocol": "openid-connect", + "publicClient": false, + "standardFlowEnabled": false, + "directAccessGrantsEnabled": false, + "serviceAccountsEnabled": false, + "redirectUris": [], + "webOrigins": [] + }, + { + "clientId": "agentcube-admin", + "name": "AgentCube Admin", + "description": "Confidential client for administrative operations.", + "enabled": true, + "protocol": "openid-connect", + "publicClient": false, + "standardFlowEnabled": false, + "directAccessGrantsEnabled": false, + "serviceAccountsEnabled": true, + "redirectUris": [], + "webOrigins": [] + } + ], + + "scopeMappings": [ + { + "client": "agentcube-sdk", + "roles": ["sandbox:invoke"] + } + ] + } +{{- end }} diff --git a/manifests/charts/base/templates/keycloak/keycloak.yaml b/manifests/charts/base/templates/keycloak/keycloak.yaml new file mode 100644 index 00000000..98405e21 --- /dev/null +++ b/manifests/charts/base/templates/keycloak/keycloak.yaml @@ -0,0 +1,90 @@ +{{- if .Values.keycloak.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: keycloak + namespace: {{ .Release.Namespace }} + labels: + app: keycloak +spec: + replicas: 1 + selector: + matchLabels: + app: keycloak + template: + metadata: + labels: + app: keycloak + spec: + containers: + - name: keycloak + image: "{{ .Values.keycloak.image.repository }}:{{ .Values.keycloak.image.tag }}" + imagePullPolicy: {{ .Values.keycloak.image.pullPolicy }} + {{- if .Values.keycloak.devMode }} + args: ["start-dev", "--import-realm"] + {{- else }} + args: ["start", "--import-realm"] + {{- end }} + ports: + - name: http + containerPort: 8080 + protocol: TCP + - name: management + containerPort: 9000 + protocol: TCP + env: + - name: KC_BOOTSTRAP_ADMIN_USERNAME + value: {{ .Values.keycloak.adminUser | quote }} + - name: KC_BOOTSTRAP_ADMIN_PASSWORD + value: {{ .Values.keycloak.adminPassword | quote }} + - name: KC_HEALTH_ENABLED + value: "true" + - name: KC_HTTP_PORT + value: "8080" + volumeMounts: + - name: realm-config + mountPath: /opt/keycloak/data/import + readOnly: true + # Keycloak's JVM + realm import takes 60-90s. The startup probe gives a 300s + # window before liveness checks begin, preventing premature pod restarts. + startupProbe: + httpGet: + path: /health/started + port: management + failureThreshold: 30 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /health/live + port: management + periodSeconds: 30 + readinessProbe: + httpGet: + path: /health/ready + port: management + periodSeconds: 10 + resources: + {{- toYaml .Values.keycloak.resources | nindent 12 }} + volumes: + - name: realm-config + configMap: + name: keycloak-realm-config + +--- +apiVersion: v1 +kind: Service +metadata: + name: keycloak + namespace: {{ .Release.Namespace }} + labels: + app: keycloak +spec: + type: {{ .Values.keycloak.service.type }} + ports: + - port: {{ .Values.keycloak.service.port }} + targetPort: 8080 + protocol: TCP + name: http + selector: + app: keycloak +{{- end }} diff --git a/manifests/charts/base/values.yaml b/manifests/charts/base/values.yaml index 14e02247..994de690 100644 --- a/manifests/charts/base/values.yaml +++ b/manifests/charts/base/values.yaml @@ -127,3 +127,25 @@ spire: requests: cpu: 50m memory: 64Mi + +# Keycloak Configuration (External User Authentication) +keycloak: + enabled: false + devMode: true # Use start-dev mode (embedded H2 DB, HTTP). Set false for production. + image: + repository: quay.io/keycloak/keycloak + tag: "26.0" + pullPolicy: IfNotPresent + adminUser: "admin" + adminPassword: "admin" # Override in production via --set or external Secret + service: + type: ClusterIP + port: 8080 + realm: "agentcube" + resources: + limits: + cpu: 500m + memory: 1Gi + requests: + cpu: 100m + memory: 512Mi From a5cdd124450bb68b8ddc8dca58439a400be28501 Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Fri, 29 May 2026 00:28:40 +0530 Subject: [PATCH 02/14] feat: hardened Keycloak deployment for production readiness - Parameterized SDK redirect URIs with secure defaults, added support for secretKeyRef for admin/database credentials, and made sslRequired enforcement dynamic based on devMode. - Added configuration blocks for external databases (PostgreSQL/MySQL) and reverse proxy settings (headers, hostname). - Propagated global imagePullSecrets, decoupled Service targetPort by using the named 'http' port, and dynamically derived the realm import filename from Helm values. Signed-off-by: Mahil Patel --- .../templates/keycloak/keycloak-realm.yaml | 8 ++-- .../base/templates/keycloak/keycloak.yaml | 42 ++++++++++++++++++- manifests/charts/base/values.yaml | 24 ++++++++++- 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/manifests/charts/base/templates/keycloak/keycloak-realm.yaml b/manifests/charts/base/templates/keycloak/keycloak-realm.yaml index 73404a4a..4582b6c5 100644 --- a/manifests/charts/base/templates/keycloak/keycloak-realm.yaml +++ b/manifests/charts/base/templates/keycloak/keycloak-realm.yaml @@ -8,11 +8,11 @@ metadata: app: keycloak data: # Filename must follow -realm.json convention for --import-realm to detect it. - agentcube-realm.json: | + {{ .Values.keycloak.realm }}-realm.json: | { "realm": {{ .Values.keycloak.realm | quote }}, "enabled": true, - "sslRequired": "none", + "sslRequired": {{ if .Values.keycloak.devMode }}"none"{{ else }}"external"{{ end }}, "roles": { "realm": [ @@ -52,8 +52,8 @@ data: "standardFlowEnabled": true, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": true, - "redirectUris": ["*"], - "webOrigins": ["*"] + "redirectUris": {{ .Values.keycloak.sdk.redirectUris | toJson }}, + "webOrigins": {{ .Values.keycloak.sdk.webOrigins | toJson }} }, { "clientId": "agentcube-router", diff --git a/manifests/charts/base/templates/keycloak/keycloak.yaml b/manifests/charts/base/templates/keycloak/keycloak.yaml index 98405e21..f9115575 100644 --- a/manifests/charts/base/templates/keycloak/keycloak.yaml +++ b/manifests/charts/base/templates/keycloak/keycloak.yaml @@ -16,6 +16,10 @@ spec: labels: app: keycloak spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} containers: - name: keycloak image: "{{ .Values.keycloak.image.repository }}:{{ .Values.keycloak.image.tag }}" @@ -36,11 +40,47 @@ spec: - name: KC_BOOTSTRAP_ADMIN_USERNAME value: {{ .Values.keycloak.adminUser | quote }} - name: KC_BOOTSTRAP_ADMIN_PASSWORD + {{- if .Values.keycloak.existingSecret }} + valueFrom: + secretKeyRef: + name: {{ .Values.keycloak.existingSecret }} + key: {{ .Values.keycloak.existingSecretKey | default "admin-password" }} + {{- else }} value: {{ .Values.keycloak.adminPassword | quote }} + {{- end }} - name: KC_HEALTH_ENABLED value: "true" - name: KC_HTTP_PORT value: "8080" + {{- if .Values.keycloak.database.vendor }} + - name: KC_DB + value: {{ .Values.keycloak.database.vendor | quote }} + - name: KC_DB_URL + value: {{ .Values.keycloak.database.url | quote }} + - name: KC_DB_USERNAME + value: {{ .Values.keycloak.database.username | quote }} + - name: KC_DB_PASSWORD + {{- if .Values.keycloak.database.existingSecret }} + valueFrom: + secretKeyRef: + name: {{ .Values.keycloak.database.existingSecret }} + key: {{ .Values.keycloak.database.existingSecretKey | default "db-password" }} + {{- else }} + value: {{ .Values.keycloak.database.password | quote }} + {{- end }} + {{- end }} + {{- if .Values.keycloak.proxy.headers }} + - name: KC_PROXY_HEADERS + value: {{ .Values.keycloak.proxy.headers | quote }} + {{- end }} + {{- if .Values.keycloak.proxy.httpEnabled }} + - name: KC_HTTP_ENABLED + value: "true" + {{- end }} + {{- if .Values.keycloak.proxy.hostname }} + - name: KC_HOSTNAME + value: {{ .Values.keycloak.proxy.hostname | quote }} + {{- end }} volumeMounts: - name: realm-config mountPath: /opt/keycloak/data/import @@ -82,7 +122,7 @@ spec: type: {{ .Values.keycloak.service.type }} ports: - port: {{ .Values.keycloak.service.port }} - targetPort: 8080 + targetPort: http protocol: TCP name: http selector: diff --git a/manifests/charts/base/values.yaml b/manifests/charts/base/values.yaml index 994de690..24b04b69 100644 --- a/manifests/charts/base/values.yaml +++ b/manifests/charts/base/values.yaml @@ -131,17 +131,39 @@ spire: # Keycloak Configuration (External User Authentication) keycloak: enabled: false - devMode: true # Use start-dev mode (embedded H2 DB, HTTP). Set false for production. + # Use start-dev mode (embedded H2 DB, HTTP) for local development. + # Set to false for production — requires database.vendor and proxy settings below. + devMode: true image: repository: quay.io/keycloak/keycloak tag: "26.0" pullPolicy: IfNotPresent adminUser: "admin" adminPassword: "admin" # Override in production via --set or external Secret + existingSecret: "" # Name of a pre-existing Secret containing admin credentials + existingSecretKey: "admin-password" # Key within the Secret holding the password + # External database configuration (required when devMode is false) + database: + vendor: "" # Database type: "postgres", "mysql", "mariadb". Empty = embedded H2 (dev only). + url: "" # JDBC URL, e.g., "jdbc:postgresql://postgres:5432/keycloak" + username: "" + password: "" + existingSecret: "" # Name of a pre-existing Secret containing the database password + existingSecretKey: "db-password" + # Reverse proxy configuration (required when Keycloak is behind an Ingress or load balancer) + proxy: + headers: "" # Proxy header mode: "xforwarded" or "forwarded". Empty = disabled. + httpEnabled: false # Allow HTTP traffic when running behind a TLS-terminating proxy + hostname: "" # Public hostname, e.g., "keycloak.example.com". Empty = auto-detect. service: type: ClusterIP port: 8080 realm: "agentcube" + sdk: + redirectUris: + - "http://localhost*" + webOrigins: + - "http://localhost*" resources: limits: cpu: 500m From 9f8f5df1fabc5cc4669aaaf6896314d91773c92b Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Tue, 2 Jun 2026 23:11:36 +0530 Subject: [PATCH 03/14] feat: hardened and validated Keycloak Helm deployment - Added fail-fast validation for database and proxy settings in production mode - Decoupled credentials into chart-managed Secrets with required-value checks - Added restricted securityContext for Keycloak pod and container - Migrated realm configuration to Keycloak 26 default roles and client secrets Signed-off-by: Mahil Patel --- .../templates/keycloak/keycloak-realm.yaml | 32 ++++++++++++-- .../templates/keycloak/keycloak-secrets.yaml | 23 ++++++++++ .../base/templates/keycloak/keycloak.yaml | 42 +++++++++++++------ manifests/charts/base/values.yaml | 13 +++++- 4 files changed, 92 insertions(+), 18 deletions(-) create mode 100644 manifests/charts/base/templates/keycloak/keycloak-secrets.yaml diff --git a/manifests/charts/base/templates/keycloak/keycloak-realm.yaml b/manifests/charts/base/templates/keycloak/keycloak-realm.yaml index 4582b6c5..7ffe0c10 100644 --- a/manifests/charts/base/templates/keycloak/keycloak-realm.yaml +++ b/manifests/charts/base/templates/keycloak/keycloak-realm.yaml @@ -1,12 +1,17 @@ {{- if .Values.keycloak.enabled }} apiVersion: v1 -kind: ConfigMap +kind: Secret metadata: name: keycloak-realm-config namespace: {{ .Release.Namespace }} labels: app: keycloak -data: +type: Opaque +# stringData accepts plain text and Kubernetes base64-encodes it automatically. +stringData: + # Note: The --import-realm flag only applies this JSON on an empty/first start. + # Once the realm exists, subsequent pod restarts will silently IGNORE updates to this Secret. + # To update a live production realm, use the Keycloak Admin API or a declarative tool. # Filename must follow -realm.json convention for --import-realm to detect it. {{ .Values.keycloak.realm }}-realm.json: | { @@ -14,8 +19,26 @@ data: "enabled": true, "sslRequired": {{ if .Values.keycloak.devMode }}"none"{{ else }}"external"{{ end }}, + "defaultRole": { + "name": "default-roles-{{ .Values.keycloak.realm }}", + "description": "${role_default-roles}", + "composite": true, + "clientRole": false + }, + "roles": { "realm": [ + { + "name": "default-roles-{{ .Values.keycloak.realm }}", + "description": "${role_default-roles}", + "composite": true, + "clientRole": false, + "composites": { + "realm": [ + "sandbox:invoke" + ] + } + }, { "name": "sandbox:invoke", "description": "Permission to invoke agent runtimes and code interpreters." @@ -39,8 +62,6 @@ data: ] }, - "defaultRoles": ["sandbox:invoke"], - "clients": [ { "clientId": "agentcube-sdk", @@ -49,6 +70,7 @@ data: "enabled": true, "protocol": "openid-connect", "publicClient": false, + "secret": {{ .Values.keycloak.clients.sdk.secret | quote }}, "standardFlowEnabled": true, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": true, @@ -62,6 +84,7 @@ data: "enabled": true, "protocol": "openid-connect", "publicClient": false, + "secret": {{ .Values.keycloak.clients.router.secret | quote }}, "standardFlowEnabled": false, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": true, @@ -88,6 +111,7 @@ data: "enabled": true, "protocol": "openid-connect", "publicClient": false, + "secret": {{ .Values.keycloak.clients.admin.secret | quote }}, "standardFlowEnabled": false, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": true, diff --git a/manifests/charts/base/templates/keycloak/keycloak-secrets.yaml b/manifests/charts/base/templates/keycloak/keycloak-secrets.yaml new file mode 100644 index 00000000..61c2bd62 --- /dev/null +++ b/manifests/charts/base/templates/keycloak/keycloak-secrets.yaml @@ -0,0 +1,23 @@ +{{- if .Values.keycloak.enabled }} +{{- $renderAdmin := not .Values.keycloak.existingSecret }} +{{- $renderDb := and .Values.keycloak.database.vendor (not .Values.keycloak.database.existingSecret) }} +{{- if or $renderAdmin $renderDb }} +# Chart-managed Secret for Keycloak admin and/or database credentials. +# Rendered if either the admin or database credentials need to be managed by the chart. +apiVersion: v1 +kind: Secret +metadata: + name: keycloak-credentials + namespace: {{ .Release.Namespace }} + labels: + app: keycloak +type: Opaque +stringData: + {{- if $renderAdmin }} + admin-password: {{ required "keycloak.adminPassword is required when keycloak.existingSecret is not set" .Values.keycloak.adminPassword | quote }} + {{- end }} + {{- if $renderDb }} + db-password: {{ required "keycloak.database.password is required when keycloak.database.vendor is set and keycloak.database.existingSecret is not set" .Values.keycloak.database.password | quote }} + {{- end }} +{{- end }} +{{- end }} diff --git a/manifests/charts/base/templates/keycloak/keycloak.yaml b/manifests/charts/base/templates/keycloak/keycloak.yaml index f9115575..28c17e4f 100644 --- a/manifests/charts/base/templates/keycloak/keycloak.yaml +++ b/manifests/charts/base/templates/keycloak/keycloak.yaml @@ -1,4 +1,18 @@ {{- if .Values.keycloak.enabled }} +{{- if not .Values.keycloak.devMode }} +{{- if empty .Values.keycloak.database.vendor }} + {{- fail "keycloak.database.vendor is required when devMode is false (production mode requires an external database)" }} +{{- end }} +{{- if empty .Values.keycloak.database.url }} + {{- fail "keycloak.database.url is required when devMode is false (production mode requires a database URL)" }} +{{- end }} +{{- if empty .Values.keycloak.database.username }} + {{- fail "keycloak.database.username is required when devMode is false (production mode requires a database username)" }} +{{- end }} +{{- if empty .Values.keycloak.proxy.hostname }} + {{- fail "keycloak.proxy.hostname is required when devMode is false (production mode requires a strict hostname)" }} +{{- end }} +{{- end }} apiVersion: apps/v1 kind: Deployment metadata: @@ -16,14 +30,26 @@ spec: labels: app: keycloak spec: + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + runAsNonRoot: true {{- with .Values.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} containers: - name: keycloak + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL image: "{{ .Values.keycloak.image.repository }}:{{ .Values.keycloak.image.tag }}" imagePullPolicy: {{ .Values.keycloak.image.pullPolicy }} + # Note: --import-realm only applies on an empty database/first start. + # Realm Secret updates will not be re-applied on subsequent pod restarts. {{- if .Values.keycloak.devMode }} args: ["start-dev", "--import-realm"] {{- else }} @@ -40,14 +66,10 @@ spec: - name: KC_BOOTSTRAP_ADMIN_USERNAME value: {{ .Values.keycloak.adminUser | quote }} - name: KC_BOOTSTRAP_ADMIN_PASSWORD - {{- if .Values.keycloak.existingSecret }} valueFrom: secretKeyRef: - name: {{ .Values.keycloak.existingSecret }} + name: {{ .Values.keycloak.existingSecret | default "keycloak-credentials" }} key: {{ .Values.keycloak.existingSecretKey | default "admin-password" }} - {{- else }} - value: {{ .Values.keycloak.adminPassword | quote }} - {{- end }} - name: KC_HEALTH_ENABLED value: "true" - name: KC_HTTP_PORT @@ -60,14 +82,10 @@ spec: - name: KC_DB_USERNAME value: {{ .Values.keycloak.database.username | quote }} - name: KC_DB_PASSWORD - {{- if .Values.keycloak.database.existingSecret }} valueFrom: secretKeyRef: - name: {{ .Values.keycloak.database.existingSecret }} + name: {{ .Values.keycloak.database.existingSecret | default "keycloak-credentials" }} key: {{ .Values.keycloak.database.existingSecretKey | default "db-password" }} - {{- else }} - value: {{ .Values.keycloak.database.password | quote }} - {{- end }} {{- end }} {{- if .Values.keycloak.proxy.headers }} - name: KC_PROXY_HEADERS @@ -107,8 +125,8 @@ spec: {{- toYaml .Values.keycloak.resources | nindent 12 }} volumes: - name: realm-config - configMap: - name: keycloak-realm-config + secret: + secretName: keycloak-realm-config --- apiVersion: v1 diff --git a/manifests/charts/base/values.yaml b/manifests/charts/base/values.yaml index 24b04b69..edae9f3a 100644 --- a/manifests/charts/base/values.yaml +++ b/manifests/charts/base/values.yaml @@ -136,10 +136,10 @@ keycloak: devMode: true image: repository: quay.io/keycloak/keycloak - tag: "26.0" + tag: "26.0.8" pullPolicy: IfNotPresent adminUser: "admin" - adminPassword: "admin" # Override in production via --set or external Secret + adminPassword: "admin" # Dev only. Production must use existingSecret. existingSecret: "" # Name of a pre-existing Secret containing admin credentials existingSecretKey: "admin-password" # Key within the Secret holding the password # External database configuration (required when devMode is false) @@ -159,6 +159,15 @@ keycloak: type: ClusterIP port: 8080 realm: "agentcube" + # Client secrets for confidential OAuth2 clients. Override in production via --set. + # The workloadmanager client has all auth flows disabled and does not require a secret. + clients: + sdk: + secret: "agentcube-sdk-dev-secret" + router: + secret: "agentcube-router-dev-secret" + admin: + secret: "agentcube-admin-dev-secret" sdk: redirectUris: - "http://localhost*" From 7e0622413d56930cbf2dcc2859a5c97a6e149670 Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Tue, 2 Jun 2026 23:46:53 +0530 Subject: [PATCH 04/14] fix: Improved Keycloak Helm validation - Require explicit Keycloak admin and client credentials - Enforced production Secret and external DB validation - Fixed Secret key references and quote realm import filename Signed-off-by: Mahil Patel --- .../templates/keycloak/keycloak-realm.yaml | 8 +++--- .../base/templates/keycloak/keycloak.yaml | 25 +++++++++++++------ manifests/charts/base/values.yaml | 12 ++++----- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/manifests/charts/base/templates/keycloak/keycloak-realm.yaml b/manifests/charts/base/templates/keycloak/keycloak-realm.yaml index 7ffe0c10..3bed5ad8 100644 --- a/manifests/charts/base/templates/keycloak/keycloak-realm.yaml +++ b/manifests/charts/base/templates/keycloak/keycloak-realm.yaml @@ -13,7 +13,7 @@ stringData: # Once the realm exists, subsequent pod restarts will silently IGNORE updates to this Secret. # To update a live production realm, use the Keycloak Admin API or a declarative tool. # Filename must follow -realm.json convention for --import-realm to detect it. - {{ .Values.keycloak.realm }}-realm.json: | + {{ printf "%s-realm.json" .Values.keycloak.realm | quote }}: | { "realm": {{ .Values.keycloak.realm | quote }}, "enabled": true, @@ -70,7 +70,7 @@ stringData: "enabled": true, "protocol": "openid-connect", "publicClient": false, - "secret": {{ .Values.keycloak.clients.sdk.secret | quote }}, + "secret": {{ required "keycloak.clients.sdk.secret is required when keycloak.enabled is true" .Values.keycloak.clients.sdk.secret | quote }}, "standardFlowEnabled": true, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": true, @@ -84,7 +84,7 @@ stringData: "enabled": true, "protocol": "openid-connect", "publicClient": false, - "secret": {{ .Values.keycloak.clients.router.secret | quote }}, + "secret": {{ required "keycloak.clients.router.secret is required when keycloak.enabled is true" .Values.keycloak.clients.router.secret | quote }}, "standardFlowEnabled": false, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": true, @@ -111,7 +111,7 @@ stringData: "enabled": true, "protocol": "openid-connect", "publicClient": false, - "secret": {{ .Values.keycloak.clients.admin.secret | quote }}, + "secret": {{ required "keycloak.clients.admin.secret is required when keycloak.enabled is true" .Values.keycloak.clients.admin.secret | quote }}, "standardFlowEnabled": false, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": true, diff --git a/manifests/charts/base/templates/keycloak/keycloak.yaml b/manifests/charts/base/templates/keycloak/keycloak.yaml index 28c17e4f..519f4d4e 100644 --- a/manifests/charts/base/templates/keycloak/keycloak.yaml +++ b/manifests/charts/base/templates/keycloak/keycloak.yaml @@ -1,13 +1,24 @@ {{- if .Values.keycloak.enabled }} -{{- if not .Values.keycloak.devMode }} -{{- if empty .Values.keycloak.database.vendor }} - {{- fail "keycloak.database.vendor is required when devMode is false (production mode requires an external database)" }} +{{- if empty .Values.keycloak.adminUser }} + {{- fail "keycloak.adminUser is required when keycloak.enabled is true" }} {{- end }} +{{- if .Values.keycloak.database.vendor }} {{- if empty .Values.keycloak.database.url }} - {{- fail "keycloak.database.url is required when devMode is false (production mode requires a database URL)" }} + {{- fail "keycloak.database.url is required when keycloak.database.vendor is set" }} {{- end }} {{- if empty .Values.keycloak.database.username }} - {{- fail "keycloak.database.username is required when devMode is false (production mode requires a database username)" }} + {{- fail "keycloak.database.username is required when keycloak.database.vendor is set" }} +{{- end }} +{{- end }} +{{- if not .Values.keycloak.devMode }} +{{- if empty .Values.keycloak.existingSecret }} + {{- fail "keycloak.existingSecret is required when devMode is false (production mode requires an external Secret for the admin password)" }} +{{- end }} +{{- if empty .Values.keycloak.database.vendor }} + {{- fail "keycloak.database.vendor is required when devMode is false (production mode requires an external database)" }} +{{- end }} +{{- if empty .Values.keycloak.database.existingSecret }} + {{- fail "keycloak.database.existingSecret is required when devMode is false (production mode requires an external Secret for the database password)" }} {{- end }} {{- if empty .Values.keycloak.proxy.hostname }} {{- fail "keycloak.proxy.hostname is required when devMode is false (production mode requires a strict hostname)" }} @@ -69,7 +80,7 @@ spec: valueFrom: secretKeyRef: name: {{ .Values.keycloak.existingSecret | default "keycloak-credentials" }} - key: {{ .Values.keycloak.existingSecretKey | default "admin-password" }} + key: {{ if .Values.keycloak.existingSecret }}{{ .Values.keycloak.existingSecretKey | default "admin-password" }}{{ else }}admin-password{{ end }} - name: KC_HEALTH_ENABLED value: "true" - name: KC_HTTP_PORT @@ -85,7 +96,7 @@ spec: valueFrom: secretKeyRef: name: {{ .Values.keycloak.database.existingSecret | default "keycloak-credentials" }} - key: {{ .Values.keycloak.database.existingSecretKey | default "db-password" }} + key: {{ if .Values.keycloak.database.existingSecret }}{{ .Values.keycloak.database.existingSecretKey | default "db-password" }}{{ else }}db-password{{ end }} {{- end }} {{- if .Values.keycloak.proxy.headers }} - name: KC_PROXY_HEADERS diff --git a/manifests/charts/base/values.yaml b/manifests/charts/base/values.yaml index edae9f3a..4a07dec7 100644 --- a/manifests/charts/base/values.yaml +++ b/manifests/charts/base/values.yaml @@ -138,8 +138,8 @@ keycloak: repository: quay.io/keycloak/keycloak tag: "26.0.8" pullPolicy: IfNotPresent - adminUser: "admin" - adminPassword: "admin" # Dev only. Production must use existingSecret. + adminUser: "" # Required when keycloak.enabled=true. + adminPassword: "" # Dev only. Production must use existingSecret. existingSecret: "" # Name of a pre-existing Secret containing admin credentials existingSecretKey: "admin-password" # Key within the Secret holding the password # External database configuration (required when devMode is false) @@ -159,15 +159,15 @@ keycloak: type: ClusterIP port: 8080 realm: "agentcube" - # Client secrets for confidential OAuth2 clients. Override in production via --set. + # Client secrets for confidential OAuth2 clients. Required when keycloak.enabled=true. # The workloadmanager client has all auth flows disabled and does not require a secret. clients: sdk: - secret: "agentcube-sdk-dev-secret" + secret: "" router: - secret: "agentcube-router-dev-secret" + secret: "" admin: - secret: "agentcube-admin-dev-secret" + secret: "" sdk: redirectUris: - "http://localhost*" From ed2514d1d646d59fe4d3f8628ad4e04d783a813a Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Wed, 3 Jun 2026 23:41:35 +0530 Subject: [PATCH 05/14] docs(design): added end-to-end Keycloak auth proposal Signed-off-by: Mahil Patel --- docs/design/keycloak-proposal.md | 299 ++++++++++++++++++ .../templates/keycloak/keycloak-realm.yaml | 69 ++-- manifests/charts/base/values.yaml | 4 +- 3 files changed, 352 insertions(+), 20 deletions(-) create mode 100644 docs/design/keycloak-proposal.md diff --git a/docs/design/keycloak-proposal.md b/docs/design/keycloak-proposal.md new file mode 100644 index 00000000..f7e67da1 --- /dev/null +++ b/docs/design/keycloak-proposal.md @@ -0,0 +1,299 @@ +# Keycloak Integration Design + +Author: Mahil Patel + +## Motivation + +AgentCube currently has no mechanism to authenticate external callers, anyone who can reach the Router endpoint can invoke agent runtimes and code interpreters without proving their identity. This proposal adds external authentication and authorization using Keycloak as the identity provider, covering OIDC token validation at the Router, role-based access control (RBAC), resource-level access control (RLAC), identity forwarding to downstream services, and Python SDK auth support. Everything is gated behind `keycloak.enabled` in Helm values, so existing deployments are unaffected. + +## Architecture + +### Auth layers + +| Layer | Purpose | Mechanism | Status | +|-------|---------|-----------|--------| +| Internal (transport) | Machine identity between components | mTLS with SPIRE/file-based certificates | Existing | +| Internal (application) | Bind Router to PicoD sessions | Router-signed JWT with session claims | Existing | +| **External (authentication)** | **Prove human/SDK caller identity** | **Keycloak OIDC JWT validation** | **This proposal** | +| **External (authorization)** | **Enforce access rules** | **RBAC (realm roles) + RLAC (owner labels)** | **This proposal** | + +### End-to-end request flow + +```mermaid +sequenceDiagram + actor SDK as SDK / Client + participant KC as Keycloak + participant Router as Router + participant WM as WorkloadManager + participant PicoD as PicoD (Sandbox) + + Note over SDK, KC: 1. Token Acquisition + SDK->>KC: POST /realms/agentcube/protocol/openid-connect/token
(client_credentials: agentcube-service + client_secret) + KC-->>SDK: Access Token (JWT) + + Note over SDK, PicoD: 2. Authenticated Invocation + SDK->>Router: POST /v1/namespaces/.../invocations/...
Authorization: Bearer + + Note over Router: 3. Edge Authentication + Router->>Router: Validate JWT signature (cached JWKS) + Router->>Router: Check expiry, issuer, audience + Router->>Router: Extract realm_access.roles + + Note over Router: 4. RBAC Check + Router->>Router: Require "sandbox:invoke" role + + Note over Router, WM: 5. Session Creation (if new) + Router->>Router: Sign identity JWT (sub, iss, aud, exp) + Router->>WM: POST /v1/agent-runtime
Authorization: Bearer
X-AgentCube-User-Identity: + WM->>WM: Verify identity JWT (Router public key)
Record ownership (annotation + hashed label) + WM-->>Router: Sandbox info (sessionId, endpoints, ownerId) + + Note over Router: 6. RLAC Check (if existing session) + Router->>Router: Verify sandbox owner matches JWT sub + + Note over Router, PicoD: 7. Proxy to Sandbox + Router->>Router: Sign internal JWT (session_id + user_sub) + Router->>PicoD: Forward request
Authorization: Bearer + PicoD->>PicoD: Verify internal JWT (existing flow) + PicoD-->>Router: Response + Router-->>SDK: Response +``` + +### Key decisions + +**JWKS-based offline validation.** The Router fetches Keycloak's public signing keys via OIDC discovery at startup. The `go-oidc` library caches and auto-rotates these keys - no per-request call to Keycloak. + +**Forwarding user identity via signed tokens, not plain headers.** The Router embeds the user's `sub` claim into existing channels: +- For PicoD: added as a `user_sub` claim in the Router-signed internal JWT +- For WorkloadManager: sent as a short-lived Router-signed identity JWT in the `X-AgentCube-User-Identity` header + +Downstream services trust the identity because it is cryptographically signed by the Router's private key (the same key used for PicoD auth). WM verifies this JWT using the Router's public key from the `picod-router-public-key` ConfigMap — no dependency on mTLS for identity trust. + +## Detailed Design + +### 1. Keycloak Helm Deployment + +Keycloak runs as a single-replica Deployment inside the AgentCube namespace, gated behind `{{- if .Values.keycloak.enabled }}`. The Helm chart **bootstraps** the initial realm, clients, and roles — it is not a declarative management tool. After first startup, realm changes must be made via the Keycloak Admin API or Admin Console. + +The Deployment runs `quay.io/keycloak/keycloak:26.0.8` and supports two modes: + +- **Dev mode** (`keycloak.devMode: true`, default): Runs `start-dev` with an embedded H2 database. Suitable for local development and testing. +- **Production mode** (`keycloak.devMode: false`): Runs `start` and requires an external database, an external Secret for credentials, and a public hostname. For production HA, we recommend using an external Keycloak instance or the [Keycloak Operator](https://www.keycloak.org/operator/installation) rather than the built-in single-replica chart. + +The Deployment uses `--import-realm` to load the realm configuration from a mounted Secret (`keycloak-realm-config`) on first startup. The import uses `IGNORE_EXISTING` strategy — subsequent restarts do not overwrite the realm, so Helm value changes to the realm JSON will not take effect on an existing database. The pod runs as non-root with all capabilities dropped. + +The Service exposes Keycloak on port 8080 (configurable) as a ClusterIP service. + +#### Realm Configuration + +The realm JSON defines a role hierarchy where each higher role inherits the ones below: + +``` +admin + └── sandbox:manage (create/delete AgentRuntime and CodeInterpreter CRDs) + └── sandbox:invoke (invoke agent runtimes and code interpreters) +``` + +`sandbox:invoke` is assigned to the default realm role, so every new user gets it automatically. + +**OAuth2 Clients:** + +| Client ID | Type | Purpose | +|-----------|------|---------| +| `agentcube-service` | Confidential (`client_credentials`) | Server-side / automation authentication | +| `agentcube-sdk` | Public (`authorization_code` + PKCE) | Interactive SDK and CLI authentication | +| `agentcube-router` | Confidential (`client_credentials`) | Internal Router service identity | +| `agentcube-admin` | Confidential (`client_credentials`) | Administrative operations | + +Confidential client secrets are enforced via Helm's `required` function — the chart fails to render if they're missing. The `agentcube-sdk` client is a **public client** (no secret) that uses authorization code with PKCE for interactive flows, following RFC 8252 (OAuth 2.0 for Native Apps). The `agentcube-service` client is confidential and used for server-side `client_credentials` flows where a secret can be stored securely. Both clients have `sandbox:invoke` mapped via `scopeMappings`. + +Both clients include a **hardcoded audience protocol mapper** (`oidc-audience-mapper`) that injects `agentcube-api` into the access token's `aud` claim. The Router validates `aud = "agentcube-api"`. This follows OAuth2 convention — the audience identifies the resource server (the Router API), not the client that requested the token. + +For production mode, the chart includes validation guards that fail the render if required values like `existingSecret`, `database.vendor`, or `proxy.hostname` are missing. + +### 2. OIDC Token Validation (Router) + +The Router uses the `coreos/go-oidc` library to validate incoming JWTs. This is the standard OIDC library in the Go ecosystem - Kubernetes itself uses it. + +**New file: `pkg/router/oidc.go`** + +```go +type OIDCConfig struct { + IssuerURL string // e.g. "http://keycloak.agentcube-system.svc:8080/realms/agentcube" + Audience string // expected "aud" claim, e.g. "agentcube-api" +} + +type Claims struct { + Subject string `json:"sub"` + Email string `json:"email"` + RealmAccess RealmAccess `json:"realm_access"` +} + +type RealmAccess struct { + Roles []string `json:"roles"` +} +``` + +The `OIDCValidator` uses `go-oidc` for JWKS discovery and key caching (`oidc.NewProvider()`), but validates the token as an **OAuth2 access token**, not an ID token. Keycloak's `client_credentials` grant returns an access token, and while Keycloak issues these as signed JWTs using the same keys, the audience semantics differ from ID tokens. `ValidateToken` verifies the JWT signature against cached JWKS keys, then explicitly checks `iss`, `exp`, `nbf`, `aud`, and extracts `realm_access.roles` — all locally, no per-request call to Keycloak. + +### 3. Authentication Middleware (Router) + +**New file: `pkg/router/auth.go`** + +Two gin middleware functions : + +- **`oidcAuthMiddleware()`** - extracts the Bearer token, validates it via the OIDC validator, stores Claims in context. No-op when auth is disabled. +- **`requireRole(role)`** - checks `realm_access.roles` for the required role. Returns 403 if missing. + +Applied to the `/v1` route group: + +```go +v1 := s.engine.Group("/v1") +v1.Use(s.oidcAuthMiddleware()) // validate JWT +if s.oidcValidator != nil { + v1.Use(requireRole("sandbox:invoke")) // check role +} +v1.Use(s.concurrencyLimitMiddleware()) // existing +``` + +Health endpoints skip authentication - they must remain accessible for Kubernetes probes. + +### 4. Identity Forwarding + +#### Router → PicoD + +The Router already signs an internal JWT for each proxied request. When external auth is enabled, the caller's `sub` claim is embedded in this JWT: + +```go +claims := map[string]interface{}{ + "session_id": sandbox.SessionID, +} +if oidcClaims := extractClaims(c); oidcClaims != nil { + claims["user_sub"] = oidcClaims.Subject +} +``` + +PicoD doesn't need any changes - the extra claim is simply available if it ever needs to read it. + +#### Router → WorkloadManager + +The `createSandbox` method in `session_manager.go` signs a short-lived identity JWT using the Router's existing private key (the same `picod-router-identity` key from the PicoD auth design) and sends it as a header: + +```go +identityClaims := map[string]interface{}{ + "sub": claims.Subject, + "iss": "agentcube-router", + "aud": "workloadmanager", + "exp": time.Now().Add(30 * time.Second).Unix(), +} +identityToken, _ := s.jwtManager.GenerateToken(identityClaims) +req.Header.Set("X-AgentCube-User-Identity", identityToken) +``` + +WM verifies this JWT using the Router's public key from the `picod-router-public-key` ConfigMap (already mounted for PicoD auth). This provides cryptographic proof of the user identity without depending on mTLS — the identity is trustworthy regardless of transport security configuration. WM continues to authenticate the Router itself via the existing K8s SA token. + +### 5. RLAC - Resource-Level Access Control + +RLAC ensures users can only interact with sandboxes they created. + +**Ownership tagging (WorkloadManager):** When WM creates a sandbox, it verifies the `X-AgentCube-User-Identity` JWT, extracts the `sub` claim, and records ownership in two ways: + +```go +Annotations: map[string]string{ + "agentcube.io/owner": userID, // raw sub from verified identity JWT +} +Labels: map[string]string{ + "agentcube.io/owner-hash": sha256Short(userID), // first 63 chars of hex SHA-256 +} +``` + +The raw `sub` is stored in an annotation (no length/charset restrictions) and in Redis (`SandboxInfo.OwnerID`) for authoritative ownership checks. The hashed label is used only for Kubernetes label-based selection if needed. Keycloak UUIDs (36 chars) would fit as labels directly, but federated or pairwise subject identifiers can exceed the 63-character Kubernetes label limit, so we hash defensively. + +The owner ID is returned in the create sandbox response so the Router can persist it in its Redis cache. + +**Ownership verification (Router):** Before proxying to an existing sandbox, the Router checks if the caller owns it. When auth is enabled, the check is **fail-closed** — if the owner is missing (legacy sandbox, cache issue, WM bug), access is denied rather than silently allowed: + +```go +if s.oidcValidator != nil { + claims := extractClaims(c) + if claims != nil { + if sandbox.OwnerID == "" { + c.JSON(http.StatusForbidden, gin.H{"error": "sandbox has no owner record"}) + return + } + if sandbox.OwnerID != claims.Subject { + c.JSON(http.StatusForbidden, gin.H{"error": "you do not own this sandbox"}) + return + } + } +} +``` + +This check is skipped when external auth is disabled. Sandboxes created before auth was enabled will be inaccessible once auth is turned on, which is the intended behavior. + +### 6. Python SDK Auth + +The Python SDK currently supports an explicit `auth_token` string or reading a K8s service account token from a file. Neither supports `client_credentials` flow or token refresh. + +We add a pluggable auth provider pattern: + +**New file: `sdk-python/agentcube/auth.py`** + +```python +@runtime_checkable +class AuthProvider(Protocol): + def get_token(self) -> str: ... + +class ServiceAccountAuth: + """Authenticates using OAuth2 client_credentials grant against Keycloak.""" + def __init__(self, token_url: str, client_id: str, client_secret: str): ... + def get_token(self) -> str: + # Returns cached token, refreshes 30s before expiry + ... + +class TokenAuth: + """Wraps a pre-obtained token. No refresh support.""" + def __init__(self, token: str): ... + def get_token(self) -> str: ... +``` + +The existing clients (`ControlPlaneClient`, Data Plane clients, high-level clients) are updated to accept an `auth` parameter. The `auth_token` string parameter is kept for backward compatibility. Each request calls `self._auth.get_token()` to get a fresh token. `ServiceAccountAuth` is used with the `agentcube-service` client (confidential, `client_credentials`). For interactive CLI flows, a separate `DeviceCodeAuth` or browser-based flow using the public `agentcube-sdk` client can be added later. + +### 7. Helm Wiring and CLI Flags + +When `keycloak.enabled` is true, the Router Deployment template passes additional args: + +```yaml +{{- if .Values.keycloak.enabled }} +- --enable-external-auth +- --oidc-issuer-url=http://keycloak.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.keycloak.service.port }}/realms/{{ .Values.keycloak.realm }} +- --oidc-audience=agentcube-api +{{- end }} +``` + +Three new flags in `cmd/router/main.go`: + +| Flag | Default | Description | +|------|---------|-------------| +| `--enable-external-auth` | `false` | Enable Keycloak OIDC authentication | +| `--oidc-issuer-url` | `""` | Keycloak realm issuer URL | +| `--oidc-audience` | `"agentcube-api"` | Expected JWT audience claim | + +When `--enable-external-auth` is set, `--oidc-issuer-url` is required. The Router will fail to start if it is missing. + +## Testing + +**Unit Tests:** +- `pkg/router/oidc_test.go` - OIDC validator tests using `httptest` as a fake JWKS server. +- `pkg/router/auth_test.go` - middleware tests: missing header, invalid token, valid token, role checks. +- WorkloadManager owner label tests - verify labels are applied during sandbox creation. +- Python SDK auth tests - `ServiceAccountAuth` token refresh, `TokenAuth` static token, backward compatibility. + +**E2E Tests:** + +The E2E suite (`test/e2e/`) will be extended to deploy Keycloak in the Kind cluster: + +1. Preload the Keycloak image into Kind +2. Deploy with `keycloak.enabled=true` and test client secrets +3. Obtain a real token via `client_credentials` grant +4. Test cases: no token → 401, invalid token → 401, valid token → success, RLAC ownership → 403 diff --git a/manifests/charts/base/templates/keycloak/keycloak-realm.yaml b/manifests/charts/base/templates/keycloak/keycloak-realm.yaml index 3bed5ad8..ede7b937 100644 --- a/manifests/charts/base/templates/keycloak/keycloak-realm.yaml +++ b/manifests/charts/base/templates/keycloak/keycloak-realm.yaml @@ -63,19 +63,61 @@ stringData: }, "clients": [ + { + "clientId": "agentcube-service", + "name": "AgentCube Service", + "description": "Confidential client for server-side authentication using client_credentials grant.", + "enabled": true, + "protocol": "openid-connect", + "publicClient": false, + "secret": {{ required "keycloak.clients.service.secret is required when keycloak.enabled is true" .Values.keycloak.clients.service.secret | quote }}, + "standardFlowEnabled": false, + "directAccessGrantsEnabled": false, + "serviceAccountsEnabled": true, + "redirectUris": [], + "webOrigins": [], + "protocolMappers": [ + { + "name": "agentcube-api-audience", + "protocol": "openid-connect", + "protocolMapper": "oidc-audience-mapper", + "consentRequired": false, + "config": { + "included.custom.audience": "agentcube-api", + "id.token.claim": "false", + "access.token.claim": "true" + } + } + ] + }, { "clientId": "agentcube-sdk", "name": "AgentCube SDK", - "description": "Confidential client for SDK access. Supports client_credentials for service accounts and authorization_code for interactive users.", + "description": "Public client for interactive SDK and CLI authentication using authorization code with PKCE.", "enabled": true, "protocol": "openid-connect", - "publicClient": false, - "secret": {{ required "keycloak.clients.sdk.secret is required when keycloak.enabled is true" .Values.keycloak.clients.sdk.secret | quote }}, + "publicClient": true, "standardFlowEnabled": true, "directAccessGrantsEnabled": false, - "serviceAccountsEnabled": true, + "serviceAccountsEnabled": false, "redirectUris": {{ .Values.keycloak.sdk.redirectUris | toJson }}, - "webOrigins": {{ .Values.keycloak.sdk.webOrigins | toJson }} + "webOrigins": {{ .Values.keycloak.sdk.webOrigins | toJson }}, + "attributes": { + "pkce.code.challenge.method": "S256" + }, + "protocolMappers": [ + { + "name": "agentcube-api-audience", + "protocol": "openid-connect", + "protocolMapper": "oidc-audience-mapper", + "consentRequired": false, + "config": { + "included.custom.audience": "agentcube-api", + "id.token.claim": "false", + "access.token.claim": "true" + } + } + ] }, { "clientId": "agentcube-router", @@ -91,19 +133,6 @@ stringData: "redirectUris": [], "webOrigins": [] }, - { - "clientId": "workloadmanager", - "name": "WorkloadManager", - "description": "Audience client for downstream JWT validation. All auth flows disabled (modern replacement for deprecated bearer-only).", - "enabled": true, - "protocol": "openid-connect", - "publicClient": false, - "standardFlowEnabled": false, - "directAccessGrantsEnabled": false, - "serviceAccountsEnabled": false, - "redirectUris": [], - "webOrigins": [] - }, { "clientId": "agentcube-admin", "name": "AgentCube Admin", @@ -121,6 +150,10 @@ stringData: ], "scopeMappings": [ + { + "client": "agentcube-service", + "roles": ["sandbox:invoke"] + }, { "client": "agentcube-sdk", "roles": ["sandbox:invoke"] diff --git a/manifests/charts/base/values.yaml b/manifests/charts/base/values.yaml index 4a07dec7..7704c504 100644 --- a/manifests/charts/base/values.yaml +++ b/manifests/charts/base/values.yaml @@ -160,9 +160,9 @@ keycloak: port: 8080 realm: "agentcube" # Client secrets for confidential OAuth2 clients. Required when keycloak.enabled=true. - # The workloadmanager client has all auth flows disabled and does not require a secret. + # The agentcube-sdk client is public (PKCE) and does not require a secret. clients: - sdk: + service: secret: "" router: secret: "" From 25bdfe26a6d2fedf5519ecf6821a8da53a58b385 Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Thu, 4 Jun 2026 11:31:35 +0530 Subject: [PATCH 06/14] fix: secureed client secrets and fixed port templating - Injected client secrets securely via env vars instead of helm values - Templated Keycloak container port dynamically to match service port - Updated proposal to remove old IGNORE_EXISTING terminology Signed-off-by: Mahil Patel --- docs/design/keycloak-proposal.md | 4 ++-- .../templates/keycloak/keycloak-realm.yaml | 6 +++--- .../templates/keycloak/keycloak-secrets.yaml | 12 +++++++++--- .../base/templates/keycloak/keycloak.yaml | 19 +++++++++++++++++-- manifests/charts/base/values.yaml | 5 +++++ 5 files changed, 36 insertions(+), 10 deletions(-) diff --git a/docs/design/keycloak-proposal.md b/docs/design/keycloak-proposal.md index f7e67da1..f2240dd9 100644 --- a/docs/design/keycloak-proposal.md +++ b/docs/design/keycloak-proposal.md @@ -80,7 +80,7 @@ The Deployment runs `quay.io/keycloak/keycloak:26.0.8` and supports two modes: - **Dev mode** (`keycloak.devMode: true`, default): Runs `start-dev` with an embedded H2 database. Suitable for local development and testing. - **Production mode** (`keycloak.devMode: false`): Runs `start` and requires an external database, an external Secret for credentials, and a public hostname. For production HA, we recommend using an external Keycloak instance or the [Keycloak Operator](https://www.keycloak.org/operator/installation) rather than the built-in single-replica chart. -The Deployment uses `--import-realm` to load the realm configuration from a mounted Secret (`keycloak-realm-config`) on first startup. The import uses `IGNORE_EXISTING` strategy — subsequent restarts do not overwrite the realm, so Helm value changes to the realm JSON will not take effect on an existing database. The pod runs as non-root with all capabilities dropped. +The Deployment uses `--import-realm` to load the realm configuration from a mounted Secret (`keycloak-realm-config`) on first startup. The `--import-realm` flag automatically skips existing realms - subsequent restarts do not overwrite the realm, so Helm value changes to the realm JSON will not take effect on an existing database. The pod runs as non-root with all capabilities dropped. The Service exposes Keycloak on port 8080 (configurable) as a ClusterIP service. @@ -105,7 +105,7 @@ admin | `agentcube-router` | Confidential (`client_credentials`) | Internal Router service identity | | `agentcube-admin` | Confidential (`client_credentials`) | Administrative operations | -Confidential client secrets are enforced via Helm's `required` function — the chart fails to render if they're missing. The `agentcube-sdk` client is a **public client** (no secret) that uses authorization code with PKCE for interactive flows, following RFC 8252 (OAuth 2.0 for Native Apps). The `agentcube-service` client is confidential and used for server-side `client_credentials` flows where a secret can be stored securely. Both clients have `sandbox:invoke` mapped via `scopeMappings`. +Confidential client secrets can be provided securely via an existing Kubernetes Secret (`keycloak.clients.existingSecret`) to prevent leaking them in Helm values. They are injected into the Keycloak pod as environment variables and securely substituted into the realm JSON during import. The `agentcube-sdk` client is a **public client** (no secret) that uses authorization code with PKCE for interactive flows, following RFC 8252 (OAuth 2.0 for Native Apps). The `agentcube-service` client is confidential and used for server-side `client_credentials` flows where a secret can be stored securely. Both clients have `sandbox:invoke` mapped via `scopeMappings`. Both clients include a **hardcoded audience protocol mapper** (`oidc-audience-mapper`) that injects `agentcube-api` into the access token's `aud` claim. The Router validates `aud = "agentcube-api"`. This follows OAuth2 convention — the audience identifies the resource server (the Router API), not the client that requested the token. diff --git a/manifests/charts/base/templates/keycloak/keycloak-realm.yaml b/manifests/charts/base/templates/keycloak/keycloak-realm.yaml index ede7b937..49d639d9 100644 --- a/manifests/charts/base/templates/keycloak/keycloak-realm.yaml +++ b/manifests/charts/base/templates/keycloak/keycloak-realm.yaml @@ -70,7 +70,7 @@ stringData: "enabled": true, "protocol": "openid-connect", "publicClient": false, - "secret": {{ required "keycloak.clients.service.secret is required when keycloak.enabled is true" .Values.keycloak.clients.service.secret | quote }}, + "secret": "${env.KC_CLIENT_SERVICE_SECRET}", "standardFlowEnabled": false, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": true, @@ -126,7 +126,7 @@ stringData: "enabled": true, "protocol": "openid-connect", "publicClient": false, - "secret": {{ required "keycloak.clients.router.secret is required when keycloak.enabled is true" .Values.keycloak.clients.router.secret | quote }}, + "secret": "${env.KC_CLIENT_ROUTER_SECRET}", "standardFlowEnabled": false, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": true, @@ -140,7 +140,7 @@ stringData: "enabled": true, "protocol": "openid-connect", "publicClient": false, - "secret": {{ required "keycloak.clients.admin.secret is required when keycloak.enabled is true" .Values.keycloak.clients.admin.secret | quote }}, + "secret": "${env.KC_CLIENT_ADMIN_SECRET}", "standardFlowEnabled": false, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": true, diff --git a/manifests/charts/base/templates/keycloak/keycloak-secrets.yaml b/manifests/charts/base/templates/keycloak/keycloak-secrets.yaml index 61c2bd62..7e2b7681 100644 --- a/manifests/charts/base/templates/keycloak/keycloak-secrets.yaml +++ b/manifests/charts/base/templates/keycloak/keycloak-secrets.yaml @@ -1,9 +1,10 @@ {{- if .Values.keycloak.enabled }} {{- $renderAdmin := not .Values.keycloak.existingSecret }} {{- $renderDb := and .Values.keycloak.database.vendor (not .Values.keycloak.database.existingSecret) }} -{{- if or $renderAdmin $renderDb }} -# Chart-managed Secret for Keycloak admin and/or database credentials. -# Rendered if either the admin or database credentials need to be managed by the chart. +{{- $renderClients := not .Values.keycloak.clients.existingSecret }} +{{- if or $renderAdmin $renderDb $renderClients }} +# Chart-managed Secret for Keycloak admin, database, and/or client credentials. +# Rendered if any of these credentials need to be managed by the chart. apiVersion: v1 kind: Secret metadata: @@ -19,5 +20,10 @@ stringData: {{- if $renderDb }} db-password: {{ required "keycloak.database.password is required when keycloak.database.vendor is set and keycloak.database.existingSecret is not set" .Values.keycloak.database.password | quote }} {{- end }} + {{- if $renderClients }} + client-service-secret: {{ required "keycloak.clients.service.secret is required when keycloak.clients.existingSecret is not set" .Values.keycloak.clients.service.secret | quote }} + client-router-secret: {{ required "keycloak.clients.router.secret is required when keycloak.clients.existingSecret is not set" .Values.keycloak.clients.router.secret | quote }} + client-admin-secret: {{ required "keycloak.clients.admin.secret is required when keycloak.clients.existingSecret is not set" .Values.keycloak.clients.admin.secret | quote }} + {{- end }} {{- end }} {{- end }} diff --git a/manifests/charts/base/templates/keycloak/keycloak.yaml b/manifests/charts/base/templates/keycloak/keycloak.yaml index 519f4d4e..ca08f788 100644 --- a/manifests/charts/base/templates/keycloak/keycloak.yaml +++ b/manifests/charts/base/templates/keycloak/keycloak.yaml @@ -68,7 +68,7 @@ spec: {{- end }} ports: - name: http - containerPort: 8080 + containerPort: {{ .Values.keycloak.service.port }} protocol: TCP - name: management containerPort: 9000 @@ -84,7 +84,7 @@ spec: - name: KC_HEALTH_ENABLED value: "true" - name: KC_HTTP_PORT - value: "8080" + value: {{ .Values.keycloak.service.port | quote }} {{- if .Values.keycloak.database.vendor }} - name: KC_DB value: {{ .Values.keycloak.database.vendor | quote }} @@ -110,6 +110,21 @@ spec: - name: KC_HOSTNAME value: {{ .Values.keycloak.proxy.hostname | quote }} {{- end }} + - name: KC_CLIENT_SERVICE_SECRET + valueFrom: + secretKeyRef: + name: {{ .Values.keycloak.clients.existingSecret | default "keycloak-credentials" }} + key: {{ if .Values.keycloak.clients.existingSecret }}{{ .Values.keycloak.clients.existingSecretServiceKey | default "client-service-secret" }}{{ else }}client-service-secret{{ end }} + - name: KC_CLIENT_ROUTER_SECRET + valueFrom: + secretKeyRef: + name: {{ .Values.keycloak.clients.existingSecret | default "keycloak-credentials" }} + key: {{ if .Values.keycloak.clients.existingSecret }}{{ .Values.keycloak.clients.existingSecretRouterKey | default "client-router-secret" }}{{ else }}client-router-secret{{ end }} + - name: KC_CLIENT_ADMIN_SECRET + valueFrom: + secretKeyRef: + name: {{ .Values.keycloak.clients.existingSecret | default "keycloak-credentials" }} + key: {{ if .Values.keycloak.clients.existingSecret }}{{ .Values.keycloak.clients.existingSecretAdminKey | default "client-admin-secret" }}{{ else }}client-admin-secret{{ end }} volumeMounts: - name: realm-config mountPath: /opt/keycloak/data/import diff --git a/manifests/charts/base/values.yaml b/manifests/charts/base/values.yaml index 7704c504..69ac80c9 100644 --- a/manifests/charts/base/values.yaml +++ b/manifests/charts/base/values.yaml @@ -162,6 +162,11 @@ keycloak: # Client secrets for confidential OAuth2 clients. Required when keycloak.enabled=true. # The agentcube-sdk client is public (PKCE) and does not require a secret. clients: + existingSecret: "" # Name of a pre-existing Secret containing client secrets + # The keys below are only used if existingSecret is empty + existingSecretServiceKey: "client-service-secret" + existingSecretRouterKey: "client-router-secret" + existingSecretAdminKey: "client-admin-secret" service: secret: "" router: From be7bed1ad373bdfe952ca836932c3fb990409332 Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Thu, 4 Jun 2026 17:28:16 +0530 Subject: [PATCH 07/14] refactor(helm): extracted Keycloak to addon chart and updated OIDC design - Extracted Keycloak into a standalone addon chart (manifests/charts/addons/keycloak) to ensure the core AgentCube chart remains identity-provider agnostic. - Added HA replica support and wildcard URI security validation to the Keycloak addon. - Replaced keycloak.enabled in the base chart with generic OIDC configuration (router.oidc.issuerUrl, audience, and a required rolesClaim for dynamic role extraction). Signed-off-by: Mahil Patel --- docs/design/keycloak-proposal.md | 86 ++++++--- manifests/charts/addons/keycloak/Chart.yaml | 5 + .../addons/keycloak/templates/deployment.yaml | 182 ++++++++++++++++++ .../keycloak/templates/realm-config.yaml} | 16 +- .../addons/keycloak/templates/secrets.yaml | 27 +++ manifests/charts/addons/keycloak/values.yaml | 63 ++++++ .../base/templates/agentcube-router.yaml | 10 + .../templates/keycloak/keycloak-secrets.yaml | 29 --- .../base/templates/keycloak/keycloak.yaml | 174 ----------------- manifests/charts/base/values.yaml | 65 +------ 10 files changed, 355 insertions(+), 302 deletions(-) create mode 100644 manifests/charts/addons/keycloak/Chart.yaml create mode 100644 manifests/charts/addons/keycloak/templates/deployment.yaml rename manifests/charts/{base/templates/keycloak/keycloak-realm.yaml => addons/keycloak/templates/realm-config.yaml} (90%) create mode 100644 manifests/charts/addons/keycloak/templates/secrets.yaml create mode 100644 manifests/charts/addons/keycloak/values.yaml delete mode 100644 manifests/charts/base/templates/keycloak/keycloak-secrets.yaml delete mode 100644 manifests/charts/base/templates/keycloak/keycloak.yaml diff --git a/docs/design/keycloak-proposal.md b/docs/design/keycloak-proposal.md index f2240dd9..c43be4a7 100644 --- a/docs/design/keycloak-proposal.md +++ b/docs/design/keycloak-proposal.md @@ -4,7 +4,7 @@ Author: Mahil Patel ## Motivation -AgentCube currently has no mechanism to authenticate external callers, anyone who can reach the Router endpoint can invoke agent runtimes and code interpreters without proving their identity. This proposal adds external authentication and authorization using Keycloak as the identity provider, covering OIDC token validation at the Router, role-based access control (RBAC), resource-level access control (RLAC), identity forwarding to downstream services, and Python SDK auth support. Everything is gated behind `keycloak.enabled` in Helm values, so existing deployments are unaffected. +AgentCube currently has no mechanism to authenticate external callers, anyone who can reach the Router endpoint can invoke agent runtimes and code interpreters without proving their identity. This proposal adds external authentication and authorization using Keycloak as the identity provider, covering OIDC token validation at the Router, role-based access control (RBAC), resource-level access control (RLAC), identity forwarding to downstream services, and Python SDK auth support. Keycloak is deployed as a separate addon chart, and the core chart enables auth when `router.oidc.issuerUrl` is set, so existing deployments are unaffected. ## Architecture @@ -37,7 +37,7 @@ sequenceDiagram Note over Router: 3. Edge Authentication Router->>Router: Validate JWT signature (cached JWKS) Router->>Router: Check expiry, issuer, audience - Router->>Router: Extract realm_access.roles + Router->>Router: Extract roles from configured claim (default: realm_access.roles) Note over Router: 4. RBAC Check Router->>Router: Require "sandbox:invoke" role @@ -64,21 +64,21 @@ sequenceDiagram **JWKS-based offline validation.** The Router fetches Keycloak's public signing keys via OIDC discovery at startup. The `go-oidc` library caches and auto-rotates these keys - no per-request call to Keycloak. **Forwarding user identity via signed tokens, not plain headers.** The Router embeds the user's `sub` claim into existing channels: -- For PicoD: added as a `user_sub` claim in the Router-signed internal JWT -- For WorkloadManager: sent as a short-lived Router-signed identity JWT in the `X-AgentCube-User-Identity` header +- For PicoD: added as a `user_sub` claim in the Router-signed internal JWT (allows the agent runtime to trace actions back to a specific human user for logging, context, and downstream internal AuthZ). +- For WorkloadManager: the Router creates and signs a **new, short-lived internal JWT** (containing just the user's ID) and sends it in the `X-AgentCube-User-Identity` header. We do not pass the original Keycloak token, keeping WM decoupled from the external IDP. -Downstream services trust the identity because it is cryptographically signed by the Router's private key (the same key used for PicoD auth). WM verifies this JWT using the Router's public key from the `picod-router-public-key` ConfigMap — no dependency on mTLS for identity trust. +WorkloadManager and PicoD know they can trust this user identity because the internal JWT is cryptographically signed by the Router's private key. They verify the signature using the Router's public key (which is distributed via a Kubernetes ConfigMap). This guarantees the user identity was actually verified by the Router and prevents anyone from spoofing a fake identity header. ## Detailed Design ### 1. Keycloak Helm Deployment -Keycloak runs as a single-replica Deployment inside the AgentCube namespace, gated behind `{{- if .Values.keycloak.enabled }}`. The Helm chart **bootstraps** the initial realm, clients, and roles — it is not a declarative management tool. After first startup, realm changes must be made via the Keycloak Admin API or Admin Console. +Keycloak is deployed as a **separate addon chart** (`manifests/charts/addons/keycloak/`), independent of the core AgentCube chart. This keeps the core chart provider-agnostic - it only needs an OIDC issuer URL, which works with Keycloak, Okta, Auth0, or any OIDC-compliant provider. The addon chart **bootstraps** the initial realm, clients, and roles — it is not a declarative management tool. After first startup, realm changes must be made via the Keycloak Admin API or Admin Console. -The Deployment runs `quay.io/keycloak/keycloak:26.0.8` and supports two modes: +The Deployment runs the official `quay.io/keycloak/keycloak` image and supports two modes: -- **Dev mode** (`keycloak.devMode: true`, default): Runs `start-dev` with an embedded H2 database. Suitable for local development and testing. -- **Production mode** (`keycloak.devMode: false`): Runs `start` and requires an external database, an external Secret for credentials, and a public hostname. For production HA, we recommend using an external Keycloak instance or the [Keycloak Operator](https://www.keycloak.org/operator/installation) rather than the built-in single-replica chart. +- **Dev mode** (`keycloak.devMode: true`, default): Runs `start-dev` with an embedded H2 database. Suitable for local development and testing. (Replicas must be 1). +- **Production mode** (`keycloak.devMode: false`): Runs `start` and requires an external database, an external Secret for credentials, and a public hostname. For production HA, you can increase `replicas` in the addon chart values (which requires an external database), though for complex enterprise setups we recommend the [Keycloak Operator](https://www.keycloak.org/operator/installation). The Deployment uses `--import-realm` to load the realm configuration from a mounted Secret (`keycloak-realm-config`) on first startup. The `--import-realm` flag automatically skips existing realms - subsequent restarts do not overwrite the realm, so Helm value changes to the realm JSON will not take effect on an existing database. The pod runs as non-root with all capabilities dropped. @@ -100,8 +100,8 @@ admin | Client ID | Type | Purpose | |-----------|------|---------| -| `agentcube-service` | Confidential (`client_credentials`) | Server-side / automation authentication | -| `agentcube-sdk` | Public (`authorization_code` + PKCE) | Interactive SDK and CLI authentication | +| `agentcube-service` | Confidential (`client_credentials`) | External backend applications and automated scripts using the Python SDK (Machine-to-Machine / M2M) | +| `agentcube-sdk` | Public (`authorization_code` + PKCE) | Human end users interacting via the CLI or browser (Interactive) | | `agentcube-router` | Confidential (`client_credentials`) | Internal Router service identity | | `agentcube-admin` | Confidential (`client_credentials`) | Administrative operations | @@ -119,22 +119,28 @@ The Router uses the `coreos/go-oidc` library to validate incoming JWTs. This is ```go type OIDCConfig struct { - IssuerURL string // e.g. "http://keycloak.agentcube-system.svc:8080/realms/agentcube" - Audience string // expected "aud" claim, e.g. "agentcube-api" + IssuerURL string // e.g. "http://keycloak.agentcube-system.svc:8080/realms/agentcube" + Audience string // expected "aud" claim, e.g. "agentcube-api" + RolesClaim string // dot-separated path to roles array, e.g. "realm_access.roles" } type Claims struct { - Subject string `json:"sub"` - Email string `json:"email"` - RealmAccess RealmAccess `json:"realm_access"` -} - -type RealmAccess struct { - Roles []string `json:"roles"` + Subject string // from standard "sub" claim + Email string // from standard "email" claim + Roles []string // extracted from the configured RolesClaim path } ``` -The `OIDCValidator` uses `go-oidc` for JWKS discovery and key caching (`oidc.NewProvider()`), but validates the token as an **OAuth2 access token**, not an ID token. Keycloak's `client_credentials` grant returns an access token, and while Keycloak issues these as signed JWTs using the same keys, the audience semantics differ from ID tokens. `ValidateToken` verifies the JWT signature against cached JWKS keys, then explicitly checks `iss`, `exp`, `nbf`, `aud`, and extracts `realm_access.roles` — all locally, no per-request call to Keycloak. +Roles are extracted dynamically from the JWT using the configured `RolesClaim` path. For example, `realm_access.roles` means: parse the JWT payload as JSON, navigate into the `realm_access` object, then read the `roles` array. This makes the middleware work with any OIDC provider: + +| Provider | `--oidc-roles-claim` value | +|----------|---------------------------| +| Keycloak | `realm_access.roles` (default) | +| Auth0 | `https://myapp.com/roles` | +| Okta | `groups` | +| Azure AD | `roles` | + +The `OIDCValidator` uses `go-oidc` for JWKS discovery and key caching (`oidc.NewProvider()`), but validates the token as an **OAuth2 access token**, not an ID token. Keycloak's `client_credentials` grant returns an access token, and while Keycloak issues these as signed JWTs using the same keys, the audience semantics differ from ID tokens. `ValidateToken` verifies the JWT signature against cached JWKS keys, then explicitly checks `iss`, `exp`, `nbf`, `aud`, and extracts roles from the configured claim path — all locally, no per-request call to Keycloak. ### 3. Authentication Middleware (Router) @@ -143,7 +149,7 @@ The `OIDCValidator` uses `go-oidc` for JWKS discovery and key caching (`oidc.New Two gin middleware functions : - **`oidcAuthMiddleware()`** - extracts the Bearer token, validates it via the OIDC validator, stores Claims in context. No-op when auth is disabled. -- **`requireRole(role)`** - checks `realm_access.roles` for the required role. Returns 403 if missing. +- **`requireRole(role)`** - checks the extracted roles list (from the configured claim path) for the required role. Returns 403 if missing. Applied to the `/v1` route group: @@ -261,25 +267,41 @@ The existing clients (`ControlPlaneClient`, Data Plane clients, high-level clien ### 7. Helm Wiring and CLI Flags -When `keycloak.enabled` is true, the Router Deployment template passes additional args: +The core AgentCube chart has provider-agnostic OIDC configuration under `router.oidc`. When `router.oidc.issuerUrl` is set, the Router Deployment template passes additional args: ```yaml -{{- if .Values.keycloak.enabled }} -- --enable-external-auth -- --oidc-issuer-url=http://keycloak.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.keycloak.service.port }}/realms/{{ .Values.keycloak.realm }} -- --oidc-audience=agentcube-api +{{- if .Values.router.oidc.issuerUrl }} +- --oidc-issuer-url={{ .Values.router.oidc.issuerUrl }} +- --oidc-audience={{ .Values.router.oidc.audience }} +- --oidc-roles-claim={{ .Values.router.oidc.rolesClaim }} {{- end }} ``` -Three new flags in `cmd/router/main.go`: +When using the Keycloak addon chart, the user sets the issuer URL to point at the in-cluster Keycloak service: + +```bash +# 1. Deploy the Keycloak addon +helm install keycloak manifests/charts/addons/keycloak -n agentcube-system \ + --set adminUser=admin --set adminPassword=admin \ + --set clients.service.secret=my-svc-secret \ + --set clients.router.secret=my-router-secret \ + --set clients.admin.secret=my-admin-secret + +# 2. Deploy AgentCube with OIDC pointed at the addon +helm install agentcube manifests/charts/base -n agentcube-system \ + --set router.oidc.issuerUrl=http://keycloak.agentcube-system.svc:8080/realms/agentcube \ + --set router.oidc.rolesClaim=realm_access.roles +``` + +Two new flags in `cmd/router/main.go`: | Flag | Default | Description | |------|---------|-------------| -| `--enable-external-auth` | `false` | Enable Keycloak OIDC authentication | -| `--oidc-issuer-url` | `""` | Keycloak realm issuer URL | +| `--oidc-issuer-url` | `""` | OIDC provider issuer URL | | `--oidc-audience` | `"agentcube-api"` | Expected JWT audience claim | +| `--oidc-roles-claim` | `""` | REQUIRED if issuer is set. Dot-separated path to the roles array in the JWT (e.g. `realm_access.roles` for Keycloak, `groups` for Okta). | -When `--enable-external-auth` is set, `--oidc-issuer-url` is required. The Router will fail to start if it is missing. +External authentication is automatically enabled when `--oidc-issuer-url` is provided. The Router will validate tokens against this issuer. ## Testing @@ -294,6 +316,6 @@ When `--enable-external-auth` is set, `--oidc-issuer-url` is required. The Route The E2E suite (`test/e2e/`) will be extended to deploy Keycloak in the Kind cluster: 1. Preload the Keycloak image into Kind -2. Deploy with `keycloak.enabled=true` and test client secrets +2. Deploy with the Keycloak addon and set `router.oidc.issuerUrl` in the base chart 3. Obtain a real token via `client_credentials` grant 4. Test cases: no token → 401, invalid token → 401, valid token → success, RLAC ownership → 403 diff --git a/manifests/charts/addons/keycloak/Chart.yaml b/manifests/charts/addons/keycloak/Chart.yaml new file mode 100644 index 00000000..75e5d490 --- /dev/null +++ b/manifests/charts/addons/keycloak/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +name: agentcube-keycloak +description: Keycloak identity provider addon for AgentCube +version: 0.1.0 +appVersion: "26.0.8" diff --git a/manifests/charts/addons/keycloak/templates/deployment.yaml b/manifests/charts/addons/keycloak/templates/deployment.yaml new file mode 100644 index 00000000..5053f924 --- /dev/null +++ b/manifests/charts/addons/keycloak/templates/deployment.yaml @@ -0,0 +1,182 @@ +{{- if empty .Values.adminUser }} + {{- fail "adminUser is required" }} +{{- end }} +{{- if .Values.database.vendor }} +{{- if empty .Values.database.url }} + {{- fail "database.url is required when database.vendor is set" }} +{{- end }} +{{- if empty .Values.database.username }} + {{- fail "database.username is required when database.vendor is set" }} +{{- end }} +{{- end }} +{{- if not .Values.devMode }} +{{- if empty .Values.existingSecret }} + {{- fail "existingSecret is required when devMode is false (production mode requires an external Secret for the admin password)" }} +{{- end }} +{{- if empty .Values.database.vendor }} + {{- fail "database.vendor is required when devMode is false (production mode requires an external database)" }} +{{- end }} +{{- if empty .Values.database.existingSecret }} + {{- fail "database.existingSecret is required when devMode is false (production mode requires an external Secret for the database password)" }} +{{- end }} +{{- if empty .Values.proxy.hostname }} + {{- fail "proxy.hostname is required when devMode is false (production mode requires a strict hostname)" }} +{{- end }} +{{- range .Values.sdk.redirectUris }} + {{- if contains "*" . }} + {{- fail "Wildcard redirect URIs (e.g. '*') are not allowed in production mode (devMode=false) due to open redirect risks. Please provide explicit, fully-qualified URIs for sdk.redirectUris." }} + {{- end }} +{{- end }} +{{- range .Values.sdk.webOrigins }} + {{- if contains "*" . }} + {{- fail "Wildcard web origins (e.g. '*') are not allowed in production mode (devMode=false). Please provide explicit, fully-qualified URIs for sdk.webOrigins." }} + {{- end }} +{{- end }} +{{- end }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: keycloak + namespace: {{ .Release.Namespace }} + labels: + app: keycloak +spec: + replicas: {{ .Values.replicas | default 1 }} + selector: + matchLabels: + app: keycloak + template: + metadata: + labels: + app: keycloak + spec: + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + runAsNonRoot: true + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: keycloak + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + # Note: --import-realm only applies on an empty database/first start. + # Realm Secret updates will not be re-applied on subsequent pod restarts. + {{- if .Values.devMode }} + args: ["start-dev", "--import-realm"] + {{- else }} + args: ["start", "--import-realm"] + {{- end }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + - name: management + containerPort: 9000 + protocol: TCP + env: + - name: KC_BOOTSTRAP_ADMIN_USERNAME + value: {{ .Values.adminUser | quote }} + - name: KC_BOOTSTRAP_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.existingSecret | default "keycloak-credentials" }} + key: {{ if .Values.existingSecret }}{{ .Values.existingSecretKey | default "admin-password" }}{{ else }}admin-password{{ end }} + - name: KC_HEALTH_ENABLED + value: "true" + - name: KC_HTTP_PORT + value: {{ .Values.service.port | quote }} + {{- if .Values.database.vendor }} + - name: KC_DB + value: {{ .Values.database.vendor | quote }} + - name: KC_DB_URL + value: {{ .Values.database.url | quote }} + - name: KC_DB_USERNAME + value: {{ .Values.database.username | quote }} + - name: KC_DB_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.database.existingSecret | default "keycloak-credentials" }} + key: {{ if .Values.database.existingSecret }}{{ .Values.database.existingSecretKey | default "db-password" }}{{ else }}db-password{{ end }} + {{- end }} + {{- if .Values.proxy.headers }} + - name: KC_PROXY_HEADERS + value: {{ .Values.proxy.headers | quote }} + {{- end }} + {{- if .Values.proxy.httpEnabled }} + - name: KC_HTTP_ENABLED + value: "true" + {{- end }} + {{- if .Values.proxy.hostname }} + - name: KC_HOSTNAME + value: {{ .Values.proxy.hostname | quote }} + {{- end }} + - name: KC_CLIENT_SERVICE_SECRET + valueFrom: + secretKeyRef: + name: {{ .Values.clients.existingSecret | default "keycloak-credentials" }} + key: {{ if .Values.clients.existingSecret }}{{ .Values.clients.existingSecretServiceKey | default "client-service-secret" }}{{ else }}client-service-secret{{ end }} + - name: KC_CLIENT_ROUTER_SECRET + valueFrom: + secretKeyRef: + name: {{ .Values.clients.existingSecret | default "keycloak-credentials" }} + key: {{ if .Values.clients.existingSecret }}{{ .Values.clients.existingSecretRouterKey | default "client-router-secret" }}{{ else }}client-router-secret{{ end }} + - name: KC_CLIENT_ADMIN_SECRET + valueFrom: + secretKeyRef: + name: {{ .Values.clients.existingSecret | default "keycloak-credentials" }} + key: {{ if .Values.clients.existingSecret }}{{ .Values.clients.existingSecretAdminKey | default "client-admin-secret" }}{{ else }}client-admin-secret{{ end }} + volumeMounts: + - name: realm-config + mountPath: /opt/keycloak/data/import + readOnly: true + # Keycloak's JVM + realm import takes 60-90s. The startup probe gives a 300s + # window before liveness checks begin, preventing premature pod restarts. + startupProbe: + httpGet: + path: /health/started + port: management + failureThreshold: 30 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /health/live + port: management + periodSeconds: 30 + readinessProbe: + httpGet: + path: /health/ready + port: management + periodSeconds: 10 + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: realm-config + secret: + secretName: keycloak-realm-config + +--- +apiVersion: v1 +kind: Service +metadata: + name: keycloak + namespace: {{ .Release.Namespace }} + labels: + app: keycloak +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + app: keycloak diff --git a/manifests/charts/base/templates/keycloak/keycloak-realm.yaml b/manifests/charts/addons/keycloak/templates/realm-config.yaml similarity index 90% rename from manifests/charts/base/templates/keycloak/keycloak-realm.yaml rename to manifests/charts/addons/keycloak/templates/realm-config.yaml index 49d639d9..d7eba256 100644 --- a/manifests/charts/base/templates/keycloak/keycloak-realm.yaml +++ b/manifests/charts/addons/keycloak/templates/realm-config.yaml @@ -1,4 +1,3 @@ -{{- if .Values.keycloak.enabled }} apiVersion: v1 kind: Secret metadata: @@ -13,14 +12,14 @@ stringData: # Once the realm exists, subsequent pod restarts will silently IGNORE updates to this Secret. # To update a live production realm, use the Keycloak Admin API or a declarative tool. # Filename must follow -realm.json convention for --import-realm to detect it. - {{ printf "%s-realm.json" .Values.keycloak.realm | quote }}: | + {{ printf "%s-realm.json" .Values.realm | quote }}: | { - "realm": {{ .Values.keycloak.realm | quote }}, + "realm": {{ .Values.realm | quote }}, "enabled": true, - "sslRequired": {{ if .Values.keycloak.devMode }}"none"{{ else }}"external"{{ end }}, + "sslRequired": {{ if .Values.devMode }}"none"{{ else }}"external"{{ end }}, "defaultRole": { - "name": "default-roles-{{ .Values.keycloak.realm }}", + "name": "default-roles-{{ .Values.realm }}", "description": "${role_default-roles}", "composite": true, "clientRole": false @@ -29,7 +28,7 @@ stringData: "roles": { "realm": [ { - "name": "default-roles-{{ .Values.keycloak.realm }}", + "name": "default-roles-{{ .Values.realm }}", "description": "${role_default-roles}", "composite": true, "clientRole": false, @@ -100,8 +99,8 @@ stringData: "standardFlowEnabled": true, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": false, - "redirectUris": {{ .Values.keycloak.sdk.redirectUris | toJson }}, - "webOrigins": {{ .Values.keycloak.sdk.webOrigins | toJson }}, + "redirectUris": {{ .Values.sdk.redirectUris | toJson }}, + "webOrigins": {{ .Values.sdk.webOrigins | toJson }}, "attributes": { "pkce.code.challenge.method": "S256" }, @@ -160,4 +159,3 @@ stringData: } ] } -{{- end }} diff --git a/manifests/charts/addons/keycloak/templates/secrets.yaml b/manifests/charts/addons/keycloak/templates/secrets.yaml new file mode 100644 index 00000000..e68fd5b5 --- /dev/null +++ b/manifests/charts/addons/keycloak/templates/secrets.yaml @@ -0,0 +1,27 @@ +{{- $renderAdmin := not .Values.existingSecret }} +{{- $renderDb := and .Values.database.vendor (not .Values.database.existingSecret) }} +{{- $renderClients := not .Values.clients.existingSecret }} +{{- if or $renderAdmin $renderDb $renderClients }} +# Chart-managed Secret for Keycloak admin, database, and/or client credentials. +# Rendered if any of these credentials need to be managed by the chart. +apiVersion: v1 +kind: Secret +metadata: + name: keycloak-credentials + namespace: {{ .Release.Namespace }} + labels: + app: keycloak +type: Opaque +stringData: + {{- if $renderAdmin }} + admin-password: {{ required "adminPassword is required when existingSecret is not set" .Values.adminPassword | quote }} + {{- end }} + {{- if $renderDb }} + db-password: {{ required "database.password is required when database.vendor is set and database.existingSecret is not set" .Values.database.password | quote }} + {{- end }} + {{- if $renderClients }} + client-service-secret: {{ required "clients.service.secret is required when clients.existingSecret is not set" .Values.clients.service.secret | quote }} + client-router-secret: {{ required "clients.router.secret is required when clients.existingSecret is not set" .Values.clients.router.secret | quote }} + client-admin-secret: {{ required "clients.admin.secret is required when clients.existingSecret is not set" .Values.clients.admin.secret | quote }} + {{- end }} +{{- end }} diff --git a/manifests/charts/addons/keycloak/values.yaml b/manifests/charts/addons/keycloak/values.yaml new file mode 100644 index 00000000..6b57c5ed --- /dev/null +++ b/manifests/charts/addons/keycloak/values.yaml @@ -0,0 +1,63 @@ +# Default values for the Keycloak addon chart. + +# Global settings +replicas: 1 +imagePullSecrets: [] + +# Use start-dev mode (embedded H2 DB, HTTP) for local development. +# Set to false for production — requires database.vendor and proxy settings below. +devMode: true +image: + repository: quay.io/keycloak/keycloak + tag: "26.0.8" + pullPolicy: IfNotPresent +adminUser: "" # Required. +adminPassword: "" # Dev only. Production must use existingSecret. +existingSecret: "" # Name of a pre-existing Secret containing admin credentials +existingSecretKey: "admin-password" # Key within the Secret holding the password +# External database configuration (required when devMode is false) +database: + vendor: "" # Database type: "postgres", "mysql", "mariadb". Empty = embedded H2 (dev only). + url: "" # JDBC URL, e.g., "jdbc:postgresql://postgres:5432/keycloak" + username: "" + password: "" + existingSecret: "" # Name of a pre-existing Secret containing the database password + existingSecretKey: "db-password" +# Reverse proxy configuration (required when Keycloak is behind an Ingress or load balancer) +proxy: + headers: "" # Proxy header mode: "xforwarded" or "forwarded". Empty = disabled. + httpEnabled: false # Allow HTTP traffic when running behind a TLS-terminating proxy + hostname: "" # Public hostname, e.g., "keycloak.example.com". Empty = auto-detect. +service: + type: ClusterIP + port: 8080 +realm: "agentcube" +# Client secrets for confidential OAuth2 clients. +# The agentcube-sdk client is public (PKCE) and does not require a secret. +clients: + existingSecret: "" # Name of a pre-existing Secret containing client secrets + # The keys below are only used if existingSecret is empty + existingSecretServiceKey: "client-service-secret" + existingSecretRouterKey: "client-router-secret" + existingSecretAdminKey: "client-admin-secret" + service: + secret: "" + router: + secret: "" + admin: + secret: "" +sdk: + # WARNING: Wildcard URIs (e.g. 'http://localhost*') pose security risks (open redirect vulnerabilities) in production. + # When devMode is false, the deployment will fail if these contain wildcards. + # You must override these with explicit, fully-qualified URIs for production. + redirectUris: + - "http://localhost*" + webOrigins: + - "http://localhost*" +resources: + limits: + cpu: 500m + memory: 1Gi + requests: + cpu: 100m + memory: 512Mi diff --git a/manifests/charts/base/templates/agentcube-router.yaml b/manifests/charts/base/templates/agentcube-router.yaml index 20e796ea..d41bf893 100644 --- a/manifests/charts/base/templates/agentcube-router.yaml +++ b/manifests/charts/base/templates/agentcube-router.yaml @@ -1,3 +1,8 @@ +{{- if .Values.router.oidc.issuerUrl }} +{{- if empty .Values.router.oidc.rolesClaim }} + {{- fail "router.oidc.rolesClaim is required when router.oidc.issuerUrl is set" }} +{{- end }} +{{- end }} apiVersion: apps/v1 kind: Deployment metadata: @@ -83,6 +88,11 @@ spec: - --mtls-key={{ .Values.spire.spiffeHelper.certDir }}/{{ .Values.spire.spiffeHelper.keyFileName }} - --mtls-ca={{ .Values.spire.spiffeHelper.certDir }}/{{ .Values.spire.spiffeHelper.bundleFileName }} {{- end }} + {{- if .Values.router.oidc.issuerUrl }} + - --oidc-issuer-url={{ .Values.router.oidc.issuerUrl }} + - --oidc-audience={{ .Values.router.oidc.audience }} + - --oidc-roles-claim={{ .Values.router.oidc.rolesClaim }} + {{- end }} resources: {{- toYaml .Values.router.resources | nindent 12 }} livenessProbe: diff --git a/manifests/charts/base/templates/keycloak/keycloak-secrets.yaml b/manifests/charts/base/templates/keycloak/keycloak-secrets.yaml deleted file mode 100644 index 7e2b7681..00000000 --- a/manifests/charts/base/templates/keycloak/keycloak-secrets.yaml +++ /dev/null @@ -1,29 +0,0 @@ -{{- if .Values.keycloak.enabled }} -{{- $renderAdmin := not .Values.keycloak.existingSecret }} -{{- $renderDb := and .Values.keycloak.database.vendor (not .Values.keycloak.database.existingSecret) }} -{{- $renderClients := not .Values.keycloak.clients.existingSecret }} -{{- if or $renderAdmin $renderDb $renderClients }} -# Chart-managed Secret for Keycloak admin, database, and/or client credentials. -# Rendered if any of these credentials need to be managed by the chart. -apiVersion: v1 -kind: Secret -metadata: - name: keycloak-credentials - namespace: {{ .Release.Namespace }} - labels: - app: keycloak -type: Opaque -stringData: - {{- if $renderAdmin }} - admin-password: {{ required "keycloak.adminPassword is required when keycloak.existingSecret is not set" .Values.keycloak.adminPassword | quote }} - {{- end }} - {{- if $renderDb }} - db-password: {{ required "keycloak.database.password is required when keycloak.database.vendor is set and keycloak.database.existingSecret is not set" .Values.keycloak.database.password | quote }} - {{- end }} - {{- if $renderClients }} - client-service-secret: {{ required "keycloak.clients.service.secret is required when keycloak.clients.existingSecret is not set" .Values.keycloak.clients.service.secret | quote }} - client-router-secret: {{ required "keycloak.clients.router.secret is required when keycloak.clients.existingSecret is not set" .Values.keycloak.clients.router.secret | quote }} - client-admin-secret: {{ required "keycloak.clients.admin.secret is required when keycloak.clients.existingSecret is not set" .Values.keycloak.clients.admin.secret | quote }} - {{- end }} -{{- end }} -{{- end }} diff --git a/manifests/charts/base/templates/keycloak/keycloak.yaml b/manifests/charts/base/templates/keycloak/keycloak.yaml deleted file mode 100644 index ca08f788..00000000 --- a/manifests/charts/base/templates/keycloak/keycloak.yaml +++ /dev/null @@ -1,174 +0,0 @@ -{{- if .Values.keycloak.enabled }} -{{- if empty .Values.keycloak.adminUser }} - {{- fail "keycloak.adminUser is required when keycloak.enabled is true" }} -{{- end }} -{{- if .Values.keycloak.database.vendor }} -{{- if empty .Values.keycloak.database.url }} - {{- fail "keycloak.database.url is required when keycloak.database.vendor is set" }} -{{- end }} -{{- if empty .Values.keycloak.database.username }} - {{- fail "keycloak.database.username is required when keycloak.database.vendor is set" }} -{{- end }} -{{- end }} -{{- if not .Values.keycloak.devMode }} -{{- if empty .Values.keycloak.existingSecret }} - {{- fail "keycloak.existingSecret is required when devMode is false (production mode requires an external Secret for the admin password)" }} -{{- end }} -{{- if empty .Values.keycloak.database.vendor }} - {{- fail "keycloak.database.vendor is required when devMode is false (production mode requires an external database)" }} -{{- end }} -{{- if empty .Values.keycloak.database.existingSecret }} - {{- fail "keycloak.database.existingSecret is required when devMode is false (production mode requires an external Secret for the database password)" }} -{{- end }} -{{- if empty .Values.keycloak.proxy.hostname }} - {{- fail "keycloak.proxy.hostname is required when devMode is false (production mode requires a strict hostname)" }} -{{- end }} -{{- end }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: keycloak - namespace: {{ .Release.Namespace }} - labels: - app: keycloak -spec: - replicas: 1 - selector: - matchLabels: - app: keycloak - template: - metadata: - labels: - app: keycloak - spec: - securityContext: - runAsUser: 1000 - runAsGroup: 1000 - fsGroup: 1000 - runAsNonRoot: true - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: keycloak - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - image: "{{ .Values.keycloak.image.repository }}:{{ .Values.keycloak.image.tag }}" - imagePullPolicy: {{ .Values.keycloak.image.pullPolicy }} - # Note: --import-realm only applies on an empty database/first start. - # Realm Secret updates will not be re-applied on subsequent pod restarts. - {{- if .Values.keycloak.devMode }} - args: ["start-dev", "--import-realm"] - {{- else }} - args: ["start", "--import-realm"] - {{- end }} - ports: - - name: http - containerPort: {{ .Values.keycloak.service.port }} - protocol: TCP - - name: management - containerPort: 9000 - protocol: TCP - env: - - name: KC_BOOTSTRAP_ADMIN_USERNAME - value: {{ .Values.keycloak.adminUser | quote }} - - name: KC_BOOTSTRAP_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: {{ .Values.keycloak.existingSecret | default "keycloak-credentials" }} - key: {{ if .Values.keycloak.existingSecret }}{{ .Values.keycloak.existingSecretKey | default "admin-password" }}{{ else }}admin-password{{ end }} - - name: KC_HEALTH_ENABLED - value: "true" - - name: KC_HTTP_PORT - value: {{ .Values.keycloak.service.port | quote }} - {{- if .Values.keycloak.database.vendor }} - - name: KC_DB - value: {{ .Values.keycloak.database.vendor | quote }} - - name: KC_DB_URL - value: {{ .Values.keycloak.database.url | quote }} - - name: KC_DB_USERNAME - value: {{ .Values.keycloak.database.username | quote }} - - name: KC_DB_PASSWORD - valueFrom: - secretKeyRef: - name: {{ .Values.keycloak.database.existingSecret | default "keycloak-credentials" }} - key: {{ if .Values.keycloak.database.existingSecret }}{{ .Values.keycloak.database.existingSecretKey | default "db-password" }}{{ else }}db-password{{ end }} - {{- end }} - {{- if .Values.keycloak.proxy.headers }} - - name: KC_PROXY_HEADERS - value: {{ .Values.keycloak.proxy.headers | quote }} - {{- end }} - {{- if .Values.keycloak.proxy.httpEnabled }} - - name: KC_HTTP_ENABLED - value: "true" - {{- end }} - {{- if .Values.keycloak.proxy.hostname }} - - name: KC_HOSTNAME - value: {{ .Values.keycloak.proxy.hostname | quote }} - {{- end }} - - name: KC_CLIENT_SERVICE_SECRET - valueFrom: - secretKeyRef: - name: {{ .Values.keycloak.clients.existingSecret | default "keycloak-credentials" }} - key: {{ if .Values.keycloak.clients.existingSecret }}{{ .Values.keycloak.clients.existingSecretServiceKey | default "client-service-secret" }}{{ else }}client-service-secret{{ end }} - - name: KC_CLIENT_ROUTER_SECRET - valueFrom: - secretKeyRef: - name: {{ .Values.keycloak.clients.existingSecret | default "keycloak-credentials" }} - key: {{ if .Values.keycloak.clients.existingSecret }}{{ .Values.keycloak.clients.existingSecretRouterKey | default "client-router-secret" }}{{ else }}client-router-secret{{ end }} - - name: KC_CLIENT_ADMIN_SECRET - valueFrom: - secretKeyRef: - name: {{ .Values.keycloak.clients.existingSecret | default "keycloak-credentials" }} - key: {{ if .Values.keycloak.clients.existingSecret }}{{ .Values.keycloak.clients.existingSecretAdminKey | default "client-admin-secret" }}{{ else }}client-admin-secret{{ end }} - volumeMounts: - - name: realm-config - mountPath: /opt/keycloak/data/import - readOnly: true - # Keycloak's JVM + realm import takes 60-90s. The startup probe gives a 300s - # window before liveness checks begin, preventing premature pod restarts. - startupProbe: - httpGet: - path: /health/started - port: management - failureThreshold: 30 - periodSeconds: 10 - livenessProbe: - httpGet: - path: /health/live - port: management - periodSeconds: 30 - readinessProbe: - httpGet: - path: /health/ready - port: management - periodSeconds: 10 - resources: - {{- toYaml .Values.keycloak.resources | nindent 12 }} - volumes: - - name: realm-config - secret: - secretName: keycloak-realm-config - ---- -apiVersion: v1 -kind: Service -metadata: - name: keycloak - namespace: {{ .Release.Namespace }} - labels: - app: keycloak -spec: - type: {{ .Values.keycloak.service.type }} - ports: - - port: {{ .Values.keycloak.service.port }} - targetPort: http - protocol: TCP - name: http - selector: - app: keycloak -{{- end }} diff --git a/manifests/charts/base/values.yaml b/manifests/charts/base/values.yaml index 69ac80c9..276d4c7e 100644 --- a/manifests/charts/base/values.yaml +++ b/manifests/charts/base/values.yaml @@ -32,6 +32,13 @@ router: config: {} extraEnv: [] serviceAccountName: "agentcube-router" + # OIDC configuration for external user authentication. + # When oidc.issuerUrl is set, the Router validates Bearer tokens against this OIDC provider. + # Works with any OIDC-compliant provider (Keycloak, Okta, Auth0, etc.). + oidc: + issuerUrl: "" # e.g., "http://keycloak.agentcube-system.svc:8080/realms/agentcube" + audience: "agentcube-api" # Expected "aud" claim in access tokens + rolesClaim: "" # Required if issuerUrl is set. JSON path to the roles array in the JWT (e.g., "realm_access.roles" for Keycloak, "groups" for Okta). # AgentCube Workload Manager workloadmanager: @@ -127,61 +134,3 @@ spire: requests: cpu: 50m memory: 64Mi - -# Keycloak Configuration (External User Authentication) -keycloak: - enabled: false - # Use start-dev mode (embedded H2 DB, HTTP) for local development. - # Set to false for production — requires database.vendor and proxy settings below. - devMode: true - image: - repository: quay.io/keycloak/keycloak - tag: "26.0.8" - pullPolicy: IfNotPresent - adminUser: "" # Required when keycloak.enabled=true. - adminPassword: "" # Dev only. Production must use existingSecret. - existingSecret: "" # Name of a pre-existing Secret containing admin credentials - existingSecretKey: "admin-password" # Key within the Secret holding the password - # External database configuration (required when devMode is false) - database: - vendor: "" # Database type: "postgres", "mysql", "mariadb". Empty = embedded H2 (dev only). - url: "" # JDBC URL, e.g., "jdbc:postgresql://postgres:5432/keycloak" - username: "" - password: "" - existingSecret: "" # Name of a pre-existing Secret containing the database password - existingSecretKey: "db-password" - # Reverse proxy configuration (required when Keycloak is behind an Ingress or load balancer) - proxy: - headers: "" # Proxy header mode: "xforwarded" or "forwarded". Empty = disabled. - httpEnabled: false # Allow HTTP traffic when running behind a TLS-terminating proxy - hostname: "" # Public hostname, e.g., "keycloak.example.com". Empty = auto-detect. - service: - type: ClusterIP - port: 8080 - realm: "agentcube" - # Client secrets for confidential OAuth2 clients. Required when keycloak.enabled=true. - # The agentcube-sdk client is public (PKCE) and does not require a secret. - clients: - existingSecret: "" # Name of a pre-existing Secret containing client secrets - # The keys below are only used if existingSecret is empty - existingSecretServiceKey: "client-service-secret" - existingSecretRouterKey: "client-router-secret" - existingSecretAdminKey: "client-admin-secret" - service: - secret: "" - router: - secret: "" - admin: - secret: "" - sdk: - redirectUris: - - "http://localhost*" - webOrigins: - - "http://localhost*" - resources: - limits: - cpu: 500m - memory: 1Gi - requests: - cpu: 100m - memory: 512Mi From 83c9e1b4a5f7a9b04e9f840c174ebadd14d95025 Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Thu, 4 Jun 2026 22:34:24 +0530 Subject: [PATCH 08/14] fix(helm): improved keycloak addon safety and conventions - Quoted OIDC flags in router deployment to prevent YAML parsing errors - Fixed inaccurate existingSecret comment in values.yaml - Upgraded Keycloak addon Chart.yaml to apiVersion v2 (Helm 3) - Added validation to block scaling replicas when using embedded H2 database (devMode) Signed-off-by: Mahil Patel --- docs/design/keycloak-proposal.md | 10 +++++----- manifests/charts/addons/keycloak/Chart.yaml | 2 +- .../charts/addons/keycloak/templates/deployment.yaml | 3 +++ manifests/charts/addons/keycloak/values.yaml | 3 ++- manifests/charts/base/templates/agentcube-router.yaml | 6 +++--- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/docs/design/keycloak-proposal.md b/docs/design/keycloak-proposal.md index c43be4a7..ace41de9 100644 --- a/docs/design/keycloak-proposal.md +++ b/docs/design/keycloak-proposal.md @@ -67,7 +67,7 @@ sequenceDiagram - For PicoD: added as a `user_sub` claim in the Router-signed internal JWT (allows the agent runtime to trace actions back to a specific human user for logging, context, and downstream internal AuthZ). - For WorkloadManager: the Router creates and signs a **new, short-lived internal JWT** (containing just the user's ID) and sends it in the `X-AgentCube-User-Identity` header. We do not pass the original Keycloak token, keeping WM decoupled from the external IDP. -WorkloadManager and PicoD know they can trust this user identity because the internal JWT is cryptographically signed by the Router's private key. They verify the signature using the Router's public key (which is distributed via a Kubernetes ConfigMap). This guarantees the user identity was actually verified by the Router and prevents anyone from spoofing a fake identity header. +WorkloadManager and PicoD know they can trust this user identity because the internal JWT is cryptographically signed by the Router's private key. They verify the signature using the Router's public key (which is distributed via the `picod-router-identity` Kubernetes Secret). This guarantees the user identity was actually verified by the Router and prevents anyone from spoofing a fake identity header. ## Detailed Design @@ -196,7 +196,7 @@ identityToken, _ := s.jwtManager.GenerateToken(identityClaims) req.Header.Set("X-AgentCube-User-Identity", identityToken) ``` -WM verifies this JWT using the Router's public key from the `picod-router-public-key` ConfigMap (already mounted for PicoD auth). This provides cryptographic proof of the user identity without depending on mTLS — the identity is trustworthy regardless of transport security configuration. WM continues to authenticate the Router itself via the existing K8s SA token. +WM verifies this JWT using the Router's public key from the `picod-router-identity` Secret. This provides cryptographic proof of the user identity without depending on mTLS — the identity is trustworthy regardless of transport security configuration. WM continues to authenticate the Router itself via the existing K8s SA token. ### 5. RLAC - Resource-Level Access Control @@ -271,9 +271,9 @@ The core AgentCube chart has provider-agnostic OIDC configuration under `router. ```yaml {{- if .Values.router.oidc.issuerUrl }} -- --oidc-issuer-url={{ .Values.router.oidc.issuerUrl }} -- --oidc-audience={{ .Values.router.oidc.audience }} -- --oidc-roles-claim={{ .Values.router.oidc.rolesClaim }} +- {{ printf "--oidc-issuer-url=%s" .Values.router.oidc.issuerUrl | quote }} +- {{ printf "--oidc-audience=%s" .Values.router.oidc.audience | quote }} +- {{ printf "--oidc-roles-claim=%s" .Values.router.oidc.rolesClaim | quote }} {{- end }} ``` diff --git a/manifests/charts/addons/keycloak/Chart.yaml b/manifests/charts/addons/keycloak/Chart.yaml index 75e5d490..7d5f1578 100644 --- a/manifests/charts/addons/keycloak/Chart.yaml +++ b/manifests/charts/addons/keycloak/Chart.yaml @@ -1,4 +1,4 @@ -apiVersion: v1 +apiVersion: v2 name: agentcube-keycloak description: Keycloak identity provider addon for AgentCube version: 0.1.0 diff --git a/manifests/charts/addons/keycloak/templates/deployment.yaml b/manifests/charts/addons/keycloak/templates/deployment.yaml index 5053f924..63ea988d 100644 --- a/manifests/charts/addons/keycloak/templates/deployment.yaml +++ b/manifests/charts/addons/keycloak/templates/deployment.yaml @@ -1,6 +1,9 @@ {{- if empty .Values.adminUser }} {{- fail "adminUser is required" }} {{- end }} +{{- if and .Values.devMode (gt (int (default 1 .Values.replicas)) 1) }} + {{- fail "replicas cannot be greater than 1 when devMode is true. Keycloak dev mode uses an embedded H2 database which does not support clustering." }} +{{- end }} {{- if .Values.database.vendor }} {{- if empty .Values.database.url }} {{- fail "database.url is required when database.vendor is set" }} diff --git a/manifests/charts/addons/keycloak/values.yaml b/manifests/charts/addons/keycloak/values.yaml index 6b57c5ed..0af0bee0 100644 --- a/manifests/charts/addons/keycloak/values.yaml +++ b/manifests/charts/addons/keycloak/values.yaml @@ -36,7 +36,8 @@ realm: "agentcube" # The agentcube-sdk client is public (PKCE) and does not require a secret. clients: existingSecret: "" # Name of a pre-existing Secret containing client secrets - # The keys below are only used if existingSecret is empty + # The keys below are only used if existingSecret is SET. + # They define which keys inside your external Secret hold the actual passwords. existingSecretServiceKey: "client-service-secret" existingSecretRouterKey: "client-router-secret" existingSecretAdminKey: "client-admin-secret" diff --git a/manifests/charts/base/templates/agentcube-router.yaml b/manifests/charts/base/templates/agentcube-router.yaml index d41bf893..bfd5de80 100644 --- a/manifests/charts/base/templates/agentcube-router.yaml +++ b/manifests/charts/base/templates/agentcube-router.yaml @@ -89,9 +89,9 @@ spec: - --mtls-ca={{ .Values.spire.spiffeHelper.certDir }}/{{ .Values.spire.spiffeHelper.bundleFileName }} {{- end }} {{- if .Values.router.oidc.issuerUrl }} - - --oidc-issuer-url={{ .Values.router.oidc.issuerUrl }} - - --oidc-audience={{ .Values.router.oidc.audience }} - - --oidc-roles-claim={{ .Values.router.oidc.rolesClaim }} + - {{ printf "--oidc-issuer-url=%s" .Values.router.oidc.issuerUrl | quote }} + - {{ printf "--oidc-audience=%s" .Values.router.oidc.audience | quote }} + - {{ printf "--oidc-roles-claim=%s" .Values.router.oidc.rolesClaim | quote }} {{- end }} resources: {{- toYaml .Values.router.resources | nindent 12 }} From 7391ec6e794b6bd5b9d0fb724ac96ab357ac4094 Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Fri, 5 Jun 2026 12:13:19 +0530 Subject: [PATCH 09/14] feat(helm): added required role configuration for OIDC in router and values Signed-off-by: Mahil Patel --- docs/design/keycloak-proposal.md | 11 +++++++---- manifests/charts/base/templates/agentcube-router.yaml | 4 ++++ manifests/charts/base/values.yaml | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/design/keycloak-proposal.md b/docs/design/keycloak-proposal.md index ace41de9..333da4bf 100644 --- a/docs/design/keycloak-proposal.md +++ b/docs/design/keycloak-proposal.md @@ -37,10 +37,10 @@ sequenceDiagram Note over Router: 3. Edge Authentication Router->>Router: Validate JWT signature (cached JWKS) Router->>Router: Check expiry, issuer, audience - Router->>Router: Extract roles from configured claim (default: realm_access.roles) + Router->>Router: Extract roles from configured claim (e.g. realm_access.roles) Note over Router: 4. RBAC Check - Router->>Router: Require "sandbox:invoke" role + Router->>Router: Require configured role Note over Router, WM: 5. Session Creation (if new) Router->>Router: Sign identity JWT (sub, iss, aud, exp) @@ -157,7 +157,8 @@ Applied to the `/v1` route group: v1 := s.engine.Group("/v1") v1.Use(s.oidcAuthMiddleware()) // validate JWT if s.oidcValidator != nil { - v1.Use(requireRole("sandbox:invoke")) // check role + // Require configured role + v1.Use(requireRole(s.config.OIDCRequiredRole)) } v1.Use(s.concurrencyLimitMiddleware()) // existing ``` @@ -290,7 +291,8 @@ helm install keycloak manifests/charts/addons/keycloak -n agentcube-system \ # 2. Deploy AgentCube with OIDC pointed at the addon helm install agentcube manifests/charts/base -n agentcube-system \ --set router.oidc.issuerUrl=http://keycloak.agentcube-system.svc:8080/realms/agentcube \ - --set router.oidc.rolesClaim=realm_access.roles + --set router.oidc.rolesClaim=realm_access.roles \ + --set router.oidc.requiredRole=sandbox:invoke ``` Two new flags in `cmd/router/main.go`: @@ -300,6 +302,7 @@ Two new flags in `cmd/router/main.go`: | `--oidc-issuer-url` | `""` | OIDC provider issuer URL | | `--oidc-audience` | `"agentcube-api"` | Expected JWT audience claim | | `--oidc-roles-claim` | `""` | REQUIRED if issuer is set. Dot-separated path to the roles array in the JWT (e.g. `realm_access.roles` for Keycloak, `groups` for Okta). | +| `--oidc-required-role` | `""` | REQUIRED if issuer is set. The role required to access the API endpoints (e.g. `sandbox:invoke` - Note: this is just an example, though our Keycloak addon creates this role automatically). | External authentication is automatically enabled when `--oidc-issuer-url` is provided. The Router will validate tokens against this issuer. diff --git a/manifests/charts/base/templates/agentcube-router.yaml b/manifests/charts/base/templates/agentcube-router.yaml index bfd5de80..1e86c595 100644 --- a/manifests/charts/base/templates/agentcube-router.yaml +++ b/manifests/charts/base/templates/agentcube-router.yaml @@ -2,6 +2,9 @@ {{- if empty .Values.router.oidc.rolesClaim }} {{- fail "router.oidc.rolesClaim is required when router.oidc.issuerUrl is set" }} {{- end }} +{{- if empty .Values.router.oidc.requiredRole }} + {{- fail "router.oidc.requiredRole is required when router.oidc.issuerUrl is set" }} +{{- end }} {{- end }} apiVersion: apps/v1 kind: Deployment @@ -92,6 +95,7 @@ spec: - {{ printf "--oidc-issuer-url=%s" .Values.router.oidc.issuerUrl | quote }} - {{ printf "--oidc-audience=%s" .Values.router.oidc.audience | quote }} - {{ printf "--oidc-roles-claim=%s" .Values.router.oidc.rolesClaim | quote }} + - {{ printf "--oidc-required-role=%s" .Values.router.oidc.requiredRole | quote }} {{- end }} resources: {{- toYaml .Values.router.resources | nindent 12 }} diff --git a/manifests/charts/base/values.yaml b/manifests/charts/base/values.yaml index 276d4c7e..fbe54fcf 100644 --- a/manifests/charts/base/values.yaml +++ b/manifests/charts/base/values.yaml @@ -39,6 +39,7 @@ router: issuerUrl: "" # e.g., "http://keycloak.agentcube-system.svc:8080/realms/agentcube" audience: "agentcube-api" # Expected "aud" claim in access tokens rolesClaim: "" # Required if issuerUrl is set. JSON path to the roles array in the JWT (e.g., "realm_access.roles" for Keycloak, "groups" for Okta). + requiredRole: "" # Required if issuerUrl is set. The role required to access the API endpoints (e.g., "sandbox:invoke"). # AgentCube Workload Manager workloadmanager: From 6f197ff7ae94ab59ac7ffd72c6b6cfa025ae2100 Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Fri, 5 Jun 2026 14:10:03 +0530 Subject: [PATCH 10/14] feat: integrated external OIDC authentication and RBAC - Implemented provider-agnostic OIDC authentication with JWKS caching and strict JWT algorithm validation. - Added RBAC middleware to enforce configurable role-based access control. - Enhanced SessionManager to securely forward verified external user identities to WorkloadManager. Signed-off-by: Mahil Patel --- cmd/router/main.go | 11 ++ go.mod | 4 +- go.sum | 6 + pkg/router/auth.go | 135 ++++++++++++++ pkg/router/auth_test.go | 108 ++++++++++++ pkg/router/config.go | 12 ++ pkg/router/handlers.go | 8 + pkg/router/oidc.go | 250 ++++++++++++++++++++++++++ pkg/router/oidc_test.go | 274 +++++++++++++++++++++++++++++ pkg/router/server.go | 57 ++++-- pkg/router/session_manager.go | 31 +++- pkg/router/session_manager_test.go | 4 +- 12 files changed, 881 insertions(+), 19 deletions(-) create mode 100644 pkg/router/auth.go create mode 100644 pkg/router/auth_test.go create mode 100644 pkg/router/oidc.go create mode 100644 pkg/router/oidc_test.go diff --git a/cmd/router/main.go b/cmd/router/main.go index 58727d8f..0eb50639 100644 --- a/cmd/router/main.go +++ b/cmd/router/main.go @@ -44,6 +44,13 @@ func main() { mtlsCert = flag.String("mtls-cert", "", "Path to mTLS client certificate for upstream WorkloadManager connections") mtlsKey = flag.String("mtls-key", "", "Path to mTLS client key for upstream WorkloadManager connections") mtlsCA = flag.String("mtls-ca", "", "Path to mTLS CA bundle for verifying upstream WorkloadManager identity") + + // OIDC configuration for external user authentication. + // External auth is automatically enabled when --oidc-issuer-url is provided. + oidcIssuerURL = flag.String("oidc-issuer-url", "", "OIDC provider issuer URL, enables external auth when set") + oidcAudience = flag.String("oidc-audience", "agentcube-api", "Expected audience (aud) claim in the access token") + oidcRolesClaim = flag.String("oidc-roles-claim", "", "JSON path to roles array in the JWT (e.g., realm_access.roles)") + oidcRequiredRole = flag.String("oidc-required-role", "", "Role required to access the API (e.g., sandbox:invoke)") ) // Initialize klog flags @@ -76,6 +83,10 @@ func main() { TLSKey: *tlsKey, MaxConcurrentRequests: *maxConcurrentRequests, MTLSConfig: tlsConfig, + OIDCIssuerURL: *oidcIssuerURL, + OIDCAudience: *oidcAudience, + OIDCRolesClaim: *oidcRolesClaim, + OIDCRequiredRole: *oidcRequiredRole, } // Create Router API server diff --git a/go.mod b/go.mod index f0a23c80..06c63e00 100644 --- a/go.mod +++ b/go.mod @@ -7,8 +7,9 @@ toolchain go1.24.9 require ( github.com/agiledragon/gomonkey/v2 v2.13.0 github.com/alicebob/miniredis/v2 v2.35.0 - github.com/gin-contrib/gzip v1.0.1 + github.com/coreos/go-oidc/v3 v3.14.1 github.com/fsnotify/fsnotify v1.9.0 + github.com/gin-contrib/gzip v1.0.1 github.com/gin-gonic/gin v1.10.0 github.com/golang-jwt/jwt/v5 v5.2.2 github.com/google/uuid v1.6.0 @@ -40,6 +41,7 @@ require ( github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/gabriel-vasile/mimetype v1.4.3 // indirect github.com/gin-contrib/sse v0.1.0 // indirect + github.com/go-jose/go-jose/v4 v4.1.3 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-logr/zapr v1.3.0 // indirect diff --git a/go.sum b/go.sum index 5c75dfcd..e0f89f39 100644 --- a/go.sum +++ b/go.sum @@ -20,6 +20,8 @@ github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/ github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= +github.com/coreos/go-oidc/v3 v3.14.1 h1:9ePWwfdwC4QKRlCXsJGou56adA/owXczOzwKdOumLqk= +github.com/coreos/go-oidc/v3 v3.14.1/go.mod h1:HaZ3szPaZ0e4r6ebqvsLWlk2Tn+aejfmrfah6hnSYEU= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -43,6 +45,8 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= +github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs= +github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -242,6 +246,8 @@ golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY= golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs= +golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= diff --git a/pkg/router/auth.go b/pkg/router/auth.go new file mode 100644 index 00000000..ac44cb45 --- /dev/null +++ b/pkg/router/auth.go @@ -0,0 +1,135 @@ +/* +Copyright The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package router + +import ( + "context" + "net/http" + "slices" + "strings" + + "github.com/gin-gonic/gin" + "k8s.io/klog/v2" +) + +// contextKeyType is a private type for context keys to avoid collisions. +type contextKeyType string + +// contextKeyOIDCClaims is the context key for storing validated OIDC claims. +const contextKeyOIDCClaims = contextKeyType("oidcClaims") + +// oidcAuthMiddleware validates incoming OIDC JWTs. +func (s *Server) oidcAuthMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + if s.oidcValidator == nil { + c.Next() + return + } + + // Extract the Bearer token from the Authorization header. + authHeader := c.GetHeader("Authorization") + if authHeader == "" { + c.JSON(http.StatusUnauthorized, gin.H{ + "error": "missing authorization header", + "code": "UNAUTHORIZED", + }) + c.Abort() + return + } + + parts := strings.SplitN(authHeader, " ", 2) + if len(parts) != 2 || !strings.EqualFold(parts[0], "Bearer") { + c.JSON(http.StatusUnauthorized, gin.H{ + "error": "invalid authorization header format", + "code": "UNAUTHORIZED", + }) + c.Abort() + return + } + + rawToken := strings.TrimSpace(parts[1]) + if rawToken == "" { + c.JSON(http.StatusUnauthorized, gin.H{ + "error": "empty bearer token", + "code": "UNAUTHORIZED", + }) + c.Abort() + return + } + + // Validate the token against the OIDC provider. + claims, err := s.oidcValidator.ValidateToken(c.Request.Context(), rawToken) + if err != nil { + klog.V(2).Infof("OIDC token validation failed: %v", err) + c.JSON(http.StatusUnauthorized, gin.H{ + "error": "invalid or expired token", + "code": "UNAUTHORIZED", + }) + c.Abort() + return + } + + c.Set(string(contextKeyOIDCClaims), claims) + + // Store in request context for downstream code + ctx := context.WithValue(c.Request.Context(), contextKeyOIDCClaims, claims) + c.Request = c.Request.WithContext(ctx) + + c.Next() + } +} + +// requireRole enforces a specific OIDC role for the endpoint. +func requireRole(role string) gin.HandlerFunc { + return func(c *gin.Context) { + claims := extractClaims(c) + if claims == nil { + c.JSON(http.StatusUnauthorized, gin.H{ + "error": "authentication required", + "code": "UNAUTHORIZED", + }) + c.Abort() + return + } + + if !slices.Contains(claims.Roles, role) { + klog.V(2).Infof("RBAC check failed: user %s missing required role %q (has: %v)", + claims.Subject, role, claims.Roles) + c.JSON(http.StatusForbidden, gin.H{ + "error": "insufficient permissions: missing role " + role, + "code": "FORBIDDEN", + }) + c.Abort() + return + } + + c.Next() + } +} + +// extractClaims extracts validated OIDC claims from the Gin context. +func extractClaims(c *gin.Context) *Claims { + val, exists := c.Get(string(contextKeyOIDCClaims)) + if !exists { + return nil + } + claims, ok := val.(*Claims) + if !ok { + return nil + } + return claims +} diff --git a/pkg/router/auth_test.go b/pkg/router/auth_test.go new file mode 100644 index 00000000..3c16a18d --- /dev/null +++ b/pkg/router/auth_test.go @@ -0,0 +1,108 @@ +/* +Copyright The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package router + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/assert" +) + +func init() { + gin.SetMode(gin.TestMode) +} + +func TestOIDCAuthMiddleware_Disabled(t *testing.T) { + // When oidcValidator is nil, middleware should be a passthrough. + server := &Server{oidcValidator: nil} + + w := httptest.NewRecorder() + c, router := gin.CreateTestContext(w) + router.Use(server.oidcAuthMiddleware()) + router.GET("/test", func(c *gin.Context) { + claims := extractClaims(c) + assert.Nil(t, claims, "no claims should be set when auth is disabled") + c.JSON(http.StatusOK, gin.H{"ok": true}) + }) + + c.Request = httptest.NewRequest(http.MethodGet, "/test", nil) + router.ServeHTTP(w, c.Request) + assert.Equal(t, http.StatusOK, w.Code) +} + +func TestOIDCAuthMiddleware_MissingHeader(t *testing.T) { + server := &Server{oidcValidator: &OIDCValidator{}} + + w := httptest.NewRecorder() + _, router := gin.CreateTestContext(w) + router.Use(server.oidcAuthMiddleware()) + router.GET("/test", func(_ *gin.Context) { + t.Error("handler should not be called") + }) + + req := httptest.NewRequest(http.MethodGet, "/test", nil) + router.ServeHTTP(w, req) + assert.Equal(t, http.StatusUnauthorized, w.Code) + assert.Contains(t, w.Body.String(), "missing authorization header") +} + +func TestRequireRole_HasRole(t *testing.T) { + w := httptest.NewRecorder() + _, router := gin.CreateTestContext(w) + + // Pre-set claims in context before the role check + router.Use(func(c *gin.Context) { + c.Set(string(contextKeyOIDCClaims), &Claims{ + Subject: "user-1", + Roles: []string{"sandbox:invoke", "sandbox:manage"}, + }) + c.Next() + }) + router.Use(requireRole("sandbox:invoke")) + router.GET("/test", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"ok": true}) + }) + + req := httptest.NewRequest(http.MethodGet, "/test", nil) + router.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code) +} + +func TestRequireRole_MissingRole(t *testing.T) { + w := httptest.NewRecorder() + _, router := gin.CreateTestContext(w) + + router.Use(func(c *gin.Context) { + c.Set(string(contextKeyOIDCClaims), &Claims{ + Subject: "user-1", + Roles: []string{"sandbox:invoke"}, + }) + c.Next() + }) + router.Use(requireRole("admin")) + router.GET("/test", func(_ *gin.Context) { + t.Error("handler should not be called") + }) + + req := httptest.NewRequest(http.MethodGet, "/test", nil) + router.ServeHTTP(w, req) + assert.Equal(t, http.StatusForbidden, w.Code) + assert.Contains(t, w.Body.String(), "insufficient permissions: missing role admin") +} diff --git a/pkg/router/config.go b/pkg/router/config.go index b123d9e9..7a12dc9d 100644 --- a/pkg/router/config.go +++ b/pkg/router/config.go @@ -56,4 +56,16 @@ type Config struct { // When all paths are present, mutual TLS is used for // Router-to-WorkloadManager connections. MTLSConfig mtls.Config + + // OIDCIssuerURL is the OIDC provider issuer URL. + OIDCIssuerURL string + + // OIDCAudience is the expected audience claim in the JWT. + OIDCAudience string + + // OIDCRolesClaim is the dot-separated JSON path to the roles array in the JWT + OIDCRolesClaim string + + // OIDCRequiredRole is the role required to access the API. + OIDCRequiredRole string } diff --git a/pkg/router/handlers.go b/pkg/router/handlers.go index 3b44d3fa..2c7cb2ca 100644 --- a/pkg/router/handlers.go +++ b/pkg/router/handlers.go @@ -240,6 +240,14 @@ func (s *Server) generateSandboxJWT(c *gin.Context, sandbox *types.SandboxInfo) claims := map[string]interface{}{ "session_id": sandbox.SessionID, } + + if oidcClaims := extractClaims(c); oidcClaims != nil { + claims["user_sub"] = oidcClaims.Subject + if oidcClaims.Email != "" { + claims["user_email"] = oidcClaims.Email + } + } + token, err := s.jwtManager.GenerateToken(claims) if err != nil { klog.Errorf("Failed to generate JWT token (session: %s): %v", sandbox.SessionID, err) diff --git a/pkg/router/oidc.go b/pkg/router/oidc.go new file mode 100644 index 00000000..f8ad3634 --- /dev/null +++ b/pkg/router/oidc.go @@ -0,0 +1,250 @@ +/* +Copyright The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package router + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "slices" + "strings" + "time" + + gooidc "github.com/coreos/go-oidc/v3/oidc" +) + +// OIDCConfig holds provider-agnostic OIDC configuration. +type OIDCConfig struct { + // IssuerURL is the OIDC provider issuer URL. + IssuerURL string + + // Audience is the expected aud claim in the JWT. + Audience string + + // RolesClaim is the JSON path to the roles array. + RolesClaim string +} + +// Claims represents validated identity extracted from an OIDC access token. +type Claims struct { + // Subject is the standard "sub" claim identifying the user. + Subject string + + // Email is the standard "email" claim (may be empty). + Email string + + // Roles extracted from the configured RolesClaim path. + Roles []string +} + +// OIDCValidator validates OIDC access tokens using cached JWKS keys. +type OIDCValidator struct { + keySet gooidc.KeySet // JWKS key set with automatic caching/rotation + issuer string // expected issuer + audience string // expected audience + rolesClaim string // dot-notation path to roles array +} + +// NewOIDCValidator creates a new OIDCValidator instance. +func NewOIDCValidator(ctx context.Context, cfg OIDCConfig) (*OIDCValidator, error) { + provider, err := gooidc.NewProvider(ctx, cfg.IssuerURL) + if err != nil { + return nil, fmt.Errorf("failed to discover OIDC provider at %s: %w", cfg.IssuerURL, err) + } + + // Extract the JWKS URL from the provider's discovery document. + var providerClaims struct { + JWKSURL string `json:"jwks_uri"` + } + if err := provider.Claims(&providerClaims); err != nil { + return nil, fmt.Errorf("failed to extract jwks_uri from OIDC discovery: %w", err) + } + + // Create a RemoteKeySet for JWKS caching and automatic key rotation. + keySet := gooidc.NewRemoteKeySet(context.Background(), providerClaims.JWKSURL) + + return &OIDCValidator{ + keySet: keySet, + issuer: cfg.IssuerURL, + audience: cfg.Audience, + rolesClaim: cfg.RolesClaim, + }, nil +} + +// accessTokenClaims represents the standard and custom claims in an access token. +type accessTokenClaims struct { + Issuer string `json:"iss"` + Subject string `json:"sub"` + Audience claimStrings `json:"aud"` + Expiry float64 `json:"exp"` + NotBefore float64 `json:"nbf"` + Email string `json:"email"` +} + +// claimStrings handles the JWT "aud" claim which can be either a string or []string. +type claimStrings []string + +func (cs *claimStrings) UnmarshalJSON(data []byte) error { + var single string + if err := json.Unmarshal(data, &single); err == nil { + *cs = []string{single} + return nil + } + var multi []string + if err := json.Unmarshal(data, &multi); err != nil { + return err + } + *cs = multi + return nil +} + +// ValidateToken verifies the JWT signature, standard claims, and extracts roles. +func (v *OIDCValidator) ValidateToken(ctx context.Context, rawToken string) (*Claims, error) { + if rawToken == "" { + return nil, fmt.Errorf("empty token") + } + + // Verify signing algorithm before signature check + if err := v.checkTokenAlgorithm(rawToken); err != nil { + return nil, err + } + + // Verify the JWT signature using the cached JWKS keys. + payload, err := v.keySet.VerifySignature(ctx, rawToken) + if err != nil { + return nil, fmt.Errorf("token signature verification failed: %w", err) + } + + // Parse the standard claims from the verified payload. + var tokenClaims accessTokenClaims + if err := json.Unmarshal(payload, &tokenClaims); err != nil { + return nil, fmt.Errorf("failed to parse token claims: %w", err) + } + + // Validate issuer + if tokenClaims.Issuer != v.issuer { + return nil, fmt.Errorf("invalid issuer: got %q, expected %q", tokenClaims.Issuer, v.issuer) + } + + // Validate expiration + if tokenClaims.Expiry == 0 { + return nil, fmt.Errorf("token missing required exp claim") + } + if time.Now().After(time.Unix(int64(tokenClaims.Expiry), 0)) { + return nil, fmt.Errorf("token has expired") + } + + // Validate not-before (if present) + if tokenClaims.NotBefore != 0 && time.Now().Before(time.Unix(int64(tokenClaims.NotBefore), 0)) { + return nil, fmt.Errorf("token is not yet valid") + } + + // Validate audience + if v.audience != "" { + if !slices.Contains(tokenClaims.Audience, v.audience) { + return nil, fmt.Errorf("invalid audience: token audiences %v do not include %q", []string(tokenClaims.Audience), v.audience) + } + } + + // Extract roles from the full claims map using the configured path. + var allClaims map[string]interface{} + if err := json.Unmarshal(payload, &allClaims); err != nil { + return nil, fmt.Errorf("failed to parse token claims for role extraction: %w", err) + } + roles := extractRolesFromClaims(allClaims, v.rolesClaim) + + return &Claims{ + Subject: tokenClaims.Subject, + Email: tokenClaims.Email, + Roles: roles, + }, nil +} + +// extractRolesFromClaims navigates a nested claims map using a dot-separated path. +func extractRolesFromClaims(claims map[string]interface{}, path string) []string { + if path == "" { + return nil + } + + parts := strings.Split(path, ".") + var current interface{} = claims + + for _, part := range parts { + m, ok := current.(map[string]interface{}) + if !ok { + return nil + } + current, ok = m[part] + if !ok { + return nil + } + } + + // The final value should be an array of strings. + arr, ok := current.([]interface{}) + if !ok { + return nil + } + + var roles []string + for _, v := range arr { + if s, ok := v.(string); ok { + roles = append(roles, s) + } + } + return roles +} + +// allowedSigningAlgs contains supported asymmetric algorithms for JWT signatures. +var allowedSigningAlgs = map[string]bool{ + "RS256": true, + "RS384": true, + "RS512": true, + "ES256": true, + "ES384": true, + "ES512": true, + "PS256": true, + "PS384": true, + "PS512": true, + "EdDSA": true, +} + +// checkTokenAlgorithm verifies the JWT signing algorithm is supported. +func (v *OIDCValidator) checkTokenAlgorithm(rawToken string) error { + parts := strings.SplitN(rawToken, ".", 3) + if len(parts) < 2 { + return fmt.Errorf("malformed JWT: expected 3 parts, got %d", len(parts)) + } + + headerBytes, err := base64.RawURLEncoding.DecodeString(parts[0]) + if err != nil { + return fmt.Errorf("malformed JWT header: %w", err) + } + + var header struct { + Alg string `json:"alg"` + } + if err := json.Unmarshal(headerBytes, &header); err != nil { + return fmt.Errorf("malformed JWT header JSON: %w", err) + } + + if !allowedSigningAlgs[header.Alg] { + return fmt.Errorf("signing algorithm not supported: %q", header.Alg) + } + return nil +} diff --git a/pkg/router/oidc_test.go b/pkg/router/oidc_test.go new file mode 100644 index 00000000..37600802 --- /dev/null +++ b/pkg/router/oidc_test.go @@ -0,0 +1,274 @@ +/* +Copyright The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package router + +import ( + "context" + "crypto/rand" + "crypto/rsa" + "encoding/base64" + "encoding/json" + "math/big" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/golang-jwt/jwt/v5" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// testOIDCServer creates an httptest.Server that serves: +// 1. GET /.well-known/openid-configuration → OIDC discovery JSON +// 2. GET /jwks → JWKS key set JSON +func testOIDCServer(t *testing.T) (*httptest.Server, *rsa.PrivateKey) { + t.Helper() + + privateKey, err := rsa.GenerateKey(rand.Reader, 2048) + require.NoError(t, err) + + mux := http.NewServeMux() + + // We need the server URL for the discovery document, so create the server first + // with a temporary handler, then set the real handler. + ts := httptest.NewServer(nil) + issuer := ts.URL + + mux.HandleFunc("/.well-known/openid-configuration", func(w http.ResponseWriter, _ *http.Request) { + discovery := map[string]interface{}{ + "issuer": issuer, + "jwks_uri": issuer + "/jwks", + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(discovery) + }) + + mux.HandleFunc("/jwks", func(w http.ResponseWriter, _ *http.Request) { + jwks := map[string]interface{}{ + "keys": []map[string]interface{}{ + { + "kty": "RSA", + "kid": "test-key-1", + "use": "sig", + "alg": "RS256", + "n": base64.RawURLEncoding.EncodeToString(privateKey.N.Bytes()), + "e": base64.RawURLEncoding.EncodeToString(big.NewInt(int64(privateKey.E)).Bytes()), + }, + }, + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(jwks) + }) + + ts.Config.Handler = mux + return ts, privateKey +} + +// mintTestJWT creates a signed JWT for testing with configurable claims. +func mintTestJWT(t *testing.T, privateKey *rsa.PrivateKey, claims jwt.MapClaims) string { + t.Helper() + token := jwt.NewWithClaims(jwt.SigningMethodRS256, claims) + token.Header["kid"] = "test-key-1" + tokenString, err := token.SignedString(privateKey) + require.NoError(t, err) + return tokenString +} + +func TestNewOIDCValidator(t *testing.T) { + ts, _ := testOIDCServer(t) + defer ts.Close() + + cfg := OIDCConfig{ + IssuerURL: ts.URL, + Audience: "agentcube-api", + RolesClaim: "realm_access.roles", + } + + validator, err := NewOIDCValidator(context.Background(), cfg) + require.NoError(t, err) + assert.NotNil(t, validator) + assert.Equal(t, ts.URL, validator.issuer) + assert.Equal(t, "agentcube-api", validator.audience) + assert.Equal(t, "realm_access.roles", validator.rolesClaim) +} + +func TestValidateToken_ValidToken(t *testing.T) { + ts, privateKey := testOIDCServer(t) + defer ts.Close() + + validator, err := NewOIDCValidator(context.Background(), OIDCConfig{ + IssuerURL: ts.URL, + Audience: "agentcube-api", + RolesClaim: "realm_access.roles", + }) + require.NoError(t, err) + + rawToken := mintTestJWT(t, privateKey, jwt.MapClaims{ + "iss": ts.URL, + "sub": "user-123", + "aud": "agentcube-api", + "email": "test@example.com", + "exp": time.Now().Add(5 * time.Minute).Unix(), + "iat": time.Now().Unix(), + "realm_access": map[string]interface{}{ + "roles": []interface{}{"sandbox:invoke", "sandbox:manage"}, + }, + }) + + claims, err := validator.ValidateToken(context.Background(), rawToken) + require.NoError(t, err) + assert.Equal(t, "user-123", claims.Subject) + assert.Equal(t, "test@example.com", claims.Email) + assert.Equal(t, []string{"sandbox:invoke", "sandbox:manage"}, claims.Roles) +} + +func TestValidateToken_ExpiredToken(t *testing.T) { + ts, privateKey := testOIDCServer(t) + defer ts.Close() + + validator, err := NewOIDCValidator(context.Background(), OIDCConfig{ + IssuerURL: ts.URL, + Audience: "agentcube-api", + }) + require.NoError(t, err) + + rawToken := mintTestJWT(t, privateKey, jwt.MapClaims{ + "iss": ts.URL, + "sub": "user-123", + "aud": "agentcube-api", + "exp": time.Now().Add(-1 * time.Hour).Unix(), + "iat": time.Now().Add(-2 * time.Hour).Unix(), + }) + + _, err = validator.ValidateToken(context.Background(), rawToken) + assert.Error(t, err) + assert.Contains(t, err.Error(), "expired") +} + +func TestValidateToken_WrongAudience(t *testing.T) { + ts, privateKey := testOIDCServer(t) + defer ts.Close() + + validator, err := NewOIDCValidator(context.Background(), OIDCConfig{ + IssuerURL: ts.URL, + Audience: "agentcube-api", + }) + require.NoError(t, err) + + rawToken := mintTestJWT(t, privateKey, jwt.MapClaims{ + "iss": ts.URL, + "sub": "user-123", + "aud": "wrong-audience", + "exp": time.Now().Add(5 * time.Minute).Unix(), + }) + + _, err = validator.ValidateToken(context.Background(), rawToken) + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid audience") +} + +func TestValidateToken_WrongIssuer(t *testing.T) { + ts, privateKey := testOIDCServer(t) + defer ts.Close() + + validator, err := NewOIDCValidator(context.Background(), OIDCConfig{ + IssuerURL: ts.URL, + Audience: "agentcube-api", + }) + require.NoError(t, err) + + rawToken := mintTestJWT(t, privateKey, jwt.MapClaims{ + "iss": "http://wrong-issuer.example.com", + "sub": "user-123", + "aud": "agentcube-api", + "exp": time.Now().Add(5 * time.Minute).Unix(), + }) + + _, err = validator.ValidateToken(context.Background(), rawToken) + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid issuer") +} + +func TestValidateToken_InvalidSignature(t *testing.T) { + ts, _ := testOIDCServer(t) + defer ts.Close() + + validator, err := NewOIDCValidator(context.Background(), OIDCConfig{ + IssuerURL: ts.URL, + Audience: "agentcube-api", + }) + require.NoError(t, err) + + // Sign with a different key than what the JWKS endpoint serves + wrongKey, err := rsa.GenerateKey(rand.Reader, 2048) + require.NoError(t, err) + + rawToken := mintTestJWT(t, wrongKey, jwt.MapClaims{ + "iss": ts.URL, + "sub": "user-123", + "aud": "agentcube-api", + "exp": time.Now().Add(5 * time.Minute).Unix(), + }) + + _, err = validator.ValidateToken(context.Background(), rawToken) + assert.Error(t, err) + assert.Contains(t, err.Error(), "signature verification failed") +} + +func TestExtractRolesFromClaims(t *testing.T) { + tests := []struct { + name string + claims map[string]interface{} + path string + expected []string + }{ + { + name: "Keycloak nested path", + claims: map[string]interface{}{ + "realm_access": map[string]interface{}{ + "roles": []interface{}{"sandbox:invoke", "sandbox:manage"}, + }, + }, + path: "realm_access.roles", + expected: []string{"sandbox:invoke", "sandbox:manage"}, + }, + { + name: "missing intermediate key returns nil", + claims: map[string]interface{}{"other": "value"}, + path: "realm_access.roles", + expected: nil, + }, + { + name: "non-array final value returns nil", + claims: map[string]interface{}{ + "realm_access": map[string]interface{}{ + "roles": "not-an-array", + }, + }, + path: "realm_access.roles", + expected: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := extractRolesFromClaims(tt.claims, tt.path) + assert.Equal(t, tt.expected, result) + }) + } +} diff --git a/pkg/router/server.go b/pkg/router/server.go index 6e571c73..ff7adadf 100644 --- a/pkg/router/server.go +++ b/pkg/router/server.go @@ -39,6 +39,7 @@ type Server struct { storeClient store.Store httpTransport *http.Transport // Reusable HTTP transport for connection pooling jwtManager *JWTManager // JWT manager for signing requests to sandboxes + oidcValidator *OIDCValidator // nil when external auth is disabled } // NewServer creates a new Router API server instance @@ -58,8 +59,20 @@ func NewServer(config *Config) (*Server, error) { config.InitialConnectRetryInterval = 200 * time.Millisecond } - // Create session manager with store client and mTLS config - sessionManager, err := NewSessionManager(store.Storage(), &config.MTLSConfig) + // Initialize JWT manager for signing requests to sandboxes. + jwtManager, err := NewJWTManager() + if err != nil { + return nil, fmt.Errorf("failed to create JWT manager: %w", err) + } + + // Try to load existing keys from secret or store new ones + if err := jwtManager.TryStoreOrLoadJWTKeySecret(context.Background()); err != nil { + return nil, fmt.Errorf("failed to store/load JWT key secret: %w", err) + } + klog.Info("JWT manager initialized successfully") + + // Create session manager with store client, mTLS config, and JWT manager + sessionManager, err := NewSessionManager(store.Storage(), &config.MTLSConfig, jwtManager) if err != nil { return nil, fmt.Errorf("failed to create session manager: %w", err) } @@ -82,22 +95,31 @@ func NewServer(config *Config) (*Server, error) { sessionManager: sessionManager, storeClient: store.Storage(), httpTransport: httpTransport, + jwtManager: jwtManager, } - // Initialize JWT manager for signing requests to sandboxes - jwtManager, err := NewJWTManager() - if err != nil { - return nil, fmt.Errorf("failed to create JWT manager: %w", err) - } - - // Try to load existing keys from secret or store new ones - if err := jwtManager.TryStoreOrLoadJWTKeySecret(context.Background()); err != nil { - return nil, fmt.Errorf("failed to store/load JWT key secret: %w", err) + // Initialize OIDC validator when issuer URL is configured. + if config.OIDCIssuerURL != "" { + if config.OIDCRolesClaim == "" { + return nil, fmt.Errorf("--oidc-roles-claim is required when --oidc-issuer-url is set") + } + if config.OIDCRequiredRole == "" { + return nil, fmt.Errorf("--oidc-required-role is required when --oidc-issuer-url is set") + } + oidcCfg := OIDCConfig{ + IssuerURL: config.OIDCIssuerURL, + Audience: config.OIDCAudience, + RolesClaim: config.OIDCRolesClaim, + } + validator, err := NewOIDCValidator(context.Background(), oidcCfg) + if err != nil { + return nil, fmt.Errorf("failed to initialize OIDC validator: %w", err) + } + server.oidcValidator = validator + klog.Infof("External authentication enabled: issuer=%s, audience=%s, rolesClaim=%s, requiredRole=%s", + config.OIDCIssuerURL, config.OIDCAudience, config.OIDCRolesClaim, config.OIDCRequiredRole) } - server.jwtManager = jwtManager - klog.Info("JWT manager initialized successfully") - // Setup routes server.setupRoutes() @@ -141,7 +163,12 @@ func (s *Server) setupRoutes() { // Add middleware v1.Use(gin.Logger()) v1.Use(gin.Recovery()) - + // External auth: validate OIDC JWT (no-op when oidcValidator is nil) + v1.Use(s.oidcAuthMiddleware()) + // RBAC: require the configured role (only when auth is enabled) + if s.oidcValidator != nil { + v1.Use(requireRole(s.config.OIDCRequiredRole)) + } v1.Use(s.concurrencyLimitMiddleware()) // Apply concurrency limit to API routes // Agent invoke requests (support GET/POST, since downstream uses these methods) diff --git a/pkg/router/session_manager.go b/pkg/router/session_manager.go index 960f3404..c6894921 100644 --- a/pkg/router/session_manager.go +++ b/pkg/router/session_manager.go @@ -56,13 +56,15 @@ type manager struct { workloadMgrAddr string httpClient *http.Client closeMTLS func() + jwtManager *JWTManager } // NewSessionManager returns a SessionManager implementation. // storeClient is used to query sandbox information from store // workloadMgrAddr is read from the environment variable WORKLOAD_MANAGER_URL. // When mtlsCfg includes cert, key, and CA paths, the HTTP client uses mTLS to connect to WorkloadManager. -func NewSessionManager(storeClient store.Store, mtlsCfg *mtls.Config) (SessionManager, error) { +// jwtMgr is used to sign identity JWTs forwarded to WorkloadManager (may be nil when auth is disabled). +func NewSessionManager(storeClient store.Store, mtlsCfg *mtls.Config, jwtMgr *JWTManager) (SessionManager, error) { workloadMgrAddr := os.Getenv("WORKLOAD_MANAGER_URL") if workloadMgrAddr == "" { return nil, fmt.Errorf("WORKLOAD_MANAGER_URL environment variable is not set") @@ -113,6 +115,7 @@ func NewSessionManager(storeClient store.Store, mtlsCfg *mtls.Config) (SessionMa storeClient: storeClient, workloadMgrAddr: workloadMgrAddr, closeMTLS: closeMTLS, + jwtManager: jwtMgr, httpClient: &http.Client{ Timeout: 2 * time.Minute, // consistent with manager setting Transport: transport, @@ -183,6 +186,8 @@ func (m *manager) createSandbox(ctx context.Context, namespace string, name stri if token := loadWorkloadManagerAuthToken(); token != "" { req.Header.Set("Authorization", "Bearer "+token) } + // Forward external user identity for downstream tracking. + m.setIdentityHeader(ctx, req) // Send the request resp, err := m.httpClient.Do(req) @@ -233,6 +238,30 @@ func (m *manager) createSandbox(ctx context.Context, namespace string, name stri return sandbox, nil } +// setIdentityHeader signs and attaches the caller's identity as a JWT. +func (m *manager) setIdentityHeader(ctx context.Context, req *http.Request) { + if m.jwtManager == nil { + return + } + claims, ok := ctx.Value(contextKeyOIDCClaims).(*Claims) + if !ok || claims == nil { + return + } + identityClaims := map[string]interface{}{ + "sub": claims.Subject, + "aud": "workloadmanager", + } + if claims.Email != "" { + identityClaims["email"] = claims.Email + } + identityToken, err := m.jwtManager.GenerateToken(identityClaims) + if err != nil { + klog.Warningf("Failed to sign identity JWT for WM: %v", err) + return + } + req.Header.Set("X-AgentCube-User-Identity", identityToken) +} + func loadWorkloadManagerAuthToken() string { b, err := os.ReadFile(serviceAccountTokenPath) if err != nil { diff --git a/pkg/router/session_manager_test.go b/pkg/router/session_manager_test.go index 8b585f4b..58b464e4 100644 --- a/pkg/router/session_manager_test.go +++ b/pkg/router/session_manager_test.go @@ -557,7 +557,7 @@ func TestNewSessionManager_MTLSEnabled_ValidCerts(t *testing.T) { t.Setenv("WORKLOAD_MANAGER_URL", "https://localhost:8080") cfg := &mtls.Config{CertFile: certFile, KeyFile: keyFile, CAFile: caFile} - sm, err := NewSessionManager(&fakeStoreClient{}, cfg) + sm, err := NewSessionManager(&fakeStoreClient{}, cfg, nil) if err != nil { t.Fatalf("NewSessionManager with mTLS failed: %v", err) } @@ -590,7 +590,7 @@ func TestNewSessionManager_MTLSDisabled(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - sm, err := NewSessionManager(&fakeStoreClient{}, tt.cfg) + sm, err := NewSessionManager(&fakeStoreClient{}, tt.cfg, nil) if err != nil { t.Fatalf("NewSessionManager without mTLS failed: %v", err) } From 333305d9cd1d3613051de8f7f07908446ddb68a5 Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Fri, 5 Jun 2026 18:19:01 +0530 Subject: [PATCH 11/14] feat: enhanced OIDC validation and JWT handling and added tests for identity header Signed-off-by: Mahil Patel --- docs/design/keycloak-proposal.md | 3 +- pkg/router/handlers.go | 2 +- pkg/router/oidc.go | 90 +++++++++++++++--------------- pkg/router/oidc_test.go | 9 +-- pkg/router/session_manager.go | 2 +- pkg/router/session_manager_test.go | 68 ++++++++++++++++++++++ 6 files changed, 119 insertions(+), 55 deletions(-) diff --git a/docs/design/keycloak-proposal.md b/docs/design/keycloak-proposal.md index 333da4bf..8bf96493 100644 --- a/docs/design/keycloak-proposal.md +++ b/docs/design/keycloak-proposal.md @@ -275,6 +275,7 @@ The core AgentCube chart has provider-agnostic OIDC configuration under `router. - {{ printf "--oidc-issuer-url=%s" .Values.router.oidc.issuerUrl | quote }} - {{ printf "--oidc-audience=%s" .Values.router.oidc.audience | quote }} - {{ printf "--oidc-roles-claim=%s" .Values.router.oidc.rolesClaim | quote }} +- {{ printf "--oidc-required-role=%s" .Values.router.oidc.requiredRole | quote }} {{- end }} ``` @@ -295,7 +296,7 @@ helm install agentcube manifests/charts/base -n agentcube-system \ --set router.oidc.requiredRole=sandbox:invoke ``` -Two new flags in `cmd/router/main.go`: +Four new flags in `cmd/router/main.go`: | Flag | Default | Description | |------|---------|-------------| diff --git a/pkg/router/handlers.go b/pkg/router/handlers.go index 2c7cb2ca..5de1398d 100644 --- a/pkg/router/handlers.go +++ b/pkg/router/handlers.go @@ -247,7 +247,7 @@ func (s *Server) generateSandboxJWT(c *gin.Context, sandbox *types.SandboxInfo) claims["user_email"] = oidcClaims.Email } } - + token, err := s.jwtManager.GenerateToken(claims) if err != nil { klog.Errorf("Failed to generate JWT token (session: %s): %v", sandbox.SessionID, err) diff --git a/pkg/router/oidc.go b/pkg/router/oidc.go index f8ad3634..c439bdfb 100644 --- a/pkg/router/oidc.go +++ b/pkg/router/oidc.go @@ -76,7 +76,7 @@ func NewOIDCValidator(ctx context.Context, cfg OIDCConfig) (*OIDCValidator, erro } // Create a RemoteKeySet for JWKS caching and automatic key rotation. - keySet := gooidc.NewRemoteKeySet(context.Background(), providerClaims.JWKSURL) + keySet := gooidc.NewRemoteKeySet(ctx, providerClaims.JWKSURL) return &OIDCValidator{ keySet: keySet, @@ -86,33 +86,6 @@ func NewOIDCValidator(ctx context.Context, cfg OIDCConfig) (*OIDCValidator, erro }, nil } -// accessTokenClaims represents the standard and custom claims in an access token. -type accessTokenClaims struct { - Issuer string `json:"iss"` - Subject string `json:"sub"` - Audience claimStrings `json:"aud"` - Expiry float64 `json:"exp"` - NotBefore float64 `json:"nbf"` - Email string `json:"email"` -} - -// claimStrings handles the JWT "aud" claim which can be either a string or []string. -type claimStrings []string - -func (cs *claimStrings) UnmarshalJSON(data []byte) error { - var single string - if err := json.Unmarshal(data, &single); err == nil { - *cs = []string{single} - return nil - } - var multi []string - if err := json.Unmarshal(data, &multi); err != nil { - return err - } - *cs = multi - return nil -} - // ValidateToken verifies the JWT signature, standard claims, and extracts roles. func (v *OIDCValidator) ValidateToken(ctx context.Context, rawToken string) (*Claims, error) { if rawToken == "" { @@ -130,47 +103,52 @@ func (v *OIDCValidator) ValidateToken(ctx context.Context, rawToken string) (*Cl return nil, fmt.Errorf("token signature verification failed: %w", err) } - // Parse the standard claims from the verified payload. - var tokenClaims accessTokenClaims - if err := json.Unmarshal(payload, &tokenClaims); err != nil { + // Parse all claims into a single map to avoid double-unmarshaling overhead. + var allClaims map[string]interface{} + if err := json.Unmarshal(payload, &allClaims); err != nil { return nil, fmt.Errorf("failed to parse token claims: %w", err) } + // Extract standard claims + issuer, _ := allClaims["iss"].(string) + subject, _ := allClaims["sub"].(string) + email, _ := allClaims["email"].(string) + expiry, _ := allClaims["exp"].(float64) + notBefore, _ := allClaims["nbf"].(float64) + + audiences := parseAudienceClaim(allClaims["aud"]) + // Validate issuer - if tokenClaims.Issuer != v.issuer { - return nil, fmt.Errorf("invalid issuer: got %q, expected %q", tokenClaims.Issuer, v.issuer) + if issuer != v.issuer { + return nil, fmt.Errorf("invalid issuer: got %q, expected %q", issuer, v.issuer) } // Validate expiration - if tokenClaims.Expiry == 0 { + if expiry == 0 { return nil, fmt.Errorf("token missing required exp claim") } - if time.Now().After(time.Unix(int64(tokenClaims.Expiry), 0)) { + if time.Now().After(time.Unix(int64(expiry), 0)) { return nil, fmt.Errorf("token has expired") } // Validate not-before (if present) - if tokenClaims.NotBefore != 0 && time.Now().Before(time.Unix(int64(tokenClaims.NotBefore), 0)) { + if notBefore != 0 && time.Now().Before(time.Unix(int64(notBefore), 0)) { return nil, fmt.Errorf("token is not yet valid") } // Validate audience if v.audience != "" { - if !slices.Contains(tokenClaims.Audience, v.audience) { - return nil, fmt.Errorf("invalid audience: token audiences %v do not include %q", []string(tokenClaims.Audience), v.audience) + if !slices.Contains(audiences, v.audience) { + return nil, fmt.Errorf("invalid audience: token audiences %v do not include %q", audiences, v.audience) } } - // Extract roles from the full claims map using the configured path. - var allClaims map[string]interface{} - if err := json.Unmarshal(payload, &allClaims); err != nil { - return nil, fmt.Errorf("failed to parse token claims for role extraction: %w", err) - } + // Extract roles from the map using the configured path. roles := extractRolesFromClaims(allClaims, v.rolesClaim) return &Claims{ - Subject: tokenClaims.Subject, - Email: tokenClaims.Email, + Subject: subject, + Email: email, Roles: roles, }, nil } @@ -210,6 +188,26 @@ func extractRolesFromClaims(claims map[string]interface{}, path string) []string return roles } +// parseAudienceClaim extracts the "aud" claim which can be either a string or []string. +func parseAudienceClaim(audVal interface{}) []string { + if audVal == nil { + return nil + } + switch v := audVal.(type) { + case string: + return []string{v} + case []interface{}: + var audiences []string + for _, item := range v { + if s, ok := item.(string); ok { + audiences = append(audiences, s) + } + } + return audiences + } + return nil +} + // allowedSigningAlgs contains supported asymmetric algorithms for JWT signatures. var allowedSigningAlgs = map[string]bool{ "RS256": true, @@ -227,7 +225,7 @@ var allowedSigningAlgs = map[string]bool{ // checkTokenAlgorithm verifies the JWT signing algorithm is supported. func (v *OIDCValidator) checkTokenAlgorithm(rawToken string) error { parts := strings.SplitN(rawToken, ".", 3) - if len(parts) < 2 { + if len(parts) != 3 { return fmt.Errorf("malformed JWT: expected 3 parts, got %d", len(parts)) } diff --git a/pkg/router/oidc_test.go b/pkg/router/oidc_test.go index 37600802..db2a4287 100644 --- a/pkg/router/oidc_test.go +++ b/pkg/router/oidc_test.go @@ -42,13 +42,9 @@ func testOIDCServer(t *testing.T) (*httptest.Server, *rsa.PrivateKey) { privateKey, err := rsa.GenerateKey(rand.Reader, 2048) require.NoError(t, err) + var issuer string mux := http.NewServeMux() - // We need the server URL for the discovery document, so create the server first - // with a temporary handler, then set the real handler. - ts := httptest.NewServer(nil) - issuer := ts.URL - mux.HandleFunc("/.well-known/openid-configuration", func(w http.ResponseWriter, _ *http.Request) { discovery := map[string]interface{}{ "issuer": issuer, @@ -75,7 +71,8 @@ func testOIDCServer(t *testing.T) (*httptest.Server, *rsa.PrivateKey) { _ = json.NewEncoder(w).Encode(jwks) }) - ts.Config.Handler = mux + ts := httptest.NewServer(mux) + issuer = ts.URL return ts, privateKey } diff --git a/pkg/router/session_manager.go b/pkg/router/session_manager.go index c6894921..57def72a 100644 --- a/pkg/router/session_manager.go +++ b/pkg/router/session_manager.go @@ -56,7 +56,7 @@ type manager struct { workloadMgrAddr string httpClient *http.Client closeMTLS func() - jwtManager *JWTManager + jwtManager *JWTManager } // NewSessionManager returns a SessionManager implementation. diff --git a/pkg/router/session_manager_test.go b/pkg/router/session_manager_test.go index 58b464e4..9c6a0dab 100644 --- a/pkg/router/session_manager_test.go +++ b/pkg/router/session_manager_test.go @@ -674,3 +674,71 @@ func writePEMFile(t *testing.T, path, blockType string, data []byte) { t.Fatalf("encode PEM %s: %v", path, err) } } + +// ---- tests: setIdentityHeader ---- + +func TestSetIdentityHeader_WithClaims(t *testing.T) { + jwtMgr, err := NewJWTManager() + if err != nil { + t.Fatalf("failed to create JWT manager: %v", err) + } + + m := &manager{ + jwtManager: jwtMgr, + } + + req, _ := http.NewRequest("GET", "/", nil) + claims := &Claims{ + Subject: "user-123", + Email: "test@example.com", + } + ctx := context.WithValue(context.Background(), contextKeyOIDCClaims, claims) + + m.setIdentityHeader(ctx, req) + + token := req.Header.Get("X-AgentCube-User-Identity") + if token == "" { + t.Fatalf("expected X-AgentCube-User-Identity header to be set") + } +} + +func TestSetIdentityHeader_NoClaimsOrNoManager(t *testing.T) { + jwtMgr, err := NewJWTManager() + if err != nil { + t.Fatalf("failed to create JWT manager: %v", err) + } + + tests := []struct { + name string + jwtManager *JWTManager + ctx context.Context + }{ + { + name: "nil jwt manager", + jwtManager: nil, + ctx: context.WithValue(context.Background(), contextKeyOIDCClaims, &Claims{Subject: "user"}), + }, + { + name: "no claims in context", + jwtManager: jwtMgr, + ctx: context.Background(), + }, + { + name: "wrong type in context", + jwtManager: jwtMgr, + ctx: context.WithValue(context.Background(), contextKeyOIDCClaims, "not-a-claims-object"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + m := &manager{jwtManager: tt.jwtManager} + req, _ := http.NewRequest("GET", "/", nil) + m.setIdentityHeader(tt.ctx, req) + + if token := req.Header.Get("X-AgentCube-User-Identity"); token != "" { + t.Errorf("expected no identity header, got %q", token) + } + }) + } +} From e5dcce32cbf9cc59ebc42c3e8cff8871159af1ce Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Sat, 6 Jun 2026 03:55:03 +0530 Subject: [PATCH 12/14] feat(auth): implemented OIDC integration, Python SDK auth, and RLAC enforcement - Enforced resource-level access control (RLAC) in backend and reject router JWTs missing sub claim. - Added Python SDK pluggable auth providers and routed data plane traffic via Router proxy. - Wired Keycloak realm imports, client secrets, and service account roles dynamically. - Added OIDC E2E tests Signed-off-by: Mahil Patel --- docs/design/keycloak-proposal.md | 5 + .../addons/keycloak/templates/deployment.yaml | 24 +++ .../keycloak/templates/realm-config.yaml | 44 ++++- manifests/charts/addons/keycloak/values.yaml | 5 + pkg/common/types/sandbox.go | 2 + pkg/router/handlers.go | 45 +++++ pkg/router/handlers_test.go | 94 ++++++++++ pkg/router/oidc.go | 5 + pkg/router/oidc_test.go | 22 +++ pkg/router/session_manager.go | 1 + pkg/workloadmanager/handlers.go | 63 ++++--- pkg/workloadmanager/identity.go | 105 +++++++++++ pkg/workloadmanager/identity_test.go | 171 ++++++++++++++++++ pkg/workloadmanager/k8s_client.go | 1 + pkg/workloadmanager/sandbox_helper.go | 2 + pkg/workloadmanager/sandbox_helper_test.go | 65 +++++++ pkg/workloadmanager/workload_builder.go | 16 ++ pkg/workloadmanager/workload_builder_test.go | 102 +++++++++++ sdk-python/agentcube/__init__.py | 9 +- sdk-python/agentcube/agent_runtime.py | 11 +- sdk-python/agentcube/auth.py | 94 ++++++++++ .../clients/agent_runtime_data_plane.py | 26 ++- .../clients/code_interpreter_data_plane.py | 13 +- sdk-python/agentcube/clients/control_plane.py | 52 ++++-- sdk-python/agentcube/code_interpreter.py | 20 +- sdk-python/tests/test_auth.py | 145 +++++++++++++++ test/e2e/e2e_test.go | 87 +++++++++ test/e2e/run_e2e.sh | 88 +++++++++ test/e2e/test_oidc_auth.py | 67 +++++++ 29 files changed, 1331 insertions(+), 53 deletions(-) create mode 100644 pkg/workloadmanager/identity.go create mode 100644 pkg/workloadmanager/identity_test.go create mode 100644 sdk-python/agentcube/auth.py create mode 100644 sdk-python/tests/test_auth.py create mode 100644 test/e2e/test_oidc_auth.py diff --git a/docs/design/keycloak-proposal.md b/docs/design/keycloak-proposal.md index 8bf96493..9edb612f 100644 --- a/docs/design/keycloak-proposal.md +++ b/docs/design/keycloak-proposal.md @@ -224,6 +224,11 @@ The owner ID is returned in the create sandbox response so the Router can persis if s.oidcValidator != nil { claims := extractClaims(c) if claims != nil { + // Admin role bypasses RLAC ownership checks + if slices.Contains(claims.Roles, "admin") { + return true + } + if sandbox.OwnerID == "" { c.JSON(http.StatusForbidden, gin.H{"error": "sandbox has no owner record"}) return diff --git a/manifests/charts/addons/keycloak/templates/deployment.yaml b/manifests/charts/addons/keycloak/templates/deployment.yaml index 63ea988d..a9738cee 100644 --- a/manifests/charts/addons/keycloak/templates/deployment.yaml +++ b/manifests/charts/addons/keycloak/templates/deployment.yaml @@ -13,6 +13,9 @@ {{- end }} {{- end }} {{- if not .Values.devMode }} +{{- if not (or (and .Values.proxy.httpEnabled .Values.proxy.headers) .Values.tls.existingSecret) }} + {{- fail "Production mode (devMode=false) requires either: (1) TLS-termination at proxy (set proxy.httpEnabled=true AND proxy.headers) OR (2) TLS-termination at Keycloak (set tls.existingSecret)" }} +{{- end }} {{- if empty .Values.existingSecret }} {{- fail "existingSecret is required when devMode is false (production mode requires an external Secret for the admin password)" }} {{- end }} @@ -122,6 +125,12 @@ spec: - name: KC_HOSTNAME value: {{ .Values.proxy.hostname | quote }} {{- end }} + {{- if .Values.tls.existingSecret }} + - name: KC_HTTPS_CERTIFICATE_FILE + value: "/opt/keycloak/conf/certs/tls.crt" + - name: KC_HTTPS_CERTIFICATE_KEY_FILE + value: "/opt/keycloak/conf/certs/tls.key" + {{- end }} - name: KC_CLIENT_SERVICE_SECRET valueFrom: secretKeyRef: @@ -141,6 +150,11 @@ spec: - name: realm-config mountPath: /opt/keycloak/data/import readOnly: true + {{- if .Values.tls.existingSecret }} + - name: certificates + mountPath: /opt/keycloak/conf/certs + readOnly: true + {{- end }} # Keycloak's JVM + realm import takes 60-90s. The startup probe gives a 300s # window before liveness checks begin, preventing premature pod restarts. startupProbe: @@ -165,6 +179,16 @@ spec: - name: realm-config secret: secretName: keycloak-realm-config + {{- if .Values.tls.existingSecret }} + - name: certificates + secret: + secretName: {{ .Values.tls.existingSecret }} + items: + - key: {{ .Values.tls.secretCertificateKey | default "tls.crt" }} + path: tls.crt + - key: {{ .Values.tls.secretPrivateKeyKey | default "tls.key" }} + path: tls.key + {{- end }} --- apiVersion: v1 diff --git a/manifests/charts/addons/keycloak/templates/realm-config.yaml b/manifests/charts/addons/keycloak/templates/realm-config.yaml index d7eba256..4c54ac90 100644 --- a/manifests/charts/addons/keycloak/templates/realm-config.yaml +++ b/manifests/charts/addons/keycloak/templates/realm-config.yaml @@ -69,7 +69,7 @@ stringData: "enabled": true, "protocol": "openid-connect", "publicClient": false, - "secret": "${env.KC_CLIENT_SERVICE_SECRET}", + "secret": "${KC_CLIENT_SERVICE_SECRET}", "standardFlowEnabled": false, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": true, @@ -125,7 +125,7 @@ stringData: "enabled": true, "protocol": "openid-connect", "publicClient": false, - "secret": "${env.KC_CLIENT_ROUTER_SECRET}", + "secret": "${KC_CLIENT_ROUTER_SECRET}", "standardFlowEnabled": false, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": true, @@ -139,12 +139,25 @@ stringData: "enabled": true, "protocol": "openid-connect", "publicClient": false, - "secret": "${env.KC_CLIENT_ADMIN_SECRET}", + "secret": "${KC_CLIENT_ADMIN_SECRET}", "standardFlowEnabled": false, "directAccessGrantsEnabled": false, "serviceAccountsEnabled": true, "redirectUris": [], - "webOrigins": [] + "webOrigins": [], + "protocolMappers": [ + { + "name": "agentcube-api-audience", + "protocol": "openid-connect", + "protocolMapper": "oidc-audience-mapper", + "consentRequired": false, + "config": { + "included.custom.audience": "agentcube-api", + "id.token.claim": "false", + "access.token.claim": "true" + } + } + ] } ], @@ -156,6 +169,29 @@ stringData: { "client": "agentcube-sdk", "roles": ["sandbox:invoke"] + }, + { + "client": "agentcube-admin", + "roles": ["sandbox:invoke", "admin"] + } + ], + + "users": [ + { + "username": "service-account-agentcube-admin", + "enabled": true, + "serviceAccountClientId": "agentcube-admin", + "realmRoles": [ + "admin" + ] + }, + { + "username": "service-account-agentcube-service", + "enabled": true, + "serviceAccountClientId": "agentcube-service", + "realmRoles": [ + "sandbox:invoke" + ] } ] } diff --git a/manifests/charts/addons/keycloak/values.yaml b/manifests/charts/addons/keycloak/values.yaml index 0af0bee0..6853b760 100644 --- a/manifests/charts/addons/keycloak/values.yaml +++ b/manifests/charts/addons/keycloak/values.yaml @@ -47,6 +47,11 @@ clients: secret: "" admin: secret: "" +# TLS configuration for Keycloak terminating TLS directly (devMode=false only) +tls: + existingSecret: "" # Name of pre-existing Secret containing tls.crt and tls.key + secretCertificateKey: "tls.crt" # Key inside the secret holding the certificate + secretPrivateKeyKey: "tls.key" # Key inside the secret holding the private key sdk: # WARNING: Wildcard URIs (e.g. 'http://localhost*') pose security risks (open redirect vulnerabilities) in production. # When devMode is false, the deployment will fail if these contain wildcards. diff --git a/pkg/common/types/sandbox.go b/pkg/common/types/sandbox.go index b829f780..b79d3b8a 100644 --- a/pkg/common/types/sandbox.go +++ b/pkg/common/types/sandbox.go @@ -30,6 +30,7 @@ type SandboxInfo struct { Name string `json:"name"` EntryPoints []SandboxEntryPoint `json:"entryPoints"` SessionID string `json:"sessionId"` + OwnerID string `json:"ownerID,omitempty"` CreatedAt time.Time `json:"createdAt"` ExpiresAt time.Time `json:"expiresAt"` // IdleTimeout is the per-sandbox idle timeout configured via SessionTimeout on the @@ -62,6 +63,7 @@ type CreateSandboxResponse struct { SandboxID string `json:"sandboxId"` SandboxName string `json:"sandboxName"` EntryPoints []SandboxEntryPoint `json:"entryPoints"` + OwnerID string `json:"ownerID,omitempty"` } func (car *CreateSandboxRequest) Validate() error { diff --git a/pkg/router/handlers.go b/pkg/router/handlers.go index 5de1398d..811b87ce 100644 --- a/pkg/router/handlers.go +++ b/pkg/router/handlers.go @@ -23,6 +23,7 @@ import ( "net/http" "net/http/httputil" "net/url" + "slices" "strings" "time" @@ -69,6 +70,11 @@ func (s *Server) handleInvoke(c *gin.Context, namespace, name, path, kind string return } + // RLAC: verify the caller owns this sandbox (only for existing sessions) + if sessionID != "" && !s.checkSandboxOwnership(c, sandbox) { + return + } + // Update session activity in store when receiving request if err := s.storeClient.UpdateSessionLastActivity(c.Request.Context(), sandbox.SessionID, time.Now()); err != nil { klog.Warningf("Failed to update sandbox with session-id %s last activity for request: %v", sandbox.SessionID, err) @@ -83,6 +89,45 @@ func (s *Server) handleInvoke(c *gin.Context, namespace, name, path, kind string } } +// checkSandboxOwnership verifies the caller owns the sandbox or not. +func (s *Server) checkSandboxOwnership(c *gin.Context, sandbox *types.SandboxInfo) bool { + if s.oidcValidator == nil { + return true + } + claims := extractClaims(c) + if claims == nil { + return true + } + + // Admin role bypasses RLAC ownership checks + if slices.Contains(claims.Roles, "admin") { + return true + } + + // Fail-closed: deny access to sandboxes without an owner record + if sandbox.OwnerID == "" { + klog.V(2).Infof("RLAC denied: sandbox %s has no owner record (session: %s, caller: %s)", + sandbox.Name, sandbox.SessionID, claims.Subject) + c.JSON(http.StatusForbidden, gin.H{ + "error": "sandbox has no owner record", + "code": "FORBIDDEN", + }) + c.Abort() + return false + } + if sandbox.OwnerID != claims.Subject { + klog.V(2).Infof("RLAC denied: sandbox %s owned by %s, caller is %s", + sandbox.Name, sandbox.OwnerID, claims.Subject) + c.JSON(http.StatusForbidden, gin.H{ + "error": "you do not own this sandbox", + "code": "FORBIDDEN", + }) + c.Abort() + return false + } + return true +} + func (s *Server) handleGetSandboxError(c *gin.Context, err error) { // Fallback for other APIStatus errors if statusErr, ok := err.(apierrors.APIStatus); ok { diff --git a/pkg/router/handlers_test.go b/pkg/router/handlers_test.go index adeed670..624c3a5d 100644 --- a/pkg/router/handlers_test.go +++ b/pkg/router/handlers_test.go @@ -553,3 +553,97 @@ func TestConcurrencyLimitMiddleware_Overload(t *testing.T) { // Wait for first request to complete <-done } + +func TestCheckSandboxOwnership(t *testing.T) { + setupEnv() + defer teardownEnv() + + // Minimal mock OIDC validator (non-nil means auth is enabled) + mockValidator := &OIDCValidator{} + + tests := []struct { + name string + oidcValidator *OIDCValidator + claims *Claims + sandboxOwnerID string + expectAllowed bool + expectCode int + }{ + { + name: "auth disabled", + oidcValidator: nil, + sandboxOwnerID: "", + expectAllowed: true, + }, + { + name: "no claims in context", + oidcValidator: mockValidator, + claims: nil, + sandboxOwnerID: "user-123", + expectAllowed: true, + }, + { + name: "matching owner", + oidcValidator: mockValidator, + claims: &Claims{Subject: "user-123", Roles: []string{"sandbox:invoke"}}, + sandboxOwnerID: "user-123", + expectAllowed: true, + }, + { + name: "mismatched owner", + oidcValidator: mockValidator, + claims: &Claims{Subject: "user-456", Roles: []string{"sandbox:invoke"}}, + sandboxOwnerID: "user-123", + expectAllowed: false, + expectCode: http.StatusForbidden, + }, + { + name: "empty owner fail-closed", + oidcValidator: mockValidator, + claims: &Claims{Subject: "user-123", Roles: []string{"sandbox:invoke"}}, + sandboxOwnerID: "", + expectAllowed: false, + expectCode: http.StatusForbidden, + }, + { + name: "admin bypasses RLAC", + oidcValidator: mockValidator, + claims: &Claims{Subject: "admin-user", Roles: []string{"admin", "sandbox:invoke"}}, + sandboxOwnerID: "user-123", + expectAllowed: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + server := &Server{ + config: &Config{Port: "8080"}, + oidcValidator: tt.oidcValidator, + } + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Request, _ = http.NewRequest("POST", "/test", nil) + + // Set claims in context if provided + if tt.claims != nil { + c.Set(string(contextKeyOIDCClaims), tt.claims) + } + + sandbox := &types.SandboxInfo{ + Name: "test-sandbox", + SessionID: "test-session", + OwnerID: tt.sandboxOwnerID, + } + + allowed := server.checkSandboxOwnership(c, sandbox) + + if allowed != tt.expectAllowed { + t.Errorf("expected allowed=%v, got %v", tt.expectAllowed, allowed) + } + if !tt.expectAllowed && w.Code != tt.expectCode { + t.Errorf("expected status %d, got %d", tt.expectCode, w.Code) + } + }) + } +} diff --git a/pkg/router/oidc.go b/pkg/router/oidc.go index c439bdfb..7dd1999a 100644 --- a/pkg/router/oidc.go +++ b/pkg/router/oidc.go @@ -123,6 +123,11 @@ func (v *OIDCValidator) ValidateToken(ctx context.Context, rawToken string) (*Cl return nil, fmt.Errorf("invalid issuer: got %q, expected %q", issuer, v.issuer) } + // Validate subject + if subject == "" { + return nil, fmt.Errorf("token missing required sub claim") + } + // Validate expiration if expiry == 0 { return nil, fmt.Errorf("token missing required exp claim") diff --git a/pkg/router/oidc_test.go b/pkg/router/oidc_test.go index db2a4287..160a08e9 100644 --- a/pkg/router/oidc_test.go +++ b/pkg/router/oidc_test.go @@ -269,3 +269,25 @@ func TestExtractRolesFromClaims(t *testing.T) { }) } } + +func TestValidateToken_MissingSubject(t *testing.T) { + ts, privateKey := testOIDCServer(t) + defer ts.Close() + + validator, err := NewOIDCValidator(context.Background(), OIDCConfig{ + IssuerURL: ts.URL, + Audience: "agentcube-api", + }) + require.NoError(t, err) + + // Mint a token with no "sub" claim + rawToken := mintTestJWT(t, privateKey, jwt.MapClaims{ + "iss": ts.URL, + "aud": "agentcube-api", + "exp": time.Now().Add(5 * time.Minute).Unix(), + }) + + _, err = validator.ValidateToken(context.Background(), rawToken) + assert.Error(t, err) + assert.Contains(t, err.Error(), "token missing required sub claim") +} diff --git a/pkg/router/session_manager.go b/pkg/router/session_manager.go index 57def72a..7d8ed0dc 100644 --- a/pkg/router/session_manager.go +++ b/pkg/router/session_manager.go @@ -232,6 +232,7 @@ func (m *manager) createSandbox(ctx context.Context, namespace string, name stri SandboxID: res.SandboxID, Name: res.SandboxName, SessionID: res.SessionID, + OwnerID: res.OwnerID, EntryPoints: res.EntryPoints, } diff --git a/pkg/workloadmanager/handlers.go b/pkg/workloadmanager/handlers.go index 7d417c6e..3d03d47d 100644 --- a/pkg/workloadmanager/handlers.go +++ b/pkg/workloadmanager/handlers.go @@ -120,6 +120,11 @@ func (s *Server) handleSandboxCreate(c *gin.Context, kind string) { return } + // Set ownership from the Router-signed identity JWT + if ownerID := extractOwnerID(c.Request); ownerID != "" { + sandboxEntry.OwnerID = ownerID + } + // Calculate sandbox name and namespace before creating sandboxName := sandbox.Name namespace := sandbox.Namespace @@ -143,38 +148,43 @@ func (s *Server) handleSandboxCreate(c *gin.Context, kind string) { response, err := s.createSandbox(c.Request.Context(), dynamicClient, sandbox, sandboxClaim, sandboxEntry, resultChan) if err != nil { - // Client disconnected — abort with 499 so logs/metrics reflect the cancellation. - if errors.Is(err, context.Canceled) { - klog.Warningf("create sandbox aborted %s/%s: client disconnected", sandbox.Namespace, sandbox.Name) - c.AbortWithStatus(499) - return - } - // Deadline exceeded — client may still be connected; return 504 so they get a meaningful response. - if errors.Is(err, context.DeadlineExceeded) { - klog.Warningf("create sandbox timed out %s/%s: request deadline exceeded", sandbox.Namespace, sandbox.Name) - respondError(c, http.StatusGatewayTimeout, "request timed out") - return - } - // Internal sandbox-ready wait timed out; surface as 504 rather than a generic 500. - if errors.Is(err, errSandboxCreationTimeout) { - klog.Warningf("create sandbox timed out %s/%s: sandbox did not become ready within deadline", sandbox.Namespace, sandbox.Name) - respondError(c, http.StatusGatewayTimeout, err.Error()) - return - } - klog.Errorf("create sandbox failed %s/%s: %v", sandbox.Namespace, sandbox.Name, err) - // Internal errors (store, K8s API) must not leak system details to callers; - // sandbox-level failures (terminal pod state, timeout) are safe to surface. - msg := err.Error() - if apierrors.IsInternalError(err) { - msg = "internal server error" - } - respondError(c, http.StatusInternalServerError, msg) + respondCreateError(c, sandbox.Namespace, sandbox.Name, err) return } respondJSON(c, http.StatusOK, response) } +// respondCreateError maps sandbox-creation errors to the appropriate HTTP response. +func respondCreateError(c *gin.Context, namespace, name string, err error) { + // Client disconnected — abort with 499 so logs/metrics reflect the cancellation. + if errors.Is(err, context.Canceled) { + klog.Warningf("create sandbox aborted %s/%s: client disconnected", namespace, name) + c.AbortWithStatus(499) + return + } + // Deadline exceeded — client may still be connected; return 504 so they get a meaningful response. + if errors.Is(err, context.DeadlineExceeded) { + klog.Warningf("create sandbox timed out %s/%s: request deadline exceeded", namespace, name) + respondError(c, http.StatusGatewayTimeout, "request timed out") + return + } + // Internal sandbox-ready wait timed out; surface as 504 rather than a generic 500. + if errors.Is(err, errSandboxCreationTimeout) { + klog.Warningf("create sandbox timed out %s/%s: sandbox did not become ready within deadline", namespace, name) + respondError(c, http.StatusGatewayTimeout, err.Error()) + return + } + klog.Errorf("create sandbox failed %s/%s: %v", namespace, name, err) + // Internal errors (store, K8s API) must not leak system details to callers; + // sandbox-level failures (terminal pod state, timeout) are safe to surface. + msg := err.Error() + if apierrors.IsInternalError(err) { + msg = "internal server error" + } + respondError(c, http.StatusInternalServerError, msg) +} + // createK8sResources creates the K8s sandbox or sandbox claim resource. func (s *Server) createK8sResources(ctx context.Context, dynamicClient dynamic.Interface, sandbox *sandboxv1alpha1.Sandbox, sandboxClaim *extensionsv1alpha1.SandboxClaim) error { if sandboxClaim != nil { @@ -269,6 +279,7 @@ func (s *Server) createSandbox(ctx context.Context, dynamicClient dynamic.Interf SandboxID: storeCacheInfo.SandboxID, SandboxName: sandbox.Name, EntryPoints: storeCacheInfo.EntryPoints, + OwnerID: sandboxEntry.OwnerID, } if err := s.storeClient.UpdateSandbox(ctx, storeCacheInfo); err != nil { diff --git a/pkg/workloadmanager/identity.go b/pkg/workloadmanager/identity.go new file mode 100644 index 00000000..5a4f692f --- /dev/null +++ b/pkg/workloadmanager/identity.go @@ -0,0 +1,105 @@ +/* +Copyright The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package workloadmanager + +import ( + "crypto/rsa" + "crypto/sha256" + "crypto/x509" + "encoding/hex" + "encoding/pem" + "fmt" + "net/http" + + "github.com/golang-jwt/jwt/v5" + "k8s.io/klog/v2" +) + +// identityJWTHeader is the header carrying the Router-signed user identity. +const identityJWTHeader = "X-AgentCube-User-Identity" + +// verifyIdentityJWT parses the Router-signed identity JWT, verifies the RSA signature, validates aud=="workloadmanager", and returns the sub claim. +func verifyIdentityJWT(publicKeyPEM string, rawToken string) (string, error) { + block, _ := pem.Decode([]byte(publicKeyPEM)) + if block == nil { + return "", fmt.Errorf("failed to decode public key PEM") + } + + pub, err := x509.ParsePKIXPublicKey(block.Bytes) + if err != nil { + return "", fmt.Errorf("failed to parse public key: %w", err) + } + + rsaPub, ok := pub.(*rsa.PublicKey) + if !ok { + return "", fmt.Errorf("public key is not RSA") + } + + token, err := jwt.Parse(rawToken, func(_ *jwt.Token) (interface{}, error) { + return rsaPub, nil + }, jwt.WithValidMethods([]string{"RS256"})) + if err != nil { + return "", fmt.Errorf("token verification failed: %w", err) + } + + claims, ok := token.Claims.(jwt.MapClaims) + if !ok { + return "", fmt.Errorf("unexpected claims type") + } + + // Validate audience + aud, _ := claims["aud"].(string) + if aud != "workloadmanager" { + return "", fmt.Errorf("invalid audience: %q", aud) + } + + sub, _ := claims["sub"].(string) + if sub == "" { + return "", fmt.Errorf("missing sub claim") + } + + return sub, nil +} + +// extractOwnerID reads the identity JWT from the request header and returns the verified subject, or empty string if the header is absent or invalid. +func extractOwnerID(r *http.Request) string { + rawToken := r.Header.Get(identityJWTHeader) + if rawToken == "" { + return "" + } + + publicKey := GetCachedPublicKey() + if publicKey == "" { + klog.V(2).Info("Identity JWT present but public key not cached, skipping owner extraction") + return "" + } + + sub, err := verifyIdentityJWT(publicKey, rawToken) + if err != nil { + klog.V(2).Infof("Identity JWT verification failed: %v", err) + return "" + } + + return sub +} + +// sha256Short returns the first 63 characters of the hex-encoded SHA-256 hash. +func sha256Short(s string) string { + h := sha256.Sum256([]byte(s)) + full := hex.EncodeToString(h[:]) + return full[:63] +} diff --git a/pkg/workloadmanager/identity_test.go b/pkg/workloadmanager/identity_test.go new file mode 100644 index 00000000..eb07225f --- /dev/null +++ b/pkg/workloadmanager/identity_test.go @@ -0,0 +1,171 @@ +/* +Copyright The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package workloadmanager + +import ( + "crypto/rand" + "crypto/rsa" + "crypto/x509" + "encoding/pem" + "testing" + "time" + + "github.com/golang-jwt/jwt/v5" +) + +func generateTestKeyPair(t *testing.T) (*rsa.PrivateKey, string) { + t.Helper() + privateKey, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + t.Fatalf("failed to generate RSA key: %v", err) + } + + pubBytes, err := x509.MarshalPKIXPublicKey(&privateKey.PublicKey) + if err != nil { + t.Fatalf("failed to marshal public key: %v", err) + } + + pubPEM := pem.EncodeToMemory(&pem.Block{ + Type: "PUBLIC KEY", + Bytes: pubBytes, + }) + + return privateKey, string(pubPEM) +} + +func signTestToken(t *testing.T, key *rsa.PrivateKey, claims jwt.MapClaims) string { + t.Helper() + token := jwt.NewWithClaims(jwt.SigningMethodRS256, claims) + signed, err := token.SignedString(key) + if err != nil { + t.Fatalf("failed to sign token: %v", err) + } + return signed +} + +func TestVerifyIdentityJWT_ValidToken(t *testing.T) { + privKey, pubPEM := generateTestKeyPair(t) + + token := signTestToken(t, privKey, jwt.MapClaims{ + "sub": "user-123", + "aud": "workloadmanager", + "exp": time.Now().Add(5 * time.Minute).Unix(), + "iat": time.Now().Unix(), + }) + + sub, err := verifyIdentityJWT(pubPEM, token) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + if sub != "user-123" { + t.Errorf("expected sub 'user-123', got %q", sub) + } +} + +func TestVerifyIdentityJWT_ExpiredToken(t *testing.T) { + privKey, pubPEM := generateTestKeyPair(t) + + token := signTestToken(t, privKey, jwt.MapClaims{ + "sub": "user-123", + "aud": "workloadmanager", + "exp": time.Now().Add(-5 * time.Minute).Unix(), + }) + + _, err := verifyIdentityJWT(pubPEM, token) + if err == nil { + t.Fatal("expected error for expired token") + } +} + +func TestVerifyIdentityJWT_WrongAudience(t *testing.T) { + privKey, pubPEM := generateTestKeyPair(t) + + token := signTestToken(t, privKey, jwt.MapClaims{ + "sub": "user-123", + "aud": "wrong-audience", + "exp": time.Now().Add(5 * time.Minute).Unix(), + }) + + _, err := verifyIdentityJWT(pubPEM, token) + if err == nil { + t.Fatal("expected error for wrong audience") + } +} + +func TestVerifyIdentityJWT_TamperedSignature(t *testing.T) { + privKey, pubPEM := generateTestKeyPair(t) + + token := signTestToken(t, privKey, jwt.MapClaims{ + "sub": "user-123", + "aud": "workloadmanager", + "exp": time.Now().Add(5 * time.Minute).Unix(), + }) + + // Use a completely different key to sign the same claims + otherKey, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + t.Fatalf("failed to generate second RSA key: %v", err) + } + tampered := signTestToken(t, otherKey, jwt.MapClaims{ + "sub": "user-123", + "aud": "workloadmanager", + "exp": time.Now().Add(5 * time.Minute).Unix(), + }) + + // Verify the original token works + _, err = verifyIdentityJWT(pubPEM, token) + if err != nil { + t.Fatalf("valid token should pass: %v", err) + } + + // Verify the tampered token (signed with different key) fails + _, err = verifyIdentityJWT(pubPEM, tampered) + if err == nil { + t.Fatal("expected error for token signed with different key") + } +} + +func TestVerifyIdentityJWT_MissingSub(t *testing.T) { + privKey, pubPEM := generateTestKeyPair(t) + + token := signTestToken(t, privKey, jwt.MapClaims{ + "aud": "workloadmanager", + "exp": time.Now().Add(5 * time.Minute).Unix(), + }) + + _, err := verifyIdentityJWT(pubPEM, token) + if err == nil { + t.Fatal("expected error for missing sub") + } +} + +func TestSha256Short(t *testing.T) { + result := sha256Short("test-user-id") + if len(result) != 63 { + t.Errorf("expected length 63, got %d", len(result)) + } + + // Verify deterministic + if sha256Short("test-user-id") != result { + t.Error("sha256Short should be deterministic") + } + + // Verify different inputs produce different outputs + if sha256Short("other-user-id") == result { + t.Error("different inputs should produce different hashes") + } +} diff --git a/pkg/workloadmanager/k8s_client.go b/pkg/workloadmanager/k8s_client.go index 1ebf0387..2c44d8e3 100644 --- a/pkg/workloadmanager/k8s_client.go +++ b/pkg/workloadmanager/k8s_client.go @@ -80,6 +80,7 @@ type K8sClient struct { type sandboxEntry struct { Kind string SessionID string + OwnerID string Ports []runtimev1alpha1.TargetPort IdleTimeout time.Duration } diff --git a/pkg/workloadmanager/sandbox_helper.go b/pkg/workloadmanager/sandbox_helper.go index 322b5d8d..0d0d467c 100644 --- a/pkg/workloadmanager/sandbox_helper.go +++ b/pkg/workloadmanager/sandbox_helper.go @@ -61,6 +61,7 @@ func buildSandboxPlaceHolder(sandboxCR *sandboxv1alpha1.Sandbox, entry *sandboxE return &types.SandboxInfo{ Kind: entry.Kind, SessionID: entry.SessionID, + OwnerID: entry.OwnerID, SandboxNamespace: sandboxCR.GetNamespace(), Name: sandboxCR.GetName(), ExpiresAt: expiresAt, @@ -94,6 +95,7 @@ func buildSandboxInfo(sandbox *sandboxv1alpha1.Sandbox, podIP string, entry *san SandboxNamespace: sandbox.GetNamespace(), EntryPoints: accesses, SessionID: entry.SessionID, + OwnerID: entry.OwnerID, CreatedAt: createdAt, ExpiresAt: expiresAt, Status: getSandboxStatus(sandbox), diff --git a/pkg/workloadmanager/sandbox_helper_test.go b/pkg/workloadmanager/sandbox_helper_test.go index 793108ad..1c8fc917 100644 --- a/pkg/workloadmanager/sandbox_helper_test.go +++ b/pkg/workloadmanager/sandbox_helper_test.go @@ -451,3 +451,68 @@ func TestGetSandboxStatus_TableDriven(t *testing.T) { }) } } + +func TestBuildSandboxPlaceHolder_OwnerID(t *testing.T) { + tests := []struct { + name string + ownerID string + }{ + {"with owner", "user-123"}, + {"without owner", ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sandbox := &sandboxv1alpha1.Sandbox{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sandbox", + Namespace: "default", + }, + } + entry := &sandboxEntry{ + Kind: types.SandboxKind, + SessionID: "session-123", + OwnerID: tt.ownerID, + IdleTimeout: 15 * time.Minute, + } + + info := buildSandboxPlaceHolder(sandbox, entry) + assert.Equal(t, tt.ownerID, info.OwnerID) + }) + } +} + +func TestBuildSandboxInfo_OwnerID(t *testing.T) { + tests := []struct { + name string + ownerID string + }{ + {"with owner", "user-456"}, + {"without owner", ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sandbox := &sandboxv1alpha1.Sandbox{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sandbox", + Namespace: "default", + UID: "uid-123", + CreationTimestamp: metav1.Now(), + }, + } + entry := &sandboxEntry{ + Kind: types.SandboxKind, + SessionID: "session-456", + OwnerID: tt.ownerID, + Ports: []runtimev1alpha1.TargetPort{ + {Port: 8080, Protocol: runtimev1alpha1.ProtocolTypeHTTP, PathPrefix: "/"}, + }, + IdleTimeout: 15 * time.Minute, + } + + info := buildSandboxInfo(sandbox, "10.0.0.1", entry) + assert.Equal(t, tt.ownerID, info.OwnerID) + }) + } +} diff --git a/pkg/workloadmanager/workload_builder.go b/pkg/workloadmanager/workload_builder.go index 7e97c7dc..f53917e1 100644 --- a/pkg/workloadmanager/workload_builder.go +++ b/pkg/workloadmanager/workload_builder.go @@ -133,6 +133,7 @@ type buildSandboxParams struct { workloadName string sandboxName string sessionID string + ownerID string ttl time.Duration idleTimeout time.Duration podSpec corev1.PodSpec @@ -145,6 +146,7 @@ type buildSandboxClaimParams struct { name string sandboxTemplateName string sessionID string + ownerID string idleTimeout time.Duration // ownerReference is the reference to the CodeInterpreter that creates this SandboxClaim ownerReference *metav1.OwnerReference @@ -203,6 +205,13 @@ func buildSandboxObject(params *buildSandboxParams) *sandboxv1alpha1.Sandbox { Replicas: ptr.To[int32](1), }, } + + // Ownership metadata for RLAC + if params.ownerID != "" { + sandbox.ObjectMeta.Annotations["agentcube.io/owner"] = params.ownerID + sandbox.ObjectMeta.Labels["agentcube.io/owner-hash"] = sha256Short(params.ownerID) + } + return sandbox } @@ -237,6 +246,13 @@ func buildSandboxClaimObject(params *buildSandboxClaimParams) *extensionsv1alpha if params.ownerReference != nil { sandboxClaim.ObjectMeta.OwnerReferences = []metav1.OwnerReference{*params.ownerReference} } + + // Ownership metadata for RLAC + if params.ownerID != "" { + sandboxClaim.ObjectMeta.Annotations["agentcube.io/owner"] = params.ownerID + sandboxClaim.ObjectMeta.Labels["agentcube.io/owner-hash"] = sha256Short(params.ownerID) + } + return sandboxClaim } diff --git a/pkg/workloadmanager/workload_builder_test.go b/pkg/workloadmanager/workload_builder_test.go index b7e44c38..0203e396 100644 --- a/pkg/workloadmanager/workload_builder_test.go +++ b/pkg/workloadmanager/workload_builder_test.go @@ -685,3 +685,105 @@ func TestBuildSandboxByCodeInterpreter_SuccessWithWarmPool(t *testing.T) { } assertOwnerReference(t, claim.OwnerReferences[0]) } + +func TestBuildSandboxObject_OwnershipLabels(t *testing.T) { + tests := []struct { + name string + ownerID string + wantLabel bool + wantAnnot bool + }{ + { + name: "ownerID set", + ownerID: "user-123", + wantLabel: true, + wantAnnot: true, + }, + { + name: "ownerID empty", + ownerID: "", + wantLabel: false, + wantAnnot: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + params := &buildSandboxParams{ + namespace: "default", + sandboxName: "sandbox-owner-test", + sessionID: "session-owner", + ownerID: tt.ownerID, + ttl: time.Hour, + idleTimeout: 15 * time.Minute, + } + sandbox := buildSandboxObject(params) + + _, hasAnnot := sandbox.ObjectMeta.Annotations["agentcube.io/owner"] + _, hasLabel := sandbox.ObjectMeta.Labels["agentcube.io/owner-hash"] + + if hasAnnot != tt.wantAnnot { + t.Errorf("expected annotation present=%v, got %v", tt.wantAnnot, hasAnnot) + } + if hasLabel != tt.wantLabel { + t.Errorf("expected label present=%v, got %v", tt.wantLabel, hasLabel) + } + + if tt.ownerID != "" { + if sandbox.ObjectMeta.Annotations["agentcube.io/owner"] != tt.ownerID { + t.Errorf("expected annotation value %q, got %q", tt.ownerID, sandbox.ObjectMeta.Annotations["agentcube.io/owner"]) + } + hash := sandbox.ObjectMeta.Labels["agentcube.io/owner-hash"] + if len(hash) != 63 { + t.Errorf("expected owner-hash length 63, got %d", len(hash)) + } + } + }) + } +} + +func TestBuildSandboxClaimObject_OwnershipLabels(t *testing.T) { + tests := []struct { + name string + ownerID string + wantLabel bool + wantAnnot bool + }{ + { + name: "ownerID set", + ownerID: "user-456", + wantLabel: true, + wantAnnot: true, + }, + { + name: "ownerID empty", + ownerID: "", + wantLabel: false, + wantAnnot: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + params := &buildSandboxClaimParams{ + namespace: "default", + name: "claim-owner-test", + sandboxTemplateName: "my-ci", + sessionID: "session-claim-owner", + ownerID: tt.ownerID, + idleTimeout: 10 * time.Minute, + } + claim := buildSandboxClaimObject(params) + + _, hasAnnot := claim.ObjectMeta.Annotations["agentcube.io/owner"] + _, hasLabel := claim.ObjectMeta.Labels["agentcube.io/owner-hash"] + + if hasAnnot != tt.wantAnnot { + t.Errorf("expected annotation present=%v, got %v", tt.wantAnnot, hasAnnot) + } + if hasLabel != tt.wantLabel { + t.Errorf("expected label present=%v, got %v", tt.wantLabel, hasLabel) + } + }) + } +} diff --git a/sdk-python/agentcube/__init__.py b/sdk-python/agentcube/__init__.py index 36c2b70c..496c8ef0 100644 --- a/sdk-python/agentcube/__init__.py +++ b/sdk-python/agentcube/__init__.py @@ -14,5 +14,12 @@ from .code_interpreter import CodeInterpreterClient from .agent_runtime import AgentRuntimeClient +from .auth import AuthProvider, TokenAuth, ServiceAccountAuth -__all__ = ["CodeInterpreterClient", "AgentRuntimeClient"] +__all__ = [ + "CodeInterpreterClient", + "AgentRuntimeClient", + "AuthProvider", + "TokenAuth", + "ServiceAccountAuth", +] diff --git a/sdk-python/agentcube/agent_runtime.py b/sdk-python/agentcube/agent_runtime.py index d5322178..1f0efe04 100644 --- a/sdk-python/agentcube/agent_runtime.py +++ b/sdk-python/agentcube/agent_runtime.py @@ -31,6 +31,8 @@ def __init__( session_id: Optional[str] = None, timeout: int = 120, connect_timeout: float = 5.0, + auth_token: Optional[str] = None, + auth: Optional["AuthProvider"] = None, ): self.agent_name = agent_name self.namespace = namespace @@ -40,6 +42,11 @@ def __init__( level = logging.DEBUG if verbose else logging.INFO self.logger = get_logger(__name__, level=level) + self._auth = auth + if not self._auth and auth_token: + from agentcube.auth import TokenAuth + self._auth = TokenAuth(auth_token) + router_url = router_url or os.getenv("ROUTER_URL") if not router_url: raise ValueError( @@ -55,6 +62,7 @@ def __init__( agent_name=self.agent_name, timeout=self.timeout, connect_timeout=self.connect_timeout, + auth=self._auth, ) if verbose: self.dp_client.logger.setLevel(logging.DEBUG) @@ -72,7 +80,7 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): self.close() - def invoke(self, payload: Dict[str, Any], timeout: Optional[float] = None) -> Any: + def invoke(self, payload: Dict[str, Any], timeout: Optional[float] = None, path: str = "") -> Any: if not self.session_id: raise ValueError("AgentRuntime session_id is not initialized") @@ -80,6 +88,7 @@ def invoke(self, payload: Dict[str, Any], timeout: Optional[float] = None) -> An session_id=self.session_id, payload=payload, timeout=timeout, + path=path, ) resp.raise_for_status() diff --git a/sdk-python/agentcube/auth.py b/sdk-python/agentcube/auth.py new file mode 100644 index 00000000..bb640bb1 --- /dev/null +++ b/sdk-python/agentcube/auth.py @@ -0,0 +1,94 @@ +# Copyright The Volcano Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import threading +import time +from typing import Optional, Protocol, Dict, runtime_checkable + +import requests + +from agentcube.utils.log import get_logger + +_REFRESH_BUFFER_SECONDS = 30 +_DEFAULT_TOKEN_TIMEOUT = (5.0, 30.0) # (connect, read) + +logger = get_logger(__name__) + + +@runtime_checkable +class AuthProvider(Protocol): + """Protocol for pluggable authentication providers.""" + + def get_token(self) -> str: ... + + +class TokenAuth: + """Wraps a static bearer token.""" + + def __init__(self, token: str): + if not token: + raise ValueError("Token must not be empty") + self._token = token + + def get_token(self) -> str: + return self._token + + +class ServiceAccountAuth: + """OAuth2 client_credentials grant with thread-safe token caching.""" + + def __init__( + self, + token_url: str, + client_id: str, + client_secret: str, + scope: Optional[str] = None, + headers: Optional[Dict[str, str]] = None, + timeout: tuple = _DEFAULT_TOKEN_TIMEOUT, + ): + self._token_url = token_url + self._client_id = client_id + self._client_secret = client_secret + self._scope = scope + self._headers = headers or {} + self._timeout = timeout + + self._lock = threading.Lock() + self._token: Optional[str] = None + self._expires_at: float = 0.0 + + def get_token(self) -> str: + with self._lock: + if self._token and time.monotonic() < self._expires_at: + return self._token + return self._fetch_token() + + def _fetch_token(self) -> str: + data = { + "grant_type": "client_credentials", + "client_id": self._client_id, + "client_secret": self._client_secret, + } + if self._scope: + data["scope"] = self._scope + + logger.debug(f"Fetching token from {self._token_url}") + resp = requests.post(self._token_url, data=data, headers=self._headers, timeout=self._timeout) + resp.raise_for_status() + + body = resp.json() + self._token = body["access_token"] + expires_in = int(body.get("expires_in", 3600)) + self._expires_at = time.monotonic() + expires_in - _REFRESH_BUFFER_SECONDS + return self._token or "" diff --git a/sdk-python/agentcube/clients/agent_runtime_data_plane.py b/sdk-python/agentcube/clients/agent_runtime_data_plane.py index 00eadec3..7156f6e4 100644 --- a/sdk-python/agentcube/clients/agent_runtime_data_plane.py +++ b/sdk-python/agentcube/clients/agent_runtime_data_plane.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, Optional +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Dict, Optional from urllib.parse import urljoin import requests @@ -20,6 +22,9 @@ from agentcube.utils.http import create_session from agentcube.utils.log import get_logger +if TYPE_CHECKING: + from agentcube.auth import AuthProvider + class AgentRuntimeDataPlaneClient: SESSION_HEADER = "x-agentcube-session-id" @@ -33,12 +38,14 @@ def __init__( connect_timeout: float = 5.0, pool_connections: int = 10, pool_maxsize: int = 10, + auth: Optional["AuthProvider"] = None, ): self.router_url = router_url self.namespace = namespace self.agent_name = agent_name self.timeout = timeout self.connect_timeout = connect_timeout + self._auth = auth self.logger = get_logger(f"{__name__}.AgentRuntimeDataPlaneClient") base_path = ( @@ -51,9 +58,16 @@ def __init__( pool_maxsize=pool_maxsize, ) + def _auth_headers(self) -> Dict[str, str]: + """Return Authorization header dict if auth is available.""" + if not self._auth: + return {} + return {"Authorization": f"Bearer {self._auth.get_token()}"} + def bootstrap_session_id(self) -> str: resp = self.session.get( self.base_url, + headers=self._auth_headers(), timeout=(self.connect_timeout, self.timeout), ) session_id = resp.headers.get(self.SESSION_HEADER) @@ -79,16 +93,22 @@ def invoke( session_id: str, payload: Dict[str, Any], timeout: Optional[float] = None, + path: str = "", ) -> requests.Response: headers = { self.SESSION_HEADER: session_id, "Content-Type": "application/json", } + headers.update(self._auth_headers()) read_timeout = timeout if timeout is not None else self.timeout - self.logger.debug(f"POST {self.base_url}") + # Ensure path doesn't have leading slash so urljoin works correctly with base_url + invoke_path = path.lstrip("/") + url = urljoin(self.base_url, invoke_path) if invoke_path else self.base_url + + self.logger.debug(f"POST {url}") return self.session.post( - self.base_url, + url, json=payload, headers=headers, timeout=(self.connect_timeout, read_timeout), diff --git a/sdk-python/agentcube/clients/code_interpreter_data_plane.py b/sdk-python/agentcube/clients/code_interpreter_data_plane.py index 99f51d72..f4c1c0cd 100644 --- a/sdk-python/agentcube/clients/code_interpreter_data_plane.py +++ b/sdk-python/agentcube/clients/code_interpreter_data_plane.py @@ -12,13 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import base64 import json import time import os import ast import shlex -from typing import Optional, Any, Dict, List, Union +from typing import TYPE_CHECKING, Optional, Any, Dict, List, Union from urllib.parse import urljoin import requests @@ -27,6 +29,9 @@ from agentcube.utils.http import create_session from agentcube.exceptions import CommandExecutionError +if TYPE_CHECKING: + from agentcube.auth import AuthProvider + class CodeInterpreterDataPlaneClient: """Client for AgentCube Data Plane (Router -> PicoD). Handles command execution and file operations via the Router. @@ -46,6 +51,7 @@ def __init__( connect_timeout: float = 5.0, pool_connections: int = 10, pool_maxsize: int = 10, + auth: Optional["AuthProvider"] = None, ): """Initialize Data Plane client. @@ -65,6 +71,7 @@ def __init__( self.connect_timeout = connect_timeout self.pool_connections = pool_connections self.pool_maxsize = pool_maxsize + self._auth = auth self.logger = get_logger(f"{__name__}.CodeInterpreterDataPlaneClient") if base_url: @@ -98,6 +105,8 @@ def _request(self, method: str, endpoint: str, body: Optional[bytes] = None, **k url = urljoin(self.base_url, endpoint) headers = {} + if self._auth: + headers["Authorization"] = f"Bearer {self._auth.get_token()}" if body: headers["Content-Type"] = "application/json" @@ -235,6 +244,8 @@ def upload_file(self, local_path: str, remote_path: str) -> None: headers = { "x-agentcube-session-id": self.session_id } + if self._auth: + headers["Authorization"] = f"Bearer {self._auth.get_token()}" self.logger.debug(f"Uploading file {local_path} to {remote_path}") resp = self.session.post(url, files=files, data=data, headers=headers, timeout=self.timeout) diff --git a/sdk-python/agentcube/clients/control_plane.py b/sdk-python/agentcube/clients/control_plane.py index 48bee355..964dffd8 100644 --- a/sdk-python/agentcube/clients/control_plane.py +++ b/sdk-python/agentcube/clients/control_plane.py @@ -12,14 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import os +from typing import TYPE_CHECKING, Dict, Any, Optional + import requests -from typing import Dict, Any, Optional from agentcube.utils.log import get_logger from agentcube.utils.utils import read_token_from_file from agentcube.utils.http import create_session +if TYPE_CHECKING: + from agentcube.auth import AuthProvider + class ControlPlaneClient: """Client for AgentCube Control Plane (WorkloadManager). Handles creation and deletion of Code Interpreter sessions. @@ -33,6 +39,7 @@ def __init__( connect_timeout: float = 5.0, pool_connections: int = 10, pool_maxsize: int = 10, + auth: Optional["AuthProvider"] = None, ): """Initialize the Control Plane client. @@ -43,6 +50,7 @@ def __init__( connect_timeout: Connection timeout in seconds (default: 5). pool_connections: Number of connection pools to cache (default: 10). pool_maxsize: Maximum connections per pool (default: 10). + auth: Optional AuthProvider instance (takes priority over auth_token). """ # Prioritize argument -> env var self.base_url = workload_manager_url or os.getenv("WORKLOAD_MANAGER_URL") @@ -52,9 +60,21 @@ def __init__( "or 'WORKLOAD_MANAGER_URL' environment variable." ) - # Prioritize argument -> k8s service account token file - token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" - token = auth_token or read_token_from_file(token_path) + # Resolve auth: auth param > auth_token > k8s SA token file + if auth: + self._auth = auth + elif auth_token: + from agentcube.auth import TokenAuth + self._auth = TokenAuth(auth_token) + else: + token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" + token = read_token_from_file(token_path) + if token: + from agentcube.auth import TokenAuth + self._auth = TokenAuth(token) + else: + self._auth = None + self.timeout = timeout self.connect_timeout = connect_timeout @@ -70,8 +90,13 @@ def __init__( self.session.headers.update({ "Content-Type": "application/json", }) - if token: - self.session.headers["Authorization"] = f"Bearer {token}" + + def _apply_auth(self, request_kwargs: dict) -> None: + """Add Authorization header from auth provider if available.""" + if not self._auth: + return + headers = request_kwargs.setdefault("headers", {}) + headers["Authorization"] = f"Bearer {self._auth.get_token()}" def create_session( self, @@ -102,11 +127,9 @@ def create_session( self.logger.debug(f"Creating session at {url} with payload: {payload}") try: - response = self.session.post( - url, - json=payload, - timeout=(self.connect_timeout, self.timeout) - ) + kwargs = {"json": payload, "timeout": (self.connect_timeout, self.timeout)} + self._apply_auth(kwargs) + response = self.session.post(url, **kwargs) response.raise_for_status() data = response.json() @@ -134,10 +157,9 @@ def delete_session(self, session_id: str) -> bool: self.logger.debug(f"Deleting session {session_id} at {url}") try: - response = self.session.delete( - url, - timeout=(self.connect_timeout, self.timeout) - ) + kwargs: Dict[str, Any] = {"timeout": (self.connect_timeout, self.timeout)} + self._apply_auth(kwargs) + response = self.session.delete(url, **kwargs) if response.status_code == 404: return True # Already gone response.raise_for_status() diff --git a/sdk-python/agentcube/code_interpreter.py b/sdk-python/agentcube/code_interpreter.py index b0ccf5df..ca560627 100644 --- a/sdk-python/agentcube/code_interpreter.py +++ b/sdk-python/agentcube/code_interpreter.py @@ -12,14 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import os import logging -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional from agentcube.clients.control_plane import ControlPlaneClient from agentcube.clients.code_interpreter_data_plane import CodeInterpreterDataPlaneClient from agentcube.utils.log import get_logger +if TYPE_CHECKING: + from agentcube.auth import AuthProvider + class CodeInterpreterClient: """ @@ -58,6 +63,7 @@ def __init__( auth_token: Optional[str] = None, verbose: bool = False, session_id: Optional[str] = None, + auth: Optional["AuthProvider"] = None, ): """ Initialize the Code Interpreter Client. @@ -84,8 +90,17 @@ def __init__( level = logging.DEBUG if verbose else logging.INFO self.logger = get_logger(__name__, level=level) + # Resolve auth: auth param > auth_token > None + if auth: + self._auth = auth + elif auth_token: + from agentcube.auth import TokenAuth + self._auth = TokenAuth(auth_token) + else: + self._auth = None + # Initialize Control Plane client - self.cp_client = ControlPlaneClient(workload_manager_url, auth_token) + self.cp_client = ControlPlaneClient(workload_manager_url, auth=self._auth) if verbose: self.cp_client.logger.setLevel(logging.DEBUG) @@ -133,6 +148,7 @@ def _init_data_plane(self): router_url=self.router_url, namespace=self.namespace, session_id=self.session_id, + auth=self._auth, ) if self.verbose: self.dp_client.logger.setLevel(logging.DEBUG) diff --git a/sdk-python/tests/test_auth.py b/sdk-python/tests/test_auth.py new file mode 100644 index 00000000..5781de37 --- /dev/null +++ b/sdk-python/tests/test_auth.py @@ -0,0 +1,145 @@ +# Copyright The Volcano Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import unittest +from concurrent.futures import ThreadPoolExecutor +from unittest.mock import Mock, patch + +from agentcube.auth import AuthProvider, ServiceAccountAuth, TokenAuth + + +class TestTokenAuth(unittest.TestCase): + + def test_token_auth_get_token(self): + auth = TokenAuth("my-secret-token") + self.assertEqual(auth.get_token(), "my-secret-token") + + def test_token_auth_empty_raises(self): + with self.assertRaises(ValueError): + TokenAuth("") + + +class TestServiceAccountAuth(unittest.TestCase): + + def _mock_response(self, access_token="tok-123", expires_in=3600, status_code=200): + resp = Mock() + resp.status_code = status_code + resp.json.return_value = { + "access_token": access_token, + "expires_in": expires_in, + } + resp.raise_for_status = Mock() + return resp + + @patch("agentcube.auth.requests.post") + def test_service_account_auth_initial_fetch(self, mock_post): + mock_post.return_value = self._mock_response() + + auth = ServiceAccountAuth( + token_url="https://idp.example.com/token", + client_id="cid", + client_secret="csecret", + ) + token = auth.get_token() + + self.assertEqual(token, "tok-123") + mock_post.assert_called_once() + + @patch("agentcube.auth.requests.post") + def test_service_account_auth_caches_token(self, mock_post): + mock_post.return_value = self._mock_response() + + auth = ServiceAccountAuth( + token_url="https://idp.example.com/token", + client_id="cid", + client_secret="csecret", + ) + auth.get_token() + auth.get_token() + + # Only one HTTP call despite two get_token() calls + mock_post.assert_called_once() + + @patch("agentcube.auth.time.monotonic") + @patch("agentcube.auth.requests.post") + def test_service_account_auth_refreshes_expired(self, mock_post, mock_monotonic): + mock_post.return_value = self._mock_response(expires_in=60) + # First call at t=0, cached until t=30 (60 - 30s buffer) + mock_monotonic.return_value = 0.0 + + auth = ServiceAccountAuth( + token_url="https://idp.example.com/token", + client_id="cid", + client_secret="csecret", + ) + auth.get_token() + self.assertEqual(mock_post.call_count, 1) + + # Advance past expiry (beyond the 30s buffer) + mock_monotonic.return_value = 31.0 + mock_post.return_value = self._mock_response(access_token="tok-456") + + token = auth.get_token() + self.assertEqual(token, "tok-456") + self.assertEqual(mock_post.call_count, 2) + + @patch("agentcube.auth.requests.post") + def test_service_account_auth_thread_safety(self, mock_post): + mock_post.return_value = self._mock_response() + + auth = ServiceAccountAuth( + token_url="https://idp.example.com/token", + client_id="cid", + client_secret="csecret", + ) + + with ThreadPoolExecutor(max_workers=8) as pool: + results = list(pool.map(lambda _: auth.get_token(), range(50))) + + # All threads should get the same cached token + self.assertTrue(all(t == "tok-123" for t in results)) + + @patch("agentcube.auth.requests.post") + def test_service_account_auth_http_error(self, mock_post): + from requests.exceptions import HTTPError + resp = Mock() + resp.status_code = 401 + resp.raise_for_status.side_effect = HTTPError(response=resp) + mock_post.return_value = resp + + auth = ServiceAccountAuth( + token_url="https://idp.example.com/token", + client_id="cid", + client_secret="csecret", + ) + with self.assertRaises(HTTPError): + auth.get_token() + + +class TestAuthProviderProtocol(unittest.TestCase): + + def test_auth_provider_protocol(self): + token_auth = TokenAuth("abc") + sa_auth = ServiceAccountAuth( + token_url="https://idp.example.com/token", + client_id="cid", + client_secret="csecret", + ) + self.assertIsInstance(token_auth, AuthProvider) + self.assertIsInstance(sa_auth, AuthProvider) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index daba8c09..3fb24a0b 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -1560,3 +1560,90 @@ func TestCodeInterpreterBasicInvocationLoad(t *testing.T) { runCodeInterpreterLoadTest(t, env, namespace, name, requestsPerSecond, testDuration, "Load test request %d!") } + +// skipIfNoOIDC skips the test when OIDC is not enabled in the E2E environment. +func skipIfNoOIDC(t *testing.T) { + t.Helper() + if os.Getenv("OIDC_ENABLED") != "true" { + t.Skip("skipping OIDC test: OIDC_ENABLED not set (Keycloak not deployed)") + } +} + +// rawInvoke sends an HTTP request to the Router and returns the raw status code and body. +func rawInvoke(routerURL, namespace, name, sessionID, token string) (int, string, error) { + reqURL := fmt.Sprintf("%s/v1/namespaces/%s/agent-runtimes/%s/invocations/echo", + routerURL, namespace, name) + + body := `{"input":"oidc-test"}` + httpReq, err := http.NewRequest("POST", reqURL, strings.NewReader(body)) + if err != nil { + return 0, "", err + } + httpReq.Header.Set("Content-Type", "application/json") + if token != "" { + httpReq.Header.Set("Authorization", "Bearer "+token) + } + if sessionID != "" { + httpReq.Header.Set("x-agentcube-session-id", sessionID) + } + + client := &http.Client{Timeout: 60 * time.Second} + resp, err := client.Do(httpReq) + if err != nil { + return 0, "", err + } + defer resp.Body.Close() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return resp.StatusCode, "", err + } + return resp.StatusCode, string(respBody), nil +} + +func TestOIDCAuthNoToken(t *testing.T) { + skipIfNoOIDC(t) + env := newTestEnv(t) + + status, _, err := rawInvoke(env.routerURL, agentcubeNamespace, "echo-agent", "", "") + require.NoError(t, err) + require.Equal(t, http.StatusUnauthorized, status) +} + +func TestOIDCAuthInvalidToken(t *testing.T) { + skipIfNoOIDC(t) + env := newTestEnv(t) + + status, _, err := rawInvoke(env.routerURL, agentcubeNamespace, "echo-agent", "", "invalid.garbage.token") + require.NoError(t, err) + require.Equal(t, http.StatusUnauthorized, status) +} + +func TestRLACOwnershipEnforcement(t *testing.T) { + skipIfNoOIDC(t) + env := newTestEnv(t) + + // Step 1: Create a sandbox with the valid OIDC token (User A) + req := &AgentInvokeRequest{Input: "RLAC ownership test"} + _, sessionID, err := env.invokeAgentRuntime(agentcubeNamespace, "echo-agent", "", req) + require.NoError(t, err) + require.NotEmpty(t, sessionID) + + // Step 2: Try to access the same sandbox with a different identity (admin client) + adminToken := os.Getenv("ADMIN_TOKEN") + if adminToken == "" { + t.Skip("skipping RLAC cross-user test: ADMIN_TOKEN not set") + } + + status, body, err := rawInvoke(env.routerURL, agentcubeNamespace, "echo-agent", sessionID, adminToken) + require.NoError(t, err) + + // Admin role should bypass RLAC; if the admin client has admin role, expect 200. + // If the admin client does NOT have admin role, expect 403. + if status == http.StatusForbidden { + require.Contains(t, body, "you do not own this sandbox") + } else { + // Admin bypass succeeded, which is also valid + require.Equal(t, http.StatusOK, status) + } +} diff --git a/test/e2e/run_e2e.sh b/test/e2e/run_e2e.sh index 0c215718..3298f476 100755 --- a/test/e2e/run_e2e.sh +++ b/test/e2e/run_e2e.sh @@ -22,6 +22,8 @@ AGENTCUBE_NAMESPACE=${AGENTCUBE_NAMESPACE:-agentcube-system} WORKLOAD_NAMESPACE=${WORKLOAD_NAMESPACE:-agentcube} E2E_VENV_DIR=${E2E_VENV_DIR:-/tmp/agentcube-e2e-venv} MCP_K8S_LOCAL_PORT=${MCP_K8S_LOCAL_PORT:-19446} +KEYCLOAK_ENABLED=${KEYCLOAK_ENABLED:-false} +KEYCLOAK_IMAGE=${KEYCLOAK_IMAGE:-quay.io/keycloak/keycloak:26.0} _SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" REPO_ROOT="$(cd "$_SCRIPT_DIR/../.." && pwd)" @@ -58,6 +60,10 @@ cleanup() { echo "Stopping MCP in-cluster port forward (PID: $MCP_K8S_PF_PID)..." kill "$MCP_K8S_PF_PID" 2>/dev/null || true fi + if [ -n "${KEYCLOAK_PID:-}" ]; then + echo "Stopping Keycloak port forward (PID: $KEYCLOAK_PID)..." + kill "$KEYCLOAK_PID" 2>/dev/null || true + fi # Best-effort: remove MCP Deployment so the next run starts clean kubectl delete deployment agentcube-code-interpreter-mcp -n "${AGENTCUBE_NAMESPACE:-agentcube}" --ignore-not-found=true 2>/dev/null || true @@ -410,6 +416,41 @@ run_setup() { kubectl get agentruntime echo-agent-short-ttl -n "${WORKLOAD_NAMESPACE}" -o jsonpath='{.metadata.name}{"\n"}' || echo "echo-agent-short-ttl may still be starting..." echo "AgentRuntimes created, waiting for pods to be ready..." sleep 10 + + # Deploy Keycloak when enabled + if [ "${KEYCLOAK_ENABLED}" = "true" ]; then + step "Deploying Keycloak addon..." + docker_pull_if_missing "${KEYCLOAK_IMAGE}" + kind_load_image "${KEYCLOAK_IMAGE}" + + helm upgrade --install keycloak manifests/charts/addons/keycloak \ + --namespace "${AGENTCUBE_NAMESPACE}" \ + --set adminUser=admin --set adminPassword=admin \ + --set clients.service.secret=e2e-service-secret \ + --set clients.router.secret=e2e-router-secret \ + --set clients.admin.secret=e2e-admin-secret \ + --wait --timeout=5m + + step "Waiting for Keycloak to be ready..." + kubectl -n "${AGENTCUBE_NAMESPACE}" rollout status deployment/keycloak --timeout=300s + + # Configure OIDC Helm args for Router + OIDC_HELM_ARGS=( + --set "router.oidc.issuerUrl=http://keycloak.${AGENTCUBE_NAMESPACE}.svc.cluster.local:8080/realms/agentcube" + --set "router.oidc.rolesClaim=realm_access.roles" + --set "router.oidc.requiredRole=sandbox:invoke" + ) + + # Reconfigure Router with OIDC flags + step "Reconfiguring Router with OIDC flags..." + helm upgrade agentcube manifests/charts/base \ + --namespace "${AGENTCUBE_NAMESPACE}" \ + --reuse-values \ + "${OIDC_HELM_ARGS[@]}" \ + --wait --timeout=5m + + kubectl -n "${AGENTCUBE_NAMESPACE}" rollout status deployment/agentcube-router --timeout=300s + fi } echo "Starting E2E tests..." @@ -441,6 +482,35 @@ step "Running tests..." API_TOKEN=$(kubectl create token e2e-test -n "${AGENTCUBE_NAMESPACE}" --duration=24h) echo "Token created" +# Obtain Keycloak tokens when OIDC is enabled +if [ "${KEYCLOAK_ENABLED}" = "true" ]; then + step "Obtaining Keycloak access tokens..." + kubectl port-forward svc/keycloak -n "${AGENTCUBE_NAMESPACE}" 8082:8080 > /tmp/keycloak_port_forward.log 2>&1 & + KEYCLOAK_PID=$! + sleep 3 + + KEYCLOAK_TOKEN=$(curl -s -X POST \ + -H "Host: keycloak.${AGENTCUBE_NAMESPACE}.svc.cluster.local:8080" \ + "http://localhost:8082/realms/agentcube/protocol/openid-connect/token" \ + -d "grant_type=client_credentials" \ + -d "client_id=agentcube-service" \ + -d "client_secret=e2e-service-secret" | jq -r '.access_token') + + ADMIN_TOKEN=$(curl -s -X POST \ + -H "Host: keycloak.${AGENTCUBE_NAMESPACE}.svc.cluster.local:8080" \ + "http://localhost:8082/realms/agentcube/protocol/openid-connect/token" \ + -d "grant_type=client_credentials" \ + -d "client_id=agentcube-admin" \ + -d "client_secret=e2e-admin-secret" | jq -r '.access_token') + + # Override the K8s SA token with the Keycloak token + export API_TOKEN="${KEYCLOAK_TOKEN}" + export ADMIN_TOKEN="${ADMIN_TOKEN}" + export OIDC_ENABLED="true" + export KEYCLOAK_TOKEN_URL="http://localhost:8082/realms/agentcube/protocol/openid-connect/token" + echo "Keycloak tokens acquired" +fi + # Port forward workload manager in background echo "Starting workload manager port-forward..." kubectl port-forward svc/workloadmanager -n "${AGENTCUBE_NAMESPACE}" "${WORKLOAD_MANAGER_LOCAL_PORT}:8080" > /tmp/workload_port_forward.log 2>&1 & @@ -519,6 +589,8 @@ if ! WORKLOAD_MANAGER_URL="http://localhost:${WORKLOAD_MANAGER_LOCAL_PORT}" \ ROUTER_URL="http://localhost:${ROUTER_LOCAL_PORT}" \ MTLS_ENABLED="${MTLS_ENABLED}" \ WORKLOAD_NAMESPACE="${WORKLOAD_NAMESPACE}" \ + OIDC_ENABLED="${OIDC_ENABLED:-false}" \ + ADMIN_TOKEN="${ADMIN_TOKEN:-}" \ API_TOKEN=$API_TOKEN \ go test -v ./test/e2e/...; then TEST_FAILED=1 @@ -530,12 +602,28 @@ cd "$(dirname "$0")" if ! WORKLOAD_MANAGER_URL="http://localhost:${WORKLOAD_MANAGER_LOCAL_PORT}" \ ROUTER_URL="http://localhost:${ROUTER_LOCAL_PORT}" \ MTLS_ENABLED="${MTLS_ENABLED}" \ + OIDC_ENABLED="${OIDC_ENABLED:-false}" \ + KEYCLOAK_TOKEN_URL="${KEYCLOAK_TOKEN_URL:-}" \ API_TOKEN=$API_TOKEN \ AGENTCUBE_NAMESPACE="${WORKLOAD_NAMESPACE}" \ "$E2E_VENV_DIR/bin/python" test_codeinterpreter.py; then TEST_FAILED=1 fi +if [ "${KEYCLOAK_ENABLED}" = "true" ]; then + echo "Running Python OIDC auth tests..." + if ! WORKLOAD_MANAGER_URL="http://localhost:${WORKLOAD_MANAGER_LOCAL_PORT}" \ + ROUTER_URL="http://localhost:${ROUTER_LOCAL_PORT}" \ + OIDC_ENABLED="true" \ + KEYCLOAK_TOKEN_URL="${KEYCLOAK_TOKEN_URL}" \ + AGENTCUBE_SYSTEM_NAMESPACE="${AGENTCUBE_NAMESPACE}" \ + API_TOKEN=$API_TOKEN \ + AGENTCUBE_NAMESPACE="${WORKLOAD_NAMESPACE}" \ + "$E2E_VENV_DIR/bin/python" test_oidc_auth.py; then + TEST_FAILED=1 + fi +fi + echo "Running LangChain AgentcubeSandbox E2E..." if ! WORKLOAD_MANAGER_URL="http://localhost:${WORKLOAD_MANAGER_LOCAL_PORT}" ROUTER_URL="http://localhost:${ROUTER_LOCAL_PORT}" MTLS_ENABLED="${MTLS_ENABLED}" API_TOKEN=$API_TOKEN AGENTCUBE_NAMESPACE="${AGENTCUBE_NAMESPACE}" "$E2E_VENV_DIR/bin/python" test_langchain_agentcube_sandbox.py; then TEST_FAILED=1 diff --git a/test/e2e/test_oidc_auth.py b/test/e2e/test_oidc_auth.py new file mode 100644 index 00000000..e33e5640 --- /dev/null +++ b/test/e2e/test_oidc_auth.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# Copyright The Volcano Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""E2E tests for OIDC authentication using the Python SDK.""" + +import os +import sys +import unittest + + +def skip_if_no_oidc(): + """Skip test if OIDC is not enabled.""" + if os.getenv("OIDC_ENABLED") != "true": + raise unittest.SkipTest("OIDC_ENABLED not set (Keycloak not deployed)") + + +class TestOIDCSDKAuth(unittest.TestCase): + """Tests that the Python SDK works with ServiceAccountAuth.""" + + def test_service_account_auth_agent_runtime(self): + """Full flow: ServiceAccountAuth -> AgentRuntime session -> execute code.""" + skip_if_no_oidc() + + from agentcube import AgentRuntimeClient, ServiceAccountAuth + + keycloak_url = os.getenv( + "KEYCLOAK_TOKEN_URL", + "http://localhost:8082/realms/agentcube/protocol/openid-connect/token", + ) + + system_namespace = os.getenv("AGENTCUBE_SYSTEM_NAMESPACE", "agentcube-system") + auth = ServiceAccountAuth( + token_url=keycloak_url, + client_id="agentcube-service", + client_secret="e2e-service-secret", + headers={"Host": f"keycloak.{system_namespace}.svc.cluster.local:8080"} + ) + + namespace = os.getenv("AGENTCUBE_NAMESPACE", "agentcube") + router_url = os.getenv("ROUTER_URL", "http://localhost:8081") + + with AgentRuntimeClient( + agent_name="echo-agent", + namespace=namespace, + router_url=router_url, + auth=auth, + ) as client: + result = client.invoke({"input": "Hello OIDC"}, path="echo") + output = result.get("output", "") if isinstance(result, dict) else str(result) + self.assertIn("Hello OIDC", output) + + +if __name__ == "__main__": + result = unittest.main(verbosity=2, exit=False) + sys.exit(0 if result.result.wasSuccessful() else 1) From 6dd2fa4712aadd0eea6271936c8f13751ebe867c Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Sat, 6 Jun 2026 03:56:09 +0530 Subject: [PATCH 13/14] docs: added tutorial for securing external access with Keycloak (OIDC) Signed-off-by: Mahil Patel --- .../docs/tutorials/external-auth-keycloak.md | 441 ++++++++++++++++++ 1 file changed, 441 insertions(+) create mode 100644 docs/agentcube/docs/tutorials/external-auth-keycloak.md diff --git a/docs/agentcube/docs/tutorials/external-auth-keycloak.md b/docs/agentcube/docs/tutorials/external-auth-keycloak.md new file mode 100644 index 00000000..00028124 --- /dev/null +++ b/docs/agentcube/docs/tutorials/external-auth-keycloak.md @@ -0,0 +1,441 @@ +# Securing External Access with Keycloak (OIDC) + +This task shows you how to add external authentication and authorization to +AgentCube using [Keycloak](https://www.keycloak.org/) as the identity provider. +By the end, every request to the Router API will require a valid JWT token +issued by Keycloak, and access will be controlled by realm roles (RBAC) and +resource ownership (RLAC). + +## Before you begin + +1. Follow the [Getting Started](../getting-started.md) guide to install + AgentCube on your cluster and deploy the `sample-agent` runtime. **Do not** + enable OIDC during the initial installation - this tutorial walks through + that step explicitly. + +2. Make sure you have the following tools installed: + - [`kubectl`](https://kubernetes.io/docs/tasks/tools/) (v1.25+) + - [`helm`](https://helm.sh/docs/intro/install/) (v3.12+) + - [`curl`](https://curl.se/) (any recent version) + - [`jq`](https://jqlang.github.io/jq/download/) (for parsing JSON + responses) + +3. Confirm AgentCube is running without external auth: + + ```bash + kubectl get pods -n agentcube-system + ``` + + You should see the Router and WorkloadManager pods in `Running` state: + +``` + NAME READY STATUS RESTARTS AGE + agentcube-router-699fb7784d-6plc9 2/2 Running 0 15s + spire-agent-jg7mm 1/1 Running 3 (31m ago) 31m + spire-server-0 2/2 Running 0 31m + workloadmanager-57c547f945-zqwf6 2/2 Running 3 (30m ago) 31m +``` + +4. Confirm the Router is currently reachable **without** a token. + + Open a **new terminal** and port-forward the Router: + + ```bash + kubectl port-forward svc/agentcube-router -n agentcube-system 8081:8080 + ``` + + In your **original terminal**, check the health endpoint: + + ```bash + curl -s http://localhost:8081/health/live + ``` + + Expected output: + + ```json + {"status":"alive"} + ``` + + Stop the port-forward by pressing `Ctrl+C` in the terminal where it is + running. + +## What gets deployed + +When you install the Keycloak addon and enable OIDC, the following resources are +created or modified: + +| Resource | Kind | Purpose | +|---|---|---| +| `keycloak` | Deployment (1 replica) | Keycloak identity provider. Imports the `agentcube` realm on first startup. | +| `keycloak` | Service (ClusterIP) | Exposes Keycloak on port 8080 inside the cluster. | +| `keycloak-realm-config` | Secret | Contains the realm JSON with roles, clients, and scope mappings. | +| `keycloak-credentials` | Secret | Stores admin password and client secrets. | + +The Keycloak addon also auto-configures the following inside the `agentcube` +realm: + +| Item | Details | +|---|---| +| **Roles** | `sandbox:invoke` (default for all users), `sandbox:manage` (inherits invoke), `admin` (inherits manage) | +| **Clients** | `agentcube-service` (confidential, for SDKs), `agentcube-sdk` (public, for browsers/CLI), `agentcube-admin` (confidential, for admin ops) | +| **Audience** | All tokens include `agentcube-api` in the `aud` claim via a protocol mapper | + +After enabling OIDC on the Router, **all** requests to `/v1/...` endpoints +require a valid Bearer token. Health endpoints (`/health/live`, `/health/ready`) +remain unauthenticated for Kubernetes probes. + +## Step 1 - Deploy the Keycloak addon + +Install Keycloak into your AgentCube namespace using the addon Helm chart. You +must provide an admin username, admin password, and client secrets for the three +confidential clients. Choose strong secrets for production - the values below +are for demonstration only. + +```bash +helm upgrade --install keycloak manifests/charts/addons/keycloak \ + --namespace agentcube-system \ + --set adminUser=admin \ + --set adminPassword=admin \ + --set clients.service.secret=my-service-secret \ + --set clients.router.secret=my-router-secret \ + --set clients.admin.secret=my-admin-secret +``` + +Expected output: + +``` + Release "keycloak" does not exist. Installing it now. + NAME: keycloak + LAST DEPLOYED: Sat Jun 6 03:03:47 2026 + NAMESPACE: agentcube-system + STATUS: deployed + REVISION: 1 + DESCRIPTION: Install complete + TEST SUITE: None +``` + +> **Tip:** +> The addon chart uses `devMode: true` by default, which runs Keycloak with +> an embedded H2 database. This is fine for local development and testing. +> For production deployments, set `devMode=false` and provide an external +> database - see the chart's `values.yaml` for the full set of production +> options. + +Wait for Keycloak to be ready. The JVM startup plus realm import typically +takes 60–90 seconds: + +```bash +kubectl rollout status deployment/keycloak -n agentcube-system --timeout=300s +``` + +Expected output: + +``` +Waiting for deployment "keycloak" rollout to finish: 0 of 1 updated replicas are available... +deployment "keycloak" successfully rolled out +``` + +Verify the Keycloak pod is running: + +```bash +kubectl get pods -n agentcube-system -l app=keycloak +``` + +Expected output: + +``` +NAME READY STATUS RESTARTS AGE +keycloak-f8d7dff7b-b2p24 1/1 Running 0 2m44s +``` + +## Step 2 - Enable OIDC on the Router + +Now configure the Router to validate tokens against Keycloak. This is done by +upgrading the base AgentCube Helm release with OIDC settings. Use +`--reuse-values` so your existing configuration (Redis, images, SPIRE, etc.) is +preserved. + +```bash +helm upgrade agentcube manifests/charts/base \ + -n agentcube-system \ + --reuse-values \ + --set router.oidc.issuerUrl=http://keycloak.agentcube-system.svc.cluster.local:8080/realms/agentcube \ + --set router.oidc.rolesClaim=realm_access.roles \ + --set router.oidc.requiredRole=sandbox:invoke +``` + +Expected output: + +``` +Release "agentcube" has been upgraded. Happy Helming! +NAME: agentcube +LAST DEPLOYED: Sat Jun 6 03:07:07 2026 +NAMESPACE: agentcube-system +STATUS: deployed +REVISION: 4 +DESCRIPTION: Upgrade complete +TEST SUITE: None +``` + +This tells the Router: + +- **`issuerUrl`** - where to discover OIDC configuration and signing keys + (JWKS). The Router fetches these once at startup and caches them. +- **`rolesClaim`** - the dot-separated path inside the JWT where roles are + stored. For Keycloak this is `realm_access.roles`. +- **`requiredRole`** - the minimum role required to access the API. Users + without `sandbox:invoke` will receive a `403 Forbidden`. + +Wait for the Router to restart with the new configuration: + +```bash +kubectl rollout status deployment/agentcube-router -n agentcube-system --timeout=120s +``` + +## Step 3 - Verify that unauthenticated requests are rejected + +Open a **new terminal** and port-forward the Router: + +```bash +kubectl port-forward svc/agentcube-router -n agentcube-system 8081:8080 +``` + +In your **original terminal**, try making a request to the `sample-agent` (which you deployed in the Getting Started guide) **without** a token: + +```bash +curl -s -w "\nHTTP Status: %{http_code}\n" http://localhost:8081/v1/namespaces/default/agent-runtimes/sample-agent/invocations/ +``` + +Expected output — the Router now rejects unauthenticated requests: + +``` +{"code":"UNAUTHORIZED","error":"missing authorization header"} +HTTP Status: 401 +``` + +## Step 4 - Obtain a token from Keycloak + +Open **another new terminal** and port-forward Keycloak so you can reach it +from your local machine: + +```bash +kubectl port-forward svc/keycloak -n agentcube-system 8082:8080 +``` + +Back in your **original terminal**, proceed with the token requests below. + +### Get a service account token + +Use the `client_credentials` grant to obtain a token for the +`agentcube-service` client. Note the `Host` header - this is needed because +Keycloak validates the issuer against the original hostname, and we're +connecting through a port-forward: + +```bash +KEYCLOAK_TOKEN=$(curl -s -X POST \ + -H "Host: keycloak.agentcube-system.svc.cluster.local:8080" \ + "http://localhost:8082/realms/agentcube/protocol/openid-connect/token" \ + -d "grant_type=client_credentials" \ + -d "client_id=agentcube-service" \ + -d "client_secret=my-service-secret" | jq -r '.access_token') + +echo $KEYCLOAK_TOKEN +``` + +Expected output - a long base64-encoded JWT string: + +### Inspect the token (optional) + +You can decode the token to see its claims: + +```bash +echo $KEYCLOAK_TOKEN | cut -d'.' -f2 | base64 -d 2>/dev/null | jq . +``` + +Expected output - you should see `realm_access.roles` containing +`sandbox:invoke`, and `aud` containing `agentcube-api`: + +``` +{ + "exp": 1780695969, + "iat": 1780695669, + "jti": "31ef712f-f62a-4a4e-8714-7ccf710ab476", + "iss": "http://keycloak.agentcube-system.svc.cluster.local:8080/realms/agentcube", + "aud": "agentcube-api", + "sub": "bb0f4c04-e1d2-4bdb-b7bb-56d5c1cefe50", + "typ": "Bearer", + "azp": "agentcube-service", + "acr": "1", + "realm_access": { + "roles": [ + "sandbox:invoke" + ] + }, + "scope": "profile email", + "email_verified": false, + "clientHost": "127.0.0.1", + "preferred_username": "service-account-agentcube-service", + "clientAddress": "127.0.0.1", + "client_id": "agentcube-service" +} +``` + +### Get an admin token + +The `agentcube-admin` client has the `admin` role, which grants full access +including the ability to bypass resource ownership checks (RLAC): + +```bash +ADMIN_TOKEN=$(curl -s -X POST \ + -H "Host: keycloak.agentcube-system.svc.cluster.local:8080" \ + "http://localhost:8082/realms/agentcube/protocol/openid-connect/token" \ + -d "grant_type=client_credentials" \ + -d "client_id=agentcube-admin" \ + -d "client_secret=my-admin-secret" | jq -r '.access_token') + +echo $ADMIN_TOKEN +``` + +## Step 5 - Make an authenticated request + +Now use the token to make an authenticated request through the Router: + +```bash +curl -s -w "\nHTTP Status: %{http_code}\n" \ + -H "Authorization: Bearer $KEYCLOAK_TOKEN" \ + http://localhost:8081/v1/namespaces/default/agent-runtimes/sample-agent/invocations/ +``` + +Expected output - the request should now succeed (the exact response depends on +what agent runtimes you have deployed): + +``` +HTTP Status: 200 +``` + +## Step 6 - Verify RBAC enforcement + +Try using an **invalid** token to confirm the Router rejects it: + +```bash +curl -s -w "\nHTTP Status: %{http_code}\n" \ + -H "Authorization: Bearer invalid-token-here" \ + http://localhost:8081/v1/namespaces/default/agent-runtimes/sample-agent/invocations/ +``` + +Expected output : + +``` +{"code":"UNAUTHORIZED","error":"invalid or expired token"} +HTTP Status: 401 +``` + +## Step 7 - Use the Python SDK with authentication + +The AgentCube Python SDK supports Keycloak authentication via the +`ServiceAccountAuth` provider. This uses the `client_credentials` grant and +automatically refreshes tokens before they expire. + +### Install the SDK + +```bash +pip install -e sdk-python/ +``` + +### Example usage + +Create a Python script (or run in a Python REPL): + +```python +from agentcube import ServiceAccountAuth +from agentcube import AgentRuntimeClient + +# Initialize authentication with Keycloak +auth = ServiceAccountAuth( + token_url="http://localhost:8082/realms/agentcube/protocol/openid-connect/token", + client_id="agentcube-service", + client_secret="my-service-secret", + headers={"Host": "keycloak.agentcube-system.svc.cluster.local:8080"}, +) + +# Create the client - it will automatically attach Bearer tokens to requests +client = AgentRuntimeClient( + router_url="http://localhost:8081", + namespace="default", + agent_name="sample-agent", + auth=auth, +) + +# Invoke the agent +response = client.invoke(payload={}) +print(f"Response: {response}") +``` + +Expected output: + +If you are using the mock `sample-agent` from the Getting Started guide (which runs Python's built-in `http.server`), the script will raise a `501 Server Error: Not Implemented` exception. This is expected because `http.server` only handles `GET`/`HEAD` requests and doesn't implement `POST` handlers. A real agent framework (e.g., FastAPI or Flask) implementing `POST` handlers will return the actual response. + +The `ServiceAccountAuth` class handles the full token lifecycle: +- Fetches a new token on the first request +- Caches the token in memory +- Automatically refreshes 30 seconds before expiry + +## Understanding what changed + +The Helm chart passes four `--oidc-*` flags to the Router when +`router.oidc.issuerUrl` is set. The Router discovers Keycloak's signing keys +once at startup (via OIDC discovery), caches them, and validates every incoming +JWT locally — no per-request calls to Keycloak. + +> **Note:** The `--import-realm` flag only applies on Keycloak's **first +> startup**. Changes to the realm Secret after that will **not** take effect +> automatically. Use the Keycloak Admin Console or Admin API to update a live +> realm. + +### Realm roles + +| Role | Permissions | Assigned to | +|------|-------------|-------------| +| `sandbox:invoke` | Invoke agent runtimes and code interpreters | Default role (all users/clients) | +| `sandbox:manage` | Create/delete AgentRuntime and CodeInterpreter CRDs. Inherits `sandbox:invoke`. | — | +| `admin` | Full administrative access. Bypasses ownership checks. Inherits `sandbox:manage`. | `agentcube-admin` client | + +### OAuth2 clients + +| Client ID | Type | Grant | Purpose | +|-----------|------|-------|---------| +| `agentcube-service` | Confidential | `client_credentials` | Python SDK, external services (M2M) | +| `agentcube-sdk` | Public | Authorization code + PKCE | CLI, browser-based apps (interactive) | +| `agentcube-admin` | Confidential | `client_credentials` | Administrative operations | + +## Cleanup + +Stop all port-forwards by pressing `Ctrl+C` in each terminal where they are +running. + +If you want to **disable** external auth and go back to unauthenticated access, +remove the OIDC configuration from the Router: + +```bash +helm upgrade agentcube manifests/charts/base \ + -n agentcube-system \ + --reuse-values \ + --set router.oidc.issuerUrl="" \ + --set router.oidc.rolesClaim="" \ + --set router.oidc.requiredRole="" +``` + +Wait for the Router to restart: + +```bash +kubectl rollout status deployment/agentcube-router -n agentcube-system --timeout=120s +``` + +To also remove the Keycloak addon entirely: + +```bash +helm uninstall keycloak -n agentcube-system +``` + +This removes the Keycloak Deployment, Service, and associated Secrets from the +cluster. From 779784630d13d8abc1d58608f44cec1e49d98fd6 Mon Sep 17 00:00:00 2001 From: Mahil Patel Date: Sat, 6 Jun 2026 04:05:16 +0530 Subject: [PATCH 14/14] feat: enforced JWT expiration requirement in identity verification and fixed python lint issue Signed-off-by: Mahil Patel --- pkg/workloadmanager/identity.go | 2 +- sdk-python/agentcube/agent_runtime.py | 3 ++- sdk-python/tests/test_auth.py | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/workloadmanager/identity.go b/pkg/workloadmanager/identity.go index 5a4f692f..e3fb2cd8 100644 --- a/pkg/workloadmanager/identity.go +++ b/pkg/workloadmanager/identity.go @@ -51,7 +51,7 @@ func verifyIdentityJWT(publicKeyPEM string, rawToken string) (string, error) { token, err := jwt.Parse(rawToken, func(_ *jwt.Token) (interface{}, error) { return rsaPub, nil - }, jwt.WithValidMethods([]string{"RS256"})) + }, jwt.WithValidMethods([]string{"RS256"}), jwt.WithExpirationRequired()) if err != nil { return "", fmt.Errorf("token verification failed: %w", err) } diff --git a/sdk-python/agentcube/agent_runtime.py b/sdk-python/agentcube/agent_runtime.py index 1f0efe04..9e6cf8b7 100644 --- a/sdk-python/agentcube/agent_runtime.py +++ b/sdk-python/agentcube/agent_runtime.py @@ -17,6 +17,7 @@ from typing import Any, Dict, Optional from requests.exceptions import JSONDecodeError +from agentcube.auth import AuthProvider from agentcube.clients.agent_runtime_data_plane import AgentRuntimeDataPlaneClient from agentcube.utils.log import get_logger @@ -32,7 +33,7 @@ def __init__( timeout: int = 120, connect_timeout: float = 5.0, auth_token: Optional[str] = None, - auth: Optional["AuthProvider"] = None, + auth: Optional[AuthProvider] = None, ): self.agent_name = agent_name self.namespace = namespace diff --git a/sdk-python/tests/test_auth.py b/sdk-python/tests/test_auth.py index 5781de37..23a9f42a 100644 --- a/sdk-python/tests/test_auth.py +++ b/sdk-python/tests/test_auth.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import time import unittest from concurrent.futures import ThreadPoolExecutor from unittest.mock import Mock, patch