diff --git a/deployments/charts/service/README.md b/deployments/charts/service/README.md index 6fa51f6fa..dc5495b43 100644 --- a/deployments/charts/service/README.md +++ b/deployments/charts/service/README.md @@ -403,11 +403,24 @@ Envoy uses filesystem-based dynamic configuration (LDS/CDS). When the ConfigMap | `gateway.networkPolicies.enabled` | Deploy NetworkPolicies restricting ingress to upstream pods | `false` | | `gateway.networkPolicies.upstreams` | List of upstream pods to protect (name, podSelector, port) | See values.yaml | -#### TLS +#### Gateway → Upstream TLS + +Traffic between the Envoy gateway and the upstream services (`osmo-service`, `osmo-router`, `osmo-agent`, `osmo-logger`) is encrypted by default. The UI intentionally stays on plain HTTP behind NetworkPolicy — Next.js does not natively serve TLS. + +**Default — encryption without validation.** Each upstream service mints its own ephemeral self-signed cert in-process at startup (ECDSA P-256, ~1ms) and loads it into uvicorn's SSLContext via `--ssl_self_signed true`. Envoy connects with TLS but does *not* validate the cert. The wire is encrypted; identity verification is delegated to NetworkPolicy + Kubernetes RBAC. No CA management, no Secrets, no rotation — cert lifecycle is tied to process lifecycle. + +**Externally-provisioned certs.** Point `gateway.tls.upstreamCerts.` at an existing `kubernetes.io/tls` Secret containing `tls.crt` + `tls.key`. That Secret is mounted at `/etc/osmo/tls` and uvicorn loads it instead of self-signing. To make Envoy validate against a CA, set `gateway.tls.caSecret` to a Secret containing `ca.crt`. The chart does not create these Secrets — provision them however suits your environment (cert-manager, Vault CSI, sealed-secrets, manual `kubectl create secret tls`, etc.). The two knobs are independent: you can use external certs without validation, or validation alone (rarely useful), but typical "real" TLS sets both. | Parameter | Description | Default | |-----------|-------------|---------| -| `gateway.tls.enabled` | Generate self-signed certs for upstream TLS | `false` | +| `gateway.tls.enabled` | Encrypt gateway → upstream traffic. | `true` | +| `gateway.tls.upstreamCerts.service` | Existing `kubernetes.io/tls` Secret for `osmo-service`. Empty string ⇒ self-signed. | `""` | +| `gateway.tls.upstreamCerts.router` | Same, for `osmo-router`. | `""` | +| `gateway.tls.upstreamCerts.agent` | Same, for `osmo-agent`. | `""` | +| `gateway.tls.upstreamCerts.logger` | Same, for `osmo-logger`. | `""` | +| `gateway.tls.caSecret` | Existing Secret containing `ca.crt`. When set, Envoy validates upstreams against this CA; when empty, TLS is encryption-only. | `""` | + +NetworkPolicy and TLS are independent: NetworkPolicy controls *who* can connect at L3/L4; TLS encrypts the bytes at L7. Run them together for defense in depth. ### Extensibility diff --git a/deployments/charts/service/templates/_gateway-envoy-config.tpl b/deployments/charts/service/templates/_gateway-envoy-config.tpl index ce7e43e5e..04449449f 100644 --- a/deployments/charts/service/templates/_gateway-envoy-config.tpl +++ b/deployments/charts/service/templates/_gateway-envoy-config.tpl @@ -70,7 +70,7 @@ data: filename: /etc/ssl/envoy-certs/tls.key {{- end }} - {{- if $gw.tls.enabled }} + {{- if and $gw.tls.enabled $gw.tls.caSecret }} sds_upstream_ca.yaml: | resources: - "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.Secret @@ -578,7 +578,16 @@ data: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: {{ $gw.upstreams.service.host }} common_tls_context: + {{/* Envoy 1.29 upstream defaults to TLS 1.2 max. uvicorn's + SSLContext uses Python defaults (TLS 1.2 floor, 1.3 if + the openssl version supports it). Allow up to 1.3 so + negotiation can pick the most compatible option. */}} + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + {{- if $gw.tls.caSecret }} validation_context_sds_secret_config: name: upstream_ca sds_config: @@ -586,6 +595,7 @@ data: path: /var/config/sds_upstream_ca.yaml watched_directory: path: /var/config + {{- end }} {{- end }} {{- if $gw.upstreams.router.enabled }} @@ -611,7 +621,16 @@ data: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: {{ $gw.upstreams.router.host }} common_tls_context: + {{/* Envoy 1.29 upstream defaults to TLS 1.2 max. uvicorn's + SSLContext uses Python defaults (TLS 1.2 floor, 1.3 if + the openssl version supports it). Allow up to 1.3 so + negotiation can pick the most compatible option. */}} + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + {{- if $gw.tls.caSecret }} validation_context_sds_secret_config: name: upstream_ca sds_config: @@ -619,6 +638,7 @@ data: path: /var/config/sds_upstream_ca.yaml watched_directory: path: /var/config + {{- end }} {{- end }} {{- end }} @@ -638,20 +658,12 @@ data: socket_address: address: {{ $gw.upstreams.ui.host }} port_value: {{ $gw.upstreams.ui.port }} - {{- if $gw.tls.enabled }} - transport_socket: - name: envoy.transport_sockets.tls - typed_config: - "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext - common_tls_context: - validation_context_sds_secret_config: - name: upstream_ca - sds_config: - path_config_source: - path: /var/config/sds_upstream_ca.yaml - watched_directory: - path: /var/config - {{- end }} + {{/* + UI traffic stays HTTP — Next.js does not natively serve HTTPS and + the UI sits behind NetworkPolicy. Confidentiality of the UI HTML + relies on browser → gateway TLS (gateway.envoy.ssl.enabled), not on + Envoy → upstream TLS. + */}} {{- end }} {{- if $gw.upstreams.agent.enabled }} @@ -675,7 +687,16 @@ data: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: {{ $gw.upstreams.agent.host }} common_tls_context: + {{/* Envoy 1.29 upstream defaults to TLS 1.2 max. uvicorn's + SSLContext uses Python defaults (TLS 1.2 floor, 1.3 if + the openssl version supports it). Allow up to 1.3 so + negotiation can pick the most compatible option. */}} + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + {{- if $gw.tls.caSecret }} validation_context_sds_secret_config: name: upstream_ca sds_config: @@ -683,6 +704,7 @@ data: path: /var/config/sds_upstream_ca.yaml watched_directory: path: /var/config + {{- end }} {{- end }} {{- end }} @@ -707,7 +729,16 @@ data: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: {{ $gw.upstreams.logger.host }} common_tls_context: + {{/* Envoy 1.29 upstream defaults to TLS 1.2 max. uvicorn's + SSLContext uses Python defaults (TLS 1.2 floor, 1.3 if + the openssl version supports it). Allow up to 1.3 so + negotiation can pick the most compatible option. */}} + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + {{- if $gw.tls.caSecret }} validation_context_sds_secret_config: name: upstream_ca sds_config: @@ -715,6 +746,7 @@ data: path: /var/config/sds_upstream_ca.yaml watched_directory: path: /var/config + {{- end }} {{- end }} {{- end }} @@ -805,6 +837,7 @@ data: {{- end }} {{- if $envoy.internalJwks.enabled }} + {{- $jwksHost := $envoy.internalJwks.host | default $gw.upstreams.service.host }} - "@type": type.googleapis.com/envoy.config.cluster.v3.Cluster name: {{ $envoy.internalJwks.cluster }} connect_timeout: 3s @@ -818,8 +851,32 @@ data: - endpoint: address: socket_address: - address: {{ $envoy.internalJwks.host | default $gw.upstreams.service.host }} + address: {{ $jwksHost }} port_value: {{ $envoy.internalJwks.port | default $gw.upstreams.service.port }} + {{- if $gw.tls.enabled }} + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: {{ $jwksHost }} + common_tls_context: + {{/* Envoy 1.29 upstream defaults to TLS 1.2 max. uvicorn's + SSLContext uses Python defaults (TLS 1.2 floor, 1.3 if + the openssl version supports it). Allow up to 1.3 so + negotiation can pick the most compatible option. */}} + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + {{- if $gw.tls.caSecret }} + validation_context_sds_secret_config: + name: upstream_ca + sds_config: + path_config_source: + path: /var/config/sds_upstream_ca.yaml + watched_directory: + path: /var/config + {{- end }} + {{- end }} {{- end }} {{- end }} diff --git a/deployments/charts/service/templates/_gateway-helpers.tpl b/deployments/charts/service/templates/_gateway-helpers.tpl index e47765cc7..49b255498 100644 --- a/deployments/charts/service/templates/_gateway-helpers.tpl +++ b/deployments/charts/service/templates/_gateway-helpers.tpl @@ -30,3 +30,66 @@ app.kubernetes.io/name: {{ include "osmo.gateway-name" .context }} app.kubernetes.io/instance: {{ .context.Release.Name }} app.kubernetes.io/component: {{ .component }} {{- end }} + +{{/* +Per-upstream TLS args. Pass a dict with "context" and "secretName". + +When secretName is non-empty, that Secret is mounted at /etc/osmo/tls and +uvicorn loads tls.crt + tls.key from there (--ssl_keyfile / --ssl_certfile). +When empty, the Python service mints an ephemeral self-signed cert in +process at startup (--ssl_self_signed true) — no chart-side cert material. +*/}} +{{- define "osmo.upstream-tls-args" -}} +{{- if .context.Values.gateway.tls.enabled }} +{{- if .secretName }} +- --ssl_keyfile +- /etc/osmo/tls/tls.key +- --ssl_certfile +- /etc/osmo/tls/tls.crt +{{- else }} +- --ssl_self_signed +- "true" +{{- end }} +{{- end }} +{{- end }} + +{{/* +TLS volume mount for an upstream container. Only emitted when a Secret +name is provided — self-signed mode keeps cert material in an in-process +tempdir, so no mount is needed. +*/}} +{{- define "osmo.upstream-tls-volume-mount" -}} +{{- if and .context.Values.gateway.tls.enabled .secretName }} +- name: tls + mountPath: /etc/osmo/tls + readOnly: true +{{- end }} +{{- end }} + +{{/* +TLS volume for an upstream pod. Pass dict with "context" and "secretName". +Only emitted when secretName is non-empty. +*/}} +{{- define "osmo.upstream-tls-volume" -}} +{{- if and .context.Values.gateway.tls.enabled .secretName }} +- name: tls + secret: + secretName: {{ .secretName }} +{{- end }} +{{- end }} + +{{/* +Render a probe block, injecting `scheme: HTTPS` into httpGet when TLS is on. +Pass dict with "probe" (the probe value from Values) and "context" ($). + +Use: + livenessProbe: + {{- include "osmo.upstream-probe-yaml" (dict "probe" .Values.services.service.livenessProbe "context" .) | nindent 10 }} +*/}} +{{- define "osmo.upstream-probe-yaml" -}} +{{- $probe := .probe }} +{{- if and $probe .context.Values.gateway.tls.enabled (hasKey $probe "httpGet") }} + {{- $probe = mustMergeOverwrite (deepCopy $probe) (dict "httpGet" (dict "scheme" "HTTPS")) }} +{{- end }} +{{- toYaml $probe }} +{{- end }} diff --git a/deployments/charts/service/templates/agent-service.yaml b/deployments/charts/service/templates/agent-service.yaml index b76784f82..7e54f9424 100644 --- a/deployments/charts/service/templates/agent-service.yaml +++ b/deployments/charts/service/templates/agent-service.yaml @@ -130,6 +130,7 @@ spec: {{- range $arg := .Values.services.agent.extraArgs }} - {{ $arg | quote }} {{- end }} + {{- include "osmo.upstream-tls-args" (dict "context" . "secretName" .Values.gateway.tls.upstreamCerts.agent) | nindent 8 }} env: {{- if .Values.services.migration.enabled }} - name: OSMO_SCHEMA_VERSION @@ -154,7 +155,7 @@ spec: {{- end }} imagePullPolicy: {{ .Values.services.agent.imagePullPolicy }} ports: - {{- if or .Values.services.configFile.enabled .Values.global.logs.enabled .Values.services.configs.enabled .Values.services.agent.extraVolumeMounts }} + {{- if or .Values.services.configFile.enabled .Values.global.logs.enabled .Values.services.configs.enabled .Values.services.agent.extraVolumeMounts (and .Values.gateway.tls.enabled .Values.gateway.tls.upstreamCerts.agent) }} volumeMounts: {{- end }} {{- if .Values.services.configFile.enabled}} @@ -168,6 +169,7 @@ spec: mountPath: /logs {{- end }} {{- include "osmo.extra-volume-mounts" .Values.services.agent | nindent 8 }} + {{- include "osmo.upstream-tls-volume-mount" (dict "context" . "secretName" .Values.gateway.tls.upstreamCerts.agent) | nindent 8 }} resources: {{- toYaml .Values.services.agent.resources | nindent 10 }} @@ -202,6 +204,9 @@ spec: httpGet: port: 8000 path: /health + {{- if .Values.gateway.tls.enabled }} + scheme: HTTPS + {{- end }} periodSeconds: 45 failureThreshold: 3 timeoutSeconds: 20 @@ -210,6 +215,7 @@ spec: {{- include "osmo.extra-sidecars" .Values.services.agent | nindent 6 }} volumes: {{- include "osmo.extra-volumes" .Values.services.agent | nindent 8 }} + {{- include "osmo.upstream-tls-volume" (dict "context" . "secretName" .Values.gateway.tls.upstreamCerts.agent) | nindent 8 }} {{- if .Values.global.logs.enabled }} - name: logs emptyDir: {} diff --git a/deployments/charts/service/templates/api-service.yaml b/deployments/charts/service/templates/api-service.yaml index 67a75f145..4e93c9e98 100644 --- a/deployments/charts/service/templates/api-service.yaml +++ b/deployments/charts/service/templates/api-service.yaml @@ -152,6 +152,7 @@ spec: {{- range $arg := .Values.services.service.extraArgs }} - {{ $arg | quote }} {{- end }} + {{- include "osmo.upstream-tls-args" (dict "context" . "secretName" .Values.gateway.tls.upstreamCerts.service) | nindent 8 }} env: - name: OSMO_DISABLE_TASK_METRICS value: {{ .Values.services.service.disableTaskMetrics | quote }} @@ -193,7 +194,7 @@ spec: ports: - name: metrics containerPort: 9464 - {{- if or .Values.services.configFile.enabled .Values.global.logs.enabled .Values.services.configs.enabled .Values.services.service.extraVolumeMounts }} + {{- if or .Values.services.configFile.enabled .Values.global.logs.enabled .Values.services.configs.enabled .Values.services.service.extraVolumeMounts (and .Values.gateway.tls.enabled .Values.gateway.tls.upstreamCerts.service) }} volumeMounts: {{- end }} {{- if .Values.services.configFile.enabled}} @@ -207,12 +208,13 @@ spec: mountPath: /logs {{- end }} {{- include "osmo.extra-volume-mounts" .Values.services.service | nindent 8 }} + {{- include "osmo.upstream-tls-volume-mount" (dict "context" . "secretName" .Values.gateway.tls.upstreamCerts.service) | nindent 8 }} resources: {{- toYaml .Values.services.service.resources | nindent 10 }} # Any failure to return the version api means the service is in a bad state livenessProbe: - {{- toYaml .Values.services.service.livenessProbe | nindent 10 }} + {{- include "osmo.upstream-probe-yaml" (dict "probe" .Values.services.service.livenessProbe "context" .) | nindent 10 }} # Give the container 30 seconds to startup @@ -220,6 +222,9 @@ spec: httpGet: port: 8000 path: /api/version + {{- if .Values.gateway.tls.enabled }} + scheme: HTTPS + {{- end }} failureThreshold: 6 periodSeconds: 5 timeoutSeconds: 3 @@ -230,6 +235,9 @@ spec: httpGet: port: 8000 path: /api/workflow?limit=0&all_pools=true + {{- if .Values.gateway.tls.enabled }} + scheme: HTTPS + {{- end }} httpHeaders: - name: x-osmo-roles value: osmo-admin @@ -240,6 +248,7 @@ spec: {{- include "osmo.extra-sidecars" .Values.services.service | nindent 6 }} volumes: {{- include "osmo.extra-volumes" .Values.services.service | nindent 8 }} + {{- include "osmo.upstream-tls-volume" (dict "context" . "secretName" .Values.gateway.tls.upstreamCerts.service) | nindent 8 }} {{- if .Values.global.logs.enabled }} - name: logs emptyDir: {} diff --git a/deployments/charts/service/templates/gateway.yaml b/deployments/charts/service/templates/gateway.yaml index 56105245e..df1d65690 100644 --- a/deployments/charts/service/templates/gateway.yaml +++ b/deployments/charts/service/templates/gateway.yaml @@ -84,7 +84,7 @@ spec: - mountPath: /var/config name: envoy-config readOnly: true - {{- if $gw.tls.enabled }} + {{- if and $gw.tls.enabled $gw.tls.caSecret }} - name: gateway-tls-ca mountPath: /etc/gateway-tls readOnly: true @@ -112,10 +112,10 @@ spec: - name: envoy-config configMap: name: {{ $gwName }}-envoy-config - {{- if $gw.tls.enabled }} + {{- if and $gw.tls.enabled $gw.tls.caSecret }} - name: gateway-tls-ca secret: - secretName: {{ $gwName }}-ca-tls + secretName: {{ $gw.tls.caSecret }} items: - key: ca.crt path: ca.crt diff --git a/deployments/charts/service/templates/logger-service.yaml b/deployments/charts/service/templates/logger-service.yaml index 9644de8b8..913ec03d5 100644 --- a/deployments/charts/service/templates/logger-service.yaml +++ b/deployments/charts/service/templates/logger-service.yaml @@ -125,6 +125,7 @@ spec: {{- range $arg := .Values.services.logger.extraArgs }} - {{ $arg | quote }} {{- end }} + {{- include "osmo.upstream-tls-args" (dict "context" . "secretName" .Values.gateway.tls.upstreamCerts.logger) | nindent 8 }} env: {{- include "osmo.configmap-env" . | nindent 8 }} {{- include "osmo.extra-env" .Values.services.logger | nindent 8 }} @@ -145,7 +146,7 @@ spec: {{- end }} imagePullPolicy: {{ .Values.services.logger.imagePullPolicy }} ports: - {{- if or .Values.services.configFile.enabled .Values.global.logs.enabled .Values.services.configs.enabled .Values.services.logger.extraVolumeMounts }} + {{- if or .Values.services.configFile.enabled .Values.global.logs.enabled .Values.services.configs.enabled .Values.services.logger.extraVolumeMounts (and .Values.gateway.tls.enabled .Values.gateway.tls.upstreamCerts.logger) }} volumeMounts: {{- end }} {{- if .Values.services.configFile.enabled}} @@ -159,6 +160,7 @@ spec: mountPath: /logs {{- end }} {{- include "osmo.extra-volume-mounts" .Values.services.logger | nindent 8 }} + {{- include "osmo.upstream-tls-volume-mount" (dict "context" . "secretName" .Values.gateway.tls.upstreamCerts.logger) | nindent 8 }} resources: {{- toYaml .Values.services.logger.resources | nindent 10 }} @@ -193,6 +195,9 @@ spec: httpGet: port: 8000 path: /health + {{- if .Values.gateway.tls.enabled }} + scheme: HTTPS + {{- end }} periodSeconds: 45 failureThreshold: 3 timeoutSeconds: 20 @@ -201,6 +206,7 @@ spec: {{- include "osmo.extra-sidecars" .Values.services.logger | nindent 6 }} volumes: {{- include "osmo.extra-volumes" .Values.services.logger | nindent 8 }} + {{- include "osmo.upstream-tls-volume" (dict "context" . "secretName" .Values.gateway.tls.upstreamCerts.logger) | nindent 8 }} {{- if .Values.global.logs.enabled }} - name: logs emptyDir: {} diff --git a/deployments/charts/service/templates/router-service.yaml b/deployments/charts/service/templates/router-service.yaml index ea1aa7536..920ed4ebd 100644 --- a/deployments/charts/service/templates/router-service.yaml +++ b/deployments/charts/service/templates/router-service.yaml @@ -120,6 +120,7 @@ spec: - {{ . }} {{- end }} {{- end }} + {{- include "osmo.upstream-tls-args" (dict "context" . "secretName" .Values.gateway.tls.upstreamCerts.router) | nindent 8 }} env: - name: OSMO_SCHEMA_VERSION value: {{ .Values.targetSchema | default "public" }} @@ -162,7 +163,7 @@ spec: protocol: {{ .protocol | default "TCP" }} {{- end }} {{- end }} - {{- if or .Values.global.logs.enabled .Values.services.configFile.enabled .Values.services.router.extraVolumeMounts }} + {{- if or .Values.global.logs.enabled .Values.services.configFile.enabled .Values.services.router.extraVolumeMounts (and .Values.gateway.tls.enabled .Values.gateway.tls.upstreamCerts.router) }} volumeMounts: {{- if .Values.services.configFile.enabled}} - mountPath: {{ .Values.services.configFile.path }} @@ -176,23 +177,24 @@ spec: {{- with .Values.services.router.extraVolumeMounts }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "osmo.upstream-tls-volume-mount" (dict "context" . "secretName" .Values.gateway.tls.upstreamCerts.router) | nindent 8 }} {{- end }} resources: {{- toYaml .Values.services.router.resources | nindent 10 }} {{- with .Values.services.router.livenessProbe }} livenessProbe: - {{- toYaml . | nindent 10 }} + {{- include "osmo.upstream-probe-yaml" (dict "probe" . "context" $) | nindent 10 }} {{- end }} {{- with .Values.services.router.startupProbe }} startupProbe: - {{- toYaml . | nindent 10 }} + {{- include "osmo.upstream-probe-yaml" (dict "probe" . "context" $) | nindent 10 }} {{- end }} {{- with .Values.services.router.readinessProbe }} readinessProbe: - {{- toYaml . | nindent 10 }} + {{- include "osmo.upstream-probe-yaml" (dict "probe" . "context" $) | nindent 10 }} {{- end }} {{- with .Values.services.router.extraContainers }} @@ -215,6 +217,7 @@ spec: {{- with .Values.services.router.extraVolumes }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "osmo.upstream-tls-volume" (dict "context" . "secretName" .Values.gateway.tls.upstreamCerts.router) | nindent 8 }} --- diff --git a/deployments/charts/service/values.yaml b/deployments/charts/service/values.yaml index 925cc2e16..98de20410 100644 --- a/deployments/charts/service/values.yaml +++ b/deployments/charts/service/values.yaml @@ -2085,12 +2085,9 @@ gateway: nodeSelector: {} tolerations: [] - ## ----------------------------------------------------------------------- - ## Network Policies — restrict ingress to upstream service pods so that - ## only the gateway Envoy pods can reach them. Requires a CNI that - ## enforces NetworkPolicy (e.g. Calico, Cilium, or AWS VPC CNI with + ## Restrict upstream pod ingress to the gateway Envoy. Requires a CNI + ## that enforces NetworkPolicy (Calico, Cilium, AWS VPC CNI with ## enable-network-policy-controller=true). - ## ----------------------------------------------------------------------- networkPolicies: enabled: false upstreams: @@ -2107,11 +2104,30 @@ gateway: app: osmo-ui port: 8000 - ## ----------------------------------------------------------------------- - ## TLS — self-signed certificates for encrypted gateway-to-upstream - ## communication. When enabled, Helm generates a CA and server cert; - ## Envoy uses the CA to verify upstream connections. The upstream - ## services must also be configured to serve HTTPS. - ## ----------------------------------------------------------------------- + ## TLS for gateway -> upstream traffic. Default: each service mints an + ## ephemeral self-signed cert in-process; Envoy connects with TLS but does + ## not validate (encryption-only — identity is provided by NetworkPolicy / + ## RBAC). UI is excluded; stays HTTP. + ## + ## To use externally-provisioned certs, set upstreamCerts. to the + ## name of an existing kubernetes.io/tls Secret containing tls.crt + tls.key. + ## To make Envoy validate against a CA, set caSecret to an existing Secret + ## containing ca.crt. How those Secrets get created (cert-manager, Vault + ## CSI, sealed-secrets, manual, etc.) is up to the operator — OSMO just + ## consumes them. tls: - enabled: false + enabled: true + + ## Per-upstream server certs. Empty string = mint ephemeral self-signed + ## in-process. Otherwise the named Secret is mounted at /etc/osmo/tls + ## and uvicorn loads tls.crt + tls.key from it. + upstreamCerts: + service: "" + router: "" + agent: "" + logger: "" + + ## Optional CA bundle for Envoy upstream validation. When set, Envoy + ## validates upstream certs against ca.crt from this Secret. When empty, + ## TLS is encryption-only (no validation). + caSecret: "" diff --git a/docs/deployment_guide/getting_started/deploy_service.rst b/docs/deployment_guide/getting_started/deploy_service.rst index adb00f45f..dee6f7d4a 100644 --- a/docs/deployment_guide/getting_started/deploy_service.rst +++ b/docs/deployment_guide/getting_started/deploy_service.rst @@ -288,7 +288,7 @@ Create ``osmo_values.yaml`` for the OSMO service with the following sample. :icon: file .. code-block:: yaml - :emphasize-lines: 4, 21-23, 34, 36, 42, 51, 54-59, 74, 148-149, 153-154, 160, 164, 178-180, 195-197 + :emphasize-lines: 4, 21-23, 34, 36, 42, 51, 54-59, 74, 148-149, 153-154, 160, 164, 178-180, 217-219 # Global configuration shared across all OSMO services global: @@ -475,10 +475,32 @@ Create ``osmo_values.yaml`` for the OSMO service with the following sample. # OSMO-issued JWTs (e.g. for access-token-based access) - issuer: osmo audience: osmo - jwks_uri: http://osmo-service/api/auth/keys + # https:// because the gateway -> upstream path is encrypted by + # default (gateway.tls.enabled). Use http:// only if you set + # gateway.tls.enabled: false. + jwks_uri: https://osmo-service/api/auth/keys user_claim: unique_name cluster: osmo-service-jwks + # Gateway -> upstream TLS. Enabled by default: each upstream service + # (osmo-service, osmo-router, osmo-agent, osmo-logger) mints an + # ephemeral self-signed cert in-process at startup, uvicorn serves + # HTTPS on :8000, and Envoy connects with TLS but skips cert validation. + # UI stays HTTP behind NetworkPolicy. + # + # To use externally-provisioned certs (cert-manager, Vault CSI, + # sealed-secrets, manual — OSMO doesn't care), point upstreamCerts at + # existing kubernetes.io/tls Secrets. To make Envoy validate against a + # CA, set caSecret to an existing Secret containing ca.crt. + tls: + enabled: true + # upstreamCerts: + # service: osmo-service-tls + # router: osmo-router-tls + # agent: osmo-agent-tls + # logger: osmo-logger-tls + # caSecret: osmo-gateway-ca + # OAuth2 Proxy configuration # Set OIDC issuer URL and client ID from your IdP (e.g. Microsoft Entra ID, Google). See identity_provider_setup. oauth2Proxy: @@ -632,6 +654,7 @@ Troubleshooting * **Database connection failures**: Verify the database is running and accessible * **Authentication configuration issues**: Verify the authentication configuration is correct * **Gateway routing problems**: Verify the gateway pods are running and the ``osmo-gateway`` service has an external IP (``kubectl get svc osmo-gateway -n osmo``) + * **Repeated** ``Jwks async fetching ... failed`` **in the gateway logs**: the OSMO-issued-JWT provider's ``jwks_uri`` scheme must match ``gateway.tls.enabled`` (``https://`` when on, ``http://`` when off). Verify with the Envoy admin endpoint: ``cluster.osmo-service-jwks.ssl.handshake`` should grow alongside ``upstream_cx_total``; if it stays at ``0``, the upstream wasn't restarted to pick up its TLS config. * **Resource constraints**: Verify the resource limits are set correctly * **Missing secrets or incorrect configurations**: Verify the secrets are created correctly and the configurations are correct * **ConfigMap validation errors**: Pod in CrashLoopBackOff after a Helm upgrade — check ``kubectl describe configmap osmo-service-configs`` for the validation error diff --git a/src/service/agent/BUILD b/src/service/agent/BUILD index 3fbddb450..200a4c843 100644 --- a/src/service/agent/BUILD +++ b/src/service/agent/BUILD @@ -32,6 +32,7 @@ osmo_py_library( requirement("fastapi"), requirement("uvicorn"), "//src/utils:backend_messages", + "//src/utils:ssl_config", "//src/utils:ssl_init", "//src/utils/metrics", "//src/utils/progress_check:progress_check_lib", diff --git a/src/service/agent/agent_service.py b/src/service/agent/agent_service.py index 650f3d9fa..959e5df02 100644 --- a/src/service/agent/agent_service.py +++ b/src/service/agent/agent_service.py @@ -34,7 +34,7 @@ from src.service.core.config import configmap_loader from src.service.core.config.configmap_loader import ConfigFileMixin from src.service.core.workflow import objects -from src.utils import connectors, static_config +from src.utils import connectors, ssl_config, static_config from src.utils.progress_check import progress @@ -44,7 +44,8 @@ # mixin BackendServiceConfig.load() would reject the unknown flag and crash. class BackendServiceConfig(connectors.RedisConfig, connectors.PostgresConfig, src.lib.utils.logging.LoggingConfig, - static_config.StaticConfig, ConfigFileMixin): + static_config.StaticConfig, + ssl_config.SSLConfig, ConfigFileMixin): """Config settings for the backend service""" progress_period: int = pydantic.Field( default=30, @@ -140,7 +141,8 @@ async def liveness_update(): await asyncio.sleep(agent_service_config.progress_period) async def run_server(): - uvicorn_config = uvicorn.Config(app, host=host, port=port, log_config=None) + uvicorn_config = uvicorn.Config(app, host=host, port=port, log_config=None, + **config.uvicorn_ssl_kwargs()) uvicorn_server = uvicorn.Server(config=uvicorn_config) liveness_task = asyncio.create_task(liveness_update()) try: diff --git a/src/service/core/service.py b/src/service/core/service.py index 97bafc39c..9ec310cd6 100644 --- a/src/service/core/service.py +++ b/src/service/core/service.py @@ -547,7 +547,7 @@ def main(): port = 8000 try: - uvicorn.run(app, host=host, port=port, log_config=None) + uvicorn.run(app, host=host, port=port, log_config=None, **config.uvicorn_ssl_kwargs()) except KeyboardInterrupt: sys.exit(0) diff --git a/src/service/core/tests/test_asyncio_startup.py b/src/service/core/tests/test_asyncio_startup.py index abfbbb013..3c798172b 100644 --- a/src/service/core/tests/test_asyncio_startup.py +++ b/src/service/core/tests/test_asyncio_startup.py @@ -84,6 +84,7 @@ def test_logger_main_starts_without_default_event_loop(self): progress_file='/tmp/logger-progress', progress_period=60, config_file=None, + uvicorn_ssl_kwargs=lambda: {}, ) with ( @@ -106,6 +107,7 @@ def test_agent_main_starts_without_default_event_loop(self): host='http://127.0.0.1:8000', progress_file='/tmp/agent-progress', config_file=None, + uvicorn_ssl_kwargs=lambda: {}, ) agent_config = types.SimpleNamespace(progress_period=60) @@ -156,7 +158,10 @@ def test_router_main_starts_without_default_event_loop(self): mock.patch('fastapi.applications.FastAPI.add_middleware'), ): router = importlib.import_module('src.service.router.router') - config = types.SimpleNamespace(host='http://127.0.0.1:8000') + config = types.SimpleNamespace( + host='http://127.0.0.1:8000', + uvicorn_ssl_kwargs=lambda: {}, + ) with ( mock.patch.object(router.RouterServiceConfig, 'load', return_value=config), diff --git a/src/service/core/workflow/BUILD b/src/service/core/workflow/BUILD index 7649e8e82..d5116ae42 100644 --- a/src/service/core/workflow/BUILD +++ b/src/service/core/workflow/BUILD @@ -41,6 +41,7 @@ osmo_py_library( "//src/lib/utils:osmo_errors", "//src/lib/utils:redact", "//src/service/core/config:configmap_loader_lib", + "//src/utils:ssl_config", "//src/utils:static_config", "//src/utils/job:job", "//src/utils:yaml", diff --git a/src/service/core/workflow/objects.py b/src/service/core/workflow/objects.py index b04ed91b1..1f15b4b5c 100644 --- a/src/service/core/workflow/objects.py +++ b/src/service/core/workflow/objects.py @@ -33,13 +33,14 @@ from src.service.core.config.configmap_loader import ConfigFileMixin from src.utils.job import app, common as task_common, jobs, kb_objects, task, workflow from src.utils.job.task import _encode_hstore -from src.utils import connectors, static_config, yaml as util_yaml +from src.utils import connectors, ssl_config, static_config, yaml as util_yaml from src.utils.metrics import metrics class WorkflowServiceConfig(connectors.RedisConfig, connectors.PostgresConfig, src.lib.utils.logging.LoggingConfig, static_config.StaticConfig, + ssl_config.SSLConfig, metrics.MetricsCreatorConfig, ConfigFileMixin): """ Manages configuration specific to the workflow service. """ diff --git a/src/service/logger/BUILD b/src/service/logger/BUILD index 58c70e2c6..3a1c2a2ef 100644 --- a/src/service/logger/BUILD +++ b/src/service/logger/BUILD @@ -49,6 +49,7 @@ osmo_py_library( "//src/service/core/auth", "//src/service/core/config:configmap_loader_lib", "//src/service/core/workflow", + "//src/utils:ssl_config", "//src/utils:ssl_init", "//src/utils:static_config", "//src/utils/job", diff --git a/src/service/logger/logger.py b/src/service/logger/logger.py index 5a69b6e85..1a850cc77 100644 --- a/src/service/logger/logger.py +++ b/src/service/logger/logger.py @@ -30,13 +30,14 @@ from src.service.core.auth import auth_service from src.service.core.config import configmap_loader from src.service.core.config.configmap_loader import ConfigFileMixin -from src.utils import connectors, static_config +from src.utils import connectors, ssl_config, static_config from src.utils.progress_check import progress class LoggerServiceConfig(connectors.RedisConfig, connectors.PostgresConfig, src.lib.utils.logging.LoggingConfig, - static_config.StaticConfig, ConfigFileMixin): + static_config.StaticConfig, + ssl_config.SSLConfig, ConfigFileMixin): """Config settings for the logger service""" host: str = pydantic.Field( default='http://0.0.0.0:8000', @@ -91,7 +92,8 @@ async def liveness_update(): await asyncio.sleep(config.progress_period) async def run_server(): - uvicorn_config = uvicorn.Config(app, host=host, port=port, log_config=None) + uvicorn_config = uvicorn.Config(app, host=host, port=port, log_config=None, + **config.uvicorn_ssl_kwargs()) uvicorn_server = uvicorn.Server(config=uvicorn_config) liveness_task = asyncio.create_task(liveness_update()) try: diff --git a/src/service/router/BUILD b/src/service/router/BUILD index ba29e7f17..78fb8e3b4 100644 --- a/src/service/router/BUILD +++ b/src/service/router/BUILD @@ -36,6 +36,7 @@ osmo_py_library( "//src/lib/utils:common", "//src/lib/utils:logging", "//src/lib/utils:version", + "//src/utils:ssl_config", "//src/utils:ssl_init", "//src/utils:static_config", ], diff --git a/src/service/router/router.py b/src/service/router/router.py index 30e65aede..9b1e12982 100644 --- a/src/service/router/router.py +++ b/src/service/router/router.py @@ -34,11 +34,11 @@ from src.lib.utils import common, version import src.lib.utils.logging from src.service.router import helper -from src.utils import connectors, static_config +from src.utils import connectors, ssl_config, static_config class RouterServiceConfig(src.lib.utils.logging.LoggingConfig, static_config.StaticConfig, - connectors.PostgresConfig): + ssl_config.SSLConfig, connectors.PostgresConfig): """Config settings for the logger service""" host: str = pydantic.Field( default='http://0.0.0.0:8000', @@ -425,7 +425,8 @@ def main(): connectors.PostgresConnector(config) async def run_server(): - uvicorn_config = uvicorn.Config(app, host=host, port=port, log_config=None) + uvicorn_config = uvicorn.Config(app, host=host, port=port, log_config=None, + **config.uvicorn_ssl_kwargs()) uvicorn_server = uvicorn.Server(config=uvicorn_config) check_timeout_task = asyncio.create_task(check_webserver_timeout()) try: diff --git a/src/utils/BUILD b/src/utils/BUILD index 76dc02ea6..77f45aafb 100644 --- a/src/utils/BUILD +++ b/src/utils/BUILD @@ -50,6 +50,18 @@ osmo_py_library( visibility = ["//visibility:public"], ) +osmo_py_library( + name = "ssl_config", + srcs = [ + "ssl_config.py", + ], + deps = [ + requirement("cryptography"), + requirement("pydantic"), + ], + visibility = ["//visibility:public"], +) + osmo_py_library( name = "auth", srcs = ["auth.py"], diff --git a/src/utils/ssl_config.py b/src/utils/ssl_config.py new file mode 100644 index 000000000..1c8260c05 --- /dev/null +++ b/src/utils/ssl_config.py @@ -0,0 +1,165 @@ +""" +SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # pylint: disable=line-too-long + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +SPDX-License-Identifier: Apache-2.0 + +TLS/SSL configuration for services that listen on a uvicorn port. Lives in +its own module (rather than next to StaticConfig) because it pulls in the +`cryptography` package, and small utility binaries that only need +StaticConfig (e.g. progress_check) shouldn't have to bundle that dep. +""" + +import datetime +import ipaddress +import os +import socket +import tempfile +from typing import Any, Dict, Optional, Tuple + +from cryptography import x509 +from cryptography.hazmat.primitives import hashes, serialization +from cryptography.hazmat.primitives.asymmetric import ec +from cryptography.x509.oid import NameOID +import pydantic + + +class SSLConfig(pydantic.BaseModel): + """TLS/SSL configuration for the uvicorn listener. + + Three modes, picked by which flags are set: + + 1. ssl_self_signed=True + The process mints a fresh ECDSA P-256 cert at startup, writes it to a + temp dir, and points uvicorn at it. The cert is per-process and lives + only as long as the container. Used by the chart's default + gateway.tls.enabled mode where Envoy connects with TLS but does not + validate the upstream cert (common_tls_context: {}). No CA management, + no Secret rotation, no init container needed. + + 2. ssl_keyfile and ssl_certfile point at on-disk PEMs (e.g. mounted from + a cert-manager-managed Secret). The process serves HTTPS using the + provided cert. Used when the chart is in cert-manager mode. + + 3. None set: plain HTTP. + """ + ssl_keyfile: Optional[str] = pydantic.Field( + default=None, + description='Path to a PEM-encoded private key. If set together with ' + 'ssl_certfile, the service serves HTTPS instead of HTTP.', + json_schema_extra={'command_line': 'ssl_keyfile', 'env': 'OSMO_SSL_KEYFILE'}) + ssl_certfile: Optional[str] = pydantic.Field( + default=None, + description='Path to a PEM-encoded certificate (server leaf, optionally ' + 'chained). Required together with ssl_keyfile.', + json_schema_extra={'command_line': 'ssl_certfile', 'env': 'OSMO_SSL_CERTFILE'}) + ssl_self_signed: bool = pydantic.Field( + default=False, + description='Generate an ephemeral self-signed cert in-process and ' + 'serve HTTPS with it. The cert is regenerated on every ' + 'process start. Useful when the consumer (e.g. the OSMO ' + 'gateway) wants encryption-without-validation.', + json_schema_extra={'command_line': 'ssl_self_signed', + 'env': 'OSMO_SSL_SELF_SIGNED'}) + + @pydantic.model_validator(mode='after') + def _validate_ssl_combination(self) -> 'SSLConfig': + """Reject incomplete or conflicting TLS settings at config-load time. + + Silently falling back to HTTP when one of these is misconfigured leads + to confusing failures later (Envoy talks TLS to a plain-HTTP listener, + clients hit unexpected redirects, etc.). Fail fast instead so the + operator sees the problem at startup. + """ + explicit_paths = bool(self.ssl_keyfile) or bool(self.ssl_certfile) + both_paths = bool(self.ssl_keyfile) and bool(self.ssl_certfile) + + # Incomplete: exactly one of keyfile/certfile. + if explicit_paths and not both_paths: + missing = 'ssl_certfile' if self.ssl_keyfile else 'ssl_keyfile' + raise ValueError( + f'TLS misconfigured: ssl_keyfile and ssl_certfile must be set ' + f'together; missing {missing}. Set both to enable TLS, or ' + f'unset both to serve plain HTTP.') + + # Conflicting: self-signed mode plus explicit on-disk paths. + if self.ssl_self_signed and explicit_paths: + raise ValueError( + 'TLS misconfigured: ssl_self_signed cannot be combined with ' + 'explicit ssl_keyfile/ssl_certfile. Pick one mode — set ' + 'ssl_self_signed=true to mint an ephemeral cert in-process, ' + 'or provide ssl_keyfile + ssl_certfile to use on-disk PEMs.') + + return self + + def uvicorn_ssl_kwargs(self) -> Dict[str, Any]: + """Return uvicorn keyword args for TLS, or an empty dict if TLS is off. + + The validator above guarantees we're in exactly one of three states: + all-unset (HTTP), self-signed-only, or both paths set. + """ + if self.ssl_self_signed: + keyfile, certfile = mint_ephemeral_self_signed() + return {'ssl_keyfile': keyfile, 'ssl_certfile': certfile} + if self.ssl_keyfile and self.ssl_certfile: + return {'ssl_keyfile': self.ssl_keyfile, 'ssl_certfile': self.ssl_certfile} + return {} + + +def mint_ephemeral_self_signed() -> Tuple[str, str]: + """Generate an ECDSA P-256 self-signed cert and write it to a temp dir. + + Returns (keyfile_path, certfile_path). uvicorn opens both at startup and + parses them into an in-memory SSLContext, so the files only need to exist + long enough for ssl.SSLContext.load_cert_chain() to read them. We don't + bother deleting them; the temp dir goes away when the container exits. + + SANs include the pod hostname so anything that DOES validate (e.g. a + cluster-internal probe with HTTPS scheme) gets a name match. Envoy with + common_tls_context: {} ignores SANs entirely. + """ + private_key = ec.generate_private_key(ec.SECP256R1()) + hostname = socket.gethostname() or 'localhost' + subject = issuer = x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, hostname)]) + + san_entries: list[x509.GeneralName] = [ + x509.DNSName(hostname), + x509.DNSName('localhost'), + x509.IPAddress(ipaddress.ip_address('127.0.0.1')), + ] + + now = datetime.datetime.now(datetime.timezone.utc) + cert = (x509.CertificateBuilder() + .subject_name(subject) + .issuer_name(issuer) + .public_key(private_key.public_key()) + .serial_number(x509.random_serial_number()) + .not_valid_before(now - datetime.timedelta(minutes=5)) + .not_valid_after(now + datetime.timedelta(days=365)) + .add_extension(x509.SubjectAlternativeName(san_entries), critical=False) + .add_extension(x509.BasicConstraints(ca=False, path_length=None), critical=True) + .sign(private_key, hashes.SHA256())) + + tmpdir = tempfile.mkdtemp(prefix='osmo-tls-') + keyfile_path = os.path.join(tmpdir, 'tls.key') + certfile_path = os.path.join(tmpdir, 'tls.crt') + with open(keyfile_path, 'wb') as f: + f.write(private_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.PKCS8, + encryption_algorithm=serialization.NoEncryption())) + os.chmod(keyfile_path, 0o600) + with open(certfile_path, 'wb') as f: + f.write(cert.public_bytes(serialization.Encoding.PEM)) + return keyfile_path, certfile_path diff --git a/src/utils/tests/BUILD b/src/utils/tests/BUILD index b95b5d3ca..78372b738 100644 --- a/src/utils/tests/BUILD +++ b/src/utils/tests/BUILD @@ -18,7 +18,7 @@ SPDX-License-Identifier: Apache-2.0 """ load("//bzl:py.bzl", "osmo_py_test") -load("@osmo_python_deps//:requirements.bzl", "requirement") +load("@osmo_python_deps//:requirements.bzl", "requirement") osmo_py_test( name = "test_auth", @@ -31,6 +31,18 @@ osmo_py_test( ] ) +osmo_py_test( + name = "test_ssl_config", + srcs = [ + "test_ssl_config.py" + ], + deps = [ + "//src/utils:ssl_config", + requirement("cryptography"), + requirement("pydantic"), + ] +) + osmo_py_test( name = "test_ssl_init", srcs = [ diff --git a/src/utils/tests/test_ssl_config.py b/src/utils/tests/test_ssl_config.py new file mode 100644 index 000000000..36f1dba1c --- /dev/null +++ b/src/utils/tests/test_ssl_config.py @@ -0,0 +1,157 @@ +""" +SPDX-FileCopyrightText: NVIDIA CORPORATION +Copyright (c) 2026 NVIDIA CORPORATION. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +SPDX-License-Identifier: Apache-2.0 +""" +import datetime +import os +import socket +import ssl +import stat +import unittest + +from cryptography import x509 +from cryptography.hazmat.primitives import serialization +import pydantic + +from src.utils import ssl_config + + +class TestSSLConfigKwargs(unittest.TestCase): + """uvicorn_ssl_kwargs() picks the right mode for the given fields.""" + + def test_no_tls_returns_empty_kwargs(self): + cfg = ssl_config.SSLConfig() + self.assertEqual(cfg.uvicorn_ssl_kwargs(), {}) + + def test_explicit_paths_pass_through(self): + cfg = ssl_config.SSLConfig(ssl_keyfile='/etc/k.pem', ssl_certfile='/etc/c.pem') + self.assertEqual(cfg.uvicorn_ssl_kwargs(), + {'ssl_keyfile': '/etc/k.pem', 'ssl_certfile': '/etc/c.pem'}) + + def test_only_keyfile_set_raises(self): + # Half-configured TLS is the kind of bug that silently degrades a + # production listener to plain HTTP; the validator must fail loudly. + with self.assertRaisesRegex(pydantic.ValidationError, 'ssl_certfile'): + ssl_config.SSLConfig(ssl_keyfile='/etc/k.pem') + + def test_only_certfile_set_raises(self): + with self.assertRaisesRegex(pydantic.ValidationError, 'ssl_keyfile'): + ssl_config.SSLConfig(ssl_certfile='/etc/c.pem') + + def test_self_signed_with_explicit_paths_raises(self): + # Specifying both modes is ambiguous — pick one. Reject early so the + # operator notices instead of guessing which mode wins. + with self.assertRaisesRegex(pydantic.ValidationError, 'ssl_self_signed'): + ssl_config.SSLConfig(ssl_self_signed=True, + ssl_keyfile='/etc/k.pem', + ssl_certfile='/etc/c.pem') + + def test_self_signed_with_just_keyfile_raises(self): + # Conflict-detection should fire even when the on-disk pair is itself + # incomplete; otherwise the user gets two confusing errors instead of + # one pointing at the conflict. + with self.assertRaises(pydantic.ValidationError): + ssl_config.SSLConfig(ssl_self_signed=True, ssl_keyfile='/etc/k.pem') + + def test_self_signed_returns_real_paths(self): + cfg = ssl_config.SSLConfig(ssl_self_signed=True) + kwargs = cfg.uvicorn_ssl_kwargs() + self.assertIn('ssl_keyfile', kwargs) + self.assertIn('ssl_certfile', kwargs) + self.assertTrue(os.path.isfile(kwargs['ssl_keyfile'])) + self.assertTrue(os.path.isfile(kwargs['ssl_certfile'])) + + +class TestEphemeralSelfSigned(unittest.TestCase): + """mint_ephemeral_self_signed produces a usable cert/key pair on disk.""" + + def setUp(self): + self.keyfile, self.certfile = ssl_config.mint_ephemeral_self_signed() + + def test_files_exist_and_are_nonempty(self): + self.assertTrue(os.path.isfile(self.keyfile)) + self.assertTrue(os.path.isfile(self.certfile)) + self.assertGreater(os.path.getsize(self.keyfile), 0) + self.assertGreater(os.path.getsize(self.certfile), 0) + + def test_cert_pem_parses_as_x509(self): + with open(self.certfile, 'rb') as f: + cert = x509.load_pem_x509_certificate(f.read()) + self.assertIsInstance(cert, x509.Certificate) + + def test_cert_has_expected_sans(self): + with open(self.certfile, 'rb') as f: + cert = x509.load_pem_x509_certificate(f.read()) + san_ext = cert.extensions.get_extension_for_class(x509.SubjectAlternativeName) + dns_names = san_ext.value.get_values_for_type(x509.DNSName) + ip_addrs = [str(ip) for ip in san_ext.value.get_values_for_type(x509.IPAddress)] + # Hostname (CN) and the standard loopback aliases must be present so + # local probes (cluster-internal HTTPS, sidecar self-checks) get a + # name match. Envoy with common_tls_context: {} ignores SANs anyway. + self.assertIn(socket.gethostname() or 'localhost', dns_names) + self.assertIn('localhost', dns_names) + self.assertIn('127.0.0.1', ip_addrs) + + def test_cert_is_not_a_ca(self): + with open(self.certfile, 'rb') as f: + cert = x509.load_pem_x509_certificate(f.read()) + bc = cert.extensions.get_extension_for_class(x509.BasicConstraints) + self.assertFalse(bc.value.ca) + + def test_cert_validity_window_includes_now(self): + with open(self.certfile, 'rb') as f: + cert = x509.load_pem_x509_certificate(f.read()) + now = datetime.datetime.now(datetime.timezone.utc) + self.assertLess(cert.not_valid_before_utc, now) + self.assertGreater(cert.not_valid_after_utc, now) + + def test_keyfile_is_not_world_readable(self): + # Private key must be 0600 (only owner can read). Any group/other + # access on a private key would be a regression. + mode = stat.S_IMODE(os.stat(self.keyfile).st_mode) + self.assertEqual(mode & 0o077, 0, + f'keyfile permissions {oct(mode)} grant access beyond owner') + + def test_cert_loads_into_uvicorn_style_ssl_context(self): + # This is the exact call uvicorn makes internally; if it fails here + # uvicorn would fail at startup. Doubles as an end-to-end sanity check. + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + ctx.load_cert_chain(certfile=self.certfile, keyfile=self.keyfile) + + def test_each_call_produces_a_unique_cert(self): + # Ephemerality matters — every process start should mint a fresh cert + # rather than reusing one across pods. + keyfile2, certfile2 = ssl_config.mint_ephemeral_self_signed() + self.assertNotEqual(self.keyfile, keyfile2) + self.assertNotEqual(self.certfile, certfile2) + with open(self.certfile, 'rb') as f: + cert1 = x509.load_pem_x509_certificate(f.read()) + with open(certfile2, 'rb') as f: + cert2 = x509.load_pem_x509_certificate(f.read()) + self.assertNotEqual(cert1.serial_number, cert2.serial_number) + # Compare keys via their DER encoding — public_bytes() is on the base + # public-key protocol so this works regardless of which key type + # _mint_ephemeral_self_signed picks (avoids mypy union-attr errors + # over the seven-way PublicKeyTypes union). + pub_format = serialization.PublicFormat.SubjectPublicKeyInfo + self.assertNotEqual( + cert1.public_key().public_bytes(serialization.Encoding.DER, pub_format), + cert2.public_key().public_bytes(serialization.Encoding.DER, pub_format)) + + +if __name__ == '__main__': + unittest.main()