Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion charts/kubeflow-trainer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,10 @@ manager:
| manager.volumeMounts | list | `[]` | Volume mounts for manager containers. |
| manager.resources | object | `{}` | Pod resource requests and limits for manager containers. |
| manager.securityContext | object | `{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}}` | Security context for manager containers. |
| manager.config | object | `{"certManagement":{"enable":true,"webhookSecretName":"","webhookServiceName":""},"controller":{"groupKindConcurrency":{"clusterTrainingRuntime":1,"trainJob":5,"trainingRuntime":1}},"featureGates":{},"health":{"healthProbeBindAddress":":8081","livenessEndpointName":"healthz","readinessEndpointName":"readyz"},"leaderElection":{"leaderElect":true,"leaseDuration":"15s","renewDeadline":"10s","resourceName":"trainer.kubeflow.org","resourceNamespace":"","retryPeriod":"2s"},"metrics":{"bindAddress":":8443","secureServing":true},"statusServer":{"burst":10,"port":10443,"qps":5},"webhook":{"host":"","port":9443}}` | Controller manager configuration. This configuration is used to generate the ConfigMap for the controller manager. |
| manager.config | object | `{"certManagement":{"enable":true,"secretName":"","serviceName":""},"controller":{"groupKindConcurrency":{"clusterTrainingRuntime":1,"trainJob":5,"trainingRuntime":1}},"featureGates":{},"health":{"healthProbeBindAddress":":8081","livenessEndpointName":"healthz","readinessEndpointName":"readyz"},"leaderElection":{"leaderElect":true,"leaseDuration":"15s","renewDeadline":"10s","resourceName":"trainer.kubeflow.org","resourceNamespace":"","retryPeriod":"2s"},"metrics":{"bindAddress":":8443","secureServing":true},"statusServer":{"burst":10,"port":10443,"qps":5},"webhook":{"host":"","port":9443}}` | Controller manager configuration. This configuration is used to generate the ConfigMap for the controller manager. |
| manager.config.certManagement.enable | bool | `true` | Enable automatic TLS certificate management. |
| manager.config.certManagement.serviceName | string | `""` | Service name for TLS certificate DNS SAN. Defaults to controller manager service name if empty. |
| manager.config.certManagement.secretName | string | `""` | Secret name for storing TLS certificates. Defaults to "kubeflow-trainer-webhook-cert" if empty. |
| manager.config.statusServer.port | int | `10443` | Port that the TrainJob status server serves on. |
| manager.config.statusServer.qps | int | `5` | QPS rate limit for the TrainJob Status Server api client |
| manager.config.statusServer.burst | int | `10` | Burst rate limit for the TrainJob Status Server api client |
Expand Down
4 changes: 2 additions & 2 deletions charts/kubeflow-trainer/templates/manager/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ data:
# Certificate management configuration
certManagement:
enable: {{ .Values.manager.config.certManagement.enable }}
webhookServiceName: {{ if .Values.manager.config.certManagement.webhookServiceName }}{{ .Values.manager.config.certManagement.webhookServiceName }}{{ else }}{{ include "trainer.webhook.service.name" . }}{{ end }}
webhookSecretName: {{ if .Values.manager.config.certManagement.webhookSecretName }}{{ .Values.manager.config.certManagement.webhookSecretName }}{{ else }}{{ include "trainer.webhook.secret.name" . }}{{ end }}
serviceName: {{ if .Values.manager.config.certManagement.serviceName }}{{ .Values.manager.config.certManagement.serviceName }}{{ else }}{{ include "trainer.webhook.service.name" . }}{{ end }}
secretName: {{ if .Values.manager.config.certManagement.secretName }}{{ .Values.manager.config.certManagement.secretName }}{{ else }}{{ include "trainer.webhook.secret.name" . }}{{ end }}

statusServer:
port: {{ .Values.manager.config.statusServer.port }}
Expand Down
16 changes: 8 additions & 8 deletions charts/kubeflow-trainer/tests/manager/configmap_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,29 @@ tests:
path: data["controller_manager_config.yaml"]
pattern: "port: 9443"

- it: should use custom webhook service and secret names from config
- it: should use custom service and secret names from config
set:
manager:
config:
certManagement:
webhookServiceName: "custom-webhook-service"
webhookSecretName: "custom-webhook-secret"
serviceName: "custom-webhook-service"
secretName: "custom-webhook-secret"
asserts:
- matchRegex:
path: data["controller_manager_config.yaml"]
pattern: "webhookServiceName: custom-webhook-service"
pattern: "serviceName: custom-webhook-service"
- matchRegex:
path: data["controller_manager_config.yaml"]
pattern: "webhookSecretName: custom-webhook-secret"
pattern: "secretName: custom-webhook-secret"

- it: should auto-generate webhook service and secret names when not provided
- it: should auto-generate service and secret names when not provided
asserts:
- matchRegex:
path: data["controller_manager_config.yaml"]
pattern: "webhookServiceName: kubeflow-trainer-controller-manager"
pattern: "serviceName: kubeflow-trainer-controller-manager"
- matchRegex:
path: data["controller_manager_config.yaml"]
pattern: "webhookSecretName: kubeflow-trainer-webhook-cert"
pattern: "secretName: kubeflow-trainer-webhook-cert"

- it: should enable leader election by default
asserts:
Expand Down
8 changes: 5 additions & 3 deletions charts/kubeflow-trainer/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,12 @@ manager:
trainingRuntime: 1
clusterTrainingRuntime: 1
certManagement:
# -- Enable automatic TLS certificate management.
enable: true
# webhookServiceName and webhookSecretName are auto-generated if not specified
webhookServiceName: ""
webhookSecretName: ""
# -- Service name for TLS certificate DNS SAN. Defaults to controller manager service name if empty.
serviceName: ""
# -- Secret name for storing TLS certificates. Defaults to "kubeflow-trainer-webhook-cert" if empty.
secretName: ""
statusServer:
# -- Port that the TrainJob status server serves on.
port: 10443
Expand Down
4 changes: 2 additions & 2 deletions cmd/trainer-controller-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ func main() {
if config.IsCertManagementEnabled(&cfg) {
setupLog.Info("Setting up certificate management")
if err = cert.ManageCerts(mgr, cert.Config{
WebhookSecretName: cfg.CertManagement.WebhookSecretName,
WebhookServiceName: cfg.CertManagement.WebhookServiceName,
SecretName: cfg.CertManagement.SecretName,
ServiceName: cfg.CertManagement.ServiceName,
ValidatingWebhookConfigurationName: validatingWebhookConfigurationName,
MutatingWebhookConfigurationName: mutatingWebhookConfigurationName,
}, certsReady); err != nil {
Expand Down
4 changes: 2 additions & 2 deletions manifests/base/manager/controller_manager_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ controller:
# Certificate management configuration
certManagement:
enable: true
webhookServiceName: kubeflow-trainer-controller-manager
webhookSecretName: kubeflow-trainer-webhook-cert
serviceName: kubeflow-trainer-controller-manager
secretName: kubeflow-trainer-webhook-cert

# Client connection configuration
clientConnection:
Expand Down
16 changes: 9 additions & 7 deletions pkg/apis/config/v1alpha1/configuration_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ type Configuration struct {
// +optional
Controller *ControllerConfigurationSpec `json:"controller,omitempty"`

// certManagement is configuration for certificate management used by the webhook server.
// certManagement is configuration for TLS certificate management.
// The certificate is used by webhooks, metrics server, and status server.
// +optional
CertManagement *CertManagement `json:"certManagement,omitempty"`

Expand Down Expand Up @@ -157,7 +158,8 @@ type ControllerConfigurationSpec struct {
GroupKindConcurrency map[string]int32 `json:"groupKindConcurrency,omitempty"`
}

// CertManagement holds configuration related to webhook server certificate generation.
// CertManagement holds configuration related to TLS certificate generation for the controller manager.
// The certificate is used by multiple components: admission webhooks, metrics server, and status server.
type CertManagement struct {
// enable controls whether the cert management is enabled.
// If disabled, certificates must be provided externally.
Expand All @@ -166,22 +168,22 @@ type CertManagement struct {
// +kubebuilder:default=true
Enable *bool `json:"enable,omitempty"`

// webhookServiceName is the name of the Service used as part of the DNSName
// when generating the webhook server certificate.
// serviceName is the name of the Service used as part of the DNSName
// when generating the TLS certificate.
// Defaults to "kubeflow-trainer-controller-manager".
// +optional
// +kubebuilder:default="kubeflow-trainer-controller-manager"
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:MaxLength=253
WebhookServiceName string `json:"webhookServiceName,omitempty"`
ServiceName string `json:"serviceName,omitempty"`

// webhookSecretName is the name of the Secret used to store the CA and server certificates.
// secretName is the name of the Secret used to store the CA and server certificates.
// Defaults to "kubeflow-trainer-webhook-cert".
// +optional
// +kubebuilder:default="kubeflow-trainer-webhook-cert"
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:MaxLength=253
WebhookSecretName string `json:"webhookSecretName,omitempty"`
SecretName string `json:"secretName,omitempty"`
}

// ClientConnection provides additional configuration options for Kubernetes
Expand Down
8 changes: 4 additions & 4 deletions pkg/apis/config/v1alpha1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ func SetDefaults_Configuration(cfg *Configuration) {
if cfg.CertManagement.Enable == nil {
cfg.CertManagement.Enable = ptr.To(true)
}
if cfg.CertManagement.WebhookServiceName == "" {
cfg.CertManagement.WebhookServiceName = "kubeflow-trainer-controller-manager"
if cfg.CertManagement.ServiceName == "" {
cfg.CertManagement.ServiceName = "kubeflow-trainer-controller-manager"
}
if cfg.CertManagement.WebhookSecretName == "" {
cfg.CertManagement.WebhookSecretName = "kubeflow-trainer-webhook-cert"
if cfg.CertManagement.SecretName == "" {
cfg.CertManagement.SecretName = "kubeflow-trainer-webhook-cert"
}
if cfg.ClientConnection == nil {
cfg.ClientConnection = &ClientConnection{}
Expand Down
26 changes: 13 additions & 13 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ apiVersion: config.trainer.kubeflow.org/v1alpha1
kind: Configuration
certManagement:
enable: true
webhookServiceName: custom-webhook-service
webhookSecretName: custom-webhook-secret
serviceName: custom-webhook-service
secretName: custom-webhook-secret
`), os.FileMode(0600)); err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -195,8 +195,8 @@ controller:
TrainingRuntime.trainer.kubeflow.org: 1
certManagement:
enable: true
webhookServiceName: kubeflow-trainer-controller-manager
webhookSecretName: kubeflow-trainer-webhook-cert
serviceName: kubeflow-trainer-controller-manager
secretName: kubeflow-trainer-webhook-cert
clientConnection:
qps: 50
burst: 100
Expand Down Expand Up @@ -270,9 +270,9 @@ this is not: valid: yaml: content
}

defaultCertManagement := &configapi.CertManagement{
Enable: ptr.To(true),
WebhookServiceName: "kubeflow-trainer-controller-manager",
WebhookSecretName: "kubeflow-trainer-webhook-cert",
Enable: ptr.To(true),
ServiceName: "kubeflow-trainer-controller-manager",
SecretName: "kubeflow-trainer-webhook-cert",
}

defaultClientConnection := &configapi.ClientConnection{
Expand Down Expand Up @@ -500,9 +500,9 @@ this is not: valid: yaml: content
ClientConnection: defaultClientConnection,
StatusServer: defaultStatusServer,
CertManagement: &configapi.CertManagement{
Enable: ptr.To(true),
WebhookServiceName: "custom-webhook-service",
WebhookSecretName: "custom-webhook-secret",
Enable: ptr.To(true),
ServiceName: "custom-webhook-service",
SecretName: "custom-webhook-secret",
},
},
wantOptions: defaultOptions,
Expand All @@ -518,9 +518,9 @@ this is not: valid: yaml: content
ClientConnection: defaultClientConnection,
StatusServer: defaultStatusServer,
CertManagement: &configapi.CertManagement{
Enable: ptr.To(false),
WebhookServiceName: "kubeflow-trainer-controller-manager",
WebhookSecretName: "kubeflow-trainer-webhook-cert",
Enable: ptr.To(false),
ServiceName: "kubeflow-trainer-controller-manager",
SecretName: "kubeflow-trainer-webhook-cert",
},
},
wantOptions: defaultOptions,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,7 @@ func (p *Status) createEnvVars(trainJob *trainer.TrainJob) ([]corev1ac.EnvVarApp
if p.cfg.StatusServer.Port == nil {
return nil, fmt.Errorf("missing status server port")
}
// TODO: consider renaming the CertManagement.WebhookServiceName name?
svc := fmt.Sprintf("https://%s.%s.svc:%d", p.cfg.CertManagement.WebhookServiceName, cert.GetOperatorNamespace(), *p.cfg.StatusServer.Port)
svc := fmt.Sprintf("https://%s.%s.svc:%d", p.cfg.CertManagement.ServiceName, cert.GetOperatorNamespace(), *p.cfg.StatusServer.Port)
path := statusserver.StatusUrl(trainJob.Namespace, trainJob.Name)
statusURL := svc + path

Expand Down Expand Up @@ -169,15 +168,15 @@ func createTokenVolume(trainJob *trainer.TrainJob) corev1ac.VolumeApplyConfigura
)
}

// buildStatusServerCaCrtConfigMap creates a ConfigMap that will copy the ca.crt from the webhook secret
// buildStatusServerCaCrtConfigMap creates a ConfigMap that will copy the ca.crt from the TLS secret
func (p *Status) buildStatusServerCaCrtConfigMap(ctx context.Context, trainJob *trainer.TrainJob) (*corev1ac.ConfigMapApplyConfiguration, error) {
configMapName := fmt.Sprintf("%s-tls-config", trainJob.Name)

// Get the CA cert from the webhook secret
// Get the CA cert from the TLS secret
secret := &corev1.Secret{}
secretKey := client.ObjectKey{
Namespace: cert.GetOperatorNamespace(),
Name: p.cfg.CertManagement.WebhookSecretName,
Name: p.cfg.CertManagement.SecretName,
}

var caCertData string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,8 @@ func TestEnforceMLPolicy(t *testing.T) {
cli := utiltesting.NewClientBuilder().Build()
cfg := &configapi.Configuration{
CertManagement: &configapi.CertManagement{
WebhookServiceName: "kubeflow-trainer-controller-manager",
WebhookSecretName: "kubeflow-trainer-webhook-cert",
ServiceName: "kubeflow-trainer-controller-manager",
SecretName: "kubeflow-trainer-webhook-cert",
},
StatusServer: &configapi.StatusServer{
Port: ptr.To[int32](10443),
Expand Down Expand Up @@ -461,8 +461,8 @@ func TestBuild(t *testing.T) {

cfg := &configapi.Configuration{
CertManagement: &configapi.CertManagement{
WebhookServiceName: "kubeflow-trainer-controller-manager",
WebhookSecretName: "kubeflow-trainer-webhook-cert",
ServiceName: "kubeflow-trainer-controller-manager",
SecretName: "kubeflow-trainer-webhook-cert",
},
StatusServer: &configapi.StatusServer{
Port: ptr.To[int32](10443),
Expand Down
8 changes: 4 additions & 4 deletions pkg/util/cert/cert.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ func GetOperatorNamespace() string {
}

type Config struct {
WebhookServiceName string
WebhookSecretName string
ServiceName string
SecretName string
ValidatingWebhookConfigurationName string
MutatingWebhookConfigurationName string
}
Expand All @@ -60,12 +60,12 @@ func ManageCerts(mgr ctrl.Manager, cfg Config, setupFinished chan struct{}) erro

ns := GetOperatorNamespace()
// DNSName is <service name>.<namespace>.svc
dnsName := fmt.Sprintf("%s.%s.svc", cfg.WebhookServiceName, ns)
dnsName := fmt.Sprintf("%s.%s.svc", cfg.ServiceName, ns)

return cert.AddRotator(mgr, &cert.CertRotator{
SecretKey: types.NamespacedName{
Namespace: ns,
Name: cfg.WebhookSecretName,
Name: cfg.SecretName,
},
CertDir: certDir,
CAName: caName,
Expand Down
Loading