diff --git a/api/nvidia/v1/clusterpolicy_types.go b/api/nvidia/v1/clusterpolicy_types.go index d924901e1..ded617bfe 100644 --- a/api/nvidia/v1/clusterpolicy_types.go +++ b/api/nvidia/v1/clusterpolicy_types.go @@ -1077,6 +1077,30 @@ type DCGMExporterSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="HPC Job Mapping Configuration" // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced" HPCJobMapping *DCGMExporterHPCJobMappingConfig `json:"hpcJobMapping,omitempty"` + + // Enable Kubernetes pod labels as Prometheus label dimensions in DCGM exporter metrics. + // (Requires cluster-level read access to pods.) + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable pod-label enrichment" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + EnablePodLabels *bool `json:"enablePodLabels,omitempty"` + + // Enable Kubernetes pod UID as a Prometheus label dimension in DCGM exporter metrics. + // (Requires cluster-level read access to pods.) + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable pod-UID enrichment" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + EnablePodUID *bool `json:"enablePodUID,omitempty"` + + // Regex list for filtering which Kubernetes pod labels are included in DCGM exporter metrics. + // Empty means all pod labels are included. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Pod label allowlist regex" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:text" + PodLabelAllowlistRegex []string `json:"podLabelAllowlistRegex,omitempty"` } // DCGMExporterHPCJobMappingConfig defines HPC job mapping configuration for NVIDIA DCGM Exporter @@ -2238,6 +2262,30 @@ func (e *DCGMExporterSpec) GetHPCJobMappingDirectory() string { return e.HPCJobMapping.Directory } +// IsPodLabelsEnabled returns true if pod-label enrichment is enabled for DCGM Exporter +func (e *DCGMExporterSpec) IsPodLabelsEnabled() bool { + if e.EnablePodLabels == nil { + // default is false if not specified by user + return false + } + return *e.EnablePodLabels +} + +// IsPodUIDEnabled returns true if pod-UID enrichment is enabled for DCGM Exporter +func (e *DCGMExporterSpec) IsPodUIDEnabled() bool { + if e.EnablePodUID == nil { + // default is false if not specified by user + return false + } + return *e.EnablePodUID +} + +// IsKubernetesPodMetadataEnabled returns true if any Kubernetes pod metadata +// enrichment is enabled for DCGM Exporter. +func (e *DCGMExporterSpec) IsKubernetesPodMetadataEnabled() bool { + return e.IsPodLabelsEnabled() || e.IsPodUIDEnabled() +} + // IsEnabled returns true if gpu-feature-discovery is enabled(default) through gpu-operator func (g *GPUFeatureDiscoverySpec) IsEnabled() bool { if g.Enabled == nil { diff --git a/api/nvidia/v1/zz_generated.deepcopy.go b/api/nvidia/v1/zz_generated.deepcopy.go index e04a3570e..30110a932 100644 --- a/api/nvidia/v1/zz_generated.deepcopy.go +++ b/api/nvidia/v1/zz_generated.deepcopy.go @@ -429,6 +429,21 @@ func (in *DCGMExporterSpec) DeepCopyInto(out *DCGMExporterSpec) { *out = new(DCGMExporterHPCJobMappingConfig) (*in).DeepCopyInto(*out) } + if in.EnablePodLabels != nil { + in, out := &in.EnablePodLabels, &out.EnablePodLabels + *out = new(bool) + **out = **in + } + if in.EnablePodUID != nil { + in, out := &in.EnablePodUID, &out.EnablePodUID + *out = new(bool) + **out = **in + } + if in.PodLabelAllowlistRegex != nil { + in, out := &in.PodLabelAllowlistRegex, &out.PodLabelAllowlistRegex + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DCGMExporterSpec. diff --git a/assets/state-dcgm-exporter/0210_clusterrole.yaml b/assets/state-dcgm-exporter/0210_clusterrole.yaml new file mode 100644 index 000000000..f5380d080 --- /dev/null +++ b/assets/state-dcgm-exporter/0210_clusterrole.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: nvidia-dcgm-exporter-read-pods + labels: + app: nvidia-dcgm-exporter +# TODO: Add resourceSlices permissions when GPU Operator exposes DRA exporter support. +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch diff --git a/assets/state-dcgm-exporter/0310_clusterrolebinding.yaml b/assets/state-dcgm-exporter/0310_clusterrolebinding.yaml new file mode 100644 index 000000000..e59ce4deb --- /dev/null +++ b/assets/state-dcgm-exporter/0310_clusterrolebinding.yaml @@ -0,0 +1,14 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: nvidia-dcgm-exporter-read-pods + labels: + app: nvidia-dcgm-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: nvidia-dcgm-exporter-read-pods +subjects: +- kind: ServiceAccount + name: nvidia-dcgm-exporter + namespace: "FILLED BY THE OPERATOR" diff --git a/assets/state-dcgm-exporter/0800_daemonset.yaml b/assets/state-dcgm-exporter/0800_daemonset.yaml index c826aafbb..eb0dab163 100644 --- a/assets/state-dcgm-exporter/0800_daemonset.yaml +++ b/assets/state-dcgm-exporter/0800_daemonset.yaml @@ -24,6 +24,7 @@ spec: effect: NoSchedule priorityClassName: system-node-critical serviceAccountName: nvidia-dcgm-exporter + automountServiceAccountToken: false initContainers: - name: toolkit-validation image: "FILLED BY THE OPERATOR" diff --git a/bundle/manifests/nvidia.com_clusterpolicies.yaml b/bundle/manifests/nvidia.com_clusterpolicies.yaml index 868fe9379..ce48355b4 100644 --- a/bundle/manifests/nvidia.com_clusterpolicies.yaml +++ b/bundle/manifests/nvidia.com_clusterpolicies.yaml @@ -575,6 +575,16 @@ spec: metrics to be collected by NVIDIA DCGM Exporter type: string type: object + enablePodLabels: + description: |- + Enable Kubernetes pod labels as Prometheus label dimensions in DCGM exporter metrics. + (Requires cluster-level read access to pods.) + type: boolean + enablePodUID: + description: |- + Enable Kubernetes pod UID as a Prometheus label dimension in DCGM exporter metrics. + (Requires cluster-level read access to pods.) + type: boolean enabled: description: Enabled indicates if deployment of NVIDIA DCGM Exporter through operator is enabled @@ -628,6 +638,13 @@ spec: items: type: string type: array + podLabelAllowlistRegex: + description: |- + Regex list for filtering which Kubernetes pod labels are included in DCGM exporter metrics. + Empty means all pod labels are included. + items: + type: string + type: array repository: description: NVIDIA DCGM Exporter image repository type: string diff --git a/config/crd/bases/nvidia.com_clusterpolicies.yaml b/config/crd/bases/nvidia.com_clusterpolicies.yaml index 868fe9379..ce48355b4 100644 --- a/config/crd/bases/nvidia.com_clusterpolicies.yaml +++ b/config/crd/bases/nvidia.com_clusterpolicies.yaml @@ -575,6 +575,16 @@ spec: metrics to be collected by NVIDIA DCGM Exporter type: string type: object + enablePodLabels: + description: |- + Enable Kubernetes pod labels as Prometheus label dimensions in DCGM exporter metrics. + (Requires cluster-level read access to pods.) + type: boolean + enablePodUID: + description: |- + Enable Kubernetes pod UID as a Prometheus label dimension in DCGM exporter metrics. + (Requires cluster-level read access to pods.) + type: boolean enabled: description: Enabled indicates if deployment of NVIDIA DCGM Exporter through operator is enabled @@ -628,6 +638,13 @@ spec: items: type: string type: array + podLabelAllowlistRegex: + description: |- + Regex list for filtering which Kubernetes pod labels are included in DCGM exporter metrics. + Empty means all pod labels are included. + items: + type: string + type: array repository: description: NVIDIA DCGM Exporter image repository type: string diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 1f8806fcc..c4b04d16e 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -466,6 +466,19 @@ func ClusterRole(n ClusterPolicyController) (gpuv1.State, error) { return gpuv1.Disabled, nil } + // For state-dcgm-exporter, cluster-scoped pod-read RBAC is only needed + // when pod-metadata enrichment is opted in. + if n.stateNames[n.idx] == "state-dcgm-exporter" && + obj.Name == "nvidia-dcgm-exporter-read-pods" && + !n.singleton.Spec.DCGMExporter.IsKubernetesPodMetadataEnabled() { + err := n.client.Delete(ctx, obj) + if err != nil && !apierrors.IsNotFound(err) { + logger.Error(err, "Couldn't delete") + return gpuv1.NotReady, err + } + return gpuv1.Disabled, nil + } + if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil { return gpuv1.NotReady, err } @@ -507,6 +520,18 @@ func ClusterRoleBinding(n ClusterPolicyController) (gpuv1.State, error) { return gpuv1.Disabled, nil } + // Mirror the ClusterRole gate. + if n.stateNames[n.idx] == "state-dcgm-exporter" && + obj.Name == "nvidia-dcgm-exporter-read-pods" && + !n.singleton.Spec.DCGMExporter.IsKubernetesPodMetadataEnabled() { + err := n.client.Delete(ctx, obj) + if err != nil && !apierrors.IsNotFound(err) { + logger.Error(err, "Couldn't delete") + return gpuv1.NotReady, err + } + return gpuv1.Disabled, nil + } + for idx := range obj.Subjects { obj.Subjects[idx].Namespace = n.operatorNamespace } @@ -1828,6 +1853,26 @@ func TransformDCGMExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, jobMappingVol) } + // Inject pod-metadata enrichment env vars; RBAC is provisioned via the + // 0210/0310 assets and the SA token is mounted below. + if config.DCGMExporter.IsPodLabelsEnabled() { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "DCGM_EXPORTER_KUBERNETES_ENABLE_POD_LABELS", "true") + } + if config.DCGMExporter.IsPodUIDEnabled() { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "DCGM_EXPORTER_KUBERNETES_ENABLE_POD_UID", "true") + } + if len(config.DCGMExporter.PodLabelAllowlistRegex) > 0 { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), + "DCGM_EXPORTER_KUBERNETES_POD_LABEL_ALLOWLIST_REGEX", + strings.Join(config.DCGMExporter.PodLabelAllowlistRegex, ",")) + } + + // Override the base asset's automountServiceAccountToken=false when + // enrichment is on so the pod informer has client-go credentials. + if config.DCGMExporter.IsKubernetesPodMetadataEnabled() { + obj.Spec.Template.Spec.AutomountServiceAccountToken = ptr.To(true) + } + // mount configmap for custom metrics if provided by user if config.DCGMExporter.MetricsConfig != nil && config.DCGMExporter.MetricsConfig.Name != "" { metricsConfigVolMount := corev1.VolumeMount{Name: "metrics-config", ReadOnly: true, MountPath: MetricsConfigMountPath, SubPath: MetricsConfigFileName} diff --git a/controllers/object_controls_test.go b/controllers/object_controls_test.go index a25522d16..862a8b257 100644 --- a/controllers/object_controls_test.go +++ b/controllers/object_controls_test.go @@ -1151,6 +1151,10 @@ func getDCGMExporterTestInput(testCase string) *gpuv1.ClusterPolicy { case "standalone-dcgm": dcgmEnabled := true cp.Spec.DCGM.Enabled = &dcgmEnabled + case "pod-labels-enabled": + cp.Spec.DCGMExporter.EnablePodLabels = ptr.To(true) + case "pod-uid-enabled": + cp.Spec.DCGMExporter.EnablePodUID = ptr.To(true) default: return nil } @@ -1166,6 +1170,7 @@ func getDCGMExporterTestOutput(testCase string) map[string]interface{} { "numDaemonsets": 1, "dcgmExporterImage": "nvcr.io/nvidia/k8s/dcgm-exporter:3.3.0-3.2.0-ubuntu22.04", "imagePullSecret": "ngc-secret", + "clusterRoleExists": false, } switch testCase { @@ -1175,6 +1180,16 @@ func getDCGMExporterTestOutput(testCase string) map[string]interface{} { output["env"] = map[string]string{ "DCGM_REMOTE_HOSTENGINE_INFO": "nvidia-dcgm:5555", } + case "pod-labels-enabled": + output["env"] = map[string]string{ + "DCGM_EXPORTER_KUBERNETES_ENABLE_POD_LABELS": "true", + } + output["clusterRoleExists"] = true + case "pod-uid-enabled": + output["env"] = map[string]string{ + "DCGM_EXPORTER_KUBERNETES_ENABLE_POD_UID": "true", + } + output["clusterRoleExists"] = true default: return nil } @@ -1200,6 +1215,16 @@ func TestDCGMExporter(t *testing.T) { getDCGMExporterTestInput("standalone-dcgm"), getDCGMExporterTestOutput("standalone-dcgm"), }, + { + "PodLabelsEnabled", + getDCGMExporterTestInput("pod-labels-enabled"), + getDCGMExporterTestOutput("pod-labels-enabled"), + }, + { + "PodUIDEnabled", + getDCGMExporterTestInput("pod-uid-enabled"), + getDCGMExporterTestOutput("pod-uid-enabled"), + }, } for _, tc := range testCases { @@ -1220,15 +1245,25 @@ func TestDCGMExporter(t *testing.T) { } } for key, value := range tc.output["env"].(map[string]string) { - envFound := false - for _, envVar := range ds.Spec.Template.Spec.Containers[0].Env { - if envVar.Name == key && envVar.Value == value { - envFound = true - } - } - if !envFound { - t.Fatalf("Expected env is not set for daemonset nvidia-dcgm-exporter %s->%s", key, value) - } + require.Equal(t, value, getContainerEnv(&ds.Spec.Template.Spec.Containers[0], key), "unexpected value for env var %s on daemonset nvidia-dcgm-exporter", key) + } + + // Verify cluster-scoped RBAC and SA token mount match the feature toggle. + clusterRoleKey := client.ObjectKey{Name: "nvidia-dcgm-exporter-read-pods"} + clusterRole := &rbacv1.ClusterRole{} + clusterRoleGetErr := clusterPolicyController.client.Get(context.Background(), clusterRoleKey, clusterRole) + clusterRoleBinding := &rbacv1.ClusterRoleBinding{} + clusterRoleBindingGetErr := clusterPolicyController.client.Get(context.Background(), clusterRoleKey, clusterRoleBinding) + if tc.output["clusterRoleExists"].(bool) { + require.NoError(t, clusterRoleGetErr, "ClusterRole should exist when pod metadata enrichment is enabled") + require.NoError(t, clusterRoleBindingGetErr, "ClusterRoleBinding should exist when pod metadata enrichment is enabled") + require.NotNil(t, ds.Spec.Template.Spec.AutomountServiceAccountToken, "AutomountServiceAccountToken should be set when pod metadata enrichment is enabled") + require.True(t, *ds.Spec.Template.Spec.AutomountServiceAccountToken, "AutomountServiceAccountToken should be true when pod metadata enrichment is enabled") + } else { + require.True(t, apierrors.IsNotFound(clusterRoleGetErr), "ClusterRole should not exist when pod metadata enrichment is disabled (got err=%v)", clusterRoleGetErr) + require.True(t, apierrors.IsNotFound(clusterRoleBindingGetErr), "ClusterRoleBinding should not exist when pod metadata enrichment is disabled (got err=%v)", clusterRoleBindingGetErr) + require.NotNil(t, ds.Spec.Template.Spec.AutomountServiceAccountToken, "AutomountServiceAccountToken should be explicitly set false when pod metadata enrichment is disabled") + require.False(t, *ds.Spec.Template.Spec.AutomountServiceAccountToken, "AutomountServiceAccountToken should be false when pod metadata enrichment is disabled") } require.Equal(t, tc.output["dcgmExporterImage"], dcgmExporterImage, "Unexpected configuration for dcgm-exporter image") diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go index 64af9f93d..bbd09bd51 100644 --- a/controllers/transforms_test.go +++ b/controllers/transforms_test.go @@ -188,6 +188,11 @@ func (d Daemonset) WithHostPID(enabled bool) Daemonset { return d } +func (d Daemonset) WithAutomountServiceAccountToken(enabled bool) Daemonset { + d.Spec.Template.Spec.AutomountServiceAccountToken = ptr.To(enabled) + return d +} + func (d Daemonset) WithVolume(volume corev1.Volume) Daemonset { d.Spec.Template.Spec.Volumes = append(d.Spec.Template.Spec.Volumes, volume) return d @@ -1636,6 +1641,43 @@ func TestTransformDCGMExporter(t *testing.T) { }).WithRuntimeClassName("nvidia"). WithHostPathVolume("pod-gpu-resources", "/custom-kubelet/pod-resources", nil), }, + { + description: "transform dcgm exporter with pod metadata enrichment fully configured", + ds: NewDaemonset(). + WithContainer(corev1.Container{Name: "dcgm-exporter"}). + WithContainer(corev1.Container{Name: "dummy"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + DCGMExporter: gpuv1.DCGMExporterSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "dcgm-exporter", + Version: "v1.0.0", + ImagePullPolicy: "IfNotPresent", + ImagePullSecrets: []string{"pull-secret"}, + Args: []string{"--fail-on-init-error=false"}, + EnablePodLabels: newBoolPtr(true), + EnablePodUID: newBoolPtr(true), + PodLabelAllowlistRegex: []string{"^app$", `^kueue\.x-k8s\.io/.*$`}, + }, + DCGM: gpuv1.DCGMSpec{ + Enabled: newBoolPtr(true), + }, + }, + expectedDs: NewDaemonset().WithContainer(corev1.Container{ + Name: "dcgm-exporter", + Image: "nvcr.io/nvidia/cloud-native/dcgm-exporter:v1.0.0", + ImagePullPolicy: corev1.PullIfNotPresent, + Args: []string{"--fail-on-init-error=false"}, + Env: []corev1.EnvVar{ + {Name: "DCGM_REMOTE_HOSTENGINE_INFO", Value: "nvidia-dcgm:5555"}, + {Name: "DCGM_EXPORTER_KUBERNETES_ENABLE_POD_LABELS", Value: "true"}, + {Name: "DCGM_EXPORTER_KUBERNETES_ENABLE_POD_UID", Value: "true"}, + {Name: "DCGM_EXPORTER_KUBERNETES_POD_LABEL_ALLOWLIST_REGEX", Value: `^app$,^kueue\.x-k8s\.io/.*$`}, + }, + }).WithContainer(corev1.Container{Name: "dummy"}). + WithPullSecret("pull-secret"). + WithRuntimeClassName("nvidia"). + WithAutomountServiceAccountToken(true), + }, } for _, tc := range testCases { diff --git a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml index 868fe9379..ce48355b4 100644 --- a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml +++ b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml @@ -575,6 +575,16 @@ spec: metrics to be collected by NVIDIA DCGM Exporter type: string type: object + enablePodLabels: + description: |- + Enable Kubernetes pod labels as Prometheus label dimensions in DCGM exporter metrics. + (Requires cluster-level read access to pods.) + type: boolean + enablePodUID: + description: |- + Enable Kubernetes pod UID as a Prometheus label dimension in DCGM exporter metrics. + (Requires cluster-level read access to pods.) + type: boolean enabled: description: Enabled indicates if deployment of NVIDIA DCGM Exporter through operator is enabled @@ -628,6 +638,13 @@ spec: items: type: string type: array + podLabelAllowlistRegex: + description: |- + Regex list for filtering which Kubernetes pod labels are included in DCGM exporter metrics. + Empty means all pod labels are included. + items: + type: string + type: array repository: description: NVIDIA DCGM Exporter image repository type: string diff --git a/deployments/gpu-operator/templates/clusterpolicy.yaml b/deployments/gpu-operator/templates/clusterpolicy.yaml index d96a1f03f..f09ed4bec 100644 --- a/deployments/gpu-operator/templates/clusterpolicy.yaml +++ b/deployments/gpu-operator/templates/clusterpolicy.yaml @@ -595,6 +595,15 @@ spec: {{- if .Values.dcgmExporter.hpcJobMapping }} hpcJobMapping: {{ toYaml .Values.dcgmExporter.hpcJobMapping | nindent 6 }} {{- end }} + {{- if .Values.dcgmExporter.enablePodLabels }} + enablePodLabels: {{ .Values.dcgmExporter.enablePodLabels }} + {{- end }} + {{- if .Values.dcgmExporter.enablePodUID }} + enablePodUID: {{ .Values.dcgmExporter.enablePodUID }} + {{- end }} + {{- if .Values.dcgmExporter.podLabelAllowlistRegex }} + podLabelAllowlistRegex: {{ toYaml .Values.dcgmExporter.podLabelAllowlistRegex | nindent 6 }} + {{- end }} gfd: enabled: {{ .Values.gfd.enabled }} {{- if .Values.gfd.repository }} diff --git a/deployments/gpu-operator/values.yaml b/deployments/gpu-operator/values.yaml index 9dba0c80c..3943bdd9c 100644 --- a/deployments/gpu-operator/values.yaml +++ b/deployments/gpu-operator/values.yaml @@ -309,6 +309,15 @@ dcgmExporter: # hpcJobMapping: # enabled: true # directory: /var/lib/dcgm-exporter/job-mapping + # Enrich GPU metrics with pod metadata (UID, labels). + # Both flags are opt-in. When either is true, the operator creates a + # cluster-scoped ClusterRole/ClusterRoleBinding granting pods get/list/watch. + # Use podLabelAllowlistRegex to bound Prometheus cardinality. + enablePodLabels: false + enablePodUID: false + # podLabelAllowlistRegex: + # - "^app$" + # - "^kueue\\.x-k8s\\.io/.*$" service: internalTrafficPolicy: Cluster serviceMonitor: