Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions api/nvidia/v1/clusterpolicy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ type ClusterPolicySpec struct {
Toolkit ToolkitSpec `json:"toolkit"`
// DevicePlugin component spec
DevicePlugin DevicePluginSpec `json:"devicePlugin"`
// DRADriver component spec
DRADriver DRADriverSpec `json:"draDriver"`
// DCGMExporter spec
DCGMExporter DCGMExporterSpec `json:"dcgmExporter"`
// DCGM component spec
Expand Down Expand Up @@ -985,6 +987,104 @@ type SandboxDevicePluginSpec struct {
HostNetwork *bool `json:"hostNetwork,omitempty"`
}

// DRADriverSpec defines the properties for the NVIDIA DRA Driver deployment
type DRADriverSpec struct {
// NVIDIA DRA Driver image repository
// +kubebuilder:validation:Optional
Repository string `json:"repository,omitempty"`

// NVIDIA DRA Driver image name
// +kubebuilder:validation:Pattern=[a-zA-Z0-9\-]+
Image string `json:"image,omitempty"`

// NVIDIA DRA Driver image tag
// +kubebuilder:validation:Optional
Version string `json:"version,omitempty"`

// Image pull policy
// +kubebuilder:validation:Optional
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Image Pull Policy"
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:imagePullPolicy"
ImagePullPolicy string `json:"imagePullPolicy,omitempty"`

// Image pull secrets
// +kubebuilder:validation:Optional
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Image pull secrets"
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:io.kubernetes:Secret"
ImagePullSecrets []string `json:"imagePullSecrets,omitempty"`

// GPUs defines configuration for GPUs in the NVIDIA DRA Driver
GPUs DRADriverGPUs `json:"gpus,omitempty"`

// ComputeDomains defines configuration for ComputeDomains in the NVIDIA DRA Driver
ComputeDomains DRADriverComputeDomains `json:"computeDomains,omitempty"`
}

// DRADriverGPUs defines configuration for GPUs in the NVIDIA DRA Driver
type DRADriverGPUs struct {
// Enabled indicates if GPUs are enabled in the NVIDIA DRA Driver
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable GPUs in the NVIDIA DRA Driver"
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch"
Enabled *bool `json:"enabled,omitempty"`

// KubeletPlugin defines configuration for the NVIDIA DRA Driver kubelet plugin
KubeletPlugin DRADriverKubeletPlugin `json:"kubeletPlugin,omitempty"`
}

// DRADriverComputeDomains defines configuration for ComputeDomains in the NVIDIA DRA Driver
type DRADriverComputeDomains struct {
// Enabled indicates if ComputeDomains are enabled in the NVIDIA DRA Driver
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable ComputeDomains in the NVIDIA DRA Driver"
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch"
Enabled *bool `json:"enabled,omitempty"`

// Controller defines configuration for the NVIDIA DRA Driver controller
Controller DRADriverController `json:"controller,omitempty"`

// KubeletPlugin defines configuration for the NVIDIA DRA Driver kubelet plugin
KubeletPlugin DRADriverKubeletPlugin `json:"kubeletPlugin,omitempty"`
}

// DRADriverController defines configuration for the NVIDIA DRA Driver controller
type DRADriverController struct {
// Optional: List of environment variables
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Environment Variables"
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:text"
Env []EnvVar `json:"env,omitempty"`

// Optional: Define resources requests and limits
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Resource Requirements"
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:resourceRequirements"
Resources *ResourceRequirements `json:"resources,omitempty"`

// Optional: Set tolerations
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Tolerations"
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:io.kubernetes:Tolerations"
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
}

// DRADriverKubeletPlugin defines configuration for the NVIDIA DRA Driver kubelet plugin
type DRADriverKubeletPlugin struct {
// Optional: List of environment variables
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Environment Variables"
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:text"
Env []EnvVar `json:"env,omitempty"`

// Optional: Define resources requests and limits
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Resource Requirements"
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:resourceRequirements"
Resources *ResourceRequirements `json:"resources,omitempty"`
}

// DCGMExporterSpec defines the properties for NVIDIA DCGM Exporter deployment
type DCGMExporterSpec struct {
// Enabled indicates if deployment of NVIDIA DCGM Exporter through operator is enabled
Expand Down Expand Up @@ -2079,6 +2179,9 @@ func ImagePath(spec interface{}) (string, error) {
case *SandboxDevicePluginSpec:
config := spec.(*SandboxDevicePluginSpec)
return imagePath(config.Repository, config.Image, config.Version, "SANDBOX_DEVICE_PLUGIN_IMAGE")
case *DRADriverSpec:
config := spec.(*DRADriverSpec)
return imagePath(config.Repository, config.Image, config.Version, "DRA_DRIVER_IMAGE")
case *DCGMExporterSpec:
config := spec.(*DCGMExporterSpec)
return imagePath(config.Repository, config.Image, config.Version, "DCGM_EXPORTER_IMAGE")
Expand Down Expand Up @@ -2194,6 +2297,21 @@ func (p *DevicePluginSpec) IsEnabled() bool {
return *p.Enabled
}

// IsEnabled returns true if the DRA Driver is enabled through gpu-operator
func (d *DRADriverSpec) IsEnabled() bool {
return d.IsGPUsEnabled() || d.IsComputeDomainsEnabled()
}

// IsGPUsEnabled returns true if the GPUs resource is enabled in the DRA Driver
func (d *DRADriverSpec) IsGPUsEnabled() bool {
return d.GPUs.Enabled != nil && *d.GPUs.Enabled
}

// IsComputeDomainsEnabled returns true if the ComputeDomains resource is enabled in the DRA Driver
func (d *DRADriverSpec) IsComputeDomainsEnabled() bool {
return d.ComputeDomains.Enabled != nil && *d.ComputeDomains.Enabled
}

// IsEnabled returns true if dcgm-exporter is enabled(default) through gpu-operator
func (e *DCGMExporterSpec) IsEnabled() bool {
if e.Enabled == nil {
Expand Down
123 changes: 123 additions & 0 deletions api/nvidia/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions assets/state-dra-driver/0100_service_account.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: nvidia-dra-driver-controller
namespace: gpu-operator
labels:
helm.sh/chart: nvidia-dra-driver-26.4.0-dev
app.kubernetes.io/version: 26.4.0-dev
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: nvidia-dra-driver
app.kubernetes.io/instance: nvidia-dra-driver
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: compute-domain-daemon-service-account
namespace: gpu-operator
11 changes: 11 additions & 0 deletions assets/state-dra-driver/0120_kubeletplugin-service_account.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: nvidia-dra-driver-kubeletplugin
namespace: gpu-operator
labels:
helm.sh/chart: nvidia-dra-driver-26.4.0-dev
app.kubernetes.io/version: 26.4.0-dev
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: nvidia-dra-driver
app.kubernetes.io/instance: nvidia-dra-driver
56 changes: 56 additions & 0 deletions assets/state-dra-driver/0200_clusterrole.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: nvidia-dra-driver-clusterrole-controller
rules:
- apiGroups:
- resource.nvidia.com
resources:
- computedomains
verbs:
- get
- list
- watch
- update
- apiGroups:
- resource.nvidia.com
resources:
- computedomains/status
verbs:
- update
- apiGroups:
- resource.k8s.io
resources:
- resourceclaimtemplates
verbs:
- get
- list
- watch
- create
- update
- delete
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- get
- create
- update
- apiGroups:
- ''
resources:
- nodes
verbs:
- get
- list
- watch
- update
- apiGroups:
- ''
resources:
- pods
verbs:
- get
- list
- watch
27 changes: 27 additions & 0 deletions assets/state-dra-driver/0210_role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: nvidia-dra-driver-role-controller
namespace: gpu-operator
rules:
- apiGroups:
- apps
resources:
- daemonsets
verbs:
- get
- list
- watch
- create
- update
- patch
- delete
- apiGroups:
- resource.nvidia.com
resources:
- computedomaincliques
verbs:
- get
- list
- watch
- update
Loading