Skip to content

Commit b10bd59

Browse files
committed
fix: Ensure that iGPU device nodes includes /dev/nvidia2
Signed-off-by: Evan Lezar <elezar@nvidia.com>
1 parent 6b74d0c commit b10bd59

3 files changed

Lines changed: 97 additions & 43 deletions

File tree

internal/platform-support/tegra/filter.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ type matcherAsFilter struct {
7777
}
7878

7979
type filterByMountSpecType map[csv.MountSpecType]filter
80+
type filterByMountSpecPathsByTyper struct {
81+
MountSpecPathsByTyper
82+
}
8083

8184
type pathPatterns []string
8285
type pathPattern string
@@ -125,6 +128,14 @@ func (p filterByMountSpecType) Apply(input MountSpecPathsByTyper) MountSpecPaths
125128
return ms
126129
}
127130

131+
func (p filterByMountSpecPathsByTyper) Apply(input MountSpecPathsByTyper) MountSpecPathsByTyper {
132+
f := make(filterByMountSpecType)
133+
for t, p := range p.MountSpecPathsByType() {
134+
f[t] = &matcherAsFilter{pathPatterns(p)}
135+
}
136+
return f.Apply(input)
137+
}
138+
128139
// apply uses a matcher to filter an input string.
129140
// Each element in the input that matches is skipped and the remaining elements
130141
// are returned.

internal/platform-support/tegra/mount_specs.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ func WithoutDeviceNodes() Transformer {
130130
}
131131
}
132132

133+
func Without(m MountSpecPathsByTyper) Transformer {
134+
return filterByMountSpecPathsByTyper{m}
135+
}
136+
133137
// WithoutRegularDeviceNodes creates a transfomer which removes
134138
// regular `/dev/nvidia[0-9]+` device nodes from the source.
135139
func WithoutRegularDeviceNodes() Transformer {

pkg/nvcdi/lib-csv.go

Lines changed: 82 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,6 @@ func (l *csvlib) purecsvDeviceSpecGenerators(ids ...string) (DeviceSpecGenerator
9393
csvlib: l,
9494
index: 0,
9595
uuid: "",
96-
// We set noFilterDeviceNodes to true to ensure that the /dev/nvidia[0-1]
97-
// device nodes in the CSV files on the system are consumed as-is.
98-
noFilterDeviceNodes: true,
9996
}
10097
return g, nil
10198
}
@@ -108,9 +105,9 @@ func (l *csvlib) mixedDeviceSpecGenerators(ids ...string) (DeviceSpecGenerator,
108105
// platform-specific CSV files.
109106
type csvDeviceGenerator struct {
110107
*csvlib
111-
index int
112-
uuid string
113-
noFilterDeviceNodes bool
108+
index int
109+
uuid string
110+
mode string
114111
}
115112

116113
func (l *csvDeviceGenerator) GetUUID() (string, error) {
@@ -154,31 +151,52 @@ func (l *csvDeviceGenerator) GetDeviceSpecs() ([]specs.Device, error) {
154151
// - The device node (i.e. /dev/nvidia{{ .index }}) associated with this
155152
// particular device is added to the set of device nodes to be discovered.
156153
func (l *csvDeviceGenerator) deviceNodeDiscoverer() (discover.Discover, error) {
157-
mountSpecs := tegra.Transform(
158-
tegra.MountSpecsFromCSVFiles(l.logger, l.csvFiles...),
159-
// We remove non-device nodes.
160-
tegra.OnlyDeviceNodes(),
161-
)
162-
if !l.noFilterDeviceNodes {
163-
mountSpecs = tegra.Transform(
164-
mountSpecs,
165-
// We remove the regular (nvidia[0-9]+) device nodes.
166-
tegra.WithoutRegularDeviceNodes(),
167-
)
168-
}
169154
return tegra.New(
170155
tegra.WithLogger(l.logger),
171156
tegra.WithDriverRoot(l.driverRoot),
172157
tegra.WithDevRoot(l.devRoot),
173158
tegra.WithHookCreator(l.hookCreator),
174159
tegra.WithLdconfigPath(l.ldconfigPath),
175160
tegra.WithLibrarySearchPaths(l.librarySearchPaths...),
176-
tegra.WithMountSpecs(
177-
mountSpecs,
161+
tegra.WithMountSpecs(l.deviceNodeMountSpecs()),
162+
)
163+
}
164+
165+
func (l *csvDeviceGenerator) deviceNodeMountSpecs() tegra.MountSpecPathsByTyper {
166+
mountSpecs := tegra.Transform(
167+
tegra.MountSpecsFromCSVFiles(l.logger, l.csvFiles...),
168+
// We remove non-device nodes.
169+
tegra.OnlyDeviceNodes(),
170+
)
171+
switch l.mode {
172+
// For a dGPU we remove all regular device nodes from the list of device
173+
// nodes that we detect and only look for the node associated with the
174+
// index.
175+
case "dgpu":
176+
return tegra.Merge(
177+
tegra.Transform(
178+
mountSpecs,
179+
// We remove the regular (nvidia[0-9]+) device nodes.
180+
tegra.WithoutRegularDeviceNodes(),
181+
),
178182
// We add the specific device node for this device.
179183
tegra.DeviceNodes(fmt.Sprintf("/dev/nvidia%d", l.index)),
180-
),
181-
)
184+
)
185+
case "igpu":
186+
return tegra.Merge(
187+
tegra.Transform(
188+
mountSpecs,
189+
// We remove the /dev/nvidia1 device node.
190+
// TODO: This assumes that the dGPU has the index 1 and remove
191+
// it from the set of device nodes.
192+
tegra.Without(tegra.DeviceNodes("/dev/nvidia1")),
193+
),
194+
// We add the display device from the iGPU.
195+
tegra.DeviceNodes("/dev/nvidia2"),
196+
)
197+
default:
198+
return mountSpecs
199+
}
182200
}
183201

184202
// GetCommonEdits generates a CDI specification that can be used for ANY devices
@@ -272,35 +290,56 @@ func (l *mixedcsvlib) csvDeviceSpecGenerator(index int, uuid string, device nvml
272290
return nil, fmt.Errorf("is-integrated check failed for device (index=%v,uuid=%v)", index, uuid)
273291
}
274292

293+
if isIntegrated {
294+
return l.iGPUDeviceSpecGenerator(index, uuid)
295+
}
296+
297+
return l.dGPUDeviceSpecGenerator(index, uuid, device)
298+
}
299+
300+
func (l *mixedcsvlib) dGPUDeviceSpecGenerator(index int, uuid string, device nvml.Device) (DeviceSpecGenerator, error) {
301+
if index != 1 {
302+
return nil, fmt.Errorf("unexpected device index for dGPU: %d", index)
303+
}
275304
g := &csvDeviceGenerator{
276305
csvlib: (*csvlib)(l),
277306
index: index,
278307
uuid: uuid,
308+
mode: "dgpu",
279309
}
280310

281-
if !isIntegrated {
282-
csvDeviceNodeDiscoverer, err := g.deviceNodeDiscoverer()
283-
if err != nil {
284-
return nil, fmt.Errorf("failed to create discoverer for devices nodes: %w", err)
285-
}
311+
csvDeviceNodeDiscoverer, err := g.deviceNodeDiscoverer()
312+
if err != nil {
313+
return nil, fmt.Errorf("failed to create discoverer for devices nodes: %w", err)
314+
}
315+
316+
// If this is not an integrated GPU, we also create a spec generator for
317+
// the full GPU.
318+
dgpu := (*nvmllib)(l).withInit(&fullGPUDeviceSpecGenerator{
319+
nvmllib: (*nvmllib)(l),
320+
uuid: uuid,
321+
index: index,
322+
// For the CSV case, we include the control device nodes at a
323+
// device level.
324+
additionalDiscoverers: []discover.Discover{
325+
(*nvmllib)(l).controlDeviceNodeDiscoverer(),
326+
csvDeviceNodeDiscoverer,
327+
},
328+
featureFlags: l.featureFlags,
329+
})
330+
return dgpu, nil
331+
}
286332

287-
// If this is not an integrated GPU, we also create a spec generator for
288-
// the full GPU.
289-
dgpu := (*nvmllib)(l).withInit(&fullGPUDeviceSpecGenerator{
290-
nvmllib: (*nvmllib)(l),
291-
uuid: uuid,
292-
index: index,
293-
// For the CSV case, we include the control device nodes at a
294-
// device level.
295-
additionalDiscoverers: []discover.Discover{
296-
(*nvmllib)(l).controlDeviceNodeDiscoverer(),
297-
csvDeviceNodeDiscoverer,
298-
},
299-
featureFlags: l.featureFlags,
300-
})
301-
return dgpu, nil
333+
func (l *mixedcsvlib) iGPUDeviceSpecGenerator(index int, uuid string) (DeviceSpecGenerator, error) {
334+
if index != 0 {
335+
return nil, fmt.Errorf("unexpected device index for iGPU: %d", index)
336+
}
337+
g := &csvDeviceGenerator{
338+
csvlib: (*csvlib)(l),
339+
index: index,
340+
uuid: uuid,
341+
mode: "igpu",
302342
}
303-
304343
return g, nil
305344
}
306345

0 commit comments

Comments
 (0)