Skip to content

Commit 08bf583

Browse files
authored
Merge pull request #1697 from elezar/fix-cuda-compat-check
fix: Don't use driver version in ELF header for compat check
2 parents 722b541 + 35dc2b5 commit 08bf583

2 files changed

Lines changed: 21 additions & 22 deletions

File tree

cmd/nvidia-cdi-hook/cudacompat/cuda-elf-header.go

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"encoding/json"
2525
"fmt"
2626
"os"
27-
"slices"
2827
"strings"
2928

3029
"golang.org/x/mod/semver"
@@ -119,18 +118,13 @@ func getCUDAFwdCompatibilitySection(lib *elf.File) *elf.Section {
119118

120119
// UseCompat checks whether the CUDA compat libraries with the specified elf
121120
// header should be used given the specified host versions.
122-
// If the hostDriverVersion is specified and the ELF header includes a list of
123-
// driver verions, this is checked, otherwise the CUDA version specified in the
124-
// ELF section is checked.
125-
func (h *compatElfHeader) UseCompat(hostDriverMajor int, hostCUDAVersion string) bool {
121+
// This is done by comparing the host CUDA version with the CUDA version
122+
// specified in the ELF header.
123+
func (h *compatElfHeader) UseCompat(hostCUDAVersion string) bool {
126124
if h == nil {
127125
return false
128126
}
129127

130-
if hostDriverMajor != 0 && len(h.Driver) > 0 {
131-
return slices.Contains(h.Driver, hostDriverMajor)
132-
}
133-
134128
return h.CUDAVersion.UseCompat(hostCUDAVersion)
135129
}
136130

cmd/nvidia-cdi-hook/cudacompat/cudacompat.go

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -177,25 +177,30 @@ func (m command) getContainerForwardCompatDir(containerRoot containerRoot, o *op
177177
}
178178

179179
func (m command) useCompatLibraries(libcudaCompatPath string, hostDriverVersion string, hostCUDAVersion string) (bool, error) {
180-
driverMajor, err := extractMajorVersion(hostDriverVersion)
181-
if err != nil {
182-
return false, fmt.Errorf("failed to extract major version from %q: %v", hostDriverVersion, err)
183-
}
184-
185-
// First check the ELF header. If this is present, we use the ELF header to
186-
// determine whether the CUDA compat libraries in the container should be
187-
// used.
188-
cudaCompatHeader, _ := GetCUDACompatElfHeader(libcudaCompatPath)
189-
if cudaCompatHeader != nil {
190-
return cudaCompatHeader.UseCompat(driverMajor, hostCUDAVersion), nil
180+
// If the host CUDA version is specified, we need to inspect the ELF header
181+
// of the compat libraries in the container to determine whether these
182+
// should be used.
183+
if hostCUDAVersion != "" {
184+
cudaCompatHeader, _ := GetCUDACompatElfHeader(libcudaCompatPath)
185+
if cudaCompatHeader != nil {
186+
return cudaCompatHeader.UseCompat(hostCUDAVersion), nil
187+
}
188+
// If we were unable to read the CUDA header, we do not use the compat
189+
// libraries.
190+
return false, nil
191191
}
192192

193-
// If no CUDA Compat ELF header is available, and NO host driver version
194-
// was specified, we don't use the CUDA compat libraries in the container.
193+
// If neither a host driver version nor a host CUDA version is specified,
194+
// we don't use the CUDA compat libraries in the container.
195195
if hostDriverVersion == "" {
196196
return false, nil
197197
}
198198

199+
driverMajor, err := extractMajorVersion(hostDriverVersion)
200+
if err != nil {
201+
return false, fmt.Errorf("failed to extract major version from %q: %v", hostDriverVersion, err)
202+
}
203+
199204
compatDriverVersion := strings.TrimPrefix(filepath.Base(libcudaCompatPath), "libcuda.so.")
200205
compatMajor, err := extractMajorVersion(compatDriverVersion)
201206
if err != nil {

0 commit comments

Comments
 (0)