@@ -20,6 +20,7 @@ import (
2020 "fmt"
2121 "slices"
2222 "strconv"
23+ "strings"
2324
2425 "tags.cncf.io/container-device-interface/pkg/cdi"
2526 "tags.cncf.io/container-device-interface/specs-go"
@@ -101,12 +102,12 @@ func (l *csvDeviceGenerator) GetDeviceSpecs() ([]specs.Device, error) {
101102 }
102103 e , err := edits .FromDiscoverer (deviceNodeDiscoverer )
103104 if err != nil {
104- return nil , fmt .Errorf ("failed to create container edits for CSV files: %v " , err )
105+ return nil , fmt .Errorf ("failed to create container edits for CSV files: %w " , err )
105106 }
106107
107108 names , err := l .deviceNamers .GetDeviceNames (l .index , l )
108109 if err != nil {
109- return nil , fmt .Errorf ("failed to get device name: %v " , err )
110+ return nil , fmt .Errorf ("failed to get device name: %w " , err )
110111 }
111112 var deviceSpecs []specs.Device
112113 for _ , name := range names {
@@ -157,22 +158,7 @@ func (l *csvDeviceGenerator) deviceNodeDiscoverer() (discover.Discover, error) {
157158// GetCommonEdits generates a CDI specification that can be used for ANY devices
158159// These explicitly do not include any device nodes.
159160func (l * csvlib ) GetCommonEdits () (* cdi.ContainerEdits , error ) {
160- mountSpecs := tegra .Transform (
161- tegra .Transform (
162- tegra .MountSpecsFromCSVFiles (l .logger , l .csvFiles ... ),
163- tegra .WithoutDeviceNodes (),
164- ),
165- tegra .IgnoreSymlinkMountSpecsByPattern (l .csvIgnorePatterns ... ),
166- )
167- driverDiscoverer , err := tegra .New (
168- tegra .WithLogger (l .logger ),
169- tegra .WithDriverRoot (l .driverRoot ),
170- tegra .WithDevRoot (l .devRoot ),
171- tegra .WithHookCreator (l .hookCreator ),
172- tegra .WithLdconfigPath (l .ldconfigPath ),
173- tegra .WithLibrarySearchPaths (l .librarySearchPaths ... ),
174- tegra .WithMountSpecs (mountSpecs ),
175- )
161+ driverDiscoverer , err := l .driverDiscoverer ()
176162 if err != nil {
177163 return nil , fmt .Errorf ("failed to create driver discoverer from CSV files: %w" , err )
178164 }
@@ -321,3 +307,93 @@ func isIntegratedGPU(d nvml.Device) (bool, error) {
321307 }
322308 return pciInfo .Device == 0 , nil
323309}
310+
311+ func (l * csvlib ) driverDiscoverer () (discover.Discover , error ) {
312+ mountSpecs := tegra .Transform (
313+ tegra .Transform (
314+ tegra .MountSpecsFromCSVFiles (l .logger , l .csvFiles ... ),
315+ tegra .WithoutDeviceNodes (),
316+ ),
317+ tegra .IgnoreSymlinkMountSpecsByPattern (l .csvIgnorePatterns ... ),
318+ )
319+ driverDiscoverer , err := tegra .New (
320+ tegra .WithLogger (l .logger ),
321+ tegra .WithDriverRoot (l .driverRoot ),
322+ tegra .WithDevRoot (l .devRoot ),
323+ tegra .WithHookCreator (l .hookCreator ),
324+ tegra .WithLdconfigPath (l .ldconfigPath ),
325+ tegra .WithLibrarySearchPaths (l .librarySearchPaths ... ),
326+ tegra .WithMountSpecs (mountSpecs ),
327+ )
328+ if err != nil {
329+ return nil , fmt .Errorf ("failed to create discoverer from CSV files: %w" , err )
330+ }
331+
332+ cudaCompatDiscoverer := l .cudaCompatDiscoverer ()
333+
334+ ldcacheUpdateHook , err := discover .NewLDCacheUpdateHook (l .logger , driverDiscoverer , l .hookCreator , l .ldconfigPath )
335+ if err != nil {
336+ return nil , fmt .Errorf ("failed to create ldcache update hook discoverer: %w" , err )
337+ }
338+
339+ d := discover .Merge (
340+ driverDiscoverer ,
341+ cudaCompatDiscoverer ,
342+ // The ldcacheUpdateHook is added last to ensure that the created symlinks are included
343+ ldcacheUpdateHook ,
344+ )
345+ return d , nil
346+ }
347+
348+ // cudaCompatDiscoverer returns a discoverer for the CUDA forward compat hook
349+ // on Tegra-based systems.
350+ // If the system has NVML available, this is used to determine the driver
351+ // version to be passed to the hook.
352+ // On Orin-based systems, the compat library root in the container is also set.
353+ func (l * csvlib ) cudaCompatDiscoverer () discover.Discover {
354+ hasNvml , _ := l .infolib .HasNvml ()
355+ if ! hasNvml {
356+ return nil
357+ }
358+
359+ ret := l .nvmllib .Init ()
360+ if ret != nvml .SUCCESS {
361+ l .logger .Warningf ("Failed to initialize NVML: %v" , ret )
362+ return nil
363+ }
364+ defer func () {
365+ _ = l .nvmllib .Shutdown ()
366+ }()
367+
368+ version , ret := l .nvmllib .SystemGetDriverVersion ()
369+ if ret != nvml .SUCCESS {
370+ l .logger .Warningf ("Failed to get driver version: %v" , ret )
371+ return nil
372+ }
373+
374+ var names []string
375+ err := l .devicelib .VisitDevices (func (i int , d device.Device ) error {
376+ name , ret := d .GetName ()
377+ if ret != nvml .SUCCESS {
378+ return fmt .Errorf ("device %v: %v" , i , ret )
379+ }
380+ names = append (names , name )
381+ return nil
382+ })
383+ if err != nil {
384+ l .logger .Warningf ("Failed to get device names: %v" , err )
385+ return nil
386+ }
387+
388+ var cudaCompatContainerRoot string
389+ for _ , name := range names {
390+ // TODO: Should this be overridable through a feature flag / config option?
391+ if strings .Contains (name , "Orin (nvgpu)" ) {
392+ // TODO: This should probably be a constant or configurable.
393+ cudaCompatContainerRoot = "/usr/local/cuda/compat-orin"
394+ break
395+ }
396+ }
397+
398+ return discover .NewCUDACompatHookDiscoverer (l .logger , l .hookCreator , version , cudaCompatContainerRoot )
399+ }
0 commit comments