Skip to content

Commit 5f27af2

Browse files
committed
Add --cuda-compat-container-root
Signed-off-by: Evan Lezar <elezar@nvidia.com>
1 parent 075c741 commit 5f27af2

File tree

6 files changed

+25
-10
lines changed

6 files changed

+25
-10
lines changed

cmd/nvidia-cdi-hook/cudacompat/cudacompat.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import (
3131
)
3232

3333
const (
34-
cudaCompatPath = "/usr/local/cuda/compat"
34+
defaultCudaCompatPath = "/usr/local/cuda/compat"
3535
// cudaCompatLdsoconfdFilenamePattern specifies the pattern for the filename
3636
// in ld.so.conf.d that includes a reference to the CUDA compat path.
3737
// The 00-compat prefix is chosen to ensure that these libraries have a
@@ -44,7 +44,8 @@ type command struct {
4444
}
4545

4646
type options struct {
47-
hostDriverVersion string
47+
cudaCompatContainerRoot string
48+
hostDriverVersion string
4849
// containerSpec allows the path to the container spec to be specified for
4950
// testing.
5051
containerSpec string
@@ -78,6 +79,12 @@ func (m command) build() *cli.Command {
7879
Usage: "Specify the host driver version. If the CUDA compat libraries detected in the container do not have a higher MAJOR version, the hook is a no-op.",
7980
Destination: &cfg.hostDriverVersion,
8081
},
82+
&cli.StringFlag{
83+
Name: "cuda-compat-container-root",
84+
Usage: "Specify the folder in which CUDA compat libraries are located in the container",
85+
Value: defaultCudaCompatPath,
86+
Destination: &cfg.cudaCompatContainerRoot,
87+
},
8188
&cli.StringFlag{
8289
Name: "container-spec",
8390
Hidden: true,
@@ -110,7 +117,7 @@ func (m command) run(_ *cli.Command, cfg *options) error {
110117
return fmt.Errorf("failed to determined container root: %w", err)
111118
}
112119

113-
containerForwardCompatDir, err := m.getContainerForwardCompatDir(containerRoot(containerRootDir), cfg.hostDriverVersion)
120+
containerForwardCompatDir, err := m.getContainerForwardCompatDir(containerRoot(containerRootDir), cfg.cudaCompatContainerRoot, cfg.hostDriverVersion)
114121
if err != nil {
115122
return fmt.Errorf("failed to get container forward compat directory: %w", err)
116123
}
@@ -121,17 +128,17 @@ func (m command) run(_ *cli.Command, cfg *options) error {
121128
return m.createLdsoconfdFile(containerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir)
122129
}
123130

124-
func (m command) getContainerForwardCompatDir(containerRoot containerRoot, hostDriverVersion string) (string, error) {
131+
func (m command) getContainerForwardCompatDir(containerRoot containerRoot, cudaCompatRoot string, hostDriverVersion string) (string, error) {
125132
if hostDriverVersion == "" {
126133
m.logger.Debugf("Host driver version not specified")
127134
return "", nil
128135
}
129-
if !containerRoot.hasPath(cudaCompatPath) {
136+
if !containerRoot.hasPath(cudaCompatRoot) {
130137
m.logger.Debugf("No CUDA forward compatibility libraries directory in container")
131138
return "", nil
132139
}
133140

134-
libs, err := containerRoot.globFiles(filepath.Join(cudaCompatPath, "libcuda.so.*.*"))
141+
libs, err := containerRoot.globFiles(filepath.Join(cudaCompatRoot, "libcuda.so.*.*"))
135142
if err != nil {
136143
m.logger.Warningf("Failed to find CUDA compat library: %w", err)
137144
return "", nil

cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ func TestCompatLibs(t *testing.T) {
131131
c := command{
132132
logger: logger,
133133
}
134-
containerForwardCompatDir, err := c.getContainerForwardCompatDir(containerRoot(containerRootDir), tc.hostDriverVersion)
134+
containerForwardCompatDir, err := c.getContainerForwardCompatDir(containerRoot(containerRootDir), defaultCudaCompatPath, tc.hostDriverVersion)
135135
require.NoError(t, err)
136136
require.EqualValues(t, tc.expectedContainerForwardCompatDir, containerForwardCompatDir)
137137
})

internal/discover/compat_libs.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ import (
88

99
// NewCUDACompatHookDiscoverer creates a discoverer for a enable-cuda-compat hook.
1010
// This hook is responsible for setting up CUDA compatibility in the container and depends on the host driver version.
11-
func NewCUDACompatHookDiscoverer(logger logger.Interface, hookCreator HookCreator, version string) Discover {
11+
func NewCUDACompatHookDiscoverer(logger logger.Interface, hookCreator HookCreator, version string, cudaCompatContainerRoot string) Discover {
1212
var args []string
1313
if version != "" && !strings.Contains(version, "*") {
1414
args = append(args, "--host-driver-version="+version)
1515
}
16+
if cudaCompatContainerRoot != "" {
17+
args = append(args, "--cuda-compat-container-root="+cudaCompatContainerRoot)
18+
}
1619

1720
return hookCreator.Create("enable-cuda-compat", args...)
1821
}

internal/modifier/gated.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ func getCudaCompatModeDiscoverer(logger logger.Interface, cfg *config.Config, dr
107107
return nil, fmt.Errorf("failed to get driver version: %w", err)
108108
}
109109

110-
compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, hookCreator, version)
110+
compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, hookCreator, version, "")
111111
// For non-legacy modes we return the hook as is. These modes *should* already include the update-ldcache hook.
112112
if cfg.NVIDIAContainerRuntimeConfig.Mode != "legacy" {
113113
return compatLibHookDiscoverer, nil

internal/platform-support/tegra/tegra.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ func New(opts ...Option) (discover.Discover, error) {
8080
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
8181
}
8282

83+
// TODO: Set the version here.
84+
version := ""
85+
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(o.logger, o.hookCreator, version)
86+
8387
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(o.logger, csvDiscoverer, o.hookCreator, o.ldconfigPath)
8488
if err != nil {
8589
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
@@ -96,6 +100,7 @@ func New(opts ...Option) (discover.Discover, error) {
96100

97101
d := discover.Merge(
98102
csvDiscoverer,
103+
cudaCompatLibHookDiscoverer,
99104
// The ldcacheUpdateHook is added last to ensure that the created symlinks are included
100105
ldcacheUpdateHook,
101106
tegraSystemMounts,

pkg/nvcdi/driver-nvml.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string, libcudaSoParentDir
101101
)
102102
discoverers = append(discoverers, driverDotSoSymlinksDiscoverer)
103103

104-
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, version)
104+
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, version, "")
105105
discoverers = append(discoverers, cudaCompatLibHookDiscoverer)
106106

107107
updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath)

0 commit comments

Comments
 (0)