Skip to content

Commit 7b9c506

Browse files
committed
Add --cuda-compat-container-root
Signed-off-by: Evan Lezar <elezar@nvidia.com>
1 parent df77c43 commit 7b9c506

File tree

6 files changed

+26
-11
lines changed

6 files changed

+26
-11
lines changed

cmd/nvidia-cdi-hook/cudacompat/cudacompat.go

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import (
3131
)
3232

3333
const (
34-
cudaCompatPath = "/usr/local/cuda/compat"
34+
defaultCudaCompatPath = "/usr/local/cuda/compat"
3535
// cudaCompatLdsoconfdFilenamePattern specifies the pattern for the filename
3636
// in ld.so.conf.d that includes a reference to the CUDA compat path.
3737
// The 00-compat prefix is chosen to ensure that these libraries have a
@@ -44,8 +44,9 @@ type command struct {
4444
}
4545

4646
type options struct {
47-
hostDriverVersion string
48-
containerSpec string
47+
cudaCompatContainerRoot string
48+
hostDriverVersion string
49+
containerSpec string
4950
}
5051

5152
// NewCommand constructs a cuda-compat command with the specified logger
@@ -76,6 +77,12 @@ func (m command) build() *cli.Command {
7677
Usage: "Specify the host driver version. If the CUDA compat libraries detected in the container do not have a higher MAJOR version, the hook is a no-op.",
7778
Destination: &cfg.hostDriverVersion,
7879
},
80+
&cli.StringFlag{
81+
Name: "cuda-compat-container-root",
82+
Usage: "Specify the folder in which CUDA compat libraries are located in the container",
83+
Value: defaultCudaCompatPath,
84+
Destination: &cfg.cudaCompatContainerRoot,
85+
},
7986
&cli.StringFlag{
8087
Name: "container-spec",
8188
Hidden: true,
@@ -108,7 +115,7 @@ func (m command) run(_ *cli.Command, cfg *options) error {
108115
return fmt.Errorf("failed to determined container root: %w", err)
109116
}
110117

111-
containerForwardCompatDir, err := m.getContainerForwardCompatDir(containerRoot(containerRootDir), cfg.hostDriverVersion)
118+
containerForwardCompatDir, err := m.getContainerForwardCompatDir(containerRoot(containerRootDir), cfg.cudaCompatContainerRoot, cfg.hostDriverVersion)
112119
if err != nil {
113120
return fmt.Errorf("failed to get container forward compat directory: %w", err)
114121
}
@@ -119,17 +126,17 @@ func (m command) run(_ *cli.Command, cfg *options) error {
119126
return m.createLdsoconfdFile(containerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir)
120127
}
121128

122-
func (m command) getContainerForwardCompatDir(containerRoot containerRoot, hostDriverVersion string) (string, error) {
129+
func (m command) getContainerForwardCompatDir(containerRoot containerRoot, cudaCompatRoot string, hostDriverVersion string) (string, error) {
123130
if hostDriverVersion == "" {
124131
m.logger.Debugf("Host driver version not specified")
125132
return "", nil
126133
}
127-
if !containerRoot.hasPath(cudaCompatPath) {
134+
if !containerRoot.hasPath(cudaCompatRoot) {
128135
m.logger.Debugf("No CUDA forward compatibility libraries directory in container")
129136
return "", nil
130137
}
131138

132-
libs, err := containerRoot.globFiles(filepath.Join(cudaCompatPath, "libcuda.so.*.*"))
139+
libs, err := containerRoot.globFiles(filepath.Join(cudaCompatRoot, "libcuda.so.*.*"))
133140
if err != nil {
134141
m.logger.Warningf("Failed to find CUDA compat library: %w", err)
135142
return "", nil

cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ func TestCompatLibs(t *testing.T) {
131131
c := command{
132132
logger: logger,
133133
}
134-
containerForwardCompatDir, err := c.getContainerForwardCompatDir(containerRoot(containerRootDir), tc.hostDriverVersion)
134+
containerForwardCompatDir, err := c.getContainerForwardCompatDir(containerRoot(containerRootDir), defaultCudaCompatPath, tc.hostDriverVersion)
135135
require.NoError(t, err)
136136
require.EqualValues(t, tc.expectedContainerForwardCompatDir, containerForwardCompatDir)
137137
})

internal/discover/compat_libs.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ import (
88

99
// NewCUDACompatHookDiscoverer creates a discoverer for a enable-cuda-compat hook.
1010
// This hook is responsible for setting up CUDA compatibility in the container and depends on the host driver version.
11-
func NewCUDACompatHookDiscoverer(logger logger.Interface, hookCreator HookCreator, version string) Discover {
11+
func NewCUDACompatHookDiscoverer(logger logger.Interface, hookCreator HookCreator, version string, cudaCompatContainerRoot string) Discover {
1212
var args []string
1313
if version != "" && !strings.Contains(version, "*") {
1414
args = append(args, "--host-driver-version="+version)
1515
}
16+
if cudaCompatContainerRoot != "" {
17+
args = append(args, "--cuda-compat-container-root="+cudaCompatContainerRoot)
18+
}
1619

1720
return hookCreator.Create("enable-cuda-compat", args...)
1821
}

internal/modifier/gated.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ func getCudaCompatModeDiscoverer(logger logger.Interface, cfg *config.Config, dr
107107
return nil, fmt.Errorf("failed to get driver version: %w", err)
108108
}
109109

110-
compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, hookCreator, version)
110+
compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, hookCreator, version, "")
111111
// For non-legacy modes we return the hook as is. These modes *should* already include the update-ldcache hook.
112112
if cfg.NVIDIAContainerRuntimeConfig.Mode != "legacy" {
113113
return compatLibHookDiscoverer, nil

internal/platform-support/tegra/tegra.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ func New(opts ...Option) (discover.Discover, error) {
8080
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
8181
}
8282

83+
// TODO: Set the version here.
84+
version := ""
85+
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(o.logger, o.hookCreator, version)
86+
8387
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(o.logger, csvDiscoverer, o.hookCreator, o.ldconfigPath)
8488
if err != nil {
8589
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
@@ -96,6 +100,7 @@ func New(opts ...Option) (discover.Discover, error) {
96100

97101
d := discover.Merge(
98102
csvDiscoverer,
103+
cudaCompatLibHookDiscoverer,
99104
// The ldcacheUpdateHook is added last to ensure that the created symlinks are included
100105
ldcacheUpdateHook,
101106
tegraSystemMounts,

pkg/nvcdi/driver-nvml.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string, libcudaSoParentDir
101101
)
102102
discoverers = append(discoverers, driverDotSoSymlinksDiscoverer)
103103

104-
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, version)
104+
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, version, "")
105105
discoverers = append(discoverers, cudaCompatLibHookDiscoverer)
106106

107107
updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath)

0 commit comments

Comments
 (0)