diff --git a/cmd/nvidia-cdi-hook/commands/commands.go b/cmd/nvidia-cdi-hook/commands/commands.go index 3f80ba9b..ea56feab 100644 --- a/cmd/nvidia-cdi-hook/commands/commands.go +++ b/cmd/nvidia-cdi-hook/commands/commands.go @@ -23,6 +23,7 @@ import ( symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat" ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache" + nvidiaparams "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-nvidia-params" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" ) @@ -34,5 +35,6 @@ func New(logger logger.Interface) []*cli.Command { symlinks.NewCommand(logger), chmod.NewCommand(logger), cudacompat.NewCommand(logger), + nvidiaparams.NewCommand(logger), } } diff --git a/cmd/nvidia-cdi-hook/update-nvidia-params/mount_linux.go b/cmd/nvidia-cdi-hook/update-nvidia-params/mount_linux.go new file mode 100644 index 00000000..5aaa4d48 --- /dev/null +++ b/cmd/nvidia-cdi-hook/update-nvidia-params/mount_linux.go @@ -0,0 +1,34 @@ +//go:build linux +// +build linux + +/** +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package nvidiaparams + +import ( + "fmt" + + "golang.org/x/sys/unix" +) + +func createTmpFs(target string, size int) error { + return unix.Mount("tmpfs", target, "tmpfs", 0, fmt.Sprintf("size=%d", size)) +} + +func bindMountReadonly(source string, target string) error { + return unix.Mount(source, target, "", unix.MS_BIND|unix.MS_RDONLY|unix.MS_NOSYMFOLLOW, "") +} diff --git a/cmd/nvidia-cdi-hook/update-nvidia-params/mount_other.go b/cmd/nvidia-cdi-hook/update-nvidia-params/mount_other.go new file mode 100644 index 00000000..db25ad51 --- /dev/null +++ b/cmd/nvidia-cdi-hook/update-nvidia-params/mount_other.go @@ -0,0 +1,32 @@ +//go:build !linux +// +build !linux + +/** +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package nvidiaparams + +import ( + "fmt" +) + +func createTmpFs(target string, size int) error { + return fmt.Errorf("not supported") +} + +func bindMountReadonly(source string, target string) error { + return fmt.Errorf("not supported") +} diff --git a/cmd/nvidia-cdi-hook/update-nvidia-params/update-nvidia-params.go b/cmd/nvidia-cdi-hook/update-nvidia-params/update-nvidia-params.go new file mode 100644 index 00000000..2be0209f --- /dev/null +++ b/cmd/nvidia-cdi-hook/update-nvidia-params/update-nvidia-params.go @@ -0,0 +1,199 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package nvidiaparams + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/urfave/cli/v2" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" +) + +const ( + nvidiaDriverParamsPath = "/proc/driver/nvidia/params" +) + +type command struct { + logger logger.Interface +} + +type options struct { + containerSpec string +} + +// NewCommand constructs an update-nvidia-params command with the specified logger +func NewCommand(logger logger.Interface) *cli.Command { + c := command{ + logger: logger, + } + return c.build() +} + +// build the update-nvidia-params command +func (m command) build() *cli.Command { + cfg := options{} + + // Create the 'update-nvidia-params' command + c := cli.Command{ + Name: "update-nvidia-params", + Usage: "Update the /proc/driver/nvidia/params file in the container to disable device node modification.", + Before: func(c *cli.Context) error { + return m.validateFlags(c, &cfg) + }, + Action: func(c *cli.Context) error { + return m.run(c, &cfg) + }, + } + + c.Flags = []cli.Flag{ + &cli.StringFlag{ + Name: "container-spec", + Hidden: true, + Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN", + Destination: &cfg.containerSpec, + }, + } + + return &c +} + +func (m command) validateFlags(c *cli.Context, cfg *options) error { + return nil +} + +func (m command) run(c *cli.Context, cfg *options) error { + // TODO: Do we need to prefix the driver root? + hostNvidiaParamsFile, err := os.Open(nvidiaDriverParamsPath) + if errors.Is(err, os.ErrNotExist) { + return nil + } + if err != nil { + return fmt.Errorf("failed to load params file: %w", err) + } + defer hostNvidiaParamsFile.Close() + + s, err := oci.LoadContainerState(cfg.containerSpec) + if err != nil { + return fmt.Errorf("failed to load container state: %v", err) + } + + containerRoot, err := s.GetContainerRoot() + if err != nil { + return fmt.Errorf("failed to determined container root: %v", err) + } + + return m.updateNvidiaParamsFromReader(hostNvidiaParamsFile, containerRoot) +} + +func (m command) updateNvidiaParamsFromReader(r io.Reader, containerRoot string) error { + modifiedContents, err := m.getModifiedParamsFileContentsFromReader(r) + if err != nil { + return fmt.Errorf("failed to generate modified contents: %w", err) + } + if len(modifiedContents) == 0 { + m.logger.Debugf("No modification required") + return nil + } + return createParamsFileInContainer(containerRoot, modifiedContents) +} + +// getModifiedParamsFileContentsFromReader returns the contents of a modified params file from the specified reader. +func (m command) getModifiedParamsFileContentsFromReader(r io.Reader) ([]byte, error) { + var modified bytes.Buffer + scanner := bufio.NewScanner(r) + + var requiresModification bool + for scanner.Scan() { + line := scanner.Text() + if strings.HasPrefix(line, "ModifyDeviceFiles: ") { + if line == "ModifyDeviceFiles: 0" { + m.logger.Debugf("Device node modification is already disabled") + return nil, nil + } + if line == "ModifyDeviceFiles: 1" { + line = "ModifyDeviceFiles: 0" + requiresModification = true + } + } + if _, err := modified.WriteString(line + "\n"); err != nil { + return nil, fmt.Errorf("failed to create output buffer: %w", err) + } + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("failed to read params file: %w", err) + } + + if !requiresModification { + return nil, nil + } + + return modified.Bytes(), nil +} + +func createParamsFileInContainer(containerRoot string, contents []byte) error { + if len(contents) == 0 { + return nil + } + + tempParamsFileName, err := createFileInTempfs("nvct-params", contents, 0o444) + if err != nil { + return fmt.Errorf("failed to create temporary file: %w", err) + } + + if err := bindMountReadonly(tempParamsFileName, filepath.Join(containerRoot, nvidiaDriverParamsPath)); err != nil { + return fmt.Errorf("failed to create temporary parms file mount: %w", err) + } + + return nil +} + +// createFileInTempfs creates a file with the specified name, contents, and mode in a tmpfs. +// A tmpfs is created at /tmp/nvct-emtpy-dir* with a size sufficient for the specified contents. +func createFileInTempfs(name string, contents []byte, mode os.FileMode) (string, error) { + tmpRoot, err := os.MkdirTemp("", "nvct-empty-dir*") + if err != nil { + return "", fmt.Errorf("failed to create temporary folder: %w", err) + } + if err := createTmpFs(tmpRoot, len(contents)); err != nil { + return "", fmt.Errorf("failed to create tmpfs mount for params file: %w", err) + } + + filename := filepath.Join(tmpRoot, name) + fileInTempfs, err := os.Create(filename) + if err != nil { + return "", fmt.Errorf("failed to create temporary params file: %w", err) + } + defer fileInTempfs.Close() + + if _, err := fileInTempfs.Write(contents); err != nil { + return "", fmt.Errorf("failed to write temporary params file: %w", err) + } + + if err := fileInTempfs.Chmod(mode); err != nil { + return "", fmt.Errorf("failed to set permissions on temporary params file: %w", err) + } + return filename, nil +} diff --git a/cmd/nvidia-cdi-hook/update-nvidia-params/update-nvidia-params_test.go b/cmd/nvidia-cdi-hook/update-nvidia-params/update-nvidia-params_test.go new file mode 100644 index 00000000..47d488fa --- /dev/null +++ b/cmd/nvidia-cdi-hook/update-nvidia-params/update-nvidia-params_test.go @@ -0,0 +1,79 @@ +package nvidiaparams + +import ( + "bytes" + "testing" + + testlog "github.com/sirupsen/logrus/hooks/test" + "github.com/stretchr/testify/require" +) + +func TestGetModifiedParamsFileContentsFromReader(t *testing.T) { + logger, _ := testlog.NewNullLogger() + testCases := map[string]struct { + contents []byte + expectedError error + expectedContents []byte + }{ + "no contents": { + contents: nil, + expectedError: nil, + expectedContents: nil, + }, + "other contents are ignored": { + contents: []byte(`# Some other content + that we don't care about + `), + expectedError: nil, + expectedContents: nil, + }, + "already zero requires no modification": { + contents: []byte("ModifyDeviceFiles: 0"), + expectedError: nil, + expectedContents: nil, + }, + "leading spaces require no modification": { + contents: []byte(" ModifyDeviceFiles: 1"), + }, + "Trailing spaces require no modification": { + contents: []byte("ModifyDeviceFiles: 1 "), + }, + "Not 1 require no modification": { + contents: []byte("ModifyDeviceFiles: 11"), + }, + "single line requires modification": { + contents: []byte("ModifyDeviceFiles: 1"), + expectedError: nil, + expectedContents: []byte("ModifyDeviceFiles: 0\n"), + }, + "single line with trailing newline requires modification": { + contents: []byte("ModifyDeviceFiles: 1\n"), + expectedError: nil, + expectedContents: []byte("ModifyDeviceFiles: 0\n"), + }, + "other content is maintained": { + contents: []byte(`ModifyDeviceFiles: 1 + other content + that + is maintained`), + expectedError: nil, + expectedContents: []byte(`ModifyDeviceFiles: 0 + other content + that + is maintained +`), + }, + } + + for description, tc := range testCases { + t.Run(description, func(t *testing.T) { + c := command{ + logger: logger, + } + contents, err := c.getModifiedParamsFileContentsFromReader(bytes.NewReader(tc.contents)) + require.EqualValues(t, tc.expectedError, err) + require.EqualValues(t, string(tc.expectedContents), string(contents)) + }) + } + +} diff --git a/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go b/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go index 4aad36c5..4079a87e 100644 --- a/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go +++ b/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go @@ -102,6 +102,11 @@ containerEdits: - update-ldcache - --folder - /lib/x86_64-linux-gnu + - hookName: createContainer + path: {{ .toolkitRoot }}/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - update-nvidia-params mounts: - hostPath: /host/driver/root/lib/x86_64-linux-gnu/libcuda.so.999.88.77 containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77