Allow cdi mode to work with --gpus flag

This changes ensures that the cdi modifier also removes the NVIDIA
Container Runtime Hook from the incoming spec. This aligns with what is
done for CSV modifications and prevents an error when starting the
container.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2025-02-05 11:12:06 +01:00
parent cf026dce9a
commit 03152dba8d
No known key found for this signature in database
5 changed files with 197 additions and 13 deletions

View File

@ -68,20 +68,10 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image
return nil, fmt.Errorf("failed to get CDI spec: %v", err)
}
cdiModifier, err := cdi.New(
return cdi.New(
cdi.WithLogger(logger),
cdi.WithSpec(spec.Raw()),
)
if err != nil {
return nil, fmt.Errorf("failed to construct CDI modifier: %v", err)
}
modifiers := Merge(
nvidiaContainerRuntimeHookRemover{logger},
cdiModifier,
)
return modifiers, nil
}
func checkRequirements(logger logger.Interface, image image.CUDA) error {

View File

@ -33,6 +33,13 @@ type nvidiaContainerRuntimeHookRemover struct {
var _ oci.SpecModifier = (*nvidiaContainerRuntimeHookRemover)(nil)
// NewNvidiaContainerRuntimeHookRemover creates a modifier that removes any NVIDIA Container Runtime hooks from the provided spec.
func NewNvidiaContainerRuntimeHookRemover(logger logger.Interface) oci.SpecModifier {
return nvidiaContainerRuntimeHookRemover{
logger: logger,
}
}
// Modify removes any NVIDIA Container Runtime hooks from the provided spec
func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error {
if spec == nil {

View File

@ -85,6 +85,8 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
switch modifierType {
case "mode":
modifiers = append(modifiers, modeModifier)
case "nvidia-hook-remover":
modifiers = append(modifiers, modifier.NewNvidiaContainerRuntimeHookRemover(logger))
case "graphics":
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, image, driver)
if err != nil {
@ -121,10 +123,10 @@ func supportedModifierTypes(mode string) []string {
switch mode {
case "cdi":
// For CDI mode we make no additional modifications.
return []string{"mode"}
return []string{"nvidia-hook-remover", "mode"}
case "csv":
// For CSV mode we support mode and feature-gated modification.
return []string{"mode", "feature-gated"}
return []string{"nvidia-hook-remover", "mode", "feature-gated"}
default:
return []string{"mode", "graphics", "feature-gated"}
}

View File

@ -30,6 +30,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
)
@ -165,3 +166,181 @@ func TestFactoryMethod(t *testing.T) {
})
}
}
func TestNewSpecModifier(t *testing.T) {
logger, _ := testlog.NewNullLogger()
driver := root.New(
root.WithDriverRoot("/nvidia/driver/root"),
)
testCases := []struct {
description string
config *config.Config
spec *specs.Spec
expectedSpec *specs.Spec
}{
{
description: "csv mode removes nvidia-container-runtime-hook",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "csv",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: nil,
},
},
},
{
description: "csv mode removes nvidia-container-toolkit",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "csv",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: nil,
},
},
},
{
description: "cdi mode removes nvidia-container-runtime-hook",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "cdi",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: nil,
},
},
},
{
description: "cdi mode removes nvidia-container-toolkit",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "cdi",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: nil,
},
},
},
{
description: "legacy mode keeps nvidia-container-runtime-hook",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "legacy",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
},
{
description: "legacy mode keeps nvidia-container-toolkit",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "legacy",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
spec := &oci.SpecMock{
LoadFunc: func() (*specs.Spec, error) {
return tc.spec, nil
},
}
m, err := newSpecModifier(logger, tc.config, spec, driver)
require.NoError(t, err)
err = m.Modify(tc.spec)
require.NoError(t, err)
require.EqualValues(t, tc.expectedSpec, tc.spec)
})
}
}

View File

@ -74,6 +74,12 @@ var _ = Describe("docker", Ordered, func() {
Expect(containerOutput).To(Equal(hostOutput))
})
It("should support automatic CDI spec generation with the --gpus flag", func(ctx context.Context) {
containerOutput, _, err := r.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput))
})
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())