Allow cdi mode to work with --gpus flag

This changes ensures that the cdi modifier also removes the NVIDIA
Container Runtime Hook from the incoming spec. This aligns with what is
done for CSV modifications and prevents an error when starting the
container.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2025-02-05 11:12:06 +01:00
parent 6da7af8dfa
commit 9eccc1659d
No known key found for this signature in database
4 changed files with 191 additions and 13 deletions

View File

@ -68,20 +68,10 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image
return nil, fmt.Errorf("failed to get CDI spec: %v", err)
}
cdiModifier, err := cdi.New(
return cdi.New(
cdi.WithLogger(logger),
cdi.WithSpec(spec.Raw()),
)
if err != nil {
return nil, fmt.Errorf("failed to construct CDI modifier: %v", err)
}
modifiers := Merge(
nvidiaContainerRuntimeHookRemover{logger},
cdiModifier,
)
return modifiers, nil
}
func checkRequirements(logger logger.Interface, image image.CUDA) error {

View File

@ -33,6 +33,13 @@ type nvidiaContainerRuntimeHookRemover struct {
var _ oci.SpecModifier = (*nvidiaContainerRuntimeHookRemover)(nil)
// NewNvidiaContainerRuntimeHookRemover creates a modifier that removes any NVIDIA Container Runtime hooks from the provided spec.
func NewNvidiaContainerRuntimeHookRemover(logger logger.Interface) oci.SpecModifier {
return nvidiaContainerRuntimeHookRemover{
logger: logger,
}
}
// Modify removes any NVIDIA Container Runtime hooks from the provided spec
func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error {
if spec == nil {

View File

@ -85,6 +85,8 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
switch modifierType {
case "mode":
modifiers = append(modifiers, modeModifier)
case "nvidia-hook-remover":
modifiers = append(modifiers, modifier.NewNvidiaContainerRuntimeHookRemover(logger))
case "graphics":
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, image, driver)
if err != nil {
@ -121,10 +123,10 @@ func supportedModifierTypes(mode string) []string {
switch mode {
case "cdi":
// For CDI mode we make no additional modifications.
return []string{"mode"}
return []string{"nvidia-hook-remover", "mode"}
case "csv":
// For CSV mode we support mode and feature-gated modification.
return []string{"mode", "feature-gated"}
return []string{"nvidia-hook-remover", "mode", "feature-gated"}
default:
return []string{"mode", "graphics", "feature-gated"}
}

View File

@ -30,6 +30,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
)
@ -165,3 +166,181 @@ func TestFactoryMethod(t *testing.T) {
})
}
}
func TestNewSpecModifier(t *testing.T) {
logger, _ := testlog.NewNullLogger()
driver := root.New(
root.WithDriverRoot("/nvidia/driver/root"),
)
testCases := []struct {
description string
config *config.Config
spec *specs.Spec
expectedSpec *specs.Spec
}{
{
description: "csv mode removes nvidia-container-runtime-hook",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "csv",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: nil,
},
},
},
{
description: "csv mode removes nvidia-container-toolkit",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "csv",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: nil,
},
},
},
{
description: "cdi mode removes nvidia-container-runtime-hook",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "cdi",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: nil,
},
},
},
{
description: "cdi mode removes nvidia-container-toolkit",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "cdi",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: nil,
},
},
},
{
description: "legacy mode keeps nvidia-container-runtime-hook",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "legacy",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
},
{
description: "legacy mode keeps nvidia-container-toolkit",
config: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Mode: "legacy",
},
},
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
spec := &oci.SpecMock{
LoadFunc: func() (*specs.Spec, error) {
return tc.spec, nil
},
}
m, err := newSpecModifier(logger, tc.config, spec, driver)
require.NoError(t, err)
err = m.Modify(tc.spec)
require.NoError(t, err)
require.EqualValues(t, tc.expectedSpec, tc.spec)
})
}
}