From ae1b7e126c2d0710937143302d2579c63ca87bdf Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Thu, 30 Nov 2023 14:16:10 -0800 Subject: [PATCH 1/6] Extend the 'runtime.nvidia.com/gpu' CDI device kind to support full-GPUs specified by index or UUID Signed-off-by: Christopher Desiniotis --- internal/modifier/cdi.go | 74 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 5 deletions(-) diff --git a/internal/modifier/cdi.go b/internal/modifier/cdi.go index 76e8dab7..855277c7 100644 --- a/internal/modifier/cdi.go +++ b/internal/modifier/cdi.go @@ -18,9 +18,13 @@ package modifier import ( "fmt" + "strconv" "strings" + nvdevice "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" + "github.com/NVIDIA/go-nvlib/pkg/nvml" "tags.cncf.io/container-device-interface/pkg/parser" + "tags.cncf.io/container-device-interface/specs-go" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" @@ -152,7 +156,8 @@ func getAnnotationDevices(prefixes []string, annotations map[string]string) ([]s func filterAutomaticDevices(devices []string) []string { var automatic []string for _, device := range devices { - if device == "runtime.nvidia.com/gpu=all" { + vendor, class, _ := parser.ParseDevice(device) + if vendor == "runtime.nvidia.com" && class == "gpu" { automatic = append(automatic, device) } } @@ -176,9 +181,6 @@ func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, de return cdiModifier, nil } -// TODO: use the requested devices when generating the CDI spec once we add -// automatic CDI generation for more than just the 'runtime.nvidia.com/gpu=all' -// device func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devices []string) (spec.Interface, error) { cdilib, err := nvcdi.New( nvcdi.WithLogger(logger), @@ -191,5 +193,67 @@ func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devic return nil, fmt.Errorf("failed to construct CDI library: %w", err) } - return cdilib.GetSpec() + names := []string{} + for _, device := range devices { + _, _, name := parser.ParseDevice(device) + if name == "all" { + return cdilib.GetSpec() + } + names = append(names, name) + } + + // Note: The below code currently only supports generating CDI spec modifications + // for full-GPUs, specified either by index or UUID. MIG devices are not + // supported. + nvmlLib := nvml.New() + ret := nvmlLib.Init() + if ret != nvml.SUCCESS { + return nil, fmt.Errorf("failed to initialized NVML: %w", ret) + } + nvdevice := nvdevice.New(nvdevice.WithNvml(nvmlLib)) + + deviceSpecs := []specs.Device{} + for _, name := range names { + logger.Debugf("Getting CDI spec edits for device %q", name) + // Get a device handle by either index or UUID + var nvmlDevice nvml.Device + if idx, err := strconv.Atoi(name); err == nil { + nvmlDevice, err = nvmlLib.DeviceGetHandleByIndex(idx) + if err != nvml.SUCCESS { + return nil, fmt.Errorf("failed to get device handle for index '%v': %w", idx, err) + } + } else { + nvmlDevice, err = nvmlLib.DeviceGetHandleByUUID(name) + if err != nvml.SUCCESS { + return nil, fmt.Errorf("failed to get device handle for UUID '%v': %w", name, err) + } + } + + nvlibDevice, err := nvdevice.NewDevice(nvmlDevice) + if err != nil { + return nil, fmt.Errorf("failed to construct device: %w", err) + } + + gpuEdits, err := cdilib.GetGPUDeviceEdits(nvlibDevice) + if err != nil { + return nil, fmt.Errorf("failed to get CDI spec edits for GPU %q: %w", name, err) + } + gpuDevice := specs.Device{ + Name: name, + ContainerEdits: *gpuEdits.ContainerEdits, + } + deviceSpecs = append(deviceSpecs, gpuDevice) + } + + commonEdits, err := cdilib.GetCommonEdits() + if err != nil { + return nil, fmt.Errorf("failed to get common CDI spec edits: %w", err) + } + + return spec.New( + spec.WithDeviceSpecs(deviceSpecs), + spec.WithEdits(*commonEdits.ContainerEdits), + spec.WithVendor("runtime.nvidia.com"), + spec.WithClass("gpu"), + ) } From b9ac54b922e158a1fb014cfb82bdda553be38216 Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Mon, 4 Dec 2023 12:57:12 -0800 Subject: [PATCH 2/6] Add GetDeviceSpecsByID() API to the nvcdi Interface Signed-off-by: Christopher Desiniotis --- internal/modifier/cdi.go | 57 ++++------------------------------ pkg/nvcdi/api.go | 1 + pkg/nvcdi/gds.go | 7 +++++ pkg/nvcdi/lib-csv.go | 7 +++++ pkg/nvcdi/lib-nvml.go | 67 ++++++++++++++++++++++++++++++++++++++++ pkg/nvcdi/lib-wsl.go | 7 +++++ pkg/nvcdi/management.go | 7 +++++ pkg/nvcdi/mofed.go | 7 +++++ 8 files changed, 109 insertions(+), 51 deletions(-) diff --git a/internal/modifier/cdi.go b/internal/modifier/cdi.go index 855277c7..4a1079ac 100644 --- a/internal/modifier/cdi.go +++ b/internal/modifier/cdi.go @@ -18,13 +18,9 @@ package modifier import ( "fmt" - "strconv" "strings" - nvdevice "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" - "github.com/NVIDIA/go-nvlib/pkg/nvml" "tags.cncf.io/container-device-interface/pkg/parser" - "tags.cncf.io/container-device-interface/specs-go" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" @@ -193,56 +189,15 @@ func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devic return nil, fmt.Errorf("failed to construct CDI library: %w", err) } - names := []string{} + identifiers := []string{} for _, device := range devices { - _, _, name := parser.ParseDevice(device) - if name == "all" { - return cdilib.GetSpec() - } - names = append(names, name) + _, _, id := parser.ParseDevice(device) + identifiers = append(identifiers, id) } - // Note: The below code currently only supports generating CDI spec modifications - // for full-GPUs, specified either by index or UUID. MIG devices are not - // supported. - nvmlLib := nvml.New() - ret := nvmlLib.Init() - if ret != nvml.SUCCESS { - return nil, fmt.Errorf("failed to initialized NVML: %w", ret) - } - nvdevice := nvdevice.New(nvdevice.WithNvml(nvmlLib)) - - deviceSpecs := []specs.Device{} - for _, name := range names { - logger.Debugf("Getting CDI spec edits for device %q", name) - // Get a device handle by either index or UUID - var nvmlDevice nvml.Device - if idx, err := strconv.Atoi(name); err == nil { - nvmlDevice, err = nvmlLib.DeviceGetHandleByIndex(idx) - if err != nvml.SUCCESS { - return nil, fmt.Errorf("failed to get device handle for index '%v': %w", idx, err) - } - } else { - nvmlDevice, err = nvmlLib.DeviceGetHandleByUUID(name) - if err != nvml.SUCCESS { - return nil, fmt.Errorf("failed to get device handle for UUID '%v': %w", name, err) - } - } - - nvlibDevice, err := nvdevice.NewDevice(nvmlDevice) - if err != nil { - return nil, fmt.Errorf("failed to construct device: %w", err) - } - - gpuEdits, err := cdilib.GetGPUDeviceEdits(nvlibDevice) - if err != nil { - return nil, fmt.Errorf("failed to get CDI spec edits for GPU %q: %w", name, err) - } - gpuDevice := specs.Device{ - Name: name, - ContainerEdits: *gpuEdits.ContainerEdits, - } - deviceSpecs = append(deviceSpecs, gpuDevice) + deviceSpecs, err := cdilib.GetDeviceSpecsByID(identifiers...) + if err != nil { + return nil, fmt.Errorf("failed to get CDI device specs: %w", err) } commonEdits, err := cdilib.GetCommonEdits() diff --git a/pkg/nvcdi/api.go b/pkg/nvcdi/api.go index 43aad634..27c264de 100644 --- a/pkg/nvcdi/api.go +++ b/pkg/nvcdi/api.go @@ -51,4 +51,5 @@ type Interface interface { GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.ContainerEdits, error) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) + GetDeviceSpecsByID(...string) ([]specs.Device, error) } diff --git a/pkg/nvcdi/gds.go b/pkg/nvcdi/gds.go index cb1bf760..74a186c1 100644 --- a/pkg/nvcdi/gds.go +++ b/pkg/nvcdi/gds.go @@ -81,3 +81,10 @@ func (l *gdslib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai func (l *gdslib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) { return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported") } + +// GetDeviceSpecsByID returns the CDI device specs for the GPU(s) represented by +// the provided identifiers, where an identifier is an index or UUID of a valid +// GPU device. +func (l *gdslib) GetDeviceSpecsByID(...string) ([]specs.Device, error) { + return nil, fmt.Errorf("GetDeviceSpecsByID is not supported") +} diff --git a/pkg/nvcdi/lib-csv.go b/pkg/nvcdi/lib-csv.go index 86d86f93..31604345 100644 --- a/pkg/nvcdi/lib-csv.go +++ b/pkg/nvcdi/lib-csv.go @@ -94,3 +94,10 @@ func (l *csvlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai func (l *csvlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) { return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported for CSV files") } + +// GetDeviceSpecsByID returns the CDI device specs for the GPU(s) represented by +// the provided identifiers, where an identifier is an index or UUID of a valid +// GPU device. +func (l *csvlib) GetDeviceSpecsByID(...string) ([]specs.Device, error) { + return nil, fmt.Errorf("GetDeviceSpecsByID is not supported for CSV files") +} diff --git a/pkg/nvcdi/lib-nvml.go b/pkg/nvcdi/lib-nvml.go index 3ce68ec9..41ae23ac 100644 --- a/pkg/nvcdi/lib-nvml.go +++ b/pkg/nvcdi/lib-nvml.go @@ -18,6 +18,7 @@ package nvcdi import ( "fmt" + "strconv" "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" "github.com/NVIDIA/go-nvlib/pkg/nvml" @@ -75,6 +76,72 @@ func (l *nvmllib) GetCommonEdits() (*cdi.ContainerEdits, error) { return edits.FromDiscoverer(common) } +// GetDeviceSpecsByID returns the CDI device specs for the GPU(s) represented by +// the provided identifiers, where an identifier is an index or UUID of a valid +// GPU device. +// TODO: support identifiers that correspond to MIG devices +func (l *nvmllib) GetDeviceSpecsByID(identifiers ...string) ([]specs.Device, error) { + for _, id := range identifiers { + if id == "all" { + return l.GetAllDeviceSpecs() + } + } + + var deviceSpecs []specs.Device + + if r := l.nvmllib.Init(); r != nvml.SUCCESS { + return nil, fmt.Errorf("failed to initialize NVML: %w", r) + } + defer func() { + if r := l.nvmllib.Shutdown(); r != nvml.SUCCESS { + l.logger.Warningf("failed to shutdown NVML: %w", r) + } + }() + + nvmlDevices, err := l.getNVMLDevicesByID(identifiers...) + if err != nil { + return nil, fmt.Errorf("failed to get NVML device handles: %w", err) + } + + for i, nvmlDevice := range nvmlDevices { + nvlibDevice, err := l.devicelib.NewDevice(nvmlDevice) + if err != nil { + return nil, fmt.Errorf("failed to construct device: %w", err) + } + deviceEdits, err := l.GetGPUDeviceEdits(nvlibDevice) + if err != nil { + return nil, fmt.Errorf("failed to get CDI device edits for identifier %q: %w", identifiers[i], err) + } + deviceSpec := specs.Device{ + Name: identifiers[i], + ContainerEdits: *deviceEdits.ContainerEdits, + } + deviceSpecs = append(deviceSpecs, deviceSpec) + } + + return deviceSpecs, nil +} + +// TODO: move this to go-nvlib? +func (l *nvmllib) getNVMLDevicesByID(identifiers ...string) ([]nvml.Device, error) { + devices := []nvml.Device{} + for _, id := range identifiers { + if dev, err := l.nvmllib.DeviceGetHandleByUUID(id); err == nvml.SUCCESS { + devices = append(devices, dev) + continue + } + // TODO: check for a MIG device index + if idx, err := strconv.Atoi(id); err == nil { + if dev, err := l.nvmllib.DeviceGetHandleByIndex(idx); err == nvml.SUCCESS { + devices = append(devices, dev) + continue + } + } + return nil, fmt.Errorf("failed to get NVML device handle for identifier %q", id) + } + return devices, nil +} + func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) { var deviceSpecs []specs.Device err := l.devicelib.VisitDevices(func(i int, d device.Device) error { diff --git a/pkg/nvcdi/lib-wsl.go b/pkg/nvcdi/lib-wsl.go index b01c8268..385007cf 100644 --- a/pkg/nvcdi/lib-wsl.go +++ b/pkg/nvcdi/lib-wsl.go @@ -81,3 +81,10 @@ func (l *wsllib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai func (l *wsllib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) { return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported on WSL") } + +// GetDeviceSpecsByID returns the CDI device specs for the GPU(s) represented by +// the provided identifiers, where an identifier is an index or UUID of a valid +// GPU device. +func (l *wsllib) GetDeviceSpecsByID(...string) ([]specs.Device, error) { + return nil, fmt.Errorf("GetDeviceSpecsByID is not supported on WSL") +} diff --git a/pkg/nvcdi/management.go b/pkg/nvcdi/management.go index 36b4b27b..8c3d4b32 100644 --- a/pkg/nvcdi/management.go +++ b/pkg/nvcdi/management.go @@ -188,3 +188,10 @@ func (m *managementlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi func (m *managementlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) { return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported") } + +// GetDeviceSpecsByID returns the CDI device specs for the GPU(s) represented by +// the provided identifiers, where an identifier is an index or UUID of a valid +// GPU device. +func (l *managementlib) GetDeviceSpecsByID(...string) ([]specs.Device, error) { + return nil, fmt.Errorf("GetDeviceSpecsByID is not supported") +} diff --git a/pkg/nvcdi/mofed.go b/pkg/nvcdi/mofed.go index 3f56b2d5..607b7baf 100644 --- a/pkg/nvcdi/mofed.go +++ b/pkg/nvcdi/mofed.go @@ -81,3 +81,10 @@ func (l *mofedlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Cont func (l *mofedlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) { return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported") } + +// GetDeviceSpecsByID returns the CDI device specs for the GPU(s) represented by +// the provided identifiers, where an identifier is an index or UUID of a valid +// GPU device. +func (l *mofedlib) GetDeviceSpecsByID(...string) ([]specs.Device, error) { + return nil, fmt.Errorf("GetDeviceSpecsByID is not supported") +} From def7d09f852c9f655254bc4b5576a8fe7a13b144 Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Tue, 5 Dec 2023 16:58:29 -0800 Subject: [PATCH 3/6] Refactor how device identifiers are parsed before performing automatic CDI spec generation Signed-off-by: Christopher Desiniotis --- go.mod | 1 + go.sum | 2 + pkg/nvcdi/identifier.go | 76 +++++ pkg/nvcdi/identifier_test.go | 90 +++++ pkg/nvcdi/lib-nvml.go | 39 ++- vendor/github.com/google/uuid/CHANGELOG.md | 21 ++ vendor/github.com/google/uuid/CONTRIBUTING.md | 26 ++ vendor/github.com/google/uuid/CONTRIBUTORS | 9 + vendor/github.com/google/uuid/LICENSE | 27 ++ vendor/github.com/google/uuid/README.md | 21 ++ vendor/github.com/google/uuid/dce.go | 80 +++++ vendor/github.com/google/uuid/doc.go | 12 + vendor/github.com/google/uuid/hash.go | 53 +++ vendor/github.com/google/uuid/marshal.go | 38 +++ vendor/github.com/google/uuid/node.go | 90 +++++ vendor/github.com/google/uuid/node_js.go | 12 + vendor/github.com/google/uuid/node_net.go | 33 ++ vendor/github.com/google/uuid/null.go | 118 +++++++ vendor/github.com/google/uuid/sql.go | 59 ++++ vendor/github.com/google/uuid/time.go | 123 +++++++ vendor/github.com/google/uuid/util.go | 43 +++ vendor/github.com/google/uuid/uuid.go | 312 ++++++++++++++++++ vendor/github.com/google/uuid/version1.go | 44 +++ vendor/github.com/google/uuid/version4.go | 76 +++++ vendor/modules.txt | 3 + 25 files changed, 1396 insertions(+), 12 deletions(-) create mode 100644 pkg/nvcdi/identifier.go create mode 100644 pkg/nvcdi/identifier_test.go create mode 100644 vendor/github.com/google/uuid/CHANGELOG.md create mode 100644 vendor/github.com/google/uuid/CONTRIBUTING.md create mode 100644 vendor/github.com/google/uuid/CONTRIBUTORS create mode 100644 vendor/github.com/google/uuid/LICENSE create mode 100644 vendor/github.com/google/uuid/README.md create mode 100644 vendor/github.com/google/uuid/dce.go create mode 100644 vendor/github.com/google/uuid/doc.go create mode 100644 vendor/github.com/google/uuid/hash.go create mode 100644 vendor/github.com/google/uuid/marshal.go create mode 100644 vendor/github.com/google/uuid/node.go create mode 100644 vendor/github.com/google/uuid/node_js.go create mode 100644 vendor/github.com/google/uuid/node_net.go create mode 100644 vendor/github.com/google/uuid/null.go create mode 100644 vendor/github.com/google/uuid/sql.go create mode 100644 vendor/github.com/google/uuid/time.go create mode 100644 vendor/github.com/google/uuid/util.go create mode 100644 vendor/github.com/google/uuid/uuid.go create mode 100644 vendor/github.com/google/uuid/version1.go create mode 100644 vendor/github.com/google/uuid/version4.go diff --git a/go.mod b/go.mod index 483d6943..8fa35836 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/NVIDIA/go-nvlib v0.0.0-20231116150931-9fd385bace0d github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f github.com/fsnotify/fsnotify v1.5.4 + github.com/google/uuid v1.4.0 github.com/opencontainers/runtime-spec v1.1.0 github.com/pelletier/go-toml v1.9.4 github.com/sirupsen/logrus v1.9.0 diff --git a/go.sum b/go.sum index 4b45a0b1..59d30ef3 100644 --- a/go.sum +++ b/go.sum @@ -15,6 +15,8 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI= github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4= +github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= diff --git a/pkg/nvcdi/identifier.go b/pkg/nvcdi/identifier.go new file mode 100644 index 00000000..f46f2b41 --- /dev/null +++ b/pkg/nvcdi/identifier.go @@ -0,0 +1,76 @@ +package nvcdi + +import ( + "strconv" + "strings" + + "github.com/google/uuid" +) + +type identifier string + +// isGPUIndex checks if an identifier is a full GPU index +func (i identifier) isGpuIndex() bool { + if _, err := strconv.ParseUint(string(i), 10, 0); err != nil { + return false + } + return true +} + +// isMigIndex checks if an identifier is a MIG index +func (i identifier) isMigIndex() bool { + split := strings.SplitN(string(i), ":", 2) + if len(split) != 2 { + return false + } + for _, s := range split { + if _, err := strconv.ParseUint(s, 10, 0); err != nil { + return false + } + } + return true +} + +// isUUID checks if an identifier is a UUID +func (i identifier) isUUID() bool { + return i.isGpuUUID() || i.isMigUUID() +} + +// isGpuUUID checks if an identifier is a GPU UUID +// A GPU UUID must be of the form GPU-b1028956-cfa2-0990-bf4a-5da9abb51763 +func (i identifier) isGpuUUID() bool { + if !strings.HasPrefix(string(i), "GPU-") { + return false + } + _, err := uuid.Parse(strings.TrimPrefix(string(i), "GPU-")) + return err == nil +} + +// isMigUUID checks if an identifier is a MIG UUID +// A MIG UUID can be of one of two forms: +// - MIG-b1028956-cfa2-0990-bf4a-5da9abb51763 +// - MIG-GPU-b1028956-cfa2-0990-bf4a-5da9abb51763/3/0 +func (i identifier) isMigUUID() bool { + if !strings.HasPrefix(string(i), "MIG-") { + return false + } + suffix := strings.TrimPrefix(string(i), "MIG-") + _, err := uuid.Parse(suffix) + if err == nil { + return true + } + split := strings.SplitN(suffix, "/", 3) + if len(split) != 3 { + return false + } + if !identifier(split[0]).isGpuUUID() { + return false + } + for _, s := range split[1:] { + _, err := strconv.ParseUint(s, 10, 0) + if err != nil { + return false + } + } + return true +} diff --git a/pkg/nvcdi/identifier_test.go b/pkg/nvcdi/identifier_test.go new file mode 100644 index 00000000..c6be05cd --- /dev/null +++ b/pkg/nvcdi/identifier_test.go @@ -0,0 +1,90 @@ +package nvcdi + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestIsGpuIndex(t *testing.T) { + testCases := []struct { + id string + expected bool + }{ + {"", false}, + {"0", true}, + {"1", true}, + {"not an integer", false}, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) { + actual := identifier(tc.id).isGpuIndex() + require.Equal(t, tc.expected, actual) + }) + } +} + +func TestIsMigIndex(t *testing.T) { + testCases := []struct { + id string + expected bool + }{ + {"", false}, + {"0", false}, + {"not an integer", false}, + {"0:0", true}, + {"0:0:0", false}, + {"0:0.0", false}, + {"0:foo", false}, + {"foo:0", false}, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) { + actual := identifier(tc.id).isMigIndex() + require.Equal(t, tc.expected, actual) + }) + } +} + +func TestIsGpuUUID(t *testing.T) { + testCases := []struct { + id string + expected bool + }{ + {"", false}, + {"0", false}, + {"not an integer", false}, + {"GPU-foo", false}, + {"GPU-ebd34bdf-1083-eaac-2aff-4b71a022f9bd", true}, + {"MIG-ebd34bdf-1083-eaac-2aff-4b71a022f9bd", false}, + {"ebd34bdf-1083-eaac-2aff-4b71a022f9bd", false}, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) { + actual := identifier(tc.id).isGpuUUID() + require.Equal(t, tc.expected, actual) + }) + } +} + +func TestIsMigUUID(t *testing.T) { + testCases := []struct { + id string + expected bool + }{ + {"", false}, + {"0", false}, + {"not an integer", false}, + {"MIG-foo", false}, + {"MIG-ebd34bdf-1083-eaac-2aff-4b71a022f9bd", true}, + {"GPU-ebd34bdf-1083-eaac-2aff-4b71a022f9bd", false}, + {"ebd34bdf-1083-eaac-2aff-4b71a022f9bd", false}, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) { + actual := identifier(tc.id).isMigUUID() + require.Equal(t, tc.expected, actual) + }) + } +} diff --git a/pkg/nvcdi/lib-nvml.go b/pkg/nvcdi/lib-nvml.go index 41ae23ac..0946fc04 100644 --- a/pkg/nvcdi/lib-nvml.go +++ b/pkg/nvcdi/lib-nvml.go @@ -124,24 +124,39 @@ func (l *nvmllib) GetDeviceSpecsByID(identifiers ...string) ([]specs.Device, err // TODO: move this to go-nvlib? func (l *nvmllib) getNVMLDevicesByID(identifiers ...string) ([]nvml.Device, error) { - devices := []nvml.Device{} + var devices []nvml.Device for _, id := range identifiers { - if dev, err := l.nvmllib.DeviceGetHandleByUUID(id); err == nvml.SUCCESS { - devices = append(devices, dev) - continue + dev, err := l.getNVMLDeviceByID(id) + if err != nvml.SUCCESS { + return nil, fmt.Errorf("failed to get NVML device handle for identifier %q: %w", id, err) } - // TODO: check for a MIG device index - if idx, err := strconv.Atoi(id); err == nil { - if dev, err := l.nvmllib.DeviceGetHandleByIndex(idx); err == nvml.SUCCESS { - devices = append(devices, dev) - continue - } - } - return nil, fmt.Errorf("failed to get NVML device handle for identifier %q", id) + devices = append(devices, dev) } return devices, nil } +func (l *nvmllib) getNVMLDeviceByID(id string) (nvml.Device, error) { + var err error + devID := identifier(id) + + if devID.isUUID() { + return l.nvmllib.DeviceGetHandleByUUID(id) + } + + if devID.isGpuIndex() { + if idx, err := strconv.Atoi(id); err == nil { + return l.nvmllib.DeviceGetHandleByIndex(idx) + } + return nil, fmt.Errorf("failed to convert device index to an int: %w", err) + } + + if devID.isMigIndex() { + return nil, fmt.Errorf("MIG index is not supported") + } + + return nil, fmt.Errorf("identifier is not a valid UUID or index: %q", id) +} + func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) { var deviceSpecs []specs.Device err := l.devicelib.VisitDevices(func(i int, d device.Device) error { diff --git a/vendor/github.com/google/uuid/CHANGELOG.md b/vendor/github.com/google/uuid/CHANGELOG.md new file mode 100644 index 00000000..7ed347d3 --- /dev/null +++ b/vendor/github.com/google/uuid/CHANGELOG.md @@ -0,0 +1,21 @@ +# Changelog + +## [1.4.0](https://github.com/google/uuid/compare/v1.3.1...v1.4.0) (2023-10-26) + + +### Features + +* UUIDs slice type with Strings() convenience method ([#133](https://github.com/google/uuid/issues/133)) ([cd5fbbd](https://github.com/google/uuid/commit/cd5fbbdd02f3e3467ac18940e07e062be1f864b4)) + +### Fixes + +* Clarify that Parse's job is to parse but not necessarily validate strings. (Documents current behavior) + +## [1.3.1](https://github.com/google/uuid/compare/v1.3.0...v1.3.1) (2023-08-18) + + +### Bug Fixes + +* Use .EqualFold() to parse urn prefixed UUIDs ([#118](https://github.com/google/uuid/issues/118)) ([574e687](https://github.com/google/uuid/commit/574e6874943741fb99d41764c705173ada5293f0)) + +## Changelog diff --git a/vendor/github.com/google/uuid/CONTRIBUTING.md b/vendor/github.com/google/uuid/CONTRIBUTING.md new file mode 100644 index 00000000..a502fdc5 --- /dev/null +++ b/vendor/github.com/google/uuid/CONTRIBUTING.md @@ -0,0 +1,26 @@ +# How to contribute + +We definitely welcome patches and contribution to this project! + +### Tips + +Commits must be formatted according to the [Conventional Commits Specification](https://www.conventionalcommits.org). + +Always try to include a test case! If it is not possible or not necessary, +please explain why in the pull request description. + +### Releasing + +Commits that would precipitate a SemVer change, as described in the Conventional +Commits Specification, will trigger [`release-please`](https://github.com/google-github-actions/release-please-action) +to create a release candidate pull request. Once submitted, `release-please` +will create a release. + +For tips on how to work with `release-please`, see its documentation. + +### Legal requirements + +In order to protect both you and ourselves, you will need to sign the +[Contributor License Agreement](https://cla.developers.google.com/clas). + +You may have already signed it for other Google projects. diff --git a/vendor/github.com/google/uuid/CONTRIBUTORS b/vendor/github.com/google/uuid/CONTRIBUTORS new file mode 100644 index 00000000..b4bb97f6 --- /dev/null +++ b/vendor/github.com/google/uuid/CONTRIBUTORS @@ -0,0 +1,9 @@ +Paul Borman +bmatsuo +shawnps +theory +jboverfelt +dsymonds +cd1 +wallclockbuilder +dansouza diff --git a/vendor/github.com/google/uuid/LICENSE b/vendor/github.com/google/uuid/LICENSE new file mode 100644 index 00000000..5dc68268 --- /dev/null +++ b/vendor/github.com/google/uuid/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009,2014 Google Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/google/uuid/README.md b/vendor/github.com/google/uuid/README.md new file mode 100644 index 00000000..3e9a6188 --- /dev/null +++ b/vendor/github.com/google/uuid/README.md @@ -0,0 +1,21 @@ +# uuid +The uuid package generates and inspects UUIDs based on +[RFC 4122](https://datatracker.ietf.org/doc/html/rfc4122) +and DCE 1.1: Authentication and Security Services. + +This package is based on the github.com/pborman/uuid package (previously named +code.google.com/p/go-uuid). It differs from these earlier packages in that +a UUID is a 16 byte array rather than a byte slice. One loss due to this +change is the ability to represent an invalid UUID (vs a NIL UUID). + +###### Install +```sh +go get github.com/google/uuid +``` + +###### Documentation +[![Go Reference](https://pkg.go.dev/badge/github.com/google/uuid.svg)](https://pkg.go.dev/github.com/google/uuid) + +Full `go doc` style documentation for the package can be viewed online without +installing this package by using the GoDoc site here: +http://pkg.go.dev/github.com/google/uuid diff --git a/vendor/github.com/google/uuid/dce.go b/vendor/github.com/google/uuid/dce.go new file mode 100644 index 00000000..fa820b9d --- /dev/null +++ b/vendor/github.com/google/uuid/dce.go @@ -0,0 +1,80 @@ +// Copyright 2016 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import ( + "encoding/binary" + "fmt" + "os" +) + +// A Domain represents a Version 2 domain +type Domain byte + +// Domain constants for DCE Security (Version 2) UUIDs. +const ( + Person = Domain(0) + Group = Domain(1) + Org = Domain(2) +) + +// NewDCESecurity returns a DCE Security (Version 2) UUID. +// +// The domain should be one of Person, Group or Org. +// On a POSIX system the id should be the users UID for the Person +// domain and the users GID for the Group. The meaning of id for +// the domain Org or on non-POSIX systems is site defined. +// +// For a given domain/id pair the same token may be returned for up to +// 7 minutes and 10 seconds. +func NewDCESecurity(domain Domain, id uint32) (UUID, error) { + uuid, err := NewUUID() + if err == nil { + uuid[6] = (uuid[6] & 0x0f) | 0x20 // Version 2 + uuid[9] = byte(domain) + binary.BigEndian.PutUint32(uuid[0:], id) + } + return uuid, err +} + +// NewDCEPerson returns a DCE Security (Version 2) UUID in the person +// domain with the id returned by os.Getuid. +// +// NewDCESecurity(Person, uint32(os.Getuid())) +func NewDCEPerson() (UUID, error) { + return NewDCESecurity(Person, uint32(os.Getuid())) +} + +// NewDCEGroup returns a DCE Security (Version 2) UUID in the group +// domain with the id returned by os.Getgid. +// +// NewDCESecurity(Group, uint32(os.Getgid())) +func NewDCEGroup() (UUID, error) { + return NewDCESecurity(Group, uint32(os.Getgid())) +} + +// Domain returns the domain for a Version 2 UUID. Domains are only defined +// for Version 2 UUIDs. +func (uuid UUID) Domain() Domain { + return Domain(uuid[9]) +} + +// ID returns the id for a Version 2 UUID. IDs are only defined for Version 2 +// UUIDs. +func (uuid UUID) ID() uint32 { + return binary.BigEndian.Uint32(uuid[0:4]) +} + +func (d Domain) String() string { + switch d { + case Person: + return "Person" + case Group: + return "Group" + case Org: + return "Org" + } + return fmt.Sprintf("Domain%d", int(d)) +} diff --git a/vendor/github.com/google/uuid/doc.go b/vendor/github.com/google/uuid/doc.go new file mode 100644 index 00000000..5b8a4b9a --- /dev/null +++ b/vendor/github.com/google/uuid/doc.go @@ -0,0 +1,12 @@ +// Copyright 2016 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package uuid generates and inspects UUIDs. +// +// UUIDs are based on RFC 4122 and DCE 1.1: Authentication and Security +// Services. +// +// A UUID is a 16 byte (128 bit) array. UUIDs may be used as keys to +// maps or compared directly. +package uuid diff --git a/vendor/github.com/google/uuid/hash.go b/vendor/github.com/google/uuid/hash.go new file mode 100644 index 00000000..b404f4be --- /dev/null +++ b/vendor/github.com/google/uuid/hash.go @@ -0,0 +1,53 @@ +// Copyright 2016 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import ( + "crypto/md5" + "crypto/sha1" + "hash" +) + +// Well known namespace IDs and UUIDs +var ( + NameSpaceDNS = Must(Parse("6ba7b810-9dad-11d1-80b4-00c04fd430c8")) + NameSpaceURL = Must(Parse("6ba7b811-9dad-11d1-80b4-00c04fd430c8")) + NameSpaceOID = Must(Parse("6ba7b812-9dad-11d1-80b4-00c04fd430c8")) + NameSpaceX500 = Must(Parse("6ba7b814-9dad-11d1-80b4-00c04fd430c8")) + Nil UUID // empty UUID, all zeros +) + +// NewHash returns a new UUID derived from the hash of space concatenated with +// data generated by h. The hash should be at least 16 byte in length. The +// first 16 bytes of the hash are used to form the UUID. The version of the +// UUID will be the lower 4 bits of version. NewHash is used to implement +// NewMD5 and NewSHA1. +func NewHash(h hash.Hash, space UUID, data []byte, version int) UUID { + h.Reset() + h.Write(space[:]) //nolint:errcheck + h.Write(data) //nolint:errcheck + s := h.Sum(nil) + var uuid UUID + copy(uuid[:], s) + uuid[6] = (uuid[6] & 0x0f) | uint8((version&0xf)<<4) + uuid[8] = (uuid[8] & 0x3f) | 0x80 // RFC 4122 variant + return uuid +} + +// NewMD5 returns a new MD5 (Version 3) UUID based on the +// supplied name space and data. It is the same as calling: +// +// NewHash(md5.New(), space, data, 3) +func NewMD5(space UUID, data []byte) UUID { + return NewHash(md5.New(), space, data, 3) +} + +// NewSHA1 returns a new SHA1 (Version 5) UUID based on the +// supplied name space and data. It is the same as calling: +// +// NewHash(sha1.New(), space, data, 5) +func NewSHA1(space UUID, data []byte) UUID { + return NewHash(sha1.New(), space, data, 5) +} diff --git a/vendor/github.com/google/uuid/marshal.go b/vendor/github.com/google/uuid/marshal.go new file mode 100644 index 00000000..14bd3407 --- /dev/null +++ b/vendor/github.com/google/uuid/marshal.go @@ -0,0 +1,38 @@ +// Copyright 2016 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import "fmt" + +// MarshalText implements encoding.TextMarshaler. +func (uuid UUID) MarshalText() ([]byte, error) { + var js [36]byte + encodeHex(js[:], uuid) + return js[:], nil +} + +// UnmarshalText implements encoding.TextUnmarshaler. +func (uuid *UUID) UnmarshalText(data []byte) error { + id, err := ParseBytes(data) + if err != nil { + return err + } + *uuid = id + return nil +} + +// MarshalBinary implements encoding.BinaryMarshaler. +func (uuid UUID) MarshalBinary() ([]byte, error) { + return uuid[:], nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (uuid *UUID) UnmarshalBinary(data []byte) error { + if len(data) != 16 { + return fmt.Errorf("invalid UUID (got %d bytes)", len(data)) + } + copy(uuid[:], data) + return nil +} diff --git a/vendor/github.com/google/uuid/node.go b/vendor/github.com/google/uuid/node.go new file mode 100644 index 00000000..d651a2b0 --- /dev/null +++ b/vendor/github.com/google/uuid/node.go @@ -0,0 +1,90 @@ +// Copyright 2016 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import ( + "sync" +) + +var ( + nodeMu sync.Mutex + ifname string // name of interface being used + nodeID [6]byte // hardware for version 1 UUIDs + zeroID [6]byte // nodeID with only 0's +) + +// NodeInterface returns the name of the interface from which the NodeID was +// derived. The interface "user" is returned if the NodeID was set by +// SetNodeID. +func NodeInterface() string { + defer nodeMu.Unlock() + nodeMu.Lock() + return ifname +} + +// SetNodeInterface selects the hardware address to be used for Version 1 UUIDs. +// If name is "" then the first usable interface found will be used or a random +// Node ID will be generated. If a named interface cannot be found then false +// is returned. +// +// SetNodeInterface never fails when name is "". +func SetNodeInterface(name string) bool { + defer nodeMu.Unlock() + nodeMu.Lock() + return setNodeInterface(name) +} + +func setNodeInterface(name string) bool { + iname, addr := getHardwareInterface(name) // null implementation for js + if iname != "" && addr != nil { + ifname = iname + copy(nodeID[:], addr) + return true + } + + // We found no interfaces with a valid hardware address. If name + // does not specify a specific interface generate a random Node ID + // (section 4.1.6) + if name == "" { + ifname = "random" + randomBits(nodeID[:]) + return true + } + return false +} + +// NodeID returns a slice of a copy of the current Node ID, setting the Node ID +// if not already set. +func NodeID() []byte { + defer nodeMu.Unlock() + nodeMu.Lock() + if nodeID == zeroID { + setNodeInterface("") + } + nid := nodeID + return nid[:] +} + +// SetNodeID sets the Node ID to be used for Version 1 UUIDs. The first 6 bytes +// of id are used. If id is less than 6 bytes then false is returned and the +// Node ID is not set. +func SetNodeID(id []byte) bool { + if len(id) < 6 { + return false + } + defer nodeMu.Unlock() + nodeMu.Lock() + copy(nodeID[:], id) + ifname = "user" + return true +} + +// NodeID returns the 6 byte node id encoded in uuid. It returns nil if uuid is +// not valid. The NodeID is only well defined for version 1 and 2 UUIDs. +func (uuid UUID) NodeID() []byte { + var node [6]byte + copy(node[:], uuid[10:]) + return node[:] +} diff --git a/vendor/github.com/google/uuid/node_js.go b/vendor/github.com/google/uuid/node_js.go new file mode 100644 index 00000000..b2a0bc87 --- /dev/null +++ b/vendor/github.com/google/uuid/node_js.go @@ -0,0 +1,12 @@ +// Copyright 2017 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build js + +package uuid + +// getHardwareInterface returns nil values for the JS version of the code. +// This removes the "net" dependency, because it is not used in the browser. +// Using the "net" library inflates the size of the transpiled JS code by 673k bytes. +func getHardwareInterface(name string) (string, []byte) { return "", nil } diff --git a/vendor/github.com/google/uuid/node_net.go b/vendor/github.com/google/uuid/node_net.go new file mode 100644 index 00000000..0cbbcddb --- /dev/null +++ b/vendor/github.com/google/uuid/node_net.go @@ -0,0 +1,33 @@ +// Copyright 2017 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !js + +package uuid + +import "net" + +var interfaces []net.Interface // cached list of interfaces + +// getHardwareInterface returns the name and hardware address of interface name. +// If name is "" then the name and hardware address of one of the system's +// interfaces is returned. If no interfaces are found (name does not exist or +// there are no interfaces) then "", nil is returned. +// +// Only addresses of at least 6 bytes are returned. +func getHardwareInterface(name string) (string, []byte) { + if interfaces == nil { + var err error + interfaces, err = net.Interfaces() + if err != nil { + return "", nil + } + } + for _, ifs := range interfaces { + if len(ifs.HardwareAddr) >= 6 && (name == "" || name == ifs.Name) { + return ifs.Name, ifs.HardwareAddr + } + } + return "", nil +} diff --git a/vendor/github.com/google/uuid/null.go b/vendor/github.com/google/uuid/null.go new file mode 100644 index 00000000..d7fcbf28 --- /dev/null +++ b/vendor/github.com/google/uuid/null.go @@ -0,0 +1,118 @@ +// Copyright 2021 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import ( + "bytes" + "database/sql/driver" + "encoding/json" + "fmt" +) + +var jsonNull = []byte("null") + +// NullUUID represents a UUID that may be null. +// NullUUID implements the SQL driver.Scanner interface so +// it can be used as a scan destination: +// +// var u uuid.NullUUID +// err := db.QueryRow("SELECT name FROM foo WHERE id=?", id).Scan(&u) +// ... +// if u.Valid { +// // use u.UUID +// } else { +// // NULL value +// } +// +type NullUUID struct { + UUID UUID + Valid bool // Valid is true if UUID is not NULL +} + +// Scan implements the SQL driver.Scanner interface. +func (nu *NullUUID) Scan(value interface{}) error { + if value == nil { + nu.UUID, nu.Valid = Nil, false + return nil + } + + err := nu.UUID.Scan(value) + if err != nil { + nu.Valid = false + return err + } + + nu.Valid = true + return nil +} + +// Value implements the driver Valuer interface. +func (nu NullUUID) Value() (driver.Value, error) { + if !nu.Valid { + return nil, nil + } + // Delegate to UUID Value function + return nu.UUID.Value() +} + +// MarshalBinary implements encoding.BinaryMarshaler. +func (nu NullUUID) MarshalBinary() ([]byte, error) { + if nu.Valid { + return nu.UUID[:], nil + } + + return []byte(nil), nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (nu *NullUUID) UnmarshalBinary(data []byte) error { + if len(data) != 16 { + return fmt.Errorf("invalid UUID (got %d bytes)", len(data)) + } + copy(nu.UUID[:], data) + nu.Valid = true + return nil +} + +// MarshalText implements encoding.TextMarshaler. +func (nu NullUUID) MarshalText() ([]byte, error) { + if nu.Valid { + return nu.UUID.MarshalText() + } + + return jsonNull, nil +} + +// UnmarshalText implements encoding.TextUnmarshaler. +func (nu *NullUUID) UnmarshalText(data []byte) error { + id, err := ParseBytes(data) + if err != nil { + nu.Valid = false + return err + } + nu.UUID = id + nu.Valid = true + return nil +} + +// MarshalJSON implements json.Marshaler. +func (nu NullUUID) MarshalJSON() ([]byte, error) { + if nu.Valid { + return json.Marshal(nu.UUID) + } + + return jsonNull, nil +} + +// UnmarshalJSON implements json.Unmarshaler. +func (nu *NullUUID) UnmarshalJSON(data []byte) error { + if bytes.Equal(data, jsonNull) { + *nu = NullUUID{} + return nil // valid null UUID + } + err := json.Unmarshal(data, &nu.UUID) + nu.Valid = err == nil + return err +} diff --git a/vendor/github.com/google/uuid/sql.go b/vendor/github.com/google/uuid/sql.go new file mode 100644 index 00000000..2e02ec06 --- /dev/null +++ b/vendor/github.com/google/uuid/sql.go @@ -0,0 +1,59 @@ +// Copyright 2016 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import ( + "database/sql/driver" + "fmt" +) + +// Scan implements sql.Scanner so UUIDs can be read from databases transparently. +// Currently, database types that map to string and []byte are supported. Please +// consult database-specific driver documentation for matching types. +func (uuid *UUID) Scan(src interface{}) error { + switch src := src.(type) { + case nil: + return nil + + case string: + // if an empty UUID comes from a table, we return a null UUID + if src == "" { + return nil + } + + // see Parse for required string format + u, err := Parse(src) + if err != nil { + return fmt.Errorf("Scan: %v", err) + } + + *uuid = u + + case []byte: + // if an empty UUID comes from a table, we return a null UUID + if len(src) == 0 { + return nil + } + + // assumes a simple slice of bytes if 16 bytes + // otherwise attempts to parse + if len(src) != 16 { + return uuid.Scan(string(src)) + } + copy((*uuid)[:], src) + + default: + return fmt.Errorf("Scan: unable to scan type %T into UUID", src) + } + + return nil +} + +// Value implements sql.Valuer so that UUIDs can be written to databases +// transparently. Currently, UUIDs map to strings. Please consult +// database-specific driver documentation for matching types. +func (uuid UUID) Value() (driver.Value, error) { + return uuid.String(), nil +} diff --git a/vendor/github.com/google/uuid/time.go b/vendor/github.com/google/uuid/time.go new file mode 100644 index 00000000..e6ef06cd --- /dev/null +++ b/vendor/github.com/google/uuid/time.go @@ -0,0 +1,123 @@ +// Copyright 2016 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import ( + "encoding/binary" + "sync" + "time" +) + +// A Time represents a time as the number of 100's of nanoseconds since 15 Oct +// 1582. +type Time int64 + +const ( + lillian = 2299160 // Julian day of 15 Oct 1582 + unix = 2440587 // Julian day of 1 Jan 1970 + epoch = unix - lillian // Days between epochs + g1582 = epoch * 86400 // seconds between epochs + g1582ns100 = g1582 * 10000000 // 100s of a nanoseconds between epochs +) + +var ( + timeMu sync.Mutex + lasttime uint64 // last time we returned + clockSeq uint16 // clock sequence for this run + + timeNow = time.Now // for testing +) + +// UnixTime converts t the number of seconds and nanoseconds using the Unix +// epoch of 1 Jan 1970. +func (t Time) UnixTime() (sec, nsec int64) { + sec = int64(t - g1582ns100) + nsec = (sec % 10000000) * 100 + sec /= 10000000 + return sec, nsec +} + +// GetTime returns the current Time (100s of nanoseconds since 15 Oct 1582) and +// clock sequence as well as adjusting the clock sequence as needed. An error +// is returned if the current time cannot be determined. +func GetTime() (Time, uint16, error) { + defer timeMu.Unlock() + timeMu.Lock() + return getTime() +} + +func getTime() (Time, uint16, error) { + t := timeNow() + + // If we don't have a clock sequence already, set one. + if clockSeq == 0 { + setClockSequence(-1) + } + now := uint64(t.UnixNano()/100) + g1582ns100 + + // If time has gone backwards with this clock sequence then we + // increment the clock sequence + if now <= lasttime { + clockSeq = ((clockSeq + 1) & 0x3fff) | 0x8000 + } + lasttime = now + return Time(now), clockSeq, nil +} + +// ClockSequence returns the current clock sequence, generating one if not +// already set. The clock sequence is only used for Version 1 UUIDs. +// +// The uuid package does not use global static storage for the clock sequence or +// the last time a UUID was generated. Unless SetClockSequence is used, a new +// random clock sequence is generated the first time a clock sequence is +// requested by ClockSequence, GetTime, or NewUUID. (section 4.2.1.1) +func ClockSequence() int { + defer timeMu.Unlock() + timeMu.Lock() + return clockSequence() +} + +func clockSequence() int { + if clockSeq == 0 { + setClockSequence(-1) + } + return int(clockSeq & 0x3fff) +} + +// SetClockSequence sets the clock sequence to the lower 14 bits of seq. Setting to +// -1 causes a new sequence to be generated. +func SetClockSequence(seq int) { + defer timeMu.Unlock() + timeMu.Lock() + setClockSequence(seq) +} + +func setClockSequence(seq int) { + if seq == -1 { + var b [2]byte + randomBits(b[:]) // clock sequence + seq = int(b[0])<<8 | int(b[1]) + } + oldSeq := clockSeq + clockSeq = uint16(seq&0x3fff) | 0x8000 // Set our variant + if oldSeq != clockSeq { + lasttime = 0 + } +} + +// Time returns the time in 100s of nanoseconds since 15 Oct 1582 encoded in +// uuid. The time is only defined for version 1 and 2 UUIDs. +func (uuid UUID) Time() Time { + time := int64(binary.BigEndian.Uint32(uuid[0:4])) + time |= int64(binary.BigEndian.Uint16(uuid[4:6])) << 32 + time |= int64(binary.BigEndian.Uint16(uuid[6:8])&0xfff) << 48 + return Time(time) +} + +// ClockSequence returns the clock sequence encoded in uuid. +// The clock sequence is only well defined for version 1 and 2 UUIDs. +func (uuid UUID) ClockSequence() int { + return int(binary.BigEndian.Uint16(uuid[8:10])) & 0x3fff +} diff --git a/vendor/github.com/google/uuid/util.go b/vendor/github.com/google/uuid/util.go new file mode 100644 index 00000000..5ea6c737 --- /dev/null +++ b/vendor/github.com/google/uuid/util.go @@ -0,0 +1,43 @@ +// Copyright 2016 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import ( + "io" +) + +// randomBits completely fills slice b with random data. +func randomBits(b []byte) { + if _, err := io.ReadFull(rander, b); err != nil { + panic(err.Error()) // rand should never fail + } +} + +// xvalues returns the value of a byte as a hexadecimal digit or 255. +var xvalues = [256]byte{ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +} + +// xtob converts hex characters x1 and x2 into a byte. +func xtob(x1, x2 byte) (byte, bool) { + b1 := xvalues[x1] + b2 := xvalues[x2] + return (b1 << 4) | b2, b1 != 255 && b2 != 255 +} diff --git a/vendor/github.com/google/uuid/uuid.go b/vendor/github.com/google/uuid/uuid.go new file mode 100644 index 00000000..dc75f7d9 --- /dev/null +++ b/vendor/github.com/google/uuid/uuid.go @@ -0,0 +1,312 @@ +// Copyright 2018 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import ( + "bytes" + "crypto/rand" + "encoding/hex" + "errors" + "fmt" + "io" + "strings" + "sync" +) + +// A UUID is a 128 bit (16 byte) Universal Unique IDentifier as defined in RFC +// 4122. +type UUID [16]byte + +// A Version represents a UUID's version. +type Version byte + +// A Variant represents a UUID's variant. +type Variant byte + +// Constants returned by Variant. +const ( + Invalid = Variant(iota) // Invalid UUID + RFC4122 // The variant specified in RFC4122 + Reserved // Reserved, NCS backward compatibility. + Microsoft // Reserved, Microsoft Corporation backward compatibility. + Future // Reserved for future definition. +) + +const randPoolSize = 16 * 16 + +var ( + rander = rand.Reader // random function + poolEnabled = false + poolMu sync.Mutex + poolPos = randPoolSize // protected with poolMu + pool [randPoolSize]byte // protected with poolMu +) + +type invalidLengthError struct{ len int } + +func (err invalidLengthError) Error() string { + return fmt.Sprintf("invalid UUID length: %d", err.len) +} + +// IsInvalidLengthError is matcher function for custom error invalidLengthError +func IsInvalidLengthError(err error) bool { + _, ok := err.(invalidLengthError) + return ok +} + +// Parse decodes s into a UUID or returns an error if it cannot be parsed. Both +// the standard UUID forms defined in RFC 4122 +// (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx and +// urn:uuid:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx) are decoded. In addition, +// Parse accepts non-standard strings such as the raw hex encoding +// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx and 38 byte "Microsoft style" encodings, +// e.g. {xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx}. Only the middle 36 bytes are +// examined in the latter case. Parse should not be used to validate strings as +// it parses non-standard encodings as indicated above. +func Parse(s string) (UUID, error) { + var uuid UUID + switch len(s) { + // xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + case 36: + + // urn:uuid:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + case 36 + 9: + if !strings.EqualFold(s[:9], "urn:uuid:") { + return uuid, fmt.Errorf("invalid urn prefix: %q", s[:9]) + } + s = s[9:] + + // {xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx} + case 36 + 2: + s = s[1:] + + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + case 32: + var ok bool + for i := range uuid { + uuid[i], ok = xtob(s[i*2], s[i*2+1]) + if !ok { + return uuid, errors.New("invalid UUID format") + } + } + return uuid, nil + default: + return uuid, invalidLengthError{len(s)} + } + // s is now at least 36 bytes long + // it must be of the form xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + if s[8] != '-' || s[13] != '-' || s[18] != '-' || s[23] != '-' { + return uuid, errors.New("invalid UUID format") + } + for i, x := range [16]int{ + 0, 2, 4, 6, + 9, 11, + 14, 16, + 19, 21, + 24, 26, 28, 30, 32, 34, + } { + v, ok := xtob(s[x], s[x+1]) + if !ok { + return uuid, errors.New("invalid UUID format") + } + uuid[i] = v + } + return uuid, nil +} + +// ParseBytes is like Parse, except it parses a byte slice instead of a string. +func ParseBytes(b []byte) (UUID, error) { + var uuid UUID + switch len(b) { + case 36: // xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + case 36 + 9: // urn:uuid:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + if !bytes.EqualFold(b[:9], []byte("urn:uuid:")) { + return uuid, fmt.Errorf("invalid urn prefix: %q", b[:9]) + } + b = b[9:] + case 36 + 2: // {xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx} + b = b[1:] + case 32: // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + var ok bool + for i := 0; i < 32; i += 2 { + uuid[i/2], ok = xtob(b[i], b[i+1]) + if !ok { + return uuid, errors.New("invalid UUID format") + } + } + return uuid, nil + default: + return uuid, invalidLengthError{len(b)} + } + // s is now at least 36 bytes long + // it must be of the form xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + if b[8] != '-' || b[13] != '-' || b[18] != '-' || b[23] != '-' { + return uuid, errors.New("invalid UUID format") + } + for i, x := range [16]int{ + 0, 2, 4, 6, + 9, 11, + 14, 16, + 19, 21, + 24, 26, 28, 30, 32, 34, + } { + v, ok := xtob(b[x], b[x+1]) + if !ok { + return uuid, errors.New("invalid UUID format") + } + uuid[i] = v + } + return uuid, nil +} + +// MustParse is like Parse but panics if the string cannot be parsed. +// It simplifies safe initialization of global variables holding compiled UUIDs. +func MustParse(s string) UUID { + uuid, err := Parse(s) + if err != nil { + panic(`uuid: Parse(` + s + `): ` + err.Error()) + } + return uuid +} + +// FromBytes creates a new UUID from a byte slice. Returns an error if the slice +// does not have a length of 16. The bytes are copied from the slice. +func FromBytes(b []byte) (uuid UUID, err error) { + err = uuid.UnmarshalBinary(b) + return uuid, err +} + +// Must returns uuid if err is nil and panics otherwise. +func Must(uuid UUID, err error) UUID { + if err != nil { + panic(err) + } + return uuid +} + +// String returns the string form of uuid, xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx +// , or "" if uuid is invalid. +func (uuid UUID) String() string { + var buf [36]byte + encodeHex(buf[:], uuid) + return string(buf[:]) +} + +// URN returns the RFC 2141 URN form of uuid, +// urn:uuid:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx, or "" if uuid is invalid. +func (uuid UUID) URN() string { + var buf [36 + 9]byte + copy(buf[:], "urn:uuid:") + encodeHex(buf[9:], uuid) + return string(buf[:]) +} + +func encodeHex(dst []byte, uuid UUID) { + hex.Encode(dst, uuid[:4]) + dst[8] = '-' + hex.Encode(dst[9:13], uuid[4:6]) + dst[13] = '-' + hex.Encode(dst[14:18], uuid[6:8]) + dst[18] = '-' + hex.Encode(dst[19:23], uuid[8:10]) + dst[23] = '-' + hex.Encode(dst[24:], uuid[10:]) +} + +// Variant returns the variant encoded in uuid. +func (uuid UUID) Variant() Variant { + switch { + case (uuid[8] & 0xc0) == 0x80: + return RFC4122 + case (uuid[8] & 0xe0) == 0xc0: + return Microsoft + case (uuid[8] & 0xe0) == 0xe0: + return Future + default: + return Reserved + } +} + +// Version returns the version of uuid. +func (uuid UUID) Version() Version { + return Version(uuid[6] >> 4) +} + +func (v Version) String() string { + if v > 15 { + return fmt.Sprintf("BAD_VERSION_%d", v) + } + return fmt.Sprintf("VERSION_%d", v) +} + +func (v Variant) String() string { + switch v { + case RFC4122: + return "RFC4122" + case Reserved: + return "Reserved" + case Microsoft: + return "Microsoft" + case Future: + return "Future" + case Invalid: + return "Invalid" + } + return fmt.Sprintf("BadVariant%d", int(v)) +} + +// SetRand sets the random number generator to r, which implements io.Reader. +// If r.Read returns an error when the package requests random data then +// a panic will be issued. +// +// Calling SetRand with nil sets the random number generator to the default +// generator. +func SetRand(r io.Reader) { + if r == nil { + rander = rand.Reader + return + } + rander = r +} + +// EnableRandPool enables internal randomness pool used for Random +// (Version 4) UUID generation. The pool contains random bytes read from +// the random number generator on demand in batches. Enabling the pool +// may improve the UUID generation throughput significantly. +// +// Since the pool is stored on the Go heap, this feature may be a bad fit +// for security sensitive applications. +// +// Both EnableRandPool and DisableRandPool are not thread-safe and should +// only be called when there is no possibility that New or any other +// UUID Version 4 generation function will be called concurrently. +func EnableRandPool() { + poolEnabled = true +} + +// DisableRandPool disables the randomness pool if it was previously +// enabled with EnableRandPool. +// +// Both EnableRandPool and DisableRandPool are not thread-safe and should +// only be called when there is no possibility that New or any other +// UUID Version 4 generation function will be called concurrently. +func DisableRandPool() { + poolEnabled = false + defer poolMu.Unlock() + poolMu.Lock() + poolPos = randPoolSize +} + +// UUIDs is a slice of UUID types. +type UUIDs []UUID + +// Strings returns a string slice containing the string form of each UUID in uuids. +func (uuids UUIDs) Strings() []string { + var uuidStrs = make([]string, len(uuids)) + for i, uuid := range uuids { + uuidStrs[i] = uuid.String() + } + return uuidStrs +} diff --git a/vendor/github.com/google/uuid/version1.go b/vendor/github.com/google/uuid/version1.go new file mode 100644 index 00000000..46310962 --- /dev/null +++ b/vendor/github.com/google/uuid/version1.go @@ -0,0 +1,44 @@ +// Copyright 2016 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import ( + "encoding/binary" +) + +// NewUUID returns a Version 1 UUID based on the current NodeID and clock +// sequence, and the current time. If the NodeID has not been set by SetNodeID +// or SetNodeInterface then it will be set automatically. If the NodeID cannot +// be set NewUUID returns nil. If clock sequence has not been set by +// SetClockSequence then it will be set automatically. If GetTime fails to +// return the current NewUUID returns nil and an error. +// +// In most cases, New should be used. +func NewUUID() (UUID, error) { + var uuid UUID + now, seq, err := GetTime() + if err != nil { + return uuid, err + } + + timeLow := uint32(now & 0xffffffff) + timeMid := uint16((now >> 32) & 0xffff) + timeHi := uint16((now >> 48) & 0x0fff) + timeHi |= 0x1000 // Version 1 + + binary.BigEndian.PutUint32(uuid[0:], timeLow) + binary.BigEndian.PutUint16(uuid[4:], timeMid) + binary.BigEndian.PutUint16(uuid[6:], timeHi) + binary.BigEndian.PutUint16(uuid[8:], seq) + + nodeMu.Lock() + if nodeID == zeroID { + setNodeInterface("") + } + copy(uuid[10:], nodeID[:]) + nodeMu.Unlock() + + return uuid, nil +} diff --git a/vendor/github.com/google/uuid/version4.go b/vendor/github.com/google/uuid/version4.go new file mode 100644 index 00000000..7697802e --- /dev/null +++ b/vendor/github.com/google/uuid/version4.go @@ -0,0 +1,76 @@ +// Copyright 2016 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import "io" + +// New creates a new random UUID or panics. New is equivalent to +// the expression +// +// uuid.Must(uuid.NewRandom()) +func New() UUID { + return Must(NewRandom()) +} + +// NewString creates a new random UUID and returns it as a string or panics. +// NewString is equivalent to the expression +// +// uuid.New().String() +func NewString() string { + return Must(NewRandom()).String() +} + +// NewRandom returns a Random (Version 4) UUID. +// +// The strength of the UUIDs is based on the strength of the crypto/rand +// package. +// +// Uses the randomness pool if it was enabled with EnableRandPool. +// +// A note about uniqueness derived from the UUID Wikipedia entry: +// +// Randomly generated UUIDs have 122 random bits. One's annual risk of being +// hit by a meteorite is estimated to be one chance in 17 billion, that +// means the probability is about 0.00000000006 (6 × 10−11), +// equivalent to the odds of creating a few tens of trillions of UUIDs in a +// year and having one duplicate. +func NewRandom() (UUID, error) { + if !poolEnabled { + return NewRandomFromReader(rander) + } + return newRandomFromPool() +} + +// NewRandomFromReader returns a UUID based on bytes read from a given io.Reader. +func NewRandomFromReader(r io.Reader) (UUID, error) { + var uuid UUID + _, err := io.ReadFull(r, uuid[:]) + if err != nil { + return Nil, err + } + uuid[6] = (uuid[6] & 0x0f) | 0x40 // Version 4 + uuid[8] = (uuid[8] & 0x3f) | 0x80 // Variant is 10 + return uuid, nil +} + +func newRandomFromPool() (UUID, error) { + var uuid UUID + poolMu.Lock() + if poolPos == randPoolSize { + _, err := io.ReadFull(rander, pool[:]) + if err != nil { + poolMu.Unlock() + return Nil, err + } + poolPos = 0 + } + copy(uuid[:], pool[poolPos:(poolPos+16)]) + poolPos += 16 + poolMu.Unlock() + + uuid[6] = (uuid[6] & 0x0f) | 0x40 // Version 4 + uuid[8] = (uuid[8] & 0x3f) | 0x80 // Variant is 10 + return uuid, nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index be9f82d2..df8d0de6 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -20,6 +20,9 @@ github.com/davecgh/go-spew/spew # github.com/fsnotify/fsnotify v1.5.4 ## explicit; go 1.16 github.com/fsnotify/fsnotify +# github.com/google/uuid v1.4.0 +## explicit +github.com/google/uuid # github.com/hashicorp/errwrap v1.1.0 ## explicit # github.com/kr/pretty v0.3.1 From 31581469468d2d4e631a5984bf561ec0d9f3569e Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Tue, 5 Dec 2023 18:38:00 -0800 Subject: [PATCH 4/6] Extend the 'runtime.nvidia.com/gpu' CDI device kind to support MIG devices specified by index or UUID Signed-off-by: Christopher Desiniotis --- pkg/nvcdi/lib-nvml.go | 62 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/pkg/nvcdi/lib-nvml.go b/pkg/nvcdi/lib-nvml.go index 0946fc04..492e90cc 100644 --- a/pkg/nvcdi/lib-nvml.go +++ b/pkg/nvcdi/lib-nvml.go @@ -19,6 +19,7 @@ package nvcdi import ( "fmt" "strconv" + "strings" "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" "github.com/NVIDIA/go-nvlib/pkg/nvml" @@ -79,7 +80,6 @@ func (l *nvmllib) GetCommonEdits() (*cdi.ContainerEdits, error) { // GetDeviceSpecsByID returns the CDI device specs for the GPU(s) represented by // the provided identifiers, where an identifier is an index or UUID of a valid // GPU device. -// TODO: support identifiers that correspond to MIG devices func (l *nvmllib) GetDeviceSpecsByID(identifiers ...string) ([]specs.Device, error) { for _, id := range identifiers { if id == "all" { @@ -104,11 +104,7 @@ func (l *nvmllib) GetDeviceSpecsByID(identifiers ...string) ([]specs.Device, err } for i, nvmlDevice := range nvmlDevices { - nvlibDevice, err := l.devicelib.NewDevice(nvmlDevice) - if err != nil { - return nil, fmt.Errorf("failed to construct device: %w", err) - } - deviceEdits, err := l.GetGPUDeviceEdits(nvlibDevice) + deviceEdits, err := l.getEditsForDevice(nvmlDevice) if err != nil { return nil, fmt.Errorf("failed to get CDI device edits for identifier %q: %w", identifiers[i], err) } @@ -151,12 +147,64 @@ func (l *nvmllib) getNVMLDeviceByID(id string) (nvml.Device, error) { } if devID.isMigIndex() { - return nil, fmt.Errorf("MIG index is not supported") + var gpuIdx, migIdx int + var parent nvml.Device + split := strings.SplitN(id, ":", 2) + if gpuIdx, err = strconv.Atoi(split[0]); err != nil { + return nil, fmt.Errorf("failed to convert device index to an int: %w", err) + } + if migIdx, err = strconv.Atoi(split[1]); err != nil { + return nil, fmt.Errorf("failed to convert device index to an int: %w", err) + } + if parent, err = l.nvmllib.DeviceGetHandleByIndex(gpuIdx); err != nvml.SUCCESS { + return nil, fmt.Errorf("failed to get parent device handle: %w", err) + } + return parent.GetMigDeviceHandleByIndex(migIdx) } return nil, fmt.Errorf("identifier is not a valid UUID or index: %q", id) } +func (l *nvmllib) getEditsForDevice(nvmlDevice nvml.Device) (*cdi.ContainerEdits, error) { + mig, err := nvmlDevice.IsMigDeviceHandle() + if err != nvml.SUCCESS { + return nil, fmt.Errorf("failed to determine if device handle is a MIG device: %w", err) + } + if mig { + return l.getEditsForMIGDevice(nvmlDevice) + } + return l.getEditsForGPUDevice(nvmlDevice) +} + +func (l *nvmllib) getEditsForGPUDevice(nvmlDevice nvml.Device) (*cdi.ContainerEdits, error) { + nvlibDevice, err := l.devicelib.NewDevice(nvmlDevice) + if err != nil { + return nil, fmt.Errorf("failed to construct device: %w", err) + } + deviceEdits, err := l.GetGPUDeviceEdits(nvlibDevice) + if err != nil { + return nil, fmt.Errorf("failed to get GPU device edits: %w", err) + } + + return deviceEdits, nil +} + +func (l *nvmllib) getEditsForMIGDevice(nvmlDevice nvml.Device) (*cdi.ContainerEdits, error) { + nvmlParentDevice, ret := nvmlDevice.GetDeviceHandleFromMigDeviceHandle() + if ret != nvml.SUCCESS { + return nil, fmt.Errorf("failed to get parent device handle: %w", ret) + } + nvlibMigDevice, err := l.devicelib.NewMigDevice(nvmlDevice) + if err != nil { + return nil, fmt.Errorf("failed to construct device: %w", err) + } + nvlibParentDevice, err := l.devicelib.NewDevice(nvmlParentDevice) + if err != nil { + return nil, fmt.Errorf("failed to construct parent device: %w", err) + } + return l.GetMIGDeviceEdits(nvlibParentDevice, nvlibMigDevice) +} + func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) { var deviceSpecs []specs.Device err := l.devicelib.VisitDevices(func(i int, d device.Device) error { From 32c3bd1ded8872c4c5078e7bffb96dcf0a719eed Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Wed, 6 Dec 2023 07:54:30 -0800 Subject: [PATCH 5/6] Fallback to standard CDI modifier when creation of automatic CDI modifier fails Signed-off-by: Christopher Desiniotis --- internal/modifier/cdi.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/modifier/cdi.go b/internal/modifier/cdi.go index 4a1079ac..c53cb996 100644 --- a/internal/modifier/cdi.go +++ b/internal/modifier/cdi.go @@ -50,7 +50,12 @@ func NewCDIModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spe return nil, fmt.Errorf("requesting a CDI device with vendor 'runtime.nvidia.com' is not supported when requesting other CDI devices") } if len(automaticDevices) > 0 { - return newAutomaticCDISpecModifier(logger, cfg, automaticDevices) + automaticModifier, err := newAutomaticCDISpecModifier(logger, cfg, automaticDevices) + if err == nil { + return automaticModifier, nil + } + logger.Warningf("Failed to create the automatic CDI modifier: %w", err) + logger.Debugf("Falling back to the standard CDI modifier") } return cdi.New( From 86d86395ea8edde1bd1be176c3921e3ad7446420 Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Wed, 6 Dec 2023 08:19:33 -0800 Subject: [PATCH 6/6] Update changelog for the automatic CDI spec generation added for the 'runtime.nvidia.com/gpu' CDI kind Signed-off-by: Christopher Desiniotis --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4812fde..d7878d2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # NVIDIA Container Toolkit Changelog +## v1.15.0-rc.2 +* Extend the `runtime.nvidia.com/gpu` CDI kind to support full-GPUs and MIG devices specified by index or UUID. + ## v1.15.0-rc.1 * Skip update of ldcache in containers without ldconfig. The .so.SONAME symlinks are still created. * Normalize ldconfig path on use. This automatically adjust the ldconfig setting applied to ldconfig.real on systems where this exists. @@ -10,6 +13,7 @@ * Added support for `nvidia-ctk runtime configure --enable-cdi` for the `docker` runtime. Note that this requires Docker >= 25. * Fixed bug in `nvidia-ctk config` command when using `--set`. The types of applied config options are now applied correctly. * Add `--relative-to` option to `nvidia-ctk transform root` command. This controls whether the root transformation is applied to host or container paths. +* Added automatic CDI spec generation when the `runtime.nvidia.com/gpu=all` device is requested by a container. * [libnvidia-container] Fix device permission check when using cgroupv2 (fixes #227)