diff --git a/go.mod b/go.mod index 971199ab..165bdd97 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/NVIDIA/nvidia-container-toolkit go 1.22.0 require ( - github.com/NVIDIA/go-nvlib v0.6.1 + github.com/NVIDIA/go-nvlib v0.7.0 github.com/NVIDIA/go-nvml v0.12.4-1 github.com/moby/sys/symlink v0.3.0 github.com/opencontainers/runtime-spec v1.2.0 diff --git a/go.sum b/go.sum index ed1db237..9d2e27bd 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,5 @@ -github.com/NVIDIA/go-nvlib v0.6.1 h1:0/5FvaKvDJoJeJ+LFlh+NDQMxMlVw9wOXrOVrGXttfE= -github.com/NVIDIA/go-nvlib v0.6.1/go.mod h1:9UrsLGx/q1OrENygXjOuM5Ey5KCtiZhbvBlbUIxtGWY= +github.com/NVIDIA/go-nvlib v0.7.0 h1:Z/J7skMdLbTiHvomKVsGYsttfQMZj5FwNYIFXhZ4i/c= +github.com/NVIDIA/go-nvlib v0.7.0/go.mod h1:9UrsLGx/q1OrENygXjOuM5Ey5KCtiZhbvBlbUIxtGWY= github.com/NVIDIA/go-nvml v0.12.4-1 h1:WKUvqshhWSNTfm47ETRhv0A0zJyr1ncCuHiXwoTrBEc= github.com/NVIDIA/go-nvml v0.12.4-1/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/device/device.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/device/device.go index 5b21fc13..5fac8c11 100644 --- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/device/device.go +++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/device/device.go @@ -32,6 +32,7 @@ type Device interface { GetMigDevices() ([]MigDevice, error) GetMigProfiles() ([]MigProfile, error) GetPCIBusID() (string, error) + IsFabricAttached() (bool, error) IsMigCapable() (bool, error) IsMigEnabled() (bool, error) VisitMigDevices(func(j int, m MigDevice) error) error @@ -208,6 +209,47 @@ func (d *device) IsMigEnabled() (bool, error) { return (mode == nvml.DEVICE_MIG_ENABLE), nil } +// IsFabricAttached checks if a device is attached to a GPU fabric. +func (d *device) IsFabricAttached() (bool, error) { + if d.lib.hasSymbol("nvmlDeviceGetGpuFabricInfo") { + info, ret := d.GetGpuFabricInfo() + if ret == nvml.ERROR_NOT_SUPPORTED { + return false, nil + } + if ret != nvml.SUCCESS { + return false, fmt.Errorf("error getting GPU Fabric Info: %v", ret) + } + if info.State != nvml.GPU_FABRIC_STATE_COMPLETED { + return false, nil + } + if nvml.Return(info.Status) != nvml.SUCCESS { + return false, nil + } + + return true, nil + } + + if d.lib.hasSymbol("nvmlDeviceGetGpuFabricInfoV") { + info, ret := d.GetGpuFabricInfoV().V2() + if ret == nvml.ERROR_NOT_SUPPORTED { + return false, nil + } + if ret != nvml.SUCCESS { + return false, fmt.Errorf("error getting GPU Fabric Info: %v", ret) + } + if info.State != nvml.GPU_FABRIC_STATE_COMPLETED { + return false, nil + } + if nvml.Return(info.Status) != nvml.SUCCESS { + return false, nil + } + + return true, nil + } + + return false, nil +} + // VisitMigDevices walks a top-level device and invokes a callback function for each MIG device configured on it. func (d *device) VisitMigDevices(visit func(int, MigDevice) error) error { capable, err := d.IsMigCapable() diff --git a/vendor/modules.txt b/vendor/modules.txt index d3116d29..74a3f93e 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,4 +1,4 @@ -# github.com/NVIDIA/go-nvlib v0.6.1 +# github.com/NVIDIA/go-nvlib v0.7.0 ## explicit; go 1.20 github.com/NVIDIA/go-nvlib/pkg/nvlib/device github.com/NVIDIA/go-nvlib/pkg/nvlib/info