Merge branch 'CNT-4478/fix-unknown-devices' into 'main'

Update go-nvlib dependency to  v0.0.0-20230818092907-09424fdc8884

See merge request nvidia/container-toolkit/container-toolkit!470
This commit is contained in:
Evan Lezar 2023-08-21 09:05:58 +00:00
commit f86a5abeb6
7 changed files with 54 additions and 4 deletions

View File

@ -7,6 +7,7 @@
* Added a `nvidia-ctk config` command which supports setting config options using a `--set` flag.
* Added `--library-search-path` option to `nvidia-ctk cdi generate` command in `csv` mode. This allows folders where
libraries are located to be specified explicitly.
* Updated go-nvlib to support devices which are not present in the PCI device database. This allows the creation of dev/char symlinks on systems with such devices installed.
* [toolkit-container] Set `NVIDIA_VISIBLE_DEVICES=void` to prevent injection of NVIDIA devices and drivers into the NVIDIA Container Toolkit container.

View File

@ -73,6 +73,7 @@ func newAllPossible(logger logger.Interface, devRoot string) (nodeLister, error)
func (m allPossible) DeviceNodes() ([]deviceNode, error) {
gpus, err := nvpci.New(
nvpci.WithPCIDevicesRoot(filepath.Join(m.devRoot, nvpci.PCIDevicesRoot)),
nvpci.WithLogger(m.logger),
).GetGPUs()
if err != nil {
return nil, fmt.Errorf("failed to get GPU information: %v", err)

2
go.mod
View File

@ -11,7 +11,7 @@ require (
github.com/sirupsen/logrus v1.9.0
github.com/stretchr/testify v1.8.1
github.com/urfave/cli/v2 v2.3.0
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230613182322-7663cf900f0a
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884
golang.org/x/mod v0.5.0
golang.org/x/sys v0.7.0
)

2
go.sum
View File

@ -77,6 +77,8 @@ github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230613182322-7663cf900f0a h1:lceJVurLqiWFdxK6KMDw+SIwrAsFW/af44XrNlbGw78=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230613182322-7663cf900f0a/go.mod h1:KYZksBgh18o+uzgnpDazzG4LVYtnfB96VXHMXypEtik=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884 h1:V0LUbfm4kVA1CPG8FgG9AGZqa3ykE5U12Gd3PZgoItA=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884/go.mod h1:/x5Ky1ZJNyCjDkgSL1atII0EFKQF5WaIHKeP5nkaQfk=
golang.org/x/mod v0.5.0 h1:UG21uOlmZabA4fW5i7ZX6bjw1xELEGg/ZLgZq9auk/Q=
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

View File

@ -0,0 +1,29 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package nvpci
import "log"
type logger interface {
Warningf(string, ...interface{})
}
type simpleLogger struct{}
func (l simpleLogger) Warningf(format string, v ...interface{}) {
log.Printf("WARNING: "+format, v)
}

View File

@ -39,6 +39,10 @@ const (
PCI3dControllerClass uint32 = 0x030200
// PCINvSwitchClass represents the PCI class for NVSwitches
PCINvSwitchClass uint32 = 0x068000
// UnknownDeviceString is the device name to set for devices not found in the PCI database
UnknownDeviceString = "UNKNOWN_DEVICE"
// UnknownClassString is the class name to set for devices not found in the PCI database
UnknownClassString = "UNKNOWN_CLASS"
)
// Interface allows us to get a list of all NVIDIA PCI devices
@ -64,6 +68,7 @@ type ResourceInterface interface {
}
type nvpci struct {
logger logger
pciDevicesRoot string
pcidbPath string
}
@ -130,6 +135,9 @@ func New(opts ...Option) Interface {
for _, opt := range opts {
opt(n)
}
if n.logger == nil {
n.logger = &simpleLogger{}
}
if n.pciDevicesRoot == "" {
n.pciDevicesRoot = PCIDevicesRoot
}
@ -139,6 +147,13 @@ func New(opts ...Option) Interface {
// Option defines a function for passing options to the New() call
type Option func(*nvpci)
// WithLogger provides an Option to set the logger for the library
func WithLogger(logger logger) Option {
return func(n *nvpci) {
n.logger = logger
}
}
// WithPCIDevicesRoot provides an Option to set the root path
// for PCI devices on the system.
func WithPCIDevicesRoot(root string) Option {
@ -304,11 +319,13 @@ func (p *nvpci) GetGPUByPciBusID(address string) (*NvidiaPCIDevice, error) {
deviceName, err := pciDB.GetDeviceName(uint16(vendorID), uint16(deviceID))
if err != nil {
return nil, fmt.Errorf("unable to get device name: %v", err)
p.logger.Warningf("unable to get device name: %v\n", err)
deviceName = UnknownDeviceString
}
className, err := pciDB.GetClassName(uint32(classID))
if err != nil {
return nil, fmt.Errorf("unable to get class name for device: %v", err)
p.logger.Warningf("unable to get class name for device: %v\n", err)
className = UnknownClassString
}
nvdevice := &NvidiaPCIDevice{

2
vendor/modules.txt vendored
View File

@ -61,7 +61,7 @@ github.com/syndtr/gocapability/capability
github.com/urfave/cli/v2
# github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb
## explicit
# gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230613182322-7663cf900f0a
# gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884
## explicit; go 1.20
gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device
gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info