mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-04-15 13:01:23 +00:00
Merge branch 'missing-dra-devices' into 'main'
Ensure existence of DRM devices nodes is checked See merge request nvidia/container-toolkit/container-toolkit!260
This commit is contained in:
commit
50d7fb8f41
@ -1,57 +0,0 @@
|
|||||||
/**
|
|
||||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
**/
|
|
||||||
|
|
||||||
package generate
|
|
||||||
|
|
||||||
import (
|
|
||||||
"path/filepath"
|
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
|
||||||
"github.com/sirupsen/logrus"
|
|
||||||
)
|
|
||||||
|
|
||||||
// deviceDiscoverer defines a discoverer for device nodes
|
|
||||||
type deviceDiscoverer struct {
|
|
||||||
logger *logrus.Logger
|
|
||||||
root string
|
|
||||||
deviceNodePaths []string
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ discover.Discover = (*deviceDiscoverer)(nil)
|
|
||||||
|
|
||||||
// Devices returns the device nodes for the full GPU.
|
|
||||||
func (d *deviceDiscoverer) Devices() ([]discover.Device, error) {
|
|
||||||
var deviceNodes []discover.Device
|
|
||||||
for _, dn := range d.deviceNodePaths {
|
|
||||||
deviceNode := discover.Device{
|
|
||||||
HostPath: filepath.Join(d.root, dn),
|
|
||||||
Path: dn,
|
|
||||||
}
|
|
||||||
deviceNodes = append(deviceNodes, deviceNode)
|
|
||||||
}
|
|
||||||
|
|
||||||
return deviceNodes, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Hooks returns no hooks for a device discoverer
|
|
||||||
func (d *deviceDiscoverer) Hooks() ([]discover.Hook, error) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mounts returns no mounts for a device discoverer
|
|
||||||
func (d *deviceDiscoverer) Mounts() ([]discover.Mount, error) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
@ -29,14 +29,14 @@ import (
|
|||||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||||
)
|
)
|
||||||
|
|
||||||
// fullGPUDiscoverer wraps a deviceDiscoverer and adds specifics required for discovering full GPUs
|
// byPathHookDiscoverer discovers the entities required for injecting by-path DRM device links
|
||||||
type fullGPUDiscoverer struct {
|
type byPathHookDiscoverer struct {
|
||||||
deviceDiscoverer
|
logger *logrus.Logger
|
||||||
|
root string
|
||||||
pciBusID string
|
pciBusID string
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ discover.Discover = (*fullGPUDiscoverer)(nil)
|
var _ discover.Discover = (*byPathHookDiscoverer)(nil)
|
||||||
|
|
||||||
// NewFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device.
|
// NewFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device.
|
||||||
func NewFullGPUDiscoverer(logger *logrus.Logger, root string, d device.Device) (discover.Discover, error) {
|
func NewFullGPUDiscoverer(logger *logrus.Logger, root string, d device.Device) (discover.Discover, error) {
|
||||||
@ -61,22 +61,35 @@ func NewFullGPUDiscoverer(logger *logrus.Logger, root string, d device.Device) (
|
|||||||
|
|
||||||
deviceNodePaths := append([]string{path}, drmDeviceNodes...)
|
deviceNodePaths := append([]string{path}, drmDeviceNodes...)
|
||||||
|
|
||||||
device := fullGPUDiscoverer{
|
deviceNodes := discover.NewCharDeviceDiscoverer(
|
||||||
deviceDiscoverer: deviceDiscoverer{
|
logger,
|
||||||
logger: logger,
|
deviceNodePaths,
|
||||||
root: root,
|
root,
|
||||||
deviceNodePaths: deviceNodePaths,
|
)
|
||||||
},
|
|
||||||
|
byPathHooks := &byPathHookDiscoverer{
|
||||||
|
logger: logger,
|
||||||
|
root: root,
|
||||||
pciBusID: pciBusID,
|
pciBusID: pciBusID,
|
||||||
}
|
}
|
||||||
|
|
||||||
return &device, nil
|
dd := discover.Merge(
|
||||||
|
deviceNodes,
|
||||||
|
byPathHooks,
|
||||||
|
)
|
||||||
|
|
||||||
|
return dd, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Devices returns the empty list for the by-path hook discoverer
|
||||||
|
func (d *byPathHookDiscoverer) Devices() ([]discover.Device, error) {
|
||||||
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hooks returns the hooks for the GPU device.
|
// Hooks returns the hooks for the GPU device.
|
||||||
// The following hooks are detected:
|
// The following hooks are detected:
|
||||||
// 1. A hook to create /dev/dri/by-path symlinks
|
// 1. A hook to create /dev/dri/by-path symlinks
|
||||||
func (d *fullGPUDiscoverer) Hooks() ([]discover.Hook, error) {
|
func (d *byPathHookDiscoverer) Hooks() ([]discover.Hook, error) {
|
||||||
links, err := d.deviceNodeLinks()
|
links, err := d.deviceNodeLinks()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to discover DRA device links: %v", err)
|
return nil, fmt.Errorf("failed to discover DRA device links: %v", err)
|
||||||
@ -103,11 +116,11 @@ func (d *fullGPUDiscoverer) Hooks() ([]discover.Hook, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Mounts returns an empty slice for a full GPU
|
// Mounts returns an empty slice for a full GPU
|
||||||
func (d *fullGPUDiscoverer) Mounts() ([]discover.Mount, error) {
|
func (d *byPathHookDiscoverer) Mounts() ([]discover.Mount, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *fullGPUDiscoverer) deviceNodeLinks() ([]string, error) {
|
func (d *byPathHookDiscoverer) deviceNodeLinks() ([]string, error) {
|
||||||
candidates := []string{
|
candidates := []string{
|
||||||
fmt.Sprintf("/dev/dri/by-path/pci-%s-card", d.pciBusID),
|
fmt.Sprintf("/dev/dri/by-path/pci-%s-card", d.pciBusID),
|
||||||
fmt.Sprintf("/dev/dri/by-path/pci-%s-render", d.pciBusID),
|
fmt.Sprintf("/dev/dri/by-path/pci-%s-render", d.pciBusID),
|
||||||
|
@ -26,13 +26,6 @@ import (
|
|||||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||||
)
|
)
|
||||||
|
|
||||||
// migDeviceDiscoverer wraps a deviceDiscoverer and adds specifics required for discovering MIG devices.
|
|
||||||
type migDeviceDiscoverer struct {
|
|
||||||
deviceDiscoverer
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ discover.Discover = (*migDeviceDiscoverer)(nil)
|
|
||||||
|
|
||||||
// NewMigDeviceDiscoverer creates a discoverer for the specified mig device and its parent.
|
// NewMigDeviceDiscoverer creates a discoverer for the specified mig device and its parent.
|
||||||
func NewMigDeviceDiscoverer(logger *logrus.Logger, root string, parent device.Device, d device.MigDevice) (discover.Discover, error) {
|
func NewMigDeviceDiscoverer(logger *logrus.Logger, root string, parent device.Device, d device.MigDevice) (discover.Discover, error) {
|
||||||
minor, ret := parent.GetMinorNumber()
|
minor, ret := parent.GetMinorNumber()
|
||||||
@ -68,17 +61,15 @@ func NewMigDeviceDiscoverer(logger *logrus.Logger, root string, parent device.De
|
|||||||
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
|
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
m := migDeviceDiscoverer{
|
deviceNodes := discover.NewCharDeviceDiscoverer(
|
||||||
deviceDiscoverer: deviceDiscoverer{
|
logger,
|
||||||
logger: logger,
|
[]string{
|
||||||
root: root,
|
parentPath,
|
||||||
deviceNodePaths: []string{
|
giCapDevicePath,
|
||||||
parentPath,
|
ciCapDevicePath,
|
||||||
giCapDevicePath,
|
|
||||||
ciCapDevicePath,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
root,
|
||||||
|
)
|
||||||
|
|
||||||
return &m, nil
|
return deviceNodes, nil
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user