mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-03-25 15:08:59 +00:00
Add discovery of GDS and MOFED devices
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
7f47a61986
commit
cb34f7c6d1
79
internal/discover/gds.go
Normal file
79
internal/discover/gds.go
Normal file
@ -0,0 +1,79 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type gdsDeviceDiscoverer struct {
|
||||
None
|
||||
logger *logrus.Logger
|
||||
devices Discover
|
||||
mounts Discover
|
||||
}
|
||||
|
||||
// NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts.
|
||||
func NewGDSDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
|
||||
devices := &mounts{
|
||||
logger: logger,
|
||||
lookup: lookup.NewCharDeviceLocator(logger, root),
|
||||
required: []string{"/dev/nvidia-fs*"},
|
||||
}
|
||||
|
||||
mounts := &mounts{
|
||||
logger: logger,
|
||||
lookup: lookup.NewDirectoryLocator(logger, root),
|
||||
required: []string{"/run/udev"},
|
||||
}
|
||||
|
||||
d := gdsDeviceDiscoverer{
|
||||
logger: logger,
|
||||
devices: devices,
|
||||
mounts: mounts,
|
||||
}
|
||||
|
||||
return &d, nil
|
||||
}
|
||||
|
||||
// Devices discoveres the nvidia-fs device nodes for use with GPUDirect Storage
|
||||
func (d *gdsDeviceDiscoverer) Devices() ([]Device, error) {
|
||||
devicesAsMounts, err := d.devices.Mounts()
|
||||
if err != nil {
|
||||
d.logger.Debugf("Could not locate GDS devices: %v", err)
|
||||
return nil, nil
|
||||
}
|
||||
var devices []Device
|
||||
for _, mount := range devicesAsMounts {
|
||||
devices = append(devices, Device(mount))
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
// Mounts discovers the required mounts for GDS.
|
||||
// If no devices are discovered the discovered mounts are empty
|
||||
func (d *gdsDeviceDiscoverer) Mounts() ([]Mount, error) {
|
||||
devices, err := d.Devices()
|
||||
if err != nil || len(devices) == 0 {
|
||||
d.logger.Debugf("No nvidia-fs devices detected; skipping detection of mounts")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return d.mounts.Mounts()
|
||||
}
|
53
internal/discover/mofed.go
Normal file
53
internal/discover/mofed.go
Normal file
@ -0,0 +1,53 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type mofedDeviceDiscoverer mounts
|
||||
|
||||
// NewMOFEDDiscoverer creates a discoverer for MOFED devices.
|
||||
func NewMOFEDDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
|
||||
devices := &mofedDeviceDiscoverer{
|
||||
logger: logger,
|
||||
lookup: lookup.NewCharDeviceLocator(logger, root),
|
||||
required: []string{
|
||||
"/dev/infiniband/uverbs*",
|
||||
"/dev/infiniband/rdma_cm",
|
||||
},
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
// Devices discovers the uverbs* and rdma_cm device nodes for use with GPUDirect Storage and the MOFED stack.
|
||||
func (d *mofedDeviceDiscoverer) Devices() ([]Device, error) {
|
||||
devicesAsMounts, err := (*mounts)(d).Mounts()
|
||||
if err != nil {
|
||||
d.logger.Debugf("Could not locate MOFED devices: %v", err)
|
||||
return nil, nil
|
||||
}
|
||||
var devices []Device
|
||||
for _, mount := range devicesAsMounts {
|
||||
devices = append(devices, Device(mount))
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
Loading…
Reference in New Issue
Block a user