nvidia-container-toolkit/internal/platform-support/dgpu/nvsandboxutils.go

132 lines
3.5 KiB
Go
Raw Normal View History

/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package dgpu
import (
"fmt"
"path/filepath"
"strings"
"github.com/NVIDIA/go-nvml/pkg/nvml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils"
)
type nvsandboxutilsDGPU struct {
lib nvsandboxutils.Interface
uuid string
devRoot string
isMig bool
nvidiaCDIHookPath string
deviceLinks []string
}
var _ discover.Discover = (*nvsandboxutilsDGPU)(nil)
type UUIDer interface {
GetUUID() (string, nvml.Return)
}
func (o *options) newNvsandboxutilsDGPUDiscoverer(d UUIDer) (discover.Discover, error) {
if o.nvsandboxutilslib == nil {
return nil, nil
}
uuid, nvmlRet := d.GetUUID()
if nvmlRet != nvml.SUCCESS {
return nil, fmt.Errorf("failed to get device UUID: %w", nvmlRet)
}
nvd := nvsandboxutilsDGPU{
lib: o.nvsandboxutilslib,
uuid: uuid,
devRoot: strings.TrimSuffix(filepath.Clean(o.devRoot), "/dev"),
isMig: o.isMigDevice,
nvidiaCDIHookPath: o.nvidiaCDIHookPath,
}
return &nvd, nil
}
func (d *nvsandboxutilsDGPU) Devices() ([]discover.Device, error) {
gpuFileInfos, ret := d.lib.GetGpuResource(d.uuid)
if ret != nvsandboxutils.SUCCESS {
return nil, fmt.Errorf("failed to get GPU resource: %w", ret)
}
var devices []discover.Device
for _, info := range gpuFileInfos {
switch {
case info.SubType == nvsandboxutils.NV_DEV_DRI_CARD, info.SubType == nvsandboxutils.NV_DEV_DRI_RENDERD:
if d.isMig {
continue
}
fallthrough
case info.SubType == nvsandboxutils.NV_DEV_NVIDIA, info.SubType == nvsandboxutils.NV_DEV_NVIDIA_CAPS_NVIDIA_CAP:
containerPath := info.Path
if d.devRoot != "/" {
containerPath = strings.TrimPrefix(containerPath, d.devRoot)
}
// TODO: Extend discover.Device with additional information.
device := discover.Device{
HostPath: info.Path,
Path: containerPath,
}
devices = append(devices, device)
case info.SubType == nvsandboxutils.NV_DEV_DRI_CARD_SYMLINK, info.SubType == nvsandboxutils.NV_DEV_DRI_RENDERD_SYMLINK:
if d.isMig {
continue
}
if info.Flags == nvsandboxutils.NV_FILE_FLAG_CONTENT {
targetPath, ret := d.lib.GetFileContent(info.Path)
if ret != nvsandboxutils.SUCCESS {
return nil, fmt.Errorf("failed to get symlink: %w", ret)
}
d.deviceLinks = append(d.deviceLinks, fmt.Sprintf("%v::%v", targetPath, info.Path))
}
}
}
return devices, nil
}
// Hooks returns a hook to create the by-path symlinks for the discovered devices.
func (d *nvsandboxutilsDGPU) Hooks() ([]discover.Hook, error) {
if len(d.deviceLinks) == 0 {
return nil, nil
}
var args []string
for _, l := range d.deviceLinks {
args = append(args, "--link", l)
}
hook := discover.CreateNvidiaCDIHook(
d.nvidiaCDIHookPath,
"create-symlinks",
args...,
)
return []discover.Hook{hook}, nil
}
func (d *nvsandboxutilsDGPU) Mounts() ([]discover.Mount, error) {
return nil, nil
}