mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
56 Commits
v1.12.0
...
v1.13.0-rc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
882fbb3209 | ||
|
|
2680c45811 | ||
|
|
b76808dbd5 | ||
|
|
ba50b50a15 | ||
|
|
f6d3f8d471 | ||
|
|
d9859d66bf | ||
|
|
4ccb0b9a53 | ||
|
|
f36c775d50 | ||
|
|
b21dc929ef | ||
|
|
d226925fe7 | ||
|
|
20d6e9af04 | ||
|
|
5103adab89 | ||
|
|
7eb435eb73 | ||
|
|
5d011c1333 | ||
|
|
6adb792d57 | ||
|
|
a844749791 | ||
|
|
dd0d43e726 | ||
|
|
25811471fa | ||
|
|
569bc1a889 | ||
|
|
b1756b410a | ||
|
|
7789ac6331 | ||
|
|
7a3aabbbda | ||
|
|
e486095603 | ||
|
|
bf6babe07e | ||
|
|
d5a4d89682 | ||
|
|
5710b9e7e8 | ||
|
|
b4ab95f00c | ||
|
|
a52c9f0ac6 | ||
|
|
b6bab4d3fd | ||
|
|
5b110fba2d | ||
|
|
179133c8ad | ||
|
|
365b6c7bc2 | ||
|
|
dc4887cd44 | ||
|
|
c4836a576f | ||
|
|
98afe0d27a | ||
|
|
fdc759f7c2 | ||
|
|
43448bac11 | ||
|
|
456d2864a6 | ||
|
|
406a5ec76f | ||
|
|
f71c419cfb | ||
|
|
babb73295f | ||
|
|
f3ec5fd329 | ||
|
|
5aca0d147d | ||
|
|
f2b19b6ae9 | ||
|
|
7cb9ed66be | ||
|
|
d578f4598a | ||
|
|
d30e6c23ab | ||
|
|
1c05f2fb9a | ||
|
|
1407ace94a | ||
|
|
97008f2db6 | ||
|
|
076eed7eb4 | ||
|
|
33c7b056ea | ||
|
|
3b8c40c3e6 | ||
|
|
3f70521a63 | ||
|
|
21f5895b5a | ||
|
|
738a2e7343 |
@@ -77,13 +77,6 @@ stages:
|
||||
DIST: debian9
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-fedora35:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: fedora35
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-opensuse-leap15.1:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,9 +1,11 @@
|
||||
dist
|
||||
artifacts
|
||||
*.swp
|
||||
*.swo
|
||||
/coverage.out*
|
||||
/test/output/
|
||||
/nvidia-container-runtime
|
||||
/nvidia-container-runtime.*
|
||||
/nvidia-container-runtime-hook
|
||||
/nvidia-container-toolkit
|
||||
/nvidia-ctk
|
||||
|
||||
@@ -158,18 +158,6 @@ package-debian9-amd64:
|
||||
- .dist-debian9
|
||||
- .arch-amd64
|
||||
|
||||
package-fedora35-aarch64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-fedora35
|
||||
- .arch-aarch64
|
||||
|
||||
package-fedora35-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-fedora35
|
||||
- .arch-x86_64
|
||||
|
||||
package-opensuse-leap15.1-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
@@ -300,10 +288,6 @@ image-packaging:
|
||||
optional: true
|
||||
- job: package-debian9-amd64
|
||||
optional: true
|
||||
- job: package-fedora35-aarch64
|
||||
optional: true
|
||||
- job: package-fedora35-x86_64
|
||||
optional: true
|
||||
- job: package-opensuse-leap15.1-x86_64
|
||||
optional: true
|
||||
- job: package-ubuntu16.04-amd64
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
# NVIDIA Container Toolkit Changelog
|
||||
|
||||
## v1.13.0-rc.1
|
||||
|
||||
* Discover gsb*.bin files for GSP firmware when generating CDI specification
|
||||
* [libnvidia-container] Inject gsp*.bin files for GSP firmware
|
||||
|
||||
## v1.12.0
|
||||
|
||||
* Promote `v1.12.0-rc.5` to `v1.12.0`
|
||||
|
||||
2
Makefile
2
Makefile
@@ -61,7 +61,7 @@ cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
|
||||
endif
|
||||
cmds: $(CMD_TARGETS)
|
||||
$(CMD_TARGETS): cmd-%:
|
||||
GOOS=$(GOOS) go build -ldflags "-s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
GOOS=$(GOOS) go build -ldflags "-extldflags=-Wl,-z,lazy -s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
|
||||
build:
|
||||
GOOS=$(GOOS) go build ./...
|
||||
|
||||
@@ -105,24 +105,20 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
|
||||
build-ubuntu%: BASE_DIST = $(*)
|
||||
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
|
||||
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
|
||||
build-ubuntu%: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
build-ubuntu%: LIBNVIDIA_CONTAINER0_DEPENDENCY=$(LIBNVIDIA_CONTAINER0_VERSION)
|
||||
|
||||
build-ubi8: BASE_DIST := ubi8
|
||||
build-ubi8: DOCKERFILE_SUFFIX := centos
|
||||
build-ubi8: PACKAGE_DIST = centos8
|
||||
build-ubi8: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
build-centos7: BASE_DIST = $(*)
|
||||
build-centos7: DOCKERFILE_SUFFIX := centos
|
||||
build-centos7: PACKAGE_DIST = $(BASE_DIST)
|
||||
build-centos7: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
build-packaging: BASE_DIST := ubuntu20.04
|
||||
build-packaging: DOCKERFILE_SUFFIX := packaging
|
||||
build-packaging: PACKAGE_ARCH := amd64
|
||||
build-packaging: PACKAGE_DIST = all
|
||||
build-packaging: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
|
||||
|
||||
# Test targets
|
||||
test-%: DIST = $(*)
|
||||
|
||||
34
cmd/nvidia-container-runtime.cdi/main.go
Normal file
34
cmd/nvidia-container-runtime.cdi/main.go
Normal file
@@ -0,0 +1,34 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
|
||||
)
|
||||
|
||||
func main() {
|
||||
rt := runtime.New(
|
||||
runtime.WithModeOverride("cdi"),
|
||||
)
|
||||
|
||||
err := rt.Run(os.Args)
|
||||
if err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
34
cmd/nvidia-container-runtime.legacy/main.go
Normal file
34
cmd/nvidia-container-runtime.legacy/main.go
Normal file
@@ -0,0 +1,34 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
|
||||
)
|
||||
|
||||
func main() {
|
||||
rt := runtime.New(
|
||||
runtime.WithModeOverride("legacy"),
|
||||
)
|
||||
|
||||
err := rt.Run(os.Args)
|
||||
if err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
@@ -1,89 +1,15 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
|
||||
)
|
||||
|
||||
// version must be set by go build's -X main.version= option in the Makefile.
|
||||
var version = "unknown"
|
||||
|
||||
// gitCommit will be the hash that the binary was built from
|
||||
// and will be populated by the Makefile
|
||||
var gitCommit = ""
|
||||
|
||||
var logger = NewLogger()
|
||||
|
||||
func main() {
|
||||
err := run(os.Args)
|
||||
r := runtime.New()
|
||||
err := r.Run(os.Args)
|
||||
if err != nil {
|
||||
logger.Errorf("%v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// run is an entry point that allows for idiomatic handling of errors
|
||||
// when calling from the main function.
|
||||
func run(argv []string) (rerr error) {
|
||||
printVersion := hasVersionFlag(argv)
|
||||
if printVersion {
|
||||
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime", info.GetVersionString(fmt.Sprintf("spec: %v", specs.Version)))
|
||||
}
|
||||
|
||||
cfg, err := config.GetConfig()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error loading config: %v", err)
|
||||
}
|
||||
|
||||
logger, err = UpdateLogger(
|
||||
cfg.NVIDIAContainerRuntimeConfig.DebugFilePath,
|
||||
cfg.NVIDIAContainerRuntimeConfig.LogLevel,
|
||||
argv,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to set up logger: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if rerr != nil {
|
||||
logger.Errorf("%v", rerr)
|
||||
}
|
||||
logger.Reset()
|
||||
}()
|
||||
|
||||
logger.Debugf("Command line arguments: %v", argv)
|
||||
runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err)
|
||||
}
|
||||
|
||||
if printVersion {
|
||||
fmt.Print("\n")
|
||||
}
|
||||
return runtime.Exec(argv)
|
||||
}
|
||||
|
||||
// TODO: This should be refactored / combined with parseArgs in logger.
|
||||
func hasVersionFlag(args []string) bool {
|
||||
for i := 0; i < len(args); i++ {
|
||||
param := args[i]
|
||||
|
||||
parts := strings.SplitN(param, "=", 2)
|
||||
trimmed := strings.TrimLeft(parts[0], "-")
|
||||
// If this is not a flag we continue
|
||||
if parts[0] == trimmed {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check the version flag
|
||||
if trimmed == "version" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
@@ -41,7 +42,7 @@ func TestMain(m *testing.M) {
|
||||
var err error
|
||||
moduleRoot, err := test.GetModuleRoot()
|
||||
if err != nil {
|
||||
logger.Fatalf("error in test setup: could not get module root: %v", err)
|
||||
logrus.Fatalf("error in test setup: could not get module root: %v", err)
|
||||
}
|
||||
testBinPath := filepath.Join(moduleRoot, "test", "bin")
|
||||
testInputPath := filepath.Join(moduleRoot, "test", "input")
|
||||
@@ -53,11 +54,11 @@ func TestMain(m *testing.M) {
|
||||
// Confirm that the environment is configured correctly
|
||||
runcPath, err := exec.LookPath(runcExecutableName)
|
||||
if err != nil || filepath.Join(testBinPath, runcExecutableName) != runcPath {
|
||||
logger.Fatalf("error in test setup: mock runc path set incorrectly in TestMain(): %v", err)
|
||||
logrus.Fatalf("error in test setup: mock runc path set incorrectly in TestMain(): %v", err)
|
||||
}
|
||||
hookPath, err := exec.LookPath(nvidiaHook)
|
||||
if err != nil || filepath.Join(testBinPath, nvidiaHook) != hookPath {
|
||||
logger.Fatalf("error in test setup: mock hook path set incorrectly in TestMain(): %v", err)
|
||||
logrus.Fatalf("error in test setup: mock hook path set incorrectly in TestMain(): %v", err)
|
||||
}
|
||||
|
||||
// Store the root and binary paths in the test Config
|
||||
@@ -77,7 +78,7 @@ func TestMain(m *testing.M) {
|
||||
|
||||
// case 1) nvidia-container-runtime run --bundle
|
||||
// case 2) nvidia-container-runtime create --bundle
|
||||
// - Confirm the runtime handles bad input correctly
|
||||
// - Confirm the runtime handles bad input correctly
|
||||
func TestBadInput(t *testing.T) {
|
||||
err := cfg.generateNewRuntimeSpec()
|
||||
if err != nil {
|
||||
@@ -91,9 +92,10 @@ func TestBadInput(t *testing.T) {
|
||||
}
|
||||
|
||||
// case 1) nvidia-container-runtime run --bundle <bundle-name> <ctr-name>
|
||||
// - Confirm the runtime runs with no errors
|
||||
// - Confirm the runtime runs with no errors
|
||||
//
|
||||
// case 2) nvidia-container-runtime create --bundle <bundle-name> <ctr-name>
|
||||
// - Confirm the runtime inserts the NVIDIA prestart hook correctly
|
||||
// - Confirm the runtime inserts the NVIDIA prestart hook correctly
|
||||
func TestGoodInput(t *testing.T) {
|
||||
err := cfg.generateNewRuntimeSpec()
|
||||
if err != nil {
|
||||
@@ -170,7 +172,7 @@ func TestDuplicateHook(t *testing.T) {
|
||||
// addNVIDIAHook is a basic wrapper for an addHookModifier that is used for
|
||||
// testing.
|
||||
func addNVIDIAHook(spec *specs.Spec) error {
|
||||
m := modifier.NewStableRuntimeModifier(logger.Logger)
|
||||
m := modifier.NewStableRuntimeModifier(logrus.StandardLogger())
|
||||
return m.Modify(spec)
|
||||
}
|
||||
|
||||
|
||||
@@ -17,9 +17,12 @@
|
||||
package generate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
@@ -33,6 +36,16 @@ type deviceFolderPermissions struct {
|
||||
|
||||
var _ discover.Discover = (*deviceFolderPermissions)(nil)
|
||||
|
||||
// GetDeviceFolderPermissionHookEdits gets the edits required for device folder permissions discoverer
|
||||
func GetDeviceFolderPermissionHookEdits(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, deviceSpecs []specs.Device) (*cdi.ContainerEdits, error) {
|
||||
deviceFolderPermissionHooks, err := NewDeviceFolderPermissionHookDiscoverer(logger, driverRoot, nvidiaCTKPath, deviceSpecs)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generated permission hooks for device nodes: %v", err)
|
||||
}
|
||||
|
||||
return edits.FromDiscoverer(deviceFolderPermissionHooks)
|
||||
}
|
||||
|
||||
// NewDeviceFolderPermissionHookDiscoverer creates a discoverer that can be used to update the permissions for the parent folders of nested device nodes from the specified set of device specs.
|
||||
// This works around an issue with rootless podman when using crun as a low-level runtime.
|
||||
// See https://github.com/containers/crun/issues/1047
|
||||
|
||||
@@ -25,6 +25,7 @@ import (
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
specs "github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
@@ -37,6 +38,8 @@ import (
|
||||
const (
|
||||
formatJSON = "json"
|
||||
formatYAML = "yaml"
|
||||
|
||||
allDeviceName = "all"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
@@ -49,6 +52,7 @@ type config struct {
|
||||
deviceNameStrategy string
|
||||
driverRoot string
|
||||
nvidiaCTKPath string
|
||||
discoveryMode string
|
||||
}
|
||||
|
||||
// NewCommand constructs a generate-cdi command with the specified logger
|
||||
@@ -87,10 +91,16 @@ func (m command) build() *cli.Command {
|
||||
Value: formatYAML,
|
||||
Destination: &cfg.format,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "discovery-mode",
|
||||
Usage: "The mode to use when discovering the available entities. One of [auto | nvml | wsl]. If mode is set to 'auto' the mode will be determined based on the system configuration.",
|
||||
Value: nvcdi.ModeAuto,
|
||||
Destination: &cfg.discoveryMode,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "device-name-strategy",
|
||||
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
|
||||
Value: deviceNameStrategyIndex,
|
||||
Value: nvcdi.DeviceNameStrategyIndex,
|
||||
Destination: &cfg.deviceNameStrategy,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
@@ -117,25 +127,27 @@ func (m command) validateFlags(r *cli.Context, cfg *config) error {
|
||||
return fmt.Errorf("invalid output format: %v", cfg.format)
|
||||
}
|
||||
|
||||
_, err := newDeviceNamer(cfg.deviceNameStrategy)
|
||||
cfg.discoveryMode = strings.ToLower(cfg.discoveryMode)
|
||||
switch cfg.discoveryMode {
|
||||
case nvcdi.ModeAuto:
|
||||
case nvcdi.ModeNvml:
|
||||
case nvcdi.ModeWsl:
|
||||
default:
|
||||
return fmt.Errorf("invalid discovery mode: %v", cfg.discoveryMode)
|
||||
}
|
||||
|
||||
_, err := nvcdi.NewDeviceNamer(cfg.deviceNameStrategy)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cfg.nvidiaCTKPath = discover.FindNvidiaCTK(m.logger, cfg.nvidiaCTKPath)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
deviceNamer, err := newDeviceNamer(cfg.deviceNameStrategy)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create device namer: %v", err)
|
||||
}
|
||||
|
||||
spec, err := m.generateSpec(
|
||||
cfg.driverRoot,
|
||||
discover.FindNvidiaCTK(m.logger, cfg.nvidiaCTKPath),
|
||||
deviceNamer,
|
||||
)
|
||||
spec, err := m.generateSpec(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI spec: %v", err)
|
||||
}
|
||||
@@ -214,7 +226,12 @@ func writeToOutput(format string, data []byte, output io.Writer) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) generateSpec(driverRoot string, nvidiaCTKPath string, namer deviceNamer) (*specs.Spec, error) {
|
||||
func (m command) generateSpec(cfg *config) (*specs.Spec, error) {
|
||||
deviceNamer, err := nvcdi.NewDeviceNamer(cfg.deviceNameStrategy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create device namer: %v", err)
|
||||
}
|
||||
|
||||
nvmllib := nvml.New()
|
||||
if r := nvmllib.Init(); r != nvml.SUCCESS {
|
||||
return nil, r
|
||||
@@ -223,56 +240,52 @@ func (m command) generateSpec(driverRoot string, nvidiaCTKPath string, namer dev
|
||||
|
||||
devicelib := device.New(device.WithNvml(nvmllib))
|
||||
|
||||
deviceSpecs, err := m.generateDeviceSpecs(devicelib, driverRoot, nvidiaCTKPath, namer)
|
||||
cdilib := nvcdi.New(
|
||||
nvcdi.WithLogger(m.logger),
|
||||
nvcdi.WithDriverRoot(cfg.driverRoot),
|
||||
nvcdi.WithNVIDIACTKPath(cfg.nvidiaCTKPath),
|
||||
nvcdi.WithDeviceNamer(deviceNamer),
|
||||
nvcdi.WithDeviceLib(devicelib),
|
||||
nvcdi.WithNvmlLib(nvmllib),
|
||||
nvcdi.WithMode(string(cfg.discoveryMode)),
|
||||
)
|
||||
|
||||
deviceSpecs, err := cdilib.GetAllDeviceSpecs()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create device CDI specs: %v", err)
|
||||
}
|
||||
var hasAll bool
|
||||
for _, deviceSpec := range deviceSpecs {
|
||||
if deviceSpec.Name == allDeviceName {
|
||||
hasAll = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasAll {
|
||||
allDevice, err := MergeDeviceSpecs(deviceSpecs, allDeviceName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create CDI specification for %q device: %v", allDeviceName, err)
|
||||
}
|
||||
deviceSpecs = append(deviceSpecs, allDevice)
|
||||
}
|
||||
|
||||
allDevice := createAllDevice(deviceSpecs)
|
||||
|
||||
deviceSpecs = append(deviceSpecs, allDevice)
|
||||
|
||||
allEdits := edits.NewContainerEdits()
|
||||
|
||||
ipcs, err := NewIPCDiscoverer(m.logger, driverRoot)
|
||||
commonEdits, err := cdilib.GetCommonEdits()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
|
||||
return nil, fmt.Errorf("failed to create edits common for entities: %v", err)
|
||||
}
|
||||
|
||||
ipcEdits, err := edits.FromDiscoverer(ipcs)
|
||||
deviceFolderPermissionEdits, err := GetDeviceFolderPermissionHookEdits(m.logger, cfg.driverRoot, cfg.nvidiaCTKPath, deviceSpecs)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create container edits for IPC sockets: %v", err)
|
||||
}
|
||||
// TODO: We should not have to update this after the fact
|
||||
for _, s := range ipcEdits.Mounts {
|
||||
s.Options = append(s.Options, "noexec")
|
||||
return nil, fmt.Errorf("failed to generated edits for device folder permissions: %v", err)
|
||||
}
|
||||
|
||||
allEdits.Append(ipcEdits)
|
||||
|
||||
common, err := NewCommonDiscoverer(m.logger, driverRoot, nvidiaCTKPath, nvmllib)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err)
|
||||
}
|
||||
|
||||
deviceFolderPermissionHooks, err := NewDeviceFolderPermissionHookDiscoverer(m.logger, driverRoot, nvidiaCTKPath, deviceSpecs)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generated permission hooks for device nodes: %v", err)
|
||||
}
|
||||
|
||||
commonEdits, err := edits.FromDiscoverer(discover.Merge(common, deviceFolderPermissionHooks))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create container edits for common entities: %v", err)
|
||||
}
|
||||
|
||||
allEdits.Append(commonEdits)
|
||||
commonEdits.Append(deviceFolderPermissionEdits)
|
||||
|
||||
// We construct the spec and determine the minimum required version based on the specification.
|
||||
spec := specs.Spec{
|
||||
Version: "NOT_SET",
|
||||
Kind: "nvidia.com/gpu",
|
||||
Devices: deviceSpecs,
|
||||
ContainerEdits: *allEdits.ContainerEdits,
|
||||
ContainerEdits: *commonEdits.ContainerEdits,
|
||||
}
|
||||
|
||||
minVersion, err := cdi.MinimumRequiredVersion(&spec)
|
||||
@@ -286,89 +299,32 @@ func (m command) generateSpec(driverRoot string, nvidiaCTKPath string, namer dev
|
||||
return &spec, nil
|
||||
}
|
||||
|
||||
func (m command) generateDeviceSpecs(devicelib device.Interface, driverRoot string, nvidiaCTKPath string, namer deviceNamer) ([]specs.Device, error) {
|
||||
var deviceSpecs []specs.Device
|
||||
|
||||
err := devicelib.VisitDevices(func(i int, d device.Device) error {
|
||||
isMigEnabled, err := d.IsMigEnabled()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check whether device is MIG device: %v", err)
|
||||
// MergeDeviceSpecs creates a device with the specified name which combines the edits from the previous devices.
|
||||
// If a device of the specified name already exists, an error is returned.
|
||||
func MergeDeviceSpecs(deviceSpecs []specs.Device, mergedDeviceName string) (specs.Device, error) {
|
||||
if err := cdi.ValidateDeviceName(mergedDeviceName); err != nil {
|
||||
return specs.Device{}, fmt.Errorf("invalid device name %q: %v", mergedDeviceName, err)
|
||||
}
|
||||
for _, d := range deviceSpecs {
|
||||
if d.Name == mergedDeviceName {
|
||||
return specs.Device{}, fmt.Errorf("device %q already exists", mergedDeviceName)
|
||||
}
|
||||
if isMigEnabled {
|
||||
return nil
|
||||
}
|
||||
device, err := NewFullGPUDiscoverer(m.logger, driverRoot, nvidiaCTKPath, d)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create device: %v", err)
|
||||
}
|
||||
|
||||
deviceEdits, err := edits.FromDiscoverer(device)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create container edits for device: %v", err)
|
||||
}
|
||||
|
||||
deviceName, err := namer.GetDeviceName(i, d)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get device name: %v", err)
|
||||
}
|
||||
deviceSpec := specs.Device{
|
||||
Name: deviceName,
|
||||
ContainerEdits: *deviceEdits.ContainerEdits,
|
||||
}
|
||||
|
||||
deviceSpecs = append(deviceSpecs, deviceSpec)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate CDI spec for GPU devices: %v", err)
|
||||
}
|
||||
|
||||
err = devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
|
||||
device, err := NewMigDeviceDiscoverer(m.logger, "", d, mig)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create MIG device: %v", err)
|
||||
}
|
||||
|
||||
deviceEdits, err := edits.FromDiscoverer(device)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create container edits for MIG device: %v", err)
|
||||
}
|
||||
|
||||
deviceName, err := namer.GetMigDeviceName(i, j, mig)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get device name: %v", err)
|
||||
}
|
||||
deviceSpec := specs.Device{
|
||||
Name: deviceName,
|
||||
ContainerEdits: *deviceEdits.ContainerEdits,
|
||||
}
|
||||
|
||||
deviceSpecs = append(deviceSpecs, deviceSpec)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err)
|
||||
}
|
||||
|
||||
return deviceSpecs, nil
|
||||
}
|
||||
|
||||
// createAllDevice creates an 'all' device which combines the edits from the previous devices
|
||||
func createAllDevice(deviceSpecs []specs.Device) specs.Device {
|
||||
edits := edits.NewContainerEdits()
|
||||
mergedEdits := edits.NewContainerEdits()
|
||||
|
||||
for _, d := range deviceSpecs {
|
||||
edit := cdi.ContainerEdits{
|
||||
ContainerEdits: &d.ContainerEdits,
|
||||
}
|
||||
edits.Append(&edit)
|
||||
mergedEdits.Append(&edit)
|
||||
}
|
||||
|
||||
all := specs.Device{
|
||||
Name: "all",
|
||||
ContainerEdits: *edits.ContainerEdits,
|
||||
merged := specs.Device{
|
||||
Name: mergedDeviceName,
|
||||
ContainerEdits: *mergedEdits.ContainerEdits,
|
||||
}
|
||||
return all
|
||||
return merged, nil
|
||||
}
|
||||
|
||||
// createParentDirsIfRequired creates the parent folders of the specified path if requried.
|
||||
|
||||
117
cmd/nvidia-ctk/cdi/generate/generate_test.go
Normal file
117
cmd/nvidia-ctk/cdi/generate/generate_test.go
Normal file
@@ -0,0 +1,117 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestMergeDeviceSpecs(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
deviceSpecs []specs.Device
|
||||
mergedDeviceName string
|
||||
expectedError error
|
||||
expected specs.Device
|
||||
}{
|
||||
{
|
||||
description: "no devices",
|
||||
mergedDeviceName: "all",
|
||||
expected: specs.Device{
|
||||
Name: "all",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "one device",
|
||||
mergedDeviceName: "all",
|
||||
deviceSpecs: []specs.Device{
|
||||
{
|
||||
Name: "gpu0",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"GPU=0"},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: specs.Device{
|
||||
Name: "all",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"GPU=0"},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "two devices",
|
||||
mergedDeviceName: "all",
|
||||
deviceSpecs: []specs.Device{
|
||||
{
|
||||
Name: "gpu0",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"GPU=0"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "gpu1",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"GPU=1"},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: specs.Device{
|
||||
Name: "all",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"GPU=0", "GPU=1"},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "has merged device",
|
||||
mergedDeviceName: "gpu0",
|
||||
deviceSpecs: []specs.Device{
|
||||
{
|
||||
Name: "gpu0",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"GPU=0"},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedError: fmt.Errorf("device %q already exists", "gpu0"),
|
||||
},
|
||||
{
|
||||
description: "invalid merged device name",
|
||||
mergedDeviceName: ".-not-valid",
|
||||
expectedError: fmt.Errorf("invalid device name %q", ".-not-valid"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
mergedDevice, err := MergeDeviceSpecs(tc.deviceSpecs, tc.mergedDeviceName)
|
||||
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, tc.expected, mergedDevice)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// NewMigDeviceDiscoverer creates a discoverer for the specified mig device and its parent.
|
||||
func NewMigDeviceDiscoverer(logger *logrus.Logger, driverRoot string, parent device.Device, d device.MigDevice) (discover.Discover, error) {
|
||||
minor, ret := parent.GetMinorNumber()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
|
||||
}
|
||||
parentPath := fmt.Sprintf("/dev/nvidia%d", minor)
|
||||
|
||||
migCaps, err := nvcaps.NewMigCaps()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
|
||||
}
|
||||
|
||||
gi, ret := d.GetGpuInstanceId()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret)
|
||||
}
|
||||
|
||||
ci, ret := d.GetComputeInstanceId()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret)
|
||||
}
|
||||
|
||||
giCap := nvcaps.NewGPUInstanceCap(minor, gi)
|
||||
giCapDevicePath, err := migCaps.GetCapDevicePath(giCap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get GI cap device path: %v", err)
|
||||
}
|
||||
|
||||
ciCap := nvcaps.NewComputeInstanceCap(minor, gi, ci)
|
||||
ciCapDevicePath, err := migCaps.GetCapDevicePath(ciCap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
|
||||
}
|
||||
|
||||
deviceNodes := discover.NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
[]string{
|
||||
parentPath,
|
||||
giCapDevicePath,
|
||||
ciCapDevicePath,
|
||||
},
|
||||
driverRoot,
|
||||
)
|
||||
|
||||
return deviceNodes, nil
|
||||
}
|
||||
@@ -88,30 +88,22 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
|
||||
LIBNVIDIA_CONTAINER_VERSION ?= $(LIB_VERSION)
|
||||
LIBNVIDIA_CONTAINER_TAG ?= $(LIB_TAG)
|
||||
|
||||
LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
|
||||
|
||||
# private ubuntu target
|
||||
--ubuntu%: OS := ubuntu
|
||||
--ubuntu%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
--ubuntu%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
|
||||
--ubuntu%: PKG_REV := 1
|
||||
|
||||
# private debian target
|
||||
--debian%: OS := debian
|
||||
--debian%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
--debian%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
|
||||
--debian%: PKG_REV := 1
|
||||
|
||||
# private centos target
|
||||
--centos%: OS := centos
|
||||
--centos%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--centos%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--centos%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--centos%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
--centos8%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
# private fedora target
|
||||
--fedora%: OS := fedora
|
||||
--fedora%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--fedora%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--fedora%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--fedora%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
# The fedora(35) base image has very slow performance when building aarch64 packages.
|
||||
@@ -120,21 +112,15 @@ LIBNVIDIA_CONTAINER_TAG ?= $(LIB_TAG)
|
||||
|
||||
# private amazonlinux target
|
||||
--amazonlinux%: OS := amazonlinux
|
||||
--amazonlinux%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--amazonlinux%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--amazonlinux%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--amazonlinux%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
|
||||
# private opensuse-leap target
|
||||
--opensuse-leap%: OS = opensuse-leap
|
||||
--opensuse-leap%: BASEIMAGE = opensuse/leap:$(VERSION)
|
||||
--opensuse-leap%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--opensuse-leap%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
# private rhel target (actually built on centos)
|
||||
--rhel%: OS := centos
|
||||
--rhel%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--rhel%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
|
||||
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)
|
||||
--rhel%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
@@ -155,8 +141,8 @@ docker-build-%:
|
||||
--build-arg BASEIMAGE="$(BASEIMAGE)" \
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--build-arg PKG_NAME="$(LIB_NAME)" \
|
||||
--build-arg PKG_VERS="$(LIB_VERSION)" \
|
||||
--build-arg PKG_REV="$(PKG_REV)" \
|
||||
--build-arg PKG_VERS="$(PACKAGE_VERSION)" \
|
||||
--build-arg PKG_REV="$(PACKAGE_REVISION)" \
|
||||
--build-arg LIBNVIDIA_CONTAINER_TOOLS_VERSION="$(LIBNVIDIA_CONTAINER_TOOLS_VERSION)" \
|
||||
--build-arg CONFIG_TOML_SUFFIX="$(CONFIG_TOML_SUFFIX)" \
|
||||
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
|
||||
|
||||
4
go.mod
4
go.mod
@@ -4,7 +4,7 @@ go 1.18
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v1.0.0
|
||||
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82
|
||||
github.com/NVIDIA/go-nvml v0.12.0-0
|
||||
github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a
|
||||
github.com/fsnotify/fsnotify v1.5.4
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb
|
||||
@@ -12,7 +12,7 @@ require (
|
||||
github.com/sirupsen/logrus v1.9.0
|
||||
github.com/stretchr/testify v1.7.0
|
||||
github.com/urfave/cli/v2 v2.3.0
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230119114711-6fe07bb33342
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230209143738-95328d8c4438
|
||||
golang.org/x/mod v0.5.0
|
||||
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6
|
||||
sigs.k8s.io/yaml v1.3.0
|
||||
|
||||
4
go.sum
4
go.sum
@@ -3,6 +3,8 @@ github.com/BurntSushi/toml v1.0.0 h1:dtDWrepsVPfW9H/4y7dDgFc2MBUSeJhlaDtK13CxFlU
|
||||
github.com/BurntSushi/toml v1.0.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82 h1:x751Xx1tdxkiA/sdkv2J769n21UbYKzVOpe9S/h1M3k=
|
||||
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
|
||||
github.com/NVIDIA/go-nvml v0.12.0-0 h1:eHYNHbzAsMgWYshf6dEmTY66/GCXnORJFnzm3TNH4mc=
|
||||
github.com/NVIDIA/go-nvml v0.12.0-0/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
|
||||
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
|
||||
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
|
||||
github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
|
||||
@@ -88,6 +90,8 @@ github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17
|
||||
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230119114711-6fe07bb33342 h1:083n9fJt2dWOpJd/X/q9Xgl5XtQLL22uSFYbzVqJssg=
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230119114711-6fe07bb33342/go.mod h1:GStidGxhaqJhYFW1YpOnLvYCbL2EsM0od7IW4u7+JgU=
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230209143738-95328d8c4438 h1:+qRai7XRl8omFQVCeHcaWzL542Yw64vfmuXG+79ZCIc=
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230209143738-95328d8c4438/go.mod h1:GStidGxhaqJhYFW1YpOnLvYCbL2EsM0od7IW4u7+JgU=
|
||||
golang.org/x/mod v0.5.0 h1:UG21uOlmZabA4fW5i7ZX6bjw1xELEGg/ZLgZq9auk/Q=
|
||||
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
|
||||
golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
|
||||
@@ -58,7 +58,11 @@ func (d *charDevices) Devices() ([]Device, error) {
|
||||
}
|
||||
var devices []Device
|
||||
for _, mount := range devicesAsMounts {
|
||||
devices = append(devices, Device(mount))
|
||||
device := Device{
|
||||
HostPath: mount.HostPath,
|
||||
Path: mount.Path,
|
||||
}
|
||||
devices = append(devices, device)
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
|
||||
@@ -32,6 +32,7 @@ type Device struct {
|
||||
type Mount struct {
|
||||
HostPath string
|
||||
Path string
|
||||
Options []string
|
||||
}
|
||||
|
||||
// Hook represents a discovered hook.
|
||||
|
||||
@@ -29,12 +29,47 @@ const (
|
||||
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
|
||||
)
|
||||
|
||||
var _ Discover = (*Hook)(nil)
|
||||
|
||||
// Devices returns an empty list of devices for a Hook discoverer.
|
||||
func (h Hook) Devices() ([]Device, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Mounts returns an empty list of mounts for a Hook discoverer.
|
||||
func (h Hook) Mounts() ([]Mount, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Hooks allows the Hook type to also implement the Discoverer interface.
|
||||
// It returns a single hook
|
||||
func (h Hook) Hooks() ([]Hook, error) {
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
|
||||
// CreateCreateSymlinkHook creates a hook which creates a symlink from link -> target.
|
||||
func CreateCreateSymlinkHook(nvidiaCTKPath string, links []string) Discover {
|
||||
if len(links) == 0 {
|
||||
return None{}
|
||||
}
|
||||
|
||||
var args []string
|
||||
for _, link := range links {
|
||||
args = append(args, "--link", link)
|
||||
}
|
||||
return CreateNvidiaCTKHook(
|
||||
nvidiaCTKPath,
|
||||
"create-symlinks",
|
||||
args...,
|
||||
)
|
||||
}
|
||||
|
||||
// CreateNvidiaCTKHook creates a hook which invokes the NVIDIA Container CLI hook subcommand.
|
||||
func CreateNvidiaCTKHook(executable string, hookName string, additionalArgs ...string) Hook {
|
||||
func CreateNvidiaCTKHook(nvidiaCTKPath string, hookName string, additionalArgs ...string) Hook {
|
||||
return Hook{
|
||||
Lifecycle: cdi.CreateContainerHook,
|
||||
Path: executable,
|
||||
Args: append([]string{filepath.Base(executable), "hook", hookName}, additionalArgs...),
|
||||
Path: nvidiaCTKPath,
|
||||
Args: append([]string{filepath.Base(nvidiaCTKPath), "hook", hookName}, additionalArgs...),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,6 +82,9 @@ func FindNvidiaCTK(logger *logrus.Logger, nvidiaCTKPath string) string {
|
||||
return nvidiaCTKPath
|
||||
}
|
||||
|
||||
if nvidiaCTKPath == "" {
|
||||
nvidiaCTKPath = nvidiaCTKExecutable
|
||||
}
|
||||
logger.Debugf("Locating NVIDIA Container Toolkit CLI as %v", nvidiaCTKPath)
|
||||
lookup := lookup.NewExecutableLocator(logger, "")
|
||||
hookPath := nvidiaCTKDefaultFilePath
|
||||
|
||||
60
internal/discover/icp_test.go
Normal file
60
internal/discover/icp_test.go
Normal file
@@ -0,0 +1,60 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestIPCMounts(t *testing.T) {
|
||||
l := ipcMounts(
|
||||
mounts{
|
||||
logger: logrus.New(),
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(path string) ([]string, error) {
|
||||
return []string{"/host/path"}, nil
|
||||
},
|
||||
},
|
||||
required: []string{"target"},
|
||||
},
|
||||
)
|
||||
|
||||
mounts, err := l.Mounts()
|
||||
require.NoError(t, err)
|
||||
|
||||
require.EqualValues(
|
||||
t,
|
||||
[]Mount{
|
||||
{
|
||||
HostPath: "/host/path",
|
||||
Path: "/host/path",
|
||||
Options: []string{
|
||||
"ro",
|
||||
"nosuid",
|
||||
"nodev",
|
||||
"bind",
|
||||
"noexec",
|
||||
},
|
||||
},
|
||||
},
|
||||
mounts,
|
||||
)
|
||||
}
|
||||
@@ -14,17 +14,18 @@
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type ipcMounts mounts
|
||||
|
||||
// NewIPCDiscoverer creats a discoverer for NVIDIA IPC sockets.
|
||||
func NewIPCDiscoverer(logger *logrus.Logger, driverRoot string) (discover.Discover, error) {
|
||||
d := discover.NewMounts(
|
||||
func NewIPCDiscoverer(logger *logrus.Logger, driverRoot string) (Discover, error) {
|
||||
d := newMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
@@ -38,5 +39,22 @@ func NewIPCDiscoverer(logger *logrus.Logger, driverRoot string) (discover.Discov
|
||||
},
|
||||
)
|
||||
|
||||
return d, nil
|
||||
return (*ipcMounts)(d), nil
|
||||
}
|
||||
|
||||
// Mounts returns the discovered mounts with "noexec" added to the mount options.
|
||||
func (d *ipcMounts) Mounts() ([]Mount, error) {
|
||||
mounts, err := (*mounts)(d).Mounts()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var modifiedMounts []Mount
|
||||
for _, m := range mounts {
|
||||
mount := m
|
||||
mount.Options = append(m.Options, "noexec")
|
||||
modifiedMounts = append(modifiedMounts, mount)
|
||||
}
|
||||
|
||||
return modifiedMounts, nil
|
||||
}
|
||||
@@ -43,6 +43,11 @@ var _ Discover = (*mounts)(nil)
|
||||
|
||||
// NewMounts creates a discoverer for the required mounts using the specified locator.
|
||||
func NewMounts(logger *logrus.Logger, lookup lookup.Locator, root string, required []string) Discover {
|
||||
return newMounts(logger, lookup, root, required)
|
||||
}
|
||||
|
||||
// newMounts creates a discoverer for the required mounts using the specified locator.
|
||||
func newMounts(logger *logrus.Logger, lookup lookup.Locator, root string, required []string) *mounts {
|
||||
return &mounts{
|
||||
logger: logger,
|
||||
lookup: lookup,
|
||||
@@ -93,6 +98,12 @@ func (d *mounts) Mounts() ([]Mount, error) {
|
||||
uniqueMounts[p] = Mount{
|
||||
HostPath: p,
|
||||
Path: r,
|
||||
Options: []string{
|
||||
"ro",
|
||||
"nosuid",
|
||||
"nodev",
|
||||
"bind",
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +35,14 @@ func TestMountsReturnsEmptyDevices(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestMounts(t *testing.T) {
|
||||
|
||||
mountOptions := []string{
|
||||
"ro",
|
||||
"nosuid",
|
||||
"nodev",
|
||||
"bind",
|
||||
}
|
||||
|
||||
logger, logHook := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
@@ -70,7 +78,7 @@ func TestMounts(t *testing.T) {
|
||||
},
|
||||
required: []string{"required"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "located", HostPath: "located"}},
|
||||
expectedMounts: []Mount{{Path: "located", HostPath: "located", Options: mountOptions}},
|
||||
},
|
||||
{
|
||||
description: "mounts removes located duplicates",
|
||||
@@ -83,7 +91,7 @@ func TestMounts(t *testing.T) {
|
||||
},
|
||||
required: []string{"required0", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "located", HostPath: "located"}},
|
||||
expectedMounts: []Mount{{Path: "located", HostPath: "located", Options: mountOptions}},
|
||||
},
|
||||
{
|
||||
description: "mounts skips located errors",
|
||||
@@ -98,7 +106,7 @@ func TestMounts(t *testing.T) {
|
||||
},
|
||||
required: []string{"required0", "error", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "required0", HostPath: "required0"}, {Path: "required1", HostPath: "required1"}},
|
||||
expectedMounts: []Mount{{Path: "required0", HostPath: "required0", Options: mountOptions}, {Path: "required1", HostPath: "required1", Options: mountOptions}},
|
||||
},
|
||||
{
|
||||
description: "mounts skips unlocated",
|
||||
@@ -113,7 +121,7 @@ func TestMounts(t *testing.T) {
|
||||
},
|
||||
required: []string{"required0", "empty", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "required0", HostPath: "required0"}, {Path: "required1", HostPath: "required1"}},
|
||||
expectedMounts: []Mount{{Path: "required0", HostPath: "required0", Options: mountOptions}, {Path: "required1", HostPath: "required1", Options: mountOptions}},
|
||||
},
|
||||
{
|
||||
description: "mounts adds multiple",
|
||||
@@ -129,10 +137,10 @@ func TestMounts(t *testing.T) {
|
||||
required: []string{"required0", "multiple", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{
|
||||
{Path: "required0", HostPath: "required0"},
|
||||
{Path: "multiple0", HostPath: "multiple0"},
|
||||
{Path: "multiple1", HostPath: "multiple1"},
|
||||
{Path: "required1", HostPath: "required1"},
|
||||
{Path: "required0", HostPath: "required0", Options: mountOptions},
|
||||
{Path: "multiple0", HostPath: "multiple0", Options: mountOptions},
|
||||
{Path: "multiple1", HostPath: "multiple1", Options: mountOptions},
|
||||
{Path: "required1", HostPath: "required1", Options: mountOptions},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -147,7 +155,7 @@ func TestMounts(t *testing.T) {
|
||||
required: []string{"required0", "multiple", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{
|
||||
{Path: "/located", HostPath: "/some/root/located"},
|
||||
{Path: "/located", HostPath: "/some/root/located", Options: mountOptions},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
58
internal/dxcore/api.go
Normal file
58
internal/dxcore/api.go
Normal file
@@ -0,0 +1,58 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package dxcore
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||
)
|
||||
|
||||
const (
|
||||
libraryName = "libdxcore.so"
|
||||
libraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
|
||||
)
|
||||
|
||||
// dxcore stores a reference the dxcore dynamic library
|
||||
var dxcore *context
|
||||
|
||||
// Init initializes the dxcore dynamic library
|
||||
func Init() error {
|
||||
c, err := initContext()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dxcore = c
|
||||
return nil
|
||||
}
|
||||
|
||||
// Shutdown closes the dxcore dynamic library
|
||||
func Shutdown() error {
|
||||
if dxcore != nil && dxcore.initialized != 0 {
|
||||
dxcore.deinitContext()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetDriverStorePaths returns the list of driver store paths
|
||||
func GetDriverStorePaths() []string {
|
||||
var paths []string
|
||||
for i := 0; i < dxcore.getAdapterCount(); i++ {
|
||||
adapter := dxcore.getAdapter(i)
|
||||
paths = append(paths, adapter.getDriverStorePath())
|
||||
}
|
||||
|
||||
return paths
|
||||
}
|
||||
334
internal/dxcore/dxcore.c
Normal file
334
internal/dxcore/dxcore.c
Normal file
@@ -0,0 +1,334 @@
|
||||
/*
|
||||
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "dxcore.h"
|
||||
|
||||
// We define log_write as an empty macro to allow dxcore to remain unchanged.
|
||||
#define log_write(...)
|
||||
|
||||
// We define the following macros to allow dxcore to remain largely unchanged.
|
||||
#define log_info(msg) log_write('I', __FILE__, __LINE__, msg)
|
||||
#define log_warn(msg) log_write('W', __FILE__, __LINE__, msg)
|
||||
#define log_err(msg) log_write('E', __FILE__, __LINE__, msg)
|
||||
#define log_infof(fmt, ...) log_write('I', __FILE__, __LINE__, fmt, __VA_ARGS__)
|
||||
#define log_warnf(fmt, ...) log_write('W', __FILE__, __LINE__, fmt, __VA_ARGS__)
|
||||
#define log_errf(fmt, ...) log_write('E', __FILE__, __LINE__, fmt, __VA_ARGS__)
|
||||
|
||||
|
||||
#define DXCORE_MAX_PATH 260
|
||||
|
||||
/*
|
||||
* List of components we expect to find in the driver store that we need to mount
|
||||
*/
|
||||
static const char * const dxcore_nvidia_driver_store_components[] = {
|
||||
"libcuda.so.1.1", /* Core library for cuda support */
|
||||
"libcuda_loader.so", /* Core library for cuda support on WSL */
|
||||
"libnvidia-ptxjitcompiler.so.1", /* Core library for PTX Jit support */
|
||||
"libnvidia-ml.so.1", /* Core library for nvml */
|
||||
"libnvidia-ml_loader.so", /* Core library for nvml on WSL */
|
||||
"nvidia-smi", /* nvidia-smi binary*/
|
||||
"nvcubins.bin", /* Binary containing GPU code for cuda */
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* List of functions and structures we need to communicate with libdxcore.
|
||||
* Documentation on these functions can be found on docs.microsoft.com in d3dkmthk.
|
||||
*/
|
||||
|
||||
struct dxcore_enumAdapters2;
|
||||
struct dxcore_queryAdapterInfo;
|
||||
|
||||
typedef int(*pfnDxcoreEnumAdapters2)(struct dxcore_enumAdapters2* pParams);
|
||||
typedef int(*pfnDxcoreQueryAdapterInfo)(struct dxcore_queryAdapterInfo* pParams);
|
||||
|
||||
struct dxcore_lib {
|
||||
void* hDxcoreLib;
|
||||
pfnDxcoreEnumAdapters2 pDxcoreEnumAdapters2;
|
||||
pfnDxcoreQueryAdapterInfo pDxcoreQueryAdapterInfo;
|
||||
};
|
||||
|
||||
struct dxcore_adapterInfo
|
||||
{
|
||||
unsigned int hAdapter;
|
||||
struct dxcore_luid AdapterLuid;
|
||||
unsigned int NumOfSources;
|
||||
unsigned int bPresentMoveRegionsPreferred;
|
||||
};
|
||||
|
||||
struct dxcore_enumAdapters2
|
||||
{
|
||||
unsigned int NumAdapters;
|
||||
struct dxcore_adapterInfo *pAdapters;
|
||||
};
|
||||
|
||||
enum dxcore_kmtqueryAdapterInfoType
|
||||
{
|
||||
DXCORE_QUERYDRIVERVERSION = 13,
|
||||
DXCORE_QUERYREGISTRY = 48,
|
||||
};
|
||||
|
||||
enum dxcore_queryregistry_type {
|
||||
DXCORE_QUERYREGISTRY_DRIVERSTOREPATH = 2,
|
||||
DXCORE_QUERYREGISTRY_DRIVERIMAGEPATH = 3,
|
||||
};
|
||||
|
||||
enum dxcore_queryregistry_status {
|
||||
DXCORE_QUERYREGISTRY_STATUS_SUCCESS = 0,
|
||||
DXCORE_QUERYREGISTRY_STATUS_BUFFER_OVERFLOW = 1,
|
||||
DXCORE_QUERYREGISTRY_STATUS_FAIL = 2,
|
||||
};
|
||||
|
||||
struct dxcore_queryregistry_info {
|
||||
enum dxcore_queryregistry_type QueryType;
|
||||
unsigned int QueryFlags;
|
||||
wchar_t ValueName[DXCORE_MAX_PATH];
|
||||
unsigned int ValueType;
|
||||
unsigned int PhysicalAdapterIndex;
|
||||
unsigned int OutputValueSize;
|
||||
enum dxcore_queryregistry_status Status;
|
||||
union {
|
||||
unsigned long long OutputQword;
|
||||
wchar_t Output;
|
||||
};
|
||||
};
|
||||
|
||||
struct dxcore_queryAdapterInfo
|
||||
{
|
||||
unsigned int hAdapter;
|
||||
enum dxcore_kmtqueryAdapterInfoType Type;
|
||||
void *pPrivateDriverData;
|
||||
unsigned int PrivateDriverDataSize;
|
||||
};
|
||||
|
||||
static int dxcore_query_adapter_info_helper(struct dxcore_lib* pLib,
|
||||
unsigned int hAdapter,
|
||||
enum dxcore_kmtqueryAdapterInfoType type,
|
||||
void* pPrivateDriverDate,
|
||||
unsigned int privateDriverDataSize)
|
||||
{
|
||||
struct dxcore_queryAdapterInfo queryAdapterInfo = { 0 };
|
||||
|
||||
queryAdapterInfo.hAdapter = hAdapter;
|
||||
queryAdapterInfo.Type = type;
|
||||
queryAdapterInfo.pPrivateDriverData = pPrivateDriverDate;
|
||||
queryAdapterInfo.PrivateDriverDataSize = privateDriverDataSize;
|
||||
|
||||
return pLib->pDxcoreQueryAdapterInfo(&queryAdapterInfo);
|
||||
}
|
||||
|
||||
static int dxcore_query_adapter_wddm_version(struct dxcore_lib* pLib, unsigned int hAdapter, unsigned int* version)
|
||||
{
|
||||
return dxcore_query_adapter_info_helper(pLib,
|
||||
hAdapter,
|
||||
DXCORE_QUERYDRIVERVERSION,
|
||||
(void*)version,
|
||||
sizeof(*version));
|
||||
}
|
||||
|
||||
static int dxcore_query_adapter_driverstore(struct dxcore_lib* pLib, unsigned int hAdapter, char** ppDriverStorePath)
|
||||
{
|
||||
struct dxcore_queryregistry_info params = {0};
|
||||
struct dxcore_queryregistry_info* pValue = NULL;
|
||||
wchar_t* pOutput;
|
||||
size_t outputSizeInBytes;
|
||||
size_t outputSize;
|
||||
|
||||
params.QueryType = DXCORE_QUERYREGISTRY_DRIVERSTOREPATH;
|
||||
|
||||
if (dxcore_query_adapter_info_helper(pLib,
|
||||
hAdapter,
|
||||
DXCORE_QUERYREGISTRY,
|
||||
(void*)¶ms,
|
||||
sizeof(params)))
|
||||
{
|
||||
log_err("Failed to query driver store path size for the WDDM Adapter");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (params.OutputValueSize > DXCORE_MAX_PATH * sizeof(wchar_t)) {
|
||||
log_err("The driver store path size returned by dxcore is not valid");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
outputSizeInBytes = (size_t)params.OutputValueSize;
|
||||
outputSize = outputSizeInBytes / sizeof(wchar_t);
|
||||
|
||||
pValue = calloc(sizeof(struct dxcore_queryregistry_info) + outputSizeInBytes + sizeof(wchar_t), 1);
|
||||
if (!pValue) {
|
||||
log_err("Out of memory while allocating temp buffer to query adapter info");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
pValue->QueryType = DXCORE_QUERYREGISTRY_DRIVERSTOREPATH;
|
||||
pValue->OutputValueSize = (unsigned int)outputSizeInBytes;
|
||||
|
||||
if (dxcore_query_adapter_info_helper(pLib,
|
||||
hAdapter,
|
||||
DXCORE_QUERYREGISTRY,
|
||||
(void*)pValue,
|
||||
(unsigned int)(sizeof(struct dxcore_queryregistry_info) + outputSizeInBytes)))
|
||||
{
|
||||
log_err("Failed to query driver store path data for the WDDM Adapter");
|
||||
free(pValue);
|
||||
return (-1);
|
||||
}
|
||||
pOutput = (wchar_t*)(&pValue->Output);
|
||||
|
||||
// Make sure no matter what happened the wchar_t string is null terminated
|
||||
pOutput[outputSize] = L'\0';
|
||||
|
||||
// Convert the output into a regular c string
|
||||
*ppDriverStorePath = (char*)calloc(outputSize + 1, sizeof(char));
|
||||
if (!*ppDriverStorePath) {
|
||||
log_err("Out of memory while allocating the buffer for the driver store path");
|
||||
free(pValue);
|
||||
return (-1);
|
||||
}
|
||||
wcstombs(*ppDriverStorePath, pOutput, outputSize);
|
||||
|
||||
free(pValue);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dxcore_add_adapter(struct dxcore_context* pCtx, struct dxcore_lib* pLib, struct dxcore_adapterInfo *pAdapterInfo)
|
||||
{
|
||||
unsigned int wddmVersion = 0;
|
||||
char* driverStorePath = NULL;
|
||||
|
||||
log_infof("Creating a new WDDM Adapter for hAdapter:%x luid:%llx", pAdapterInfo->hAdapter, *((unsigned long long*)&pAdapterInfo->AdapterLuid));
|
||||
|
||||
if (dxcore_query_adapter_wddm_version(pLib, pAdapterInfo->hAdapter, &wddmVersion)) {
|
||||
log_err("Failed to query the WDDM version for the specified adapter. Skipping it.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (wddmVersion < 2700) {
|
||||
log_err("Found a WDDM adapter running a driver with pre-WDDM 2.7 . Skipping it.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (dxcore_query_adapter_driverstore(pLib, pAdapterInfo->hAdapter, &driverStorePath)) {
|
||||
log_err("Failed to query driver store path for the WDDM Adapter . Skipping it.");
|
||||
return;
|
||||
}
|
||||
|
||||
// We got all the info we needed. Adding it to the tracking structure.
|
||||
{
|
||||
struct dxcore_adapter* newList;
|
||||
newList = realloc(pCtx->adapterList, sizeof(struct dxcore_adapter) * (pCtx->adapterCount + 1));
|
||||
if (!newList) {
|
||||
log_err("Out of memory when trying to add a new WDDM Adapter to the list of valid adapters");
|
||||
free(driverStorePath);
|
||||
return;
|
||||
}
|
||||
|
||||
pCtx->adapterList = newList;
|
||||
|
||||
pCtx->adapterList[pCtx->adapterCount].hAdapter = pAdapterInfo->hAdapter;
|
||||
pCtx->adapterList[pCtx->adapterCount].pDriverStorePath = driverStorePath;
|
||||
pCtx->adapterList[pCtx->adapterCount].wddmVersion = wddmVersion;
|
||||
pCtx->adapterCount++;
|
||||
}
|
||||
|
||||
log_infof("Adding new adapter via dxcore hAdapter:%x luid:%llx wddm version:%d", pAdapterInfo->hAdapter, *((unsigned long long*)&pAdapterInfo->AdapterLuid), wddmVersion);
|
||||
}
|
||||
|
||||
static void dxcore_enum_adapters(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
|
||||
{
|
||||
struct dxcore_enumAdapters2 params = {0};
|
||||
unsigned int adapterIndex = 0;
|
||||
|
||||
params.NumAdapters = 0;
|
||||
params.pAdapters = NULL;
|
||||
|
||||
if (pLib->pDxcoreEnumAdapters2(¶ms)) {
|
||||
log_err("Failed to enumerate adapters via dxcore");
|
||||
return;
|
||||
}
|
||||
|
||||
params.pAdapters = malloc(sizeof(struct dxcore_adapterInfo) * params.NumAdapters);
|
||||
if (pLib->pDxcoreEnumAdapters2(¶ms)) {
|
||||
free(params.pAdapters);
|
||||
log_err("Failed to enumerate adapters via dxcore");
|
||||
return;
|
||||
}
|
||||
|
||||
for (adapterIndex = 0; adapterIndex < params.NumAdapters; adapterIndex++) {
|
||||
dxcore_add_adapter(pCtx, pLib, ¶ms.pAdapters[adapterIndex]);
|
||||
}
|
||||
|
||||
free(params.pAdapters);
|
||||
}
|
||||
|
||||
int dxcore_init_context(struct dxcore_context* pCtx)
|
||||
{
|
||||
struct dxcore_lib lib = {0};
|
||||
|
||||
pCtx->initialized = 0;
|
||||
pCtx->adapterCount = 0;
|
||||
pCtx->adapterList = NULL;
|
||||
|
||||
lib.hDxcoreLib = dlopen("libdxcore.so", RTLD_LAZY);
|
||||
if (!lib.hDxcoreLib) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
lib.pDxcoreEnumAdapters2 = (pfnDxcoreEnumAdapters2)dlsym(lib.hDxcoreLib, "D3DKMTEnumAdapters2");
|
||||
if (!lib.pDxcoreEnumAdapters2) {
|
||||
log_err("dxcore library is present but the symbol D3DKMTEnumAdapters2 is missing");
|
||||
goto error;
|
||||
}
|
||||
|
||||
lib.pDxcoreQueryAdapterInfo = (pfnDxcoreQueryAdapterInfo)dlsym(lib.hDxcoreLib, "D3DKMTQueryAdapterInfo");
|
||||
if (!lib.pDxcoreQueryAdapterInfo) {
|
||||
log_err("dxcore library is present but the symbol D3DKMTQueryAdapterInfo is missing");
|
||||
goto error;
|
||||
}
|
||||
|
||||
dxcore_enum_adapters(pCtx, &lib);
|
||||
|
||||
log_info("dxcore layer initialized successfully");
|
||||
pCtx->initialized = 1;
|
||||
|
||||
dlclose(lib.hDxcoreLib);
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
dxcore_deinit_context(pCtx);
|
||||
|
||||
if (lib.hDxcoreLib)
|
||||
dlclose(lib.hDxcoreLib);
|
||||
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static void dxcore_deinit_adapter(struct dxcore_adapter* pAdapter)
|
||||
{
|
||||
if (!pAdapter)
|
||||
return;
|
||||
|
||||
free(pAdapter->pDriverStorePath);
|
||||
}
|
||||
|
||||
void dxcore_deinit_context(struct dxcore_context* pCtx)
|
||||
{
|
||||
unsigned int adapterIndex = 0;
|
||||
|
||||
if (!pCtx)
|
||||
return;
|
||||
|
||||
for (adapterIndex = 0; adapterIndex < pCtx->adapterCount; adapterIndex++) {
|
||||
dxcore_deinit_adapter(&pCtx->adapterList[adapterIndex]);
|
||||
}
|
||||
|
||||
free(pCtx->adapterList);
|
||||
|
||||
pCtx->initialized = 0;
|
||||
}
|
||||
59
internal/dxcore/dxcore.go
Normal file
59
internal/dxcore/dxcore.go
Normal file
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package dxcore
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
#include <dxcore.h>
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type context C.struct_dxcore_context
|
||||
type adapter C.struct_dxcore_adapter
|
||||
|
||||
// initContext initializes the dxcore context and populates the list of adapters.
|
||||
func initContext() (*context, error) {
|
||||
cContext := C.struct_dxcore_context{}
|
||||
if C.dxcore_init_context(&cContext) != 0 {
|
||||
return nil, fmt.Errorf("failed to initialize dxcore context")
|
||||
}
|
||||
c := (*context)(&cContext)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// deinitContext deinitializes the dxcore context and frees the list of adapters.
|
||||
func (c context) deinitContext() {
|
||||
cContext := C.struct_dxcore_context(c)
|
||||
C.dxcore_deinit_context(&cContext)
|
||||
}
|
||||
|
||||
func (c context) getAdapterCount() int {
|
||||
return int(c.adapterCount)
|
||||
}
|
||||
|
||||
func (c context) getAdapter(index int) adapter {
|
||||
arrayPointer := (*[1 << 30]C.struct_dxcore_adapter)(unsafe.Pointer(c.adapterList))
|
||||
return adapter(arrayPointer[index])
|
||||
}
|
||||
|
||||
func (a adapter) getDriverStorePath() string {
|
||||
return C.GoString(a.pDriverStorePath)
|
||||
}
|
||||
39
internal/dxcore/dxcore.h
Normal file
39
internal/dxcore/dxcore.h
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef HEADER_DXCORE_H_
|
||||
#define HEADER_DXCORE_H_
|
||||
|
||||
#define MAX_DXCORE_DRIVERSTORE_LIBRAIRIES (16)
|
||||
|
||||
struct dxcore_luid
|
||||
{
|
||||
unsigned int lowPart;
|
||||
int highPart;
|
||||
};
|
||||
|
||||
struct dxcore_adapter
|
||||
{
|
||||
unsigned int hAdapter;
|
||||
unsigned int wddmVersion;
|
||||
char* pDriverStorePath;
|
||||
unsigned int driverStoreComponentCount;
|
||||
const char* pDriverStoreComponents[MAX_DXCORE_DRIVERSTORE_LIBRAIRIES];
|
||||
struct dxcore_context *pContext;
|
||||
};
|
||||
|
||||
struct dxcore_context
|
||||
{
|
||||
unsigned int adapterCount;
|
||||
struct dxcore_adapter *adapterList;
|
||||
|
||||
int initialized;
|
||||
};
|
||||
|
||||
|
||||
|
||||
int dxcore_init_context(struct dxcore_context* pDxcore_context);
|
||||
void dxcore_deinit_context(struct dxcore_context* pDxcore_context);
|
||||
|
||||
#endif // HEADER_DXCORE_H_
|
||||
@@ -40,12 +40,7 @@ func (d mount) toSpec() *specs.Mount {
|
||||
s := specs.Mount{
|
||||
HostPath: d.HostPath,
|
||||
ContainerPath: d.Path,
|
||||
Options: []string{
|
||||
"ro",
|
||||
"nosuid",
|
||||
"nodev",
|
||||
"bind",
|
||||
},
|
||||
Options: d.Options,
|
||||
}
|
||||
|
||||
return &s
|
||||
|
||||
@@ -14,27 +14,26 @@
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package runtime
|
||||
package oci
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type modifyingRuntimeWrapper struct {
|
||||
logger *log.Logger
|
||||
runtime oci.Runtime
|
||||
ociSpec oci.Spec
|
||||
modifier oci.SpecModifier
|
||||
runtime Runtime
|
||||
ociSpec Spec
|
||||
modifier SpecModifier
|
||||
}
|
||||
|
||||
var _ oci.Runtime = (*modifyingRuntimeWrapper)(nil)
|
||||
var _ Runtime = (*modifyingRuntimeWrapper)(nil)
|
||||
|
||||
// NewModifyingRuntimeWrapper creates a runtime wrapper that applies the specified modifier to the OCI specification
|
||||
// before invoking the wrapped runtime. If the modifier is nil, the input runtime is returned.
|
||||
func NewModifyingRuntimeWrapper(logger *log.Logger, runtime oci.Runtime, spec oci.Spec, modifier oci.SpecModifier) oci.Runtime {
|
||||
func NewModifyingRuntimeWrapper(logger *log.Logger, runtime Runtime, spec Spec, modifier SpecModifier) Runtime {
|
||||
if modifier == nil {
|
||||
logger.Infof("Using low-level runtime with no modification")
|
||||
return runtime
|
||||
@@ -52,7 +51,7 @@ func NewModifyingRuntimeWrapper(logger *log.Logger, runtime oci.Runtime, spec oc
|
||||
// Exec checks whether a modification of the OCI specification is required and modifies it accordingly before exec-ing
|
||||
// into the wrapped runtime.
|
||||
func (r *modifyingRuntimeWrapper) Exec(args []string) error {
|
||||
if oci.HasCreateSubcommand(args) {
|
||||
if HasCreateSubcommand(args) {
|
||||
err := r.modify()
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not apply required modification to OCI specification: %v", err)
|
||||
@@ -14,13 +14,12 @@
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package runtime
|
||||
package oci
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -38,7 +37,7 @@ func TestExec(t *testing.T) {
|
||||
args []string
|
||||
modifyError error
|
||||
writeError error
|
||||
modifer oci.SpecModifier
|
||||
modifer SpecModifier
|
||||
}{
|
||||
{
|
||||
description: "no args forwards",
|
||||
@@ -92,9 +91,9 @@ func TestExec(t *testing.T) {
|
||||
hook.Reset()
|
||||
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
runtimeMock := &oci.RuntimeMock{}
|
||||
specMock := &oci.SpecMock{
|
||||
ModifyFunc: func(specModifier oci.SpecModifier) error {
|
||||
runtimeMock := &RuntimeMock{}
|
||||
specMock := &SpecMock{
|
||||
ModifyFunc: func(specModifier SpecModifier) error {
|
||||
return tc.modifyError
|
||||
},
|
||||
FlushFunc: func() error {
|
||||
@@ -144,8 +143,8 @@ func TestExec(t *testing.T) {
|
||||
func TestNilModiferReturnsRuntime(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
runtimeMock := &oci.RuntimeMock{}
|
||||
specMock := &oci.SpecMock{}
|
||||
runtimeMock := &RuntimeMock{}
|
||||
specMock := &SpecMock{}
|
||||
|
||||
shim := NewModifyingRuntimeWrapper(
|
||||
logger,
|
||||
33
internal/runtime/api.go
Normal file
33
internal/runtime/api.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package runtime
|
||||
|
||||
type rt struct {
|
||||
logger *Logger
|
||||
modeOverride string
|
||||
}
|
||||
|
||||
// Interface is the interface for the runtime library.
|
||||
type Interface interface {
|
||||
Run([]string) error
|
||||
}
|
||||
|
||||
// Option is a function that configures the runtime.
|
||||
type Option func(*rt)
|
||||
|
||||
// New creates a runtime with the specified options.
|
||||
func New(opts ...Option) Interface {
|
||||
r := rt{}
|
||||
for _, opt := range opts {
|
||||
opt(&r)
|
||||
}
|
||||
if r.logger == nil {
|
||||
r.logger = NewLogger()
|
||||
}
|
||||
return &r
|
||||
}
|
||||
|
||||
// WithModeOverride allows for overriding the mode specified in the config.
|
||||
func WithModeOverride(mode string) Option {
|
||||
return func(r *rt) {
|
||||
r.modeOverride = mode
|
||||
}
|
||||
}
|
||||
@@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@@ -42,11 +42,17 @@ func NewLogger() *Logger {
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateLogger constructs a Logger with a preddefined formatter
|
||||
func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, error) {
|
||||
// Update constructs a Logger with a preddefined formatter
|
||||
func (l *Logger) Update(filename string, logLevel string, argv []string) error {
|
||||
|
||||
configFromArgs := parseArgs(argv)
|
||||
|
||||
level, logLevelError := configFromArgs.getLevel(logLevel)
|
||||
defer func() {
|
||||
if logLevelError != nil {
|
||||
l.Warn(logLevelError)
|
||||
}
|
||||
}()
|
||||
|
||||
var logFiles []*os.File
|
||||
var argLogFileError error
|
||||
@@ -55,7 +61,7 @@ func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, err
|
||||
if !configFromArgs.version {
|
||||
configLogFile, err := createLogFile(filename)
|
||||
if err != nil {
|
||||
return logger, fmt.Errorf("error opening debug log file: %v", err)
|
||||
return fmt.Errorf("error opening debug log file: %v", err)
|
||||
}
|
||||
if configLogFile != nil {
|
||||
logFiles = append(logFiles, configLogFile)
|
||||
@@ -67,14 +73,15 @@ func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, err
|
||||
}
|
||||
argLogFileError = err
|
||||
}
|
||||
defer func() {
|
||||
if argLogFileError != nil {
|
||||
l.Warnf("Failed to open log file: %v", argLogFileError)
|
||||
}
|
||||
}()
|
||||
|
||||
l := &Logger{
|
||||
Logger: logrus.New(),
|
||||
previousLogger: logger.Logger,
|
||||
logFiles: logFiles,
|
||||
}
|
||||
newLogger := logrus.New()
|
||||
|
||||
l.SetLevel(level)
|
||||
newLogger.SetLevel(level)
|
||||
if level == logrus.DebugLevel {
|
||||
logrus.SetReportCaller(true)
|
||||
// Shorten function and file names reported by the logger, by
|
||||
@@ -92,30 +99,28 @@ func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, err
|
||||
}
|
||||
|
||||
if configFromArgs.format == "json" {
|
||||
l.SetFormatter(new(logrus.JSONFormatter))
|
||||
newLogger.SetFormatter(new(logrus.JSONFormatter))
|
||||
}
|
||||
|
||||
if len(logFiles) == 0 {
|
||||
l.SetOutput(io.Discard)
|
||||
newLogger.SetOutput(io.Discard)
|
||||
} else if len(logFiles) == 1 {
|
||||
l.SetOutput(logFiles[0])
|
||||
newLogger.SetOutput(logFiles[0])
|
||||
} else if len(logFiles) > 1 {
|
||||
var writers []io.Writer
|
||||
for _, f := range logFiles {
|
||||
writers = append(writers, f)
|
||||
}
|
||||
l.SetOutput(io.MultiWriter(writers...))
|
||||
newLogger.SetOutput(io.MultiWriter(writers...))
|
||||
}
|
||||
|
||||
if logLevelError != nil {
|
||||
l.Warn(logLevelError)
|
||||
*l = Logger{
|
||||
Logger: newLogger,
|
||||
previousLogger: l.Logger,
|
||||
logFiles: logFiles,
|
||||
}
|
||||
|
||||
if argLogFileError != nil {
|
||||
l.Warnf("Failed to open log file: %v", argLogFileError)
|
||||
}
|
||||
|
||||
return l, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// Reset closes the log file (if any) and resets the logger output to what it
|
||||
@@ -126,7 +131,9 @@ func (l *Logger) Reset() error {
|
||||
if previous == nil {
|
||||
previous = logrus.New()
|
||||
}
|
||||
logger = &Logger{Logger: previous}
|
||||
l.Logger = previous
|
||||
l.previousLogger = nil
|
||||
l.logFiles = nil
|
||||
}()
|
||||
|
||||
var errs []error
|
||||
34
internal/runtime/logger_test.go
Normal file
34
internal/runtime/logger_test.go
Normal file
@@ -0,0 +1,34 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestLogger(t *testing.T) {
|
||||
l := NewLogger()
|
||||
|
||||
l.Update("", "debug", nil)
|
||||
|
||||
require.Equal(t, logrus.DebugLevel, l.Logger.Level)
|
||||
require.Equal(t, logrus.InfoLevel, l.previousLogger.Level)
|
||||
|
||||
}
|
||||
109
internal/runtime/runtime.go
Normal file
109
internal/runtime/runtime.go
Normal file
@@ -0,0 +1,109 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
// Run is an entry point that allows for idiomatic handling of errors
|
||||
// when calling from the main function.
|
||||
func (r rt) Run(argv []string) (rerr error) {
|
||||
defer func() {
|
||||
if rerr != nil {
|
||||
r.logger.Errorf("%v", rerr)
|
||||
}
|
||||
}()
|
||||
|
||||
printVersion := hasVersionFlag(argv)
|
||||
if printVersion {
|
||||
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime", info.GetVersionString(fmt.Sprintf("spec: %v", specs.Version)))
|
||||
}
|
||||
|
||||
cfg, err := config.GetConfig()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error loading config: %v", err)
|
||||
}
|
||||
if r.modeOverride != "" {
|
||||
cfg.NVIDIAContainerRuntimeConfig.Mode = r.modeOverride
|
||||
}
|
||||
|
||||
err = r.logger.Update(
|
||||
cfg.NVIDIAContainerRuntimeConfig.DebugFilePath,
|
||||
cfg.NVIDIAContainerRuntimeConfig.LogLevel,
|
||||
argv,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to set up logger: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if rerr != nil {
|
||||
r.logger.Errorf("%v", rerr)
|
||||
}
|
||||
r.logger.Reset()
|
||||
}()
|
||||
|
||||
// Print the config to the output.
|
||||
configJSON, err := json.MarshalIndent(cfg, "", " ")
|
||||
if err == nil {
|
||||
r.logger.Infof("Running with config:\n%v", string(configJSON))
|
||||
} else {
|
||||
r.logger.Infof("Running with config:\n%+v", cfg)
|
||||
}
|
||||
|
||||
r.logger.Debugf("Command line arguments: %v", argv)
|
||||
runtime, err := newNVIDIAContainerRuntime(r.logger.Logger, cfg, argv)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err)
|
||||
}
|
||||
|
||||
if printVersion {
|
||||
fmt.Print("\n")
|
||||
}
|
||||
return runtime.Exec(argv)
|
||||
}
|
||||
|
||||
func (r rt) Errorf(format string, args ...interface{}) {
|
||||
r.logger.Errorf(format, args...)
|
||||
}
|
||||
|
||||
// TODO: This should be refactored / combined with parseArgs in logger.
|
||||
func hasVersionFlag(args []string) bool {
|
||||
for i := 0; i < len(args); i++ {
|
||||
param := args[i]
|
||||
|
||||
parts := strings.SplitN(param, "=", 2)
|
||||
trimmed := strings.TrimLeft(parts[0], "-")
|
||||
// If this is not a flag we continue
|
||||
if parts[0] == trimmed {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check the version flag
|
||||
if trimmed == "version" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
@@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@@ -23,7 +23,6 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@@ -50,7 +49,7 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [
|
||||
}
|
||||
|
||||
// Create the wrapping runtime with the specified modifier
|
||||
r := runtime.NewModifyingRuntimeWrapper(
|
||||
r := oci.NewModifyingRuntimeWrapper(
|
||||
logger,
|
||||
lowLevelRuntime,
|
||||
ociSpec,
|
||||
@@ -14,20 +14,52 @@
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const (
|
||||
runcExecutableName = "runc"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
// TEST SETUP
|
||||
// Determine the module root and the test binary path
|
||||
var err error
|
||||
moduleRoot, err := test.GetModuleRoot()
|
||||
if err != nil {
|
||||
logrus.Fatalf("error in test setup: could not get module root: %v", err)
|
||||
}
|
||||
testBinPath := filepath.Join(moduleRoot, "test", "bin")
|
||||
|
||||
// Set the environment variables for the test
|
||||
os.Setenv("PATH", test.PrependToPath(testBinPath, moduleRoot))
|
||||
|
||||
// Confirm that the environment is configured correctly
|
||||
runcPath, err := exec.LookPath(runcExecutableName)
|
||||
if err != nil || filepath.Join(testBinPath, runcExecutableName) != runcPath {
|
||||
logrus.Fatalf("error in test setup: mock runc path set incorrectly in TestMain(): %v", err)
|
||||
}
|
||||
|
||||
// RUN TESTS
|
||||
exitCode := m.Run()
|
||||
|
||||
os.Exit(exitCode)
|
||||
}
|
||||
|
||||
func TestFactoryMethod(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
@@ -23,3 +23,9 @@ Breaks: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook, nv
|
||||
Replaces: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
|
||||
Description: NVIDIA Container Toolkit Base
|
||||
Provides tools such as the NVIDIA Container Runtime and NVIDIA Container Toolkit CLI to enable GPU support in containers.
|
||||
|
||||
Package: nvidia-container-toolkit-operator-extensions
|
||||
Architecture: any
|
||||
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@)
|
||||
Description: NVIDIA Container Toolkit Operator Extensions
|
||||
Provides tools for using the NVIDIA Container Toolkit with the GPU Operator
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
nvidia-container-runtime.cdi /usr/bin
|
||||
nvidia-container-runtime.legacy /usr/bin
|
||||
@@ -11,12 +11,14 @@ URL: https://github.com/NVIDIA/nvidia-container-toolkit
|
||||
License: Apache-2.0
|
||||
|
||||
Source0: nvidia-container-runtime-hook
|
||||
Source1: nvidia-container-runtime
|
||||
Source2: nvidia-ctk
|
||||
Source3: config.toml
|
||||
Source4: oci-nvidia-hook
|
||||
Source5: oci-nvidia-hook.json
|
||||
Source6: LICENSE
|
||||
Source1: nvidia-ctk
|
||||
Source2: config.toml
|
||||
Source3: oci-nvidia-hook
|
||||
Source4: oci-nvidia-hook.json
|
||||
Source5: LICENSE
|
||||
Source6: nvidia-container-runtime
|
||||
Source7: nvidia-container-runtime.cdi
|
||||
Source8: nvidia-container-runtime.legacy
|
||||
|
||||
Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2
|
||||
Provides: nvidia-container-runtime
|
||||
@@ -35,12 +37,14 @@ Requires: libseccomp
|
||||
Provides tools and utilities to enable GPU support in containers.
|
||||
|
||||
%prep
|
||||
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} .
|
||||
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} %{SOURCE7} %{SOURCE8} .
|
||||
|
||||
%install
|
||||
mkdir -p %{buildroot}%{_bindir}
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime-hook
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk
|
||||
|
||||
mkdir -p %{buildroot}/etc/nvidia-container-runtime
|
||||
@@ -57,10 +61,10 @@ mkdir -p %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit
|
||||
cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit
|
||||
|
||||
%posttrans
|
||||
if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then
|
||||
if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then
|
||||
# reparing lost file nvidia-container-runtime-hook
|
||||
cp -avf %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit/nvidia-container-runtime-hook %{_bindir}
|
||||
fi
|
||||
cp -avf %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit/nvidia-container-runtime-hook %{_bindir}
|
||||
fi
|
||||
rm -rf %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit
|
||||
ln -sf %{_bindir}/nvidia-container-runtime-hook %{_bindir}/nvidia-container-toolkit
|
||||
|
||||
@@ -97,3 +101,17 @@ Provides tools such as the NVIDIA Container Runtime and NVIDIA Container Toolkit
|
||||
%config /etc/nvidia-container-runtime/config.toml
|
||||
%{_bindir}/nvidia-container-runtime
|
||||
%{_bindir}/nvidia-ctk
|
||||
|
||||
# The OPERATOR EXTENSIONS package consists of components that are required to enable GPU support in Kubernetes.
|
||||
# This package is not distributed as part of the NVIDIA Container Toolkit RPMs.
|
||||
%package operator-extensions
|
||||
Summary: NVIDIA Container Toolkit Operator Extensions
|
||||
Requires: nvidia-container-toolkit-base == %{version}-%{release}
|
||||
|
||||
%description operator-extensions
|
||||
Provides tools for using the NVIDIA Container Toolkit with the GPU Operator
|
||||
|
||||
%files operator-extensions
|
||||
%license LICENSE
|
||||
%{_bindir}/nvidia-container-runtime.cdi
|
||||
%{_bindir}/nvidia-container-runtime.legacy
|
||||
|
||||
42
pkg/nvcdi/api.go
Normal file
42
pkg/nvcdi/api.go
Normal file
@@ -0,0 +1,42 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
)
|
||||
|
||||
const (
|
||||
// ModeAuto configures the CDI spec generator to automatically detect the system configuration
|
||||
ModeAuto = "auto"
|
||||
// ModeNvml configures the CDI spec generator to use the NVML library.
|
||||
ModeNvml = "nvml"
|
||||
// ModeWsl configures the CDI spec generator to generate a WSL spec.
|
||||
ModeWsl = "wsl"
|
||||
)
|
||||
|
||||
// Interface defines the API for the nvcdi package
|
||||
type Interface interface {
|
||||
GetCommonEdits() (*cdi.ContainerEdits, error)
|
||||
GetAllDeviceSpecs() ([]specs.Device, error)
|
||||
GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error)
|
||||
GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error)
|
||||
GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.ContainerEdits, error)
|
||||
GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error)
|
||||
}
|
||||
@@ -14,20 +14,21 @@
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// NewCommonDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.
|
||||
// newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.
|
||||
// This includes driver libraries and meta devices, for example.
|
||||
func NewCommonDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) {
|
||||
func newCommonNVMLDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) {
|
||||
metaDevices := discover.NewDeviceDiscoverer(
|
||||
logger,
|
||||
lookup.NewCharDeviceLocator(
|
||||
37
pkg/nvcdi/device-wsl.go
Normal file
37
pkg/nvcdi/device-wsl.go
Normal file
@@ -0,0 +1,37 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
dxgDeviceNode = "/dev/dxg"
|
||||
)
|
||||
|
||||
// newDXGDeviceDiscoverer returns a Discoverer for DXG devices under WSL2.
|
||||
func newDXGDeviceDiscoverer(logger *logrus.Logger, driverRoot string) discover.Discover {
|
||||
deviceNodes := discover.NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
[]string{dxgDeviceNode},
|
||||
driverRoot,
|
||||
)
|
||||
|
||||
return deviceNodes
|
||||
}
|
||||
@@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@@ -41,12 +41,18 @@ func NewDriverDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath
|
||||
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
|
||||
}
|
||||
|
||||
ipcs, err := discover.NewIPCDiscoverer(logger, driverRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
|
||||
}
|
||||
|
||||
firmwares := NewDriverFirmwareDiscoverer(logger, driverRoot, version)
|
||||
|
||||
binaries := NewDriverBinariesDiscoverer(logger, driverRoot)
|
||||
|
||||
d := discover.Merge(
|
||||
libraries,
|
||||
ipcs,
|
||||
firmwares,
|
||||
binaries,
|
||||
)
|
||||
@@ -87,7 +93,7 @@ func NewDriverLibraryDiscoverer(logger *logrus.Logger, driverRoot string, nvidia
|
||||
|
||||
// NewDriverFirmwareDiscoverer creates a discoverer for GSP firmware associated with the specified driver version.
|
||||
func NewDriverFirmwareDiscoverer(logger *logrus.Logger, driverRoot string, version string) discover.Discover {
|
||||
gspFirmwarePath := filepath.Join("/lib/firmware/nvidia", version, "gsp.bin")
|
||||
gspFirmwarePath := filepath.Join("/lib/firmware/nvidia", version, "gsp*.bin")
|
||||
return discover.NewMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
106
pkg/nvcdi/driver-wsl.go
Normal file
106
pkg/nvcdi/driver-wsl.go
Normal file
@@ -0,0 +1,106 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/dxcore"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var requiredDriverStoreFiles = []string{
|
||||
"libcuda.so.1.1", /* Core library for cuda support */
|
||||
"libcuda_loader.so", /* Core library for cuda support on WSL */
|
||||
"libnvidia-ptxjitcompiler.so.1", /* Core library for PTX Jit support */
|
||||
"libnvidia-ml.so.1", /* Core library for nvml */
|
||||
"libnvidia-ml_loader.so", /* Core library for nvml on WSL */
|
||||
"libdxcore.so", /* Core library for dxcore support */
|
||||
"nvcubins.bin", /* Binary containing GPU code for cuda */
|
||||
"nvidia-smi", /* nvidia-smi binary*/
|
||||
}
|
||||
|
||||
// newWSLDriverDiscoverer returns a Discoverer for WSL2 drivers.
|
||||
func newWSLDriverDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string) (discover.Discover, error) {
|
||||
err := dxcore.Init()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize dxcore: %v", err)
|
||||
}
|
||||
defer dxcore.Shutdown()
|
||||
|
||||
driverStorePaths := dxcore.GetDriverStorePaths()
|
||||
if len(driverStorePaths) == 0 {
|
||||
return nil, fmt.Errorf("no driver store paths found")
|
||||
}
|
||||
logger.Infof("Using WSL driver store paths: %v", driverStorePaths)
|
||||
|
||||
return newWSLDriverStoreDiscoverer(logger, driverRoot, nvidiaCTKPath, driverStorePaths)
|
||||
}
|
||||
|
||||
// newWSLDriverStoreDiscoverer returns a Discoverer for WSL2 drivers in the driver store associated with a dxcore adapter.
|
||||
func newWSLDriverStoreDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, driverStorePaths []string) (discover.Discover, error) {
|
||||
var searchPaths []string
|
||||
seen := make(map[string]bool)
|
||||
for _, path := range driverStorePaths {
|
||||
if seen[path] {
|
||||
continue
|
||||
}
|
||||
searchPaths = append(searchPaths, path)
|
||||
}
|
||||
if len(searchPaths) > 1 {
|
||||
logger.Warnf("Found multiple driver store paths: %v", searchPaths)
|
||||
}
|
||||
driverStorePath := searchPaths[0]
|
||||
searchPaths = append(searchPaths, "/usr/lib/wsl/lib")
|
||||
|
||||
libraries := discover.NewMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithSearchPaths(
|
||||
searchPaths...,
|
||||
),
|
||||
lookup.WithCount(1),
|
||||
),
|
||||
driverRoot,
|
||||
requiredDriverStoreFiles,
|
||||
)
|
||||
|
||||
// On WSL2 the driver store location is used unchanged.
|
||||
// For this reason we need to create a symlink from /usr/bin/nvidia-smi to the nvidia-smi binary in the driver store.
|
||||
target := filepath.Join(driverStorePath, "nvidia-smi")
|
||||
link := "/usr/bin/nvidia-smi"
|
||||
links := []string{fmt.Sprintf("%s::%s", target, link)}
|
||||
symlinkHook := discover.CreateCreateSymlinkHook(nvidiaCTKPath, links)
|
||||
|
||||
cfg := &discover.Config{
|
||||
DriverRoot: driverRoot,
|
||||
NvidiaCTKPath: nvidiaCTKPath,
|
||||
}
|
||||
ldcacheHook, _ := discover.NewLDCacheUpdateHook(logger, libraries, cfg)
|
||||
|
||||
d := discover.Merge(
|
||||
libraries,
|
||||
symlinkHook,
|
||||
ldcacheHook,
|
||||
)
|
||||
|
||||
return d, nil
|
||||
}
|
||||
@@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@@ -23,12 +23,50 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
|
||||
func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
|
||||
edits, err := l.GetGPUDeviceEdits(d)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get edits for device: %v", err)
|
||||
}
|
||||
|
||||
name, err := l.deviceNamer.GetDeviceName(i, d)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get device name: %v", err)
|
||||
}
|
||||
|
||||
spec := specs.Device{
|
||||
Name: name,
|
||||
ContainerEdits: *edits.ContainerEdits,
|
||||
}
|
||||
|
||||
return &spec, nil
|
||||
}
|
||||
|
||||
// GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'.
|
||||
func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error) {
|
||||
device, err := newFullGPUDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, d)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create device discoverer: %v", err)
|
||||
}
|
||||
|
||||
editsForDevice, err := edits.FromDiscoverer(device)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create container edits for device: %v", err)
|
||||
}
|
||||
|
||||
return editsForDevice, nil
|
||||
}
|
||||
|
||||
// byPathHookDiscoverer discovers the entities required for injecting by-path DRM device links
|
||||
type byPathHookDiscoverer struct {
|
||||
logger *logrus.Logger
|
||||
@@ -39,8 +77,8 @@ type byPathHookDiscoverer struct {
|
||||
|
||||
var _ discover.Discover = (*byPathHookDiscoverer)(nil)
|
||||
|
||||
// NewFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device.
|
||||
func NewFullGPUDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) {
|
||||
// newFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device.
|
||||
func newFullGPUDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) {
|
||||
// TODO: The functionality to get device paths should be integrated into the go-nvlib/pkg/device.Device interface.
|
||||
// This will allow reuse here and in other code where the paths are queried such as the NVIDIA device plugin.
|
||||
minor, ret := d.GetMinorNumber()
|
||||
93
pkg/nvcdi/lib-nvml.go
Normal file
93
pkg/nvcdi/lib-nvml.go
Normal file
@@ -0,0 +1,93 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
)
|
||||
|
||||
type nvmllib nvcdilib
|
||||
|
||||
var _ Interface = (*nvmllib)(nil)
|
||||
|
||||
// GetAllDeviceSpecs returns the device specs for all available devices.
|
||||
func (l *nvmllib) GetAllDeviceSpecs() ([]specs.Device, error) {
|
||||
var deviceSpecs []specs.Device
|
||||
|
||||
gpuDeviceSpecs, err := l.getGPUDeviceSpecs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
deviceSpecs = append(deviceSpecs, gpuDeviceSpecs...)
|
||||
|
||||
migDeviceSpecs, err := l.getMigDeviceSpecs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
deviceSpecs = append(deviceSpecs, migDeviceSpecs...)
|
||||
|
||||
return deviceSpecs, nil
|
||||
}
|
||||
|
||||
// GetCommonEdits generates a CDI specification that can be used for ANY devices
|
||||
func (l *nvmllib) GetCommonEdits() (*cdi.ContainerEdits, error) {
|
||||
common, err := newCommonNVMLDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.nvmllib)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err)
|
||||
}
|
||||
|
||||
return edits.FromDiscoverer(common)
|
||||
}
|
||||
|
||||
func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) {
|
||||
var deviceSpecs []specs.Device
|
||||
err := l.devicelib.VisitDevices(func(i int, d device.Device) error {
|
||||
deviceSpec, err := l.GetGPUDeviceSpecs(i, d)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
deviceSpecs = append(deviceSpecs, *deviceSpec)
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate CDI edits for GPU devices: %v", err)
|
||||
}
|
||||
return deviceSpecs, err
|
||||
}
|
||||
|
||||
func (l *nvmllib) getMigDeviceSpecs() ([]specs.Device, error) {
|
||||
var deviceSpecs []specs.Device
|
||||
err := l.devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
|
||||
deviceSpec, err := l.GetMIGDeviceSpecs(i, d, j, mig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
deviceSpecs = append(deviceSpecs, *deviceSpec)
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate CDI edits for GPU devices: %v", err)
|
||||
}
|
||||
return deviceSpecs, err
|
||||
}
|
||||
76
pkg/nvcdi/lib-wsl.go
Normal file
76
pkg/nvcdi/lib-wsl.go
Normal file
@@ -0,0 +1,76 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
)
|
||||
|
||||
type wsllib nvcdilib
|
||||
|
||||
var _ Interface = (*wsllib)(nil)
|
||||
|
||||
// GetAllDeviceSpecs returns the device specs for all available devices.
|
||||
func (l *wsllib) GetAllDeviceSpecs() ([]specs.Device, error) {
|
||||
device := newDXGDeviceDiscoverer(l.logger, l.driverRoot)
|
||||
deviceEdits, err := edits.FromDiscoverer(device)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create container edits for DXG device: %v", err)
|
||||
}
|
||||
|
||||
deviceSpec := specs.Device{
|
||||
Name: "all",
|
||||
ContainerEdits: *deviceEdits.ContainerEdits,
|
||||
}
|
||||
|
||||
return []specs.Device{deviceSpec}, nil
|
||||
}
|
||||
|
||||
// GetCommonEdits generates a CDI specification that can be used for ANY devices
|
||||
func (l *wsllib) GetCommonEdits() (*cdi.ContainerEdits, error) {
|
||||
driver, err := newWSLDriverDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for WSL driver: %v", err)
|
||||
}
|
||||
|
||||
return edits.FromDiscoverer(driver)
|
||||
}
|
||||
|
||||
// GetGPUDeviceEdits generates a CDI specification that can be used for GPU devices
|
||||
func (l *wsllib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
|
||||
return nil, fmt.Errorf("GetGPUDeviceEdits is not supported on WSL")
|
||||
}
|
||||
|
||||
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
|
||||
func (l *wsllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
|
||||
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported on WSL")
|
||||
}
|
||||
|
||||
// GetMIGDeviceEdits generates a CDI specification that can be used for MIG devices
|
||||
func (l *wsllib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.ContainerEdits, error) {
|
||||
return nil, fmt.Errorf("GetMIGDeviceEdits is not supported on WSL")
|
||||
}
|
||||
|
||||
// GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'.
|
||||
func (l *wsllib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
|
||||
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported on WSL")
|
||||
}
|
||||
98
pkg/nvcdi/lib.go
Normal file
98
pkg/nvcdi/lib.go
Normal file
@@ -0,0 +1,98 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
type nvcdilib struct {
|
||||
logger *logrus.Logger
|
||||
nvmllib nvml.Interface
|
||||
mode string
|
||||
devicelib device.Interface
|
||||
deviceNamer DeviceNamer
|
||||
driverRoot string
|
||||
nvidiaCTKPath string
|
||||
|
||||
infolib info.Interface
|
||||
}
|
||||
|
||||
// New creates a new nvcdi library
|
||||
func New(opts ...Option) Interface {
|
||||
l := &nvcdilib{}
|
||||
for _, opt := range opts {
|
||||
opt(l)
|
||||
}
|
||||
if l.mode == "" {
|
||||
l.mode = ModeAuto
|
||||
}
|
||||
if l.logger == nil {
|
||||
l.logger = logrus.StandardLogger()
|
||||
}
|
||||
if l.deviceNamer == nil {
|
||||
l.deviceNamer, _ = NewDeviceNamer(DeviceNameStrategyIndex)
|
||||
}
|
||||
if l.driverRoot == "" {
|
||||
l.driverRoot = "/"
|
||||
}
|
||||
if l.nvidiaCTKPath == "" {
|
||||
l.nvidiaCTKPath = "/usr/bin/nvidia-ctk"
|
||||
}
|
||||
if l.infolib == nil {
|
||||
l.infolib = info.New()
|
||||
}
|
||||
|
||||
switch l.resolveMode() {
|
||||
case ModeNvml:
|
||||
if l.nvmllib == nil {
|
||||
l.nvmllib = nvml.New()
|
||||
}
|
||||
if l.devicelib == nil {
|
||||
l.devicelib = device.New(device.WithNvml(l.nvmllib))
|
||||
}
|
||||
|
||||
return (*nvmllib)(l)
|
||||
case ModeWsl:
|
||||
return (*wsllib)(l)
|
||||
}
|
||||
|
||||
// TODO: We want an error here.
|
||||
return nil
|
||||
}
|
||||
|
||||
// resolveMode resolves the mode for CDI spec generation based on the current system.
|
||||
func (l *nvcdilib) resolveMode() (rmode string) {
|
||||
if l.mode != ModeAuto {
|
||||
return l.mode
|
||||
}
|
||||
defer func() {
|
||||
l.logger.Infof("Auto-detected mode as %q", rmode)
|
||||
}()
|
||||
|
||||
isWSL, reason := l.infolib.HasDXCore()
|
||||
l.logger.Debugf("Is WSL-based system? %v: %v", isWSL, reason)
|
||||
|
||||
if isWSL {
|
||||
return ModeWsl
|
||||
}
|
||||
|
||||
return ModeNvml
|
||||
}
|
||||
88
pkg/nvcdi/lib_test.go
Normal file
88
pkg/nvcdi/lib_test.go
Normal file
@@ -0,0 +1,88 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestResolveMode(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
mode string
|
||||
// TODO: This should be a proper mock
|
||||
hasDXCore bool
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
mode: "auto",
|
||||
hasDXCore: true,
|
||||
expected: "wsl",
|
||||
},
|
||||
{
|
||||
mode: "auto",
|
||||
hasDXCore: false,
|
||||
expected: "nvml",
|
||||
},
|
||||
{
|
||||
mode: "nvml",
|
||||
hasDXCore: true,
|
||||
expected: "nvml",
|
||||
},
|
||||
{
|
||||
mode: "wsl",
|
||||
hasDXCore: false,
|
||||
expected: "wsl",
|
||||
},
|
||||
{
|
||||
mode: "not-auto",
|
||||
hasDXCore: true,
|
||||
expected: "not-auto",
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
|
||||
l := nvcdilib{
|
||||
logger: logger,
|
||||
mode: tc.mode,
|
||||
infolib: infoMock(tc.hasDXCore),
|
||||
}
|
||||
|
||||
require.Equal(t, tc.expected, l.resolveMode())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type infoMock bool
|
||||
|
||||
func (i infoMock) HasDXCore() (bool, string) {
|
||||
return bool(i), ""
|
||||
}
|
||||
|
||||
func (i infoMock) HasNvml() (bool, string) {
|
||||
panic("should not be called")
|
||||
}
|
||||
|
||||
func (i infoMock) IsTegraSystem() (bool, string) {
|
||||
panic("should not be called")
|
||||
}
|
||||
124
pkg/nvcdi/mig-device-nvml.go
Normal file
124
pkg/nvcdi/mig-device-nvml.go
Normal file
@@ -0,0 +1,124 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// GetMIGDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
|
||||
func (l *nvmllib) GetMIGDeviceSpecs(i int, d device.Device, j int, mig device.MigDevice) (*specs.Device, error) {
|
||||
edits, err := l.GetMIGDeviceEdits(d, mig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get edits for device: %v", err)
|
||||
}
|
||||
|
||||
name, err := l.deviceNamer.GetMigDeviceName(i, d, j, mig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get device name: %v", err)
|
||||
}
|
||||
|
||||
spec := specs.Device{
|
||||
Name: name,
|
||||
ContainerEdits: *edits.ContainerEdits,
|
||||
}
|
||||
|
||||
return &spec, nil
|
||||
}
|
||||
|
||||
// GetMIGDeviceEdits returns the CDI edits for the MIG device represented by 'mig' on 'parent'.
|
||||
func (l *nvmllib) GetMIGDeviceEdits(parent device.Device, mig device.MigDevice) (*cdi.ContainerEdits, error) {
|
||||
gpu, ret := parent.GetMinorNumber()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU minor: %v", ret)
|
||||
}
|
||||
|
||||
gi, ret := mig.GetGpuInstanceId()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret)
|
||||
}
|
||||
|
||||
ci, ret := mig.GetComputeInstanceId()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret)
|
||||
}
|
||||
|
||||
editsForDevice, err := GetEditsForComputeInstance(l.logger, l.driverRoot, gpu, gi, ci)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create container edits for MIG device: %v", err)
|
||||
}
|
||||
|
||||
return editsForDevice, nil
|
||||
}
|
||||
|
||||
// GetEditsForComputeInstance returns the CDI edits for a particular compute instance defined by the (gpu, gi, ci) tuple
|
||||
func GetEditsForComputeInstance(logger *logrus.Logger, driverRoot string, gpu int, gi int, ci int) (*cdi.ContainerEdits, error) {
|
||||
computeInstance, err := newComputeInstanceDiscoverer(logger, driverRoot, gpu, gi, ci)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for Compute Instance: %v", err)
|
||||
}
|
||||
|
||||
editsForDevice, err := edits.FromDiscoverer(computeInstance)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create container edits for Compute Instance: %v", err)
|
||||
}
|
||||
|
||||
return editsForDevice, nil
|
||||
}
|
||||
|
||||
// newComputeInstanceDiscoverer returns a discoverer for the specified compute instance
|
||||
func newComputeInstanceDiscoverer(logger *logrus.Logger, driverRoot string, gpu int, gi int, ci int) (discover.Discover, error) {
|
||||
parentPath := fmt.Sprintf("/dev/nvidia%d", gpu)
|
||||
|
||||
migCaps, err := nvcaps.NewMigCaps()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
|
||||
}
|
||||
|
||||
giCap := nvcaps.NewGPUInstanceCap(gpu, gi)
|
||||
giCapDevicePath, err := migCaps.GetCapDevicePath(giCap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get GI cap device path: %v", err)
|
||||
}
|
||||
|
||||
ciCap := nvcaps.NewComputeInstanceCap(gpu, gi, ci)
|
||||
ciCapDevicePath, err := migCaps.GetCapDevicePath(ciCap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
|
||||
}
|
||||
|
||||
deviceNodes := discover.NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
[]string{
|
||||
parentPath,
|
||||
giCapDevicePath,
|
||||
ciCapDevicePath,
|
||||
},
|
||||
driverRoot,
|
||||
)
|
||||
|
||||
return deviceNodes, nil
|
||||
}
|
||||
@@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@@ -23,15 +23,20 @@ import (
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
type deviceNamer interface {
|
||||
// DeviceNamer is an interface for getting device names
|
||||
type DeviceNamer interface {
|
||||
GetDeviceName(int, device.Device) (string, error)
|
||||
GetMigDeviceName(int, int, device.MigDevice) (string, error)
|
||||
GetMigDeviceName(int, device.Device, int, device.MigDevice) (string, error)
|
||||
}
|
||||
|
||||
// Supported device naming strategies
|
||||
const (
|
||||
deviceNameStrategyIndex = "index"
|
||||
deviceNameStrategyTypeIndex = "type-index"
|
||||
deviceNameStrategyUUID = "uuid"
|
||||
// DeviceNameStrategyIndex generates devices names such as 0 or 1:0
|
||||
DeviceNameStrategyIndex = "index"
|
||||
// DeviceNameStrategyTypeIndex generates devices names such as gpu0 or mig1:0
|
||||
DeviceNameStrategyTypeIndex = "type-index"
|
||||
// DeviceNameStrategyUUID uses the device UUID as the name
|
||||
DeviceNameStrategyUUID = "uuid"
|
||||
)
|
||||
|
||||
type deviceNameIndex struct {
|
||||
@@ -40,15 +45,15 @@ type deviceNameIndex struct {
|
||||
}
|
||||
type deviceNameUUID struct{}
|
||||
|
||||
// newDeviceNamer creates a Device Namer based on the supplied strategy.
|
||||
// NewDeviceNamer creates a Device Namer based on the supplied strategy.
|
||||
// This namer can be used to construct the names for MIG and GPU devices when generating the CDI spec.
|
||||
func newDeviceNamer(strategy string) (deviceNamer, error) {
|
||||
func NewDeviceNamer(strategy string) (DeviceNamer, error) {
|
||||
switch strategy {
|
||||
case deviceNameStrategyIndex:
|
||||
case DeviceNameStrategyIndex:
|
||||
return deviceNameIndex{}, nil
|
||||
case deviceNameStrategyTypeIndex:
|
||||
case DeviceNameStrategyTypeIndex:
|
||||
return deviceNameIndex{gpuPrefix: "gpu", migPrefix: "mig"}, nil
|
||||
case deviceNameStrategyUUID:
|
||||
case DeviceNameStrategyUUID:
|
||||
return deviceNameUUID{}, nil
|
||||
}
|
||||
|
||||
@@ -61,7 +66,7 @@ func (s deviceNameIndex) GetDeviceName(i int, d device.Device) (string, error) {
|
||||
}
|
||||
|
||||
// GetMigDeviceName returns the name for the specified device based on the naming strategy
|
||||
func (s deviceNameIndex) GetMigDeviceName(i int, j int, d device.MigDevice) (string, error) {
|
||||
func (s deviceNameIndex) GetMigDeviceName(i int, d device.Device, j int, mig device.MigDevice) (string, error) {
|
||||
return fmt.Sprintf("%s%d:%d", s.migPrefix, i, j), nil
|
||||
}
|
||||
|
||||
@@ -75,8 +80,8 @@ func (s deviceNameUUID) GetDeviceName(i int, d device.Device) (string, error) {
|
||||
}
|
||||
|
||||
// GetMigDeviceName returns the name for the specified device based on the naming strategy
|
||||
func (s deviceNameUUID) GetMigDeviceName(i int, j int, d device.MigDevice) (string, error) {
|
||||
uuid, ret := d.GetUUID()
|
||||
func (s deviceNameUUID) GetMigDeviceName(i int, d device.Device, j int, mig device.MigDevice) (string, error) {
|
||||
uuid, ret := mig.GetUUID()
|
||||
if ret != nvml.SUCCESS {
|
||||
return "", fmt.Errorf("failed to get device UUID: %v", ret)
|
||||
}
|
||||
75
pkg/nvcdi/options.go
Normal file
75
pkg/nvcdi/options.go
Normal file
@@ -0,0 +1,75 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// Option is a function that configures the nvcdilib
|
||||
type Option func(*nvcdilib)
|
||||
|
||||
// WithDeviceLib sets the device library for the library
|
||||
func WithDeviceLib(devicelib device.Interface) Option {
|
||||
return func(l *nvcdilib) {
|
||||
l.devicelib = devicelib
|
||||
}
|
||||
}
|
||||
|
||||
// WithDeviceNamer sets the device namer for the library
|
||||
func WithDeviceNamer(namer DeviceNamer) Option {
|
||||
return func(l *nvcdilib) {
|
||||
l.deviceNamer = namer
|
||||
}
|
||||
}
|
||||
|
||||
// WithDriverRoot sets the driver root for the library
|
||||
func WithDriverRoot(root string) Option {
|
||||
return func(l *nvcdilib) {
|
||||
l.driverRoot = root
|
||||
}
|
||||
}
|
||||
|
||||
// WithLogger sets the logger for the library
|
||||
func WithLogger(logger *logrus.Logger) Option {
|
||||
return func(l *nvcdilib) {
|
||||
l.logger = logger
|
||||
}
|
||||
}
|
||||
|
||||
// WithNVIDIACTKPath sets the path to the NVIDIA Container Toolkit CLI path for the library
|
||||
func WithNVIDIACTKPath(path string) Option {
|
||||
return func(l *nvcdilib) {
|
||||
l.nvidiaCTKPath = path
|
||||
}
|
||||
}
|
||||
|
||||
// WithNvmlLib sets the nvml library for the library
|
||||
func WithNvmlLib(nvmllib nvml.Interface) Option {
|
||||
return func(l *nvcdilib) {
|
||||
l.nvmllib = nvmllib
|
||||
}
|
||||
}
|
||||
|
||||
// WithMode sets the discovery mode for the library
|
||||
func WithMode(mode string) Option {
|
||||
return func(l *nvcdilib) {
|
||||
l.mode = mode
|
||||
}
|
||||
}
|
||||
@@ -40,6 +40,12 @@ PACKAGE_IMAGE=$1
|
||||
# For example, we don't release release candidates of nvidia-container-runtime and nvidia-docker2
|
||||
# since these only bump the nvidia-container-toolkit dependency.
|
||||
function skip-for-release-candidate() {
|
||||
# We always skip nvidia-container-toolkit-operator-extensions packages
|
||||
if [[ "${package_name/"nvidia-container-toolkit-operator-extensions"/}" != "${package_name}" ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
# We allow all other packages for non-rc versions.
|
||||
if [[ "${VERSION/rc./}" == "${VERSION}" ]]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
@@ -36,9 +36,9 @@ NVIDIA_DOCKER_ROOT=${PROJECT_ROOT}/third_party/nvidia-docker
|
||||
# Get version for libnvidia-container
|
||||
libnvidia_container_version_tag=$(grep "#define NVC_VERSION" ${LIBNVIDIA_CONTAINER_ROOT}/src/nvc.h \
|
||||
| sed -e 's/#define NVC_VERSION[[:space:]]"\(.*\)"/\1/')
|
||||
libnvidia_container_version=${libnvidia_container_version_tag%%~*}
|
||||
libnvidia_container_version=${libnvidia_container_version_tag%%-*}
|
||||
libnvidia_container_tag=${libnvidia_container_version_tag##${libnvidia_container_version}}
|
||||
libnvidia_container_tag=${libnvidia_container_tag##\~}
|
||||
libnvidia_container_tag=${libnvidia_container_tag##\-}
|
||||
|
||||
versions_makefile=${NVIDIA_CONTAINER_TOOLKIT_ROOT}/versions.mk
|
||||
# Get version for nvidia-container-toolit
|
||||
|
||||
@@ -120,6 +120,12 @@ function sync() {
|
||||
mkdir -p ${dst}
|
||||
|
||||
for f in $(ls ${src}/libnvidia-container*.${pkg_type} ${src}/nvidia-container-toolkit*.${pkg_type}); do
|
||||
# We never release nvidia-container-toolkit-operator-extensions packages
|
||||
if [[ "${f/"nvidia-container-toolkit-operator-extensions"/}" != "${f}" ]]; then
|
||||
echo "Skipping ${f}"
|
||||
continue
|
||||
fi
|
||||
|
||||
df=${dst}/$(basename ${f})
|
||||
df_stable=${df//"/experimental/"/"/stable/"}
|
||||
if [[ -f "${df}" ]]; then
|
||||
|
||||
2
third_party/libnvidia-container
vendored
2
third_party/libnvidia-container
vendored
Submodule third_party/libnvidia-container updated: 7678e1af09...06977272ca
@@ -41,6 +41,7 @@ const (
|
||||
|
||||
type options struct {
|
||||
DriverRoot string
|
||||
ContainerRuntimeMode string
|
||||
ContainerRuntimeDebug string
|
||||
ContainerRuntimeLogLevel string
|
||||
ContainerCLIDebug string
|
||||
@@ -108,6 +109,11 @@ func main() {
|
||||
Destination: &opts.ContainerRuntimeLogLevel,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime-mode",
|
||||
Destination: &opts.ContainerRuntimeMode,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODE"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-cli-debug",
|
||||
Usage: "Specify the location of the debug log file for the NVIDIA Container CLI",
|
||||
@@ -299,6 +305,7 @@ func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutable
|
||||
debugOptions := map[string]string{
|
||||
"nvidia-container-runtime.debug": opts.ContainerRuntimeDebug,
|
||||
"nvidia-container-runtime.log-level": opts.ContainerRuntimeLogLevel,
|
||||
"nvidia-container-runtime.mode": opts.ContainerRuntimeMode,
|
||||
"nvidia-container-cli.debug": opts.ContainerCLIDebug,
|
||||
}
|
||||
for key, value := range debugOptions {
|
||||
|
||||
320
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const.go
generated
vendored
320
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const.go
generated
vendored
@@ -44,11 +44,13 @@ const (
|
||||
// DEVICE_PCI_BUS_ID_FMT as defined in nvml/nvml.h
|
||||
DEVICE_PCI_BUS_ID_FMT = "%08X:%02X:%02X.0"
|
||||
// NVLINK_MAX_LINKS as defined in nvml/nvml.h
|
||||
NVLINK_MAX_LINKS = 12
|
||||
NVLINK_MAX_LINKS = 18
|
||||
// TOPOLOGY_CPU as defined in nvml/nvml.h
|
||||
TOPOLOGY_CPU = 0
|
||||
// MAX_PHYSICAL_BRIDGE as defined in nvml/nvml.h
|
||||
MAX_PHYSICAL_BRIDGE = 128
|
||||
// MAX_THERMAL_SENSORS_PER_GPU as defined in nvml/nvml.h
|
||||
MAX_THERMAL_SENSORS_PER_GPU = 3
|
||||
// FlagDefault as defined in nvml/nvml.h
|
||||
FlagDefault = 0
|
||||
// FlagForce as defined in nvml/nvml.h
|
||||
@@ -57,6 +59,8 @@ const (
|
||||
SINGLE_BIT_ECC = 0
|
||||
// DOUBLE_BIT_ECC as defined in nvml/nvml.h
|
||||
DOUBLE_BIT_ECC = 0
|
||||
// MAX_GPU_PERF_PSTATES as defined in nvml/nvml.h
|
||||
MAX_GPU_PERF_PSTATES = 16
|
||||
// GRID_LICENSE_EXPIRY_NOT_AVAILABLE as defined in nvml/nvml.h
|
||||
GRID_LICENSE_EXPIRY_NOT_AVAILABLE = 0
|
||||
// GRID_LICENSE_EXPIRY_INVALID as defined in nvml/nvml.h
|
||||
@@ -73,6 +77,18 @@ const (
|
||||
VGPU_NAME_BUFFER_SIZE = 64
|
||||
// GRID_LICENSE_FEATURE_MAX_COUNT as defined in nvml/nvml.h
|
||||
GRID_LICENSE_FEATURE_MAX_COUNT = 3
|
||||
// VGPU_SCHEDULER_POLICY_UNKNOWN as defined in nvml/nvml.h
|
||||
VGPU_SCHEDULER_POLICY_UNKNOWN = 0
|
||||
// VGPU_SCHEDULER_POLICY_BEST_EFFORT as defined in nvml/nvml.h
|
||||
VGPU_SCHEDULER_POLICY_BEST_EFFORT = 1
|
||||
// VGPU_SCHEDULER_POLICY_EQUAL_SHARE as defined in nvml/nvml.h
|
||||
VGPU_SCHEDULER_POLICY_EQUAL_SHARE = 2
|
||||
// VGPU_SCHEDULER_POLICY_FIXED_SHARE as defined in nvml/nvml.h
|
||||
VGPU_SCHEDULER_POLICY_FIXED_SHARE = 3
|
||||
// SUPPORTED_VGPU_SCHEDULER_POLICY_COUNT as defined in nvml/nvml.h
|
||||
SUPPORTED_VGPU_SCHEDULER_POLICY_COUNT = 3
|
||||
// SCHEDULER_SW_MAX_LOG_ENTRIES as defined in nvml/nvml.h
|
||||
SCHEDULER_SW_MAX_LOG_ENTRIES = 200
|
||||
// GRID_LICENSE_STATE_UNKNOWN as defined in nvml/nvml.h
|
||||
GRID_LICENSE_STATE_UNKNOWN = 0
|
||||
// GRID_LICENSE_STATE_UNINITIALIZED as defined in nvml/nvml.h
|
||||
@@ -85,6 +101,8 @@ const (
|
||||
GRID_LICENSE_STATE_UNLICENSED = 4
|
||||
// GRID_LICENSE_STATE_LICENSED as defined in nvml/nvml.h
|
||||
GRID_LICENSE_STATE_LICENSED = 5
|
||||
// GSP_FIRMWARE_VERSION_BUF_SIZE as defined in nvml/nvml.h
|
||||
GSP_FIRMWARE_VERSION_BUF_SIZE = 64
|
||||
// DEVICE_ARCH_KEPLER as defined in nvml/nvml.h
|
||||
DEVICE_ARCH_KEPLER = 2
|
||||
// DEVICE_ARCH_MAXWELL as defined in nvml/nvml.h
|
||||
@@ -97,6 +115,10 @@ const (
|
||||
DEVICE_ARCH_TURING = 6
|
||||
// DEVICE_ARCH_AMPERE as defined in nvml/nvml.h
|
||||
DEVICE_ARCH_AMPERE = 7
|
||||
// DEVICE_ARCH_ADA as defined in nvml/nvml.h
|
||||
DEVICE_ARCH_ADA = 8
|
||||
// DEVICE_ARCH_HOPPER as defined in nvml/nvml.h
|
||||
DEVICE_ARCH_HOPPER = 9
|
||||
// DEVICE_ARCH_UNKNOWN as defined in nvml/nvml.h
|
||||
DEVICE_ARCH_UNKNOWN = 4294967295
|
||||
// BUS_TYPE_UNKNOWN as defined in nvml/nvml.h
|
||||
@@ -109,6 +131,10 @@ const (
|
||||
BUS_TYPE_FPCI = 3
|
||||
// BUS_TYPE_AGP as defined in nvml/nvml.h
|
||||
BUS_TYPE_AGP = 4
|
||||
// FAN_POLICY_TEMPERATURE_CONTINOUS_SW as defined in nvml/nvml.h
|
||||
FAN_POLICY_TEMPERATURE_CONTINOUS_SW = 0
|
||||
// FAN_POLICY_MANUAL as defined in nvml/nvml.h
|
||||
FAN_POLICY_MANUAL = 1
|
||||
// POWER_SOURCE_AC as defined in nvml/nvml.h
|
||||
POWER_SOURCE_AC = 0
|
||||
// POWER_SOURCE_BATTERY as defined in nvml/nvml.h
|
||||
@@ -125,10 +151,14 @@ const (
|
||||
PCIE_LINK_MAX_SPEED_16000MBPS = 4
|
||||
// PCIE_LINK_MAX_SPEED_32000MBPS as defined in nvml/nvml.h
|
||||
PCIE_LINK_MAX_SPEED_32000MBPS = 5
|
||||
// PCIE_LINK_MAX_SPEED_64000MBPS as defined in nvml/nvml.h
|
||||
PCIE_LINK_MAX_SPEED_64000MBPS = 6
|
||||
// ADAPTIVE_CLOCKING_INFO_STATUS_DISABLED as defined in nvml/nvml.h
|
||||
ADAPTIVE_CLOCKING_INFO_STATUS_DISABLED = 0
|
||||
// ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED as defined in nvml/nvml.h
|
||||
ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED = 1
|
||||
// MAX_GPU_UTILIZATIONS as defined in nvml/nvml.h
|
||||
MAX_GPU_UTILIZATIONS = 8
|
||||
// FI_DEV_ECC_CURRENT as defined in nvml/nvml.h
|
||||
FI_DEV_ECC_CURRENT = 1
|
||||
// FI_DEV_ECC_PENDING as defined in nvml/nvml.h
|
||||
@@ -449,8 +479,26 @@ const (
|
||||
FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L11 = 159
|
||||
// FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_TOTAL as defined in nvml/nvml.h
|
||||
FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_TOTAL = 160
|
||||
// FI_DEV_NVLINK_ERROR_DL_REPLAY as defined in nvml/nvml.h
|
||||
FI_DEV_NVLINK_ERROR_DL_REPLAY = 161
|
||||
// FI_DEV_NVLINK_ERROR_DL_RECOVERY as defined in nvml/nvml.h
|
||||
FI_DEV_NVLINK_ERROR_DL_RECOVERY = 162
|
||||
// FI_DEV_NVLINK_ERROR_DL_CRC as defined in nvml/nvml.h
|
||||
FI_DEV_NVLINK_ERROR_DL_CRC = 163
|
||||
// FI_DEV_NVLINK_GET_SPEED as defined in nvml/nvml.h
|
||||
FI_DEV_NVLINK_GET_SPEED = 164
|
||||
// FI_DEV_NVLINK_GET_STATE as defined in nvml/nvml.h
|
||||
FI_DEV_NVLINK_GET_STATE = 165
|
||||
// FI_DEV_NVLINK_GET_VERSION as defined in nvml/nvml.h
|
||||
FI_DEV_NVLINK_GET_VERSION = 166
|
||||
// FI_DEV_NVLINK_GET_POWER_STATE as defined in nvml/nvml.h
|
||||
FI_DEV_NVLINK_GET_POWER_STATE = 167
|
||||
// FI_DEV_NVLINK_GET_POWER_THRESHOLD as defined in nvml/nvml.h
|
||||
FI_DEV_NVLINK_GET_POWER_THRESHOLD = 168
|
||||
// FI_DEV_PCIE_L0_TO_RECOVERY_COUNTER as defined in nvml/nvml.h
|
||||
FI_DEV_PCIE_L0_TO_RECOVERY_COUNTER = 169
|
||||
// FI_MAX as defined in nvml/nvml.h
|
||||
FI_MAX = 161
|
||||
FI_MAX = 170
|
||||
// EventTypeSingleBitEccError as defined in nvml/nvml.h
|
||||
EventTypeSingleBitEccError = 1
|
||||
// EventTypeDoubleBitEccError as defined in nvml/nvml.h
|
||||
@@ -503,6 +551,16 @@ const (
|
||||
NVFBC_SESSION_FLAG_CAPTURE_WITH_WAIT_INFINITE = 8
|
||||
// NVFBC_SESSION_FLAG_CAPTURE_WITH_WAIT_TIMEOUT as defined in nvml/nvml.h
|
||||
NVFBC_SESSION_FLAG_CAPTURE_WITH_WAIT_TIMEOUT = 16
|
||||
// GPU_FABRIC_UUID_LEN as defined in nvml/nvml.h
|
||||
GPU_FABRIC_UUID_LEN = 16
|
||||
// GPU_FABRIC_STATE_NOT_SUPPORTED as defined in nvml/nvml.h
|
||||
GPU_FABRIC_STATE_NOT_SUPPORTED = 0
|
||||
// GPU_FABRIC_STATE_NOT_STARTED as defined in nvml/nvml.h
|
||||
GPU_FABRIC_STATE_NOT_STARTED = 1
|
||||
// GPU_FABRIC_STATE_IN_PROGRESS as defined in nvml/nvml.h
|
||||
GPU_FABRIC_STATE_IN_PROGRESS = 2
|
||||
// GPU_FABRIC_STATE_COMPLETED as defined in nvml/nvml.h
|
||||
GPU_FABRIC_STATE_COMPLETED = 3
|
||||
// INIT_FLAG_NO_GPUS as defined in nvml/nvml.h
|
||||
INIT_FLAG_NO_GPUS = 1
|
||||
// INIT_FLAG_NO_ATTACH as defined in nvml/nvml.h
|
||||
@@ -551,8 +609,12 @@ const (
|
||||
GPU_INSTANCE_PROFILE_6_SLICE = 6
|
||||
// GPU_INSTANCE_PROFILE_1_SLICE_REV1 as defined in nvml/nvml.h
|
||||
GPU_INSTANCE_PROFILE_1_SLICE_REV1 = 7
|
||||
// GPU_INSTANCE_PROFILE_2_SLICE_REV1 as defined in nvml/nvml.h
|
||||
GPU_INSTANCE_PROFILE_2_SLICE_REV1 = 8
|
||||
// GPU_INSTANCE_PROFILE_1_SLICE_REV2 as defined in nvml/nvml.h
|
||||
GPU_INSTANCE_PROFILE_1_SLICE_REV2 = 9
|
||||
// GPU_INSTANCE_PROFILE_COUNT as defined in nvml/nvml.h
|
||||
GPU_INSTANCE_PROFILE_COUNT = 8
|
||||
GPU_INSTANCE_PROFILE_COUNT = 10
|
||||
// COMPUTE_INSTANCE_PROFILE_1_SLICE as defined in nvml/nvml.h
|
||||
COMPUTE_INSTANCE_PROFILE_1_SLICE = 0
|
||||
// COMPUTE_INSTANCE_PROFILE_2_SLICE as defined in nvml/nvml.h
|
||||
@@ -567,12 +629,32 @@ const (
|
||||
COMPUTE_INSTANCE_PROFILE_8_SLICE = 5
|
||||
// COMPUTE_INSTANCE_PROFILE_6_SLICE as defined in nvml/nvml.h
|
||||
COMPUTE_INSTANCE_PROFILE_6_SLICE = 6
|
||||
// COMPUTE_INSTANCE_PROFILE_1_SLICE_REV1 as defined in nvml/nvml.h
|
||||
COMPUTE_INSTANCE_PROFILE_1_SLICE_REV1 = 7
|
||||
// COMPUTE_INSTANCE_PROFILE_COUNT as defined in nvml/nvml.h
|
||||
COMPUTE_INSTANCE_PROFILE_COUNT = 7
|
||||
COMPUTE_INSTANCE_PROFILE_COUNT = 8
|
||||
// COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED as defined in nvml/nvml.h
|
||||
COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED = 0
|
||||
// COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT as defined in nvml/nvml.h
|
||||
COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT = 1
|
||||
// GPM_METRICS_GET_VERSION as defined in nvml/nvml.h
|
||||
GPM_METRICS_GET_VERSION = 1
|
||||
// GPM_SUPPORT_VERSION as defined in nvml/nvml.h
|
||||
GPM_SUPPORT_VERSION = 1
|
||||
// COUNTER_COLLECTION_UNIT_STREAM_STATE_DISABLE as defined in nvml/nvml.h
|
||||
COUNTER_COLLECTION_UNIT_STREAM_STATE_DISABLE = 0
|
||||
// COUNTER_COLLECTION_UNIT_STREAM_STATE_ENABLE as defined in nvml/nvml.h
|
||||
COUNTER_COLLECTION_UNIT_STREAM_STATE_ENABLE = 1
|
||||
// NVLINK_POWER_STATE_HIGH_SPEED as defined in nvml/nvml.h
|
||||
NVLINK_POWER_STATE_HIGH_SPEED = 0
|
||||
// NVLINK_POWER_STATE_LOW as defined in nvml/nvml.h
|
||||
NVLINK_POWER_STATE_LOW = 1
|
||||
// NVLINK_LOW_POWER_THRESHOLD_MIN as defined in nvml/nvml.h
|
||||
NVLINK_LOW_POWER_THRESHOLD_MIN = 1
|
||||
// NVLINK_LOW_POWER_THRESHOLD_MAX as defined in nvml/nvml.h
|
||||
NVLINK_LOW_POWER_THRESHOLD_MAX = 8191
|
||||
// NVLINK_LOW_POWER_THRESHOLD_RESET as defined in nvml/nvml.h
|
||||
NVLINK_LOW_POWER_THRESHOLD_RESET = 4294967295
|
||||
)
|
||||
|
||||
// BridgeChipType as declared in nvml/nvml.h
|
||||
@@ -918,32 +1000,34 @@ type Return int32
|
||||
|
||||
// Return enumeration from nvml/nvml.h
|
||||
const (
|
||||
SUCCESS Return = iota
|
||||
ERROR_UNINITIALIZED Return = 1
|
||||
ERROR_INVALID_ARGUMENT Return = 2
|
||||
ERROR_NOT_SUPPORTED Return = 3
|
||||
ERROR_NO_PERMISSION Return = 4
|
||||
ERROR_ALREADY_INITIALIZED Return = 5
|
||||
ERROR_NOT_FOUND Return = 6
|
||||
ERROR_INSUFFICIENT_SIZE Return = 7
|
||||
ERROR_INSUFFICIENT_POWER Return = 8
|
||||
ERROR_DRIVER_NOT_LOADED Return = 9
|
||||
ERROR_TIMEOUT Return = 10
|
||||
ERROR_IRQ_ISSUE Return = 11
|
||||
ERROR_LIBRARY_NOT_FOUND Return = 12
|
||||
ERROR_FUNCTION_NOT_FOUND Return = 13
|
||||
ERROR_CORRUPTED_INFOROM Return = 14
|
||||
ERROR_GPU_IS_LOST Return = 15
|
||||
ERROR_RESET_REQUIRED Return = 16
|
||||
ERROR_OPERATING_SYSTEM Return = 17
|
||||
ERROR_LIB_RM_VERSION_MISMATCH Return = 18
|
||||
ERROR_IN_USE Return = 19
|
||||
ERROR_MEMORY Return = 20
|
||||
ERROR_NO_DATA Return = 21
|
||||
ERROR_VGPU_ECC_NOT_SUPPORTED Return = 22
|
||||
ERROR_INSUFFICIENT_RESOURCES Return = 23
|
||||
ERROR_FREQ_NOT_SUPPORTED Return = 24
|
||||
ERROR_UNKNOWN Return = 999
|
||||
SUCCESS Return = iota
|
||||
ERROR_UNINITIALIZED Return = 1
|
||||
ERROR_INVALID_ARGUMENT Return = 2
|
||||
ERROR_NOT_SUPPORTED Return = 3
|
||||
ERROR_NO_PERMISSION Return = 4
|
||||
ERROR_ALREADY_INITIALIZED Return = 5
|
||||
ERROR_NOT_FOUND Return = 6
|
||||
ERROR_INSUFFICIENT_SIZE Return = 7
|
||||
ERROR_INSUFFICIENT_POWER Return = 8
|
||||
ERROR_DRIVER_NOT_LOADED Return = 9
|
||||
ERROR_TIMEOUT Return = 10
|
||||
ERROR_IRQ_ISSUE Return = 11
|
||||
ERROR_LIBRARY_NOT_FOUND Return = 12
|
||||
ERROR_FUNCTION_NOT_FOUND Return = 13
|
||||
ERROR_CORRUPTED_INFOROM Return = 14
|
||||
ERROR_GPU_IS_LOST Return = 15
|
||||
ERROR_RESET_REQUIRED Return = 16
|
||||
ERROR_OPERATING_SYSTEM Return = 17
|
||||
ERROR_LIB_RM_VERSION_MISMATCH Return = 18
|
||||
ERROR_IN_USE Return = 19
|
||||
ERROR_MEMORY Return = 20
|
||||
ERROR_NO_DATA Return = 21
|
||||
ERROR_VGPU_ECC_NOT_SUPPORTED Return = 22
|
||||
ERROR_INSUFFICIENT_RESOURCES Return = 23
|
||||
ERROR_FREQ_NOT_SUPPORTED Return = 24
|
||||
ERROR_ARGUMENT_VERSION_MISMATCH Return = 25
|
||||
ERROR_DEPRECATED Return = 26
|
||||
ERROR_UNKNOWN Return = 999
|
||||
)
|
||||
|
||||
// MemoryLocation as declared in nvml/nvml.h
|
||||
@@ -983,18 +1067,6 @@ const (
|
||||
RESTRICTED_API_COUNT RestrictedAPI = 2
|
||||
)
|
||||
|
||||
// NvLinkEccLaneErrorCounter as declared in nvml/nvml.h
|
||||
type NvLinkEccLaneErrorCounter int32
|
||||
|
||||
// NvLinkEccLaneErrorCounter enumeration from nvml/nvml.h
|
||||
const (
|
||||
NVLINK_ERROR_DL_ECC_LANE0 NvLinkEccLaneErrorCounter = iota
|
||||
NVLINK_ERROR_DL_ECC_LANE1 NvLinkEccLaneErrorCounter = 1
|
||||
NVLINK_ERROR_DL_ECC_LANE2 NvLinkEccLaneErrorCounter = 2
|
||||
NVLINK_ERROR_DL_ECC_LANE3 NvLinkEccLaneErrorCounter = 3
|
||||
NVLINK_ERROR_DL_ECC_COUNT NvLinkEccLaneErrorCounter = 4
|
||||
)
|
||||
|
||||
// GpuVirtualizationMode as declared in nvml/nvml.h
|
||||
type GpuVirtualizationMode int32
|
||||
|
||||
@@ -1034,6 +1106,50 @@ const (
|
||||
VGPU_INSTANCE_GUEST_INFO_STATE_INITIALIZED VgpuGuestInfoState = 1
|
||||
)
|
||||
|
||||
// VgpuCapability as declared in nvml/nvml.h
|
||||
type VgpuCapability int32
|
||||
|
||||
// VgpuCapability enumeration from nvml/nvml.h
|
||||
const (
|
||||
VGPU_CAP_NVLINK_P2P VgpuCapability = iota
|
||||
VGPU_CAP_GPUDIRECT VgpuCapability = 1
|
||||
VGPU_CAP_MULTI_VGPU_EXCLUSIVE VgpuCapability = 2
|
||||
VGPU_CAP_EXCLUSIVE_TYPE VgpuCapability = 3
|
||||
VGPU_CAP_EXCLUSIVE_SIZE VgpuCapability = 4
|
||||
VGPU_CAP_COUNT VgpuCapability = 5
|
||||
)
|
||||
|
||||
// VgpuDriverCapability as declared in nvml/nvml.h
|
||||
type VgpuDriverCapability int32
|
||||
|
||||
// VgpuDriverCapability enumeration from nvml/nvml.h
|
||||
const (
|
||||
VGPU_DRIVER_CAP_HETEROGENEOUS_MULTI_VGPU VgpuDriverCapability = iota
|
||||
VGPU_DRIVER_CAP_COUNT VgpuDriverCapability = 1
|
||||
)
|
||||
|
||||
// DeviceVgpuCapability as declared in nvml/nvml.h
|
||||
type DeviceVgpuCapability int32
|
||||
|
||||
// DeviceVgpuCapability enumeration from nvml/nvml.h
|
||||
const (
|
||||
DEVICE_VGPU_CAP_FRACTIONAL_MULTI_VGPU DeviceVgpuCapability = iota
|
||||
DEVICE_VGPU_CAP_HETEROGENEOUS_TIMESLICE_PROFILES DeviceVgpuCapability = 1
|
||||
DEVICE_VGPU_CAP_HETEROGENEOUS_TIMESLICE_SIZES DeviceVgpuCapability = 2
|
||||
DEVICE_VGPU_CAP_COUNT DeviceVgpuCapability = 3
|
||||
)
|
||||
|
||||
// GpuUtilizationDomainId as declared in nvml/nvml.h
|
||||
type GpuUtilizationDomainId int32
|
||||
|
||||
// GpuUtilizationDomainId enumeration from nvml/nvml.h
|
||||
const (
|
||||
GPU_UTILIZATION_DOMAIN_GPU GpuUtilizationDomainId = iota
|
||||
GPU_UTILIZATION_DOMAIN_FB GpuUtilizationDomainId = 1
|
||||
GPU_UTILIZATION_DOMAIN_VID GpuUtilizationDomainId = 2
|
||||
GPU_UTILIZATION_DOMAIN_BUS GpuUtilizationDomainId = 3
|
||||
)
|
||||
|
||||
// FanState as declared in nvml/nvml.h
|
||||
type FanState int32
|
||||
|
||||
@@ -1125,6 +1241,49 @@ const (
|
||||
VGPU_COMPATIBILITY_LIMIT_OTHER VgpuPgpuCompatibilityLimitCode = -2147483648
|
||||
)
|
||||
|
||||
// ThermalTarget as declared in nvml/nvml.h
|
||||
type ThermalTarget int32
|
||||
|
||||
// ThermalTarget enumeration from nvml/nvml.h
|
||||
const (
|
||||
THERMAL_TARGET_NONE ThermalTarget = iota
|
||||
THERMAL_TARGET_GPU ThermalTarget = 1
|
||||
THERMAL_TARGET_MEMORY ThermalTarget = 2
|
||||
THERMAL_TARGET_POWER_SUPPLY ThermalTarget = 4
|
||||
THERMAL_TARGET_BOARD ThermalTarget = 8
|
||||
THERMAL_TARGET_VCD_BOARD ThermalTarget = 9
|
||||
THERMAL_TARGET_VCD_INLET ThermalTarget = 10
|
||||
THERMAL_TARGET_VCD_OUTLET ThermalTarget = 11
|
||||
THERMAL_TARGET_ALL ThermalTarget = 15
|
||||
THERMAL_TARGET_UNKNOWN ThermalTarget = -1
|
||||
)
|
||||
|
||||
// ThermalController as declared in nvml/nvml.h
|
||||
type ThermalController int32
|
||||
|
||||
// ThermalController enumeration from nvml/nvml.h
|
||||
const (
|
||||
THERMAL_CONTROLLER_NONE ThermalController = iota
|
||||
THERMAL_CONTROLLER_GPU_INTERNAL ThermalController = 1
|
||||
THERMAL_CONTROLLER_ADM1032 ThermalController = 2
|
||||
THERMAL_CONTROLLER_ADT7461 ThermalController = 3
|
||||
THERMAL_CONTROLLER_MAX6649 ThermalController = 4
|
||||
THERMAL_CONTROLLER_MAX1617 ThermalController = 5
|
||||
THERMAL_CONTROLLER_LM99 ThermalController = 6
|
||||
THERMAL_CONTROLLER_LM89 ThermalController = 7
|
||||
THERMAL_CONTROLLER_LM64 ThermalController = 8
|
||||
THERMAL_CONTROLLER_G781 ThermalController = 9
|
||||
THERMAL_CONTROLLER_ADT7473 ThermalController = 10
|
||||
THERMAL_CONTROLLER_SBMAX6649 ThermalController = 11
|
||||
THERMAL_CONTROLLER_VBIOSEVT ThermalController = 12
|
||||
THERMAL_CONTROLLER_OS ThermalController = 13
|
||||
THERMAL_CONTROLLER_NVSYSCON_CANOAS ThermalController = 14
|
||||
THERMAL_CONTROLLER_NVSYSCON_E551 ThermalController = 15
|
||||
THERMAL_CONTROLLER_MAX6649R ThermalController = 16
|
||||
THERMAL_CONTROLLER_ADT7473S ThermalController = 17
|
||||
THERMAL_CONTROLLER_UNKNOWN ThermalController = -1
|
||||
)
|
||||
|
||||
// GridLicenseFeatureCode as declared in nvml/nvml.h
|
||||
type GridLicenseFeatureCode int32
|
||||
|
||||
@@ -1137,3 +1296,80 @@ const (
|
||||
GRID_LICENSE_FEATURE_CODE_GAMING GridLicenseFeatureCode = 3
|
||||
GRID_LICENSE_FEATURE_CODE_COMPUTE GridLicenseFeatureCode = 4
|
||||
)
|
||||
|
||||
// GpmMetricId as declared in nvml/nvml.h
|
||||
type GpmMetricId int32
|
||||
|
||||
// GpmMetricId enumeration from nvml/nvml.h
|
||||
const (
|
||||
GPM_METRIC_GRAPHICS_UTIL GpmMetricId = 1
|
||||
GPM_METRIC_SM_UTIL GpmMetricId = 2
|
||||
GPM_METRIC_SM_OCCUPANCY GpmMetricId = 3
|
||||
GPM_METRIC_INTEGER_UTIL GpmMetricId = 4
|
||||
GPM_METRIC_ANY_TENSOR_UTIL GpmMetricId = 5
|
||||
GPM_METRIC_DFMA_TENSOR_UTIL GpmMetricId = 6
|
||||
GPM_METRIC_HMMA_TENSOR_UTIL GpmMetricId = 7
|
||||
GPM_METRIC_IMMA_TENSOR_UTIL GpmMetricId = 9
|
||||
GPM_METRIC_DRAM_BW_UTIL GpmMetricId = 10
|
||||
GPM_METRIC_FP64_UTIL GpmMetricId = 11
|
||||
GPM_METRIC_FP32_UTIL GpmMetricId = 12
|
||||
GPM_METRIC_FP16_UTIL GpmMetricId = 13
|
||||
GPM_METRIC_PCIE_TX_PER_SEC GpmMetricId = 20
|
||||
GPM_METRIC_PCIE_RX_PER_SEC GpmMetricId = 21
|
||||
GPM_METRIC_NVDEC_0_UTIL GpmMetricId = 30
|
||||
GPM_METRIC_NVDEC_1_UTIL GpmMetricId = 31
|
||||
GPM_METRIC_NVDEC_2_UTIL GpmMetricId = 32
|
||||
GPM_METRIC_NVDEC_3_UTIL GpmMetricId = 33
|
||||
GPM_METRIC_NVDEC_4_UTIL GpmMetricId = 34
|
||||
GPM_METRIC_NVDEC_5_UTIL GpmMetricId = 35
|
||||
GPM_METRIC_NVDEC_6_UTIL GpmMetricId = 36
|
||||
GPM_METRIC_NVDEC_7_UTIL GpmMetricId = 37
|
||||
GPM_METRIC_NVJPG_0_UTIL GpmMetricId = 40
|
||||
GPM_METRIC_NVJPG_1_UTIL GpmMetricId = 41
|
||||
GPM_METRIC_NVJPG_2_UTIL GpmMetricId = 42
|
||||
GPM_METRIC_NVJPG_3_UTIL GpmMetricId = 43
|
||||
GPM_METRIC_NVJPG_4_UTIL GpmMetricId = 44
|
||||
GPM_METRIC_NVJPG_5_UTIL GpmMetricId = 45
|
||||
GPM_METRIC_NVJPG_6_UTIL GpmMetricId = 46
|
||||
GPM_METRIC_NVJPG_7_UTIL GpmMetricId = 47
|
||||
GPM_METRIC_NVOFA_0_UTIL GpmMetricId = 50
|
||||
GPM_METRIC_NVLINK_TOTAL_RX_PER_SEC GpmMetricId = 60
|
||||
GPM_METRIC_NVLINK_TOTAL_TX_PER_SEC GpmMetricId = 61
|
||||
GPM_METRIC_NVLINK_L0_RX_PER_SEC GpmMetricId = 62
|
||||
GPM_METRIC_NVLINK_L0_TX_PER_SEC GpmMetricId = 63
|
||||
GPM_METRIC_NVLINK_L1_RX_PER_SEC GpmMetricId = 64
|
||||
GPM_METRIC_NVLINK_L1_TX_PER_SEC GpmMetricId = 65
|
||||
GPM_METRIC_NVLINK_L2_RX_PER_SEC GpmMetricId = 66
|
||||
GPM_METRIC_NVLINK_L2_TX_PER_SEC GpmMetricId = 67
|
||||
GPM_METRIC_NVLINK_L3_RX_PER_SEC GpmMetricId = 68
|
||||
GPM_METRIC_NVLINK_L3_TX_PER_SEC GpmMetricId = 69
|
||||
GPM_METRIC_NVLINK_L4_RX_PER_SEC GpmMetricId = 70
|
||||
GPM_METRIC_NVLINK_L4_TX_PER_SEC GpmMetricId = 71
|
||||
GPM_METRIC_NVLINK_L5_RX_PER_SEC GpmMetricId = 72
|
||||
GPM_METRIC_NVLINK_L5_TX_PER_SEC GpmMetricId = 73
|
||||
GPM_METRIC_NVLINK_L6_RX_PER_SEC GpmMetricId = 74
|
||||
GPM_METRIC_NVLINK_L6_TX_PER_SEC GpmMetricId = 75
|
||||
GPM_METRIC_NVLINK_L7_RX_PER_SEC GpmMetricId = 76
|
||||
GPM_METRIC_NVLINK_L7_TX_PER_SEC GpmMetricId = 77
|
||||
GPM_METRIC_NVLINK_L8_RX_PER_SEC GpmMetricId = 78
|
||||
GPM_METRIC_NVLINK_L8_TX_PER_SEC GpmMetricId = 79
|
||||
GPM_METRIC_NVLINK_L9_RX_PER_SEC GpmMetricId = 80
|
||||
GPM_METRIC_NVLINK_L9_TX_PER_SEC GpmMetricId = 81
|
||||
GPM_METRIC_NVLINK_L10_RX_PER_SEC GpmMetricId = 82
|
||||
GPM_METRIC_NVLINK_L10_TX_PER_SEC GpmMetricId = 83
|
||||
GPM_METRIC_NVLINK_L11_RX_PER_SEC GpmMetricId = 84
|
||||
GPM_METRIC_NVLINK_L11_TX_PER_SEC GpmMetricId = 85
|
||||
GPM_METRIC_NVLINK_L12_RX_PER_SEC GpmMetricId = 86
|
||||
GPM_METRIC_NVLINK_L12_TX_PER_SEC GpmMetricId = 87
|
||||
GPM_METRIC_NVLINK_L13_RX_PER_SEC GpmMetricId = 88
|
||||
GPM_METRIC_NVLINK_L13_TX_PER_SEC GpmMetricId = 89
|
||||
GPM_METRIC_NVLINK_L14_RX_PER_SEC GpmMetricId = 90
|
||||
GPM_METRIC_NVLINK_L14_TX_PER_SEC GpmMetricId = 91
|
||||
GPM_METRIC_NVLINK_L15_RX_PER_SEC GpmMetricId = 92
|
||||
GPM_METRIC_NVLINK_L15_TX_PER_SEC GpmMetricId = 93
|
||||
GPM_METRIC_NVLINK_L16_RX_PER_SEC GpmMetricId = 94
|
||||
GPM_METRIC_NVLINK_L16_TX_PER_SEC GpmMetricId = 95
|
||||
GPM_METRIC_NVLINK_L17_RX_PER_SEC GpmMetricId = 96
|
||||
GPM_METRIC_NVLINK_L17_TX_PER_SEC GpmMetricId = 97
|
||||
GPM_METRIC_MAX GpmMetricId = 98
|
||||
)
|
||||
|
||||
363
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/device.go
generated
vendored
363
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/device.go
generated
vendored
@@ -38,21 +38,21 @@ func DeviceGetHandleByIndex(Index int) (Device, Return) {
|
||||
// nvml.DeviceGetHandleBySerial()
|
||||
func DeviceGetHandleBySerial(Serial string) (Device, Return) {
|
||||
var Device Device
|
||||
ret := nvmlDeviceGetHandleBySerial(Serial + string(rune(0)), &Device)
|
||||
ret := nvmlDeviceGetHandleBySerial(Serial+string(rune(0)), &Device)
|
||||
return Device, ret
|
||||
}
|
||||
|
||||
// nvml.DeviceGetHandleByUUID()
|
||||
func DeviceGetHandleByUUID(Uuid string) (Device, Return) {
|
||||
var Device Device
|
||||
ret := nvmlDeviceGetHandleByUUID(Uuid + string(rune(0)), &Device)
|
||||
ret := nvmlDeviceGetHandleByUUID(Uuid+string(rune(0)), &Device)
|
||||
return Device, ret
|
||||
}
|
||||
|
||||
// nvml.DeviceGetHandleByPciBusId()
|
||||
func DeviceGetHandleByPciBusId(PciBusId string) (Device, Return) {
|
||||
var Device Device
|
||||
ret := nvmlDeviceGetHandleByPciBusId(PciBusId + string(rune(0)), &Device)
|
||||
ret := nvmlDeviceGetHandleByPciBusId(PciBusId+string(rune(0)), &Device)
|
||||
return Device, ret
|
||||
}
|
||||
|
||||
@@ -2286,3 +2286,360 @@ func DeviceGetBusType(Device Device) (BusType, Return) {
|
||||
func (Device Device) GetBusType() (BusType, Return) {
|
||||
return DeviceGetBusType(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceSetDefaultFanSpeed_v2()
|
||||
func DeviceSetDefaultFanSpeed_v2(Device Device, Fan int) Return {
|
||||
return nvmlDeviceSetDefaultFanSpeed_v2(Device, uint32(Fan))
|
||||
}
|
||||
|
||||
func (Device Device) SetDefaultFanSpeed_v2(Fan int) Return {
|
||||
return DeviceSetDefaultFanSpeed_v2(Device, Fan)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetMinMaxFanSpeed()
|
||||
func DeviceGetMinMaxFanSpeed(Device Device) (int, int, Return) {
|
||||
var MinSpeed, MaxSpeed uint32
|
||||
ret := nvmlDeviceGetMinMaxFanSpeed(Device, &MinSpeed, &MaxSpeed)
|
||||
return int(MinSpeed), int(MaxSpeed), ret
|
||||
}
|
||||
|
||||
func (Device Device) GetMinMaxFanSpeed() (int, int, Return) {
|
||||
return DeviceGetMinMaxFanSpeed(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetThermalSettings()
|
||||
func DeviceGetThermalSettings(Device Device, SensorIndex uint32) (GpuThermalSettings, Return) {
|
||||
var PThermalSettings GpuThermalSettings
|
||||
ret := nvmlDeviceGetThermalSettings(Device, SensorIndex, &PThermalSettings)
|
||||
return PThermalSettings, ret
|
||||
}
|
||||
|
||||
func (Device Device) GetThermalSettings(SensorIndex uint32) (GpuThermalSettings, Return) {
|
||||
return DeviceGetThermalSettings(Device, SensorIndex)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetDefaultEccMode()
|
||||
func DeviceGetDefaultEccMode(Device Device) (EnableState, Return) {
|
||||
var DefaultMode EnableState
|
||||
ret := nvmlDeviceGetDefaultEccMode(Device, &DefaultMode)
|
||||
return DefaultMode, ret
|
||||
}
|
||||
|
||||
func (Device Device) GetDefaultEccMode() (EnableState, Return) {
|
||||
return DeviceGetDefaultEccMode(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetPcieSpeed()
|
||||
func DeviceGetPcieSpeed(Device Device) (int, Return) {
|
||||
var PcieSpeed uint32
|
||||
ret := nvmlDeviceGetPcieSpeed(Device, &PcieSpeed)
|
||||
return int(PcieSpeed), ret
|
||||
}
|
||||
|
||||
func (Device Device) GetPcieSpeed() (int, Return) {
|
||||
return DeviceGetPcieSpeed(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetGspFirmwareVersion()
|
||||
func DeviceGetGspFirmwareVersion(Device Device) (string, Return) {
|
||||
Version := make([]byte, GSP_FIRMWARE_VERSION_BUF_SIZE)
|
||||
ret := nvmlDeviceGetGspFirmwareVersion(Device, &Version[0])
|
||||
return string(Version[:clen(Version)]), ret
|
||||
}
|
||||
|
||||
func (Device Device) GetGspFirmwareVersion() (string, Return) {
|
||||
return DeviceGetGspFirmwareVersion(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetGspFirmwareMode()
|
||||
func DeviceGetGspFirmwareMode(Device Device) (bool, bool, Return) {
|
||||
var IsEnabled, DefaultMode uint32
|
||||
ret := nvmlDeviceGetGspFirmwareMode(Device, &IsEnabled, &DefaultMode)
|
||||
return (IsEnabled != 0), (DefaultMode != 0), ret
|
||||
}
|
||||
|
||||
func (Device Device) GetGspFirmwareMode() (bool, bool, Return) {
|
||||
return DeviceGetGspFirmwareMode(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetDynamicPstatesInfo()
|
||||
func DeviceGetDynamicPstatesInfo(Device Device) (GpuDynamicPstatesInfo, Return) {
|
||||
var PDynamicPstatesInfo GpuDynamicPstatesInfo
|
||||
ret := nvmlDeviceGetDynamicPstatesInfo(Device, &PDynamicPstatesInfo)
|
||||
return PDynamicPstatesInfo, ret
|
||||
}
|
||||
|
||||
func (Device Device) GetDynamicPstatesInfo() (GpuDynamicPstatesInfo, Return) {
|
||||
return DeviceGetDynamicPstatesInfo(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceSetFanSpeed_v2()
|
||||
func DeviceSetFanSpeed_v2(Device Device, Fan int, Speed int) Return {
|
||||
return nvmlDeviceSetFanSpeed_v2(Device, uint32(Fan), uint32(Speed))
|
||||
}
|
||||
|
||||
func (Device Device) SetFanSpeed_v2(Fan int, Speed int) Return {
|
||||
return DeviceSetFanSpeed_v2(Device, Fan, Speed)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetGpcClkVfOffset()
|
||||
func DeviceGetGpcClkVfOffset(Device Device) (int, Return) {
|
||||
var Offset int32
|
||||
ret := nvmlDeviceGetGpcClkVfOffset(Device, &Offset)
|
||||
return int(Offset), ret
|
||||
}
|
||||
|
||||
func (Device Device) GetGpcClkVfOffset() (int, Return) {
|
||||
return DeviceGetGpcClkVfOffset(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceSetGpcClkVfOffset()
|
||||
func DeviceSetGpcClkVfOffset(Device Device, Offset int) Return {
|
||||
return nvmlDeviceSetGpcClkVfOffset(Device, int32(Offset))
|
||||
}
|
||||
|
||||
func (Device Device) SetGpcClkVfOffset(Offset int) Return {
|
||||
return DeviceSetGpcClkVfOffset(Device, Offset)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetMinMaxClockOfPState()
|
||||
func DeviceGetMinMaxClockOfPState(Device Device, _type ClockType, Pstate Pstates) (uint32, uint32, Return) {
|
||||
var MinClockMHz, MaxClockMHz uint32
|
||||
ret := nvmlDeviceGetMinMaxClockOfPState(Device, _type, Pstate, &MinClockMHz, &MaxClockMHz)
|
||||
return MinClockMHz, MaxClockMHz, ret
|
||||
}
|
||||
|
||||
func (Device Device) GetMinMaxClockOfPState(_type ClockType, Pstate Pstates) (uint32, uint32, Return) {
|
||||
return DeviceGetMinMaxClockOfPState(Device, _type, Pstate)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetSupportedPerformanceStates()
|
||||
func DeviceGetSupportedPerformanceStates(Device Device) ([]Pstates, Return) {
|
||||
Pstates := make([]Pstates, MAX_GPU_PERF_PSTATES)
|
||||
ret := nvmlDeviceGetSupportedPerformanceStates(Device, &Pstates[0], MAX_GPU_PERF_PSTATES)
|
||||
for i := 0; i < MAX_GPU_PERF_PSTATES; i++ {
|
||||
if Pstates[i] == PSTATE_UNKNOWN {
|
||||
return Pstates[0:i], ret
|
||||
}
|
||||
}
|
||||
return Pstates, ret
|
||||
}
|
||||
|
||||
func (Device Device) GetSupportedPerformanceStates() ([]Pstates, Return) {
|
||||
return DeviceGetSupportedPerformanceStates(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetTargetFanSpeed()
|
||||
func DeviceGetTargetFanSpeed(Device Device, Fan int) (int, Return) {
|
||||
var TargetSpeed uint32
|
||||
ret := nvmlDeviceGetTargetFanSpeed(Device, uint32(Fan), &TargetSpeed)
|
||||
return int(TargetSpeed), ret
|
||||
}
|
||||
|
||||
func (Device Device) GetTargetFanSpeed(Fan int) (int, Return) {
|
||||
return DeviceGetTargetFanSpeed(Device, Fan)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetMemClkVfOffset()
|
||||
func DeviceGetMemClkVfOffset(Device Device) (int, Return) {
|
||||
var Offset int32
|
||||
ret := nvmlDeviceGetMemClkVfOffset(Device, &Offset)
|
||||
return int(Offset), ret
|
||||
}
|
||||
|
||||
func (Device Device) GetMemClkVfOffset() (int, Return) {
|
||||
return DeviceGetMemClkVfOffset(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceSetMemClkVfOffset()
|
||||
func DeviceSetMemClkVfOffset(Device Device, Offset int) Return {
|
||||
return nvmlDeviceSetMemClkVfOffset(Device, int32(Offset))
|
||||
}
|
||||
|
||||
func (Device Device) SetMemClkVfOffset(Offset int) Return {
|
||||
return DeviceSetMemClkVfOffset(Device, Offset)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetGpcClkMinMaxVfOffset()
|
||||
func DeviceGetGpcClkMinMaxVfOffset(Device Device) (int, int, Return) {
|
||||
var MinOffset, MaxOffset int32
|
||||
ret := nvmlDeviceGetGpcClkMinMaxVfOffset(Device, &MinOffset, &MaxOffset)
|
||||
return int(MinOffset), int(MaxOffset), ret
|
||||
}
|
||||
|
||||
func (Device Device) GetGpcClkMinMaxVfOffset() (int, int, Return) {
|
||||
return DeviceGetGpcClkMinMaxVfOffset(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetMemClkMinMaxVfOffset()
|
||||
func DeviceGetMemClkMinMaxVfOffset(Device Device) (int, int, Return) {
|
||||
var MinOffset, MaxOffset int32
|
||||
ret := nvmlDeviceGetMemClkMinMaxVfOffset(Device, &MinOffset, &MaxOffset)
|
||||
return int(MinOffset), int(MaxOffset), ret
|
||||
}
|
||||
|
||||
func (Device Device) GetMemClkMinMaxVfOffset() (int, int, Return) {
|
||||
return DeviceGetMemClkMinMaxVfOffset(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetGpuMaxPcieLinkGeneration()
|
||||
func DeviceGetGpuMaxPcieLinkGeneration(Device Device) (int, Return) {
|
||||
var MaxLinkGenDevice uint32
|
||||
ret := nvmlDeviceGetGpuMaxPcieLinkGeneration(Device, &MaxLinkGenDevice)
|
||||
return int(MaxLinkGenDevice), ret
|
||||
}
|
||||
|
||||
func (Device Device) GetGpuMaxPcieLinkGeneration() (int, Return) {
|
||||
return DeviceGetGpuMaxPcieLinkGeneration(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetFanControlPolicy_v2()
|
||||
func DeviceGetFanControlPolicy_v2(Device Device, Fan int) (FanControlPolicy, Return) {
|
||||
var Policy FanControlPolicy
|
||||
ret := nvmlDeviceGetFanControlPolicy_v2(Device, uint32(Fan), &Policy)
|
||||
return Policy, ret
|
||||
}
|
||||
|
||||
func (Device Device) GetFanControlPolicy_v2(Fan int) (FanControlPolicy, Return) {
|
||||
return DeviceGetFanControlPolicy_v2(Device, Fan)
|
||||
}
|
||||
|
||||
// nvml.DeviceSetFanControlPolicy()
|
||||
func DeviceSetFanControlPolicy(Device Device, Fan int, Policy FanControlPolicy) Return {
|
||||
return nvmlDeviceSetFanControlPolicy(Device, uint32(Fan), Policy)
|
||||
}
|
||||
|
||||
func (Device Device) SetFanControlPolicy(Fan int, Policy FanControlPolicy) Return {
|
||||
return DeviceSetFanControlPolicy(Device, Fan, Policy)
|
||||
}
|
||||
|
||||
// nvml.DeviceClearFieldValues()
|
||||
func DeviceClearFieldValues(Device Device, Values []FieldValue) Return {
|
||||
ValuesCount := len(Values)
|
||||
return nvmlDeviceClearFieldValues(Device, int32(ValuesCount), &Values[0])
|
||||
}
|
||||
|
||||
func (Device Device) ClearFieldValues(Values []FieldValue) Return {
|
||||
return DeviceClearFieldValues(Device, Values)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetVgpuCapabilities()
|
||||
func DeviceGetVgpuCapabilities(Device Device, Capability DeviceVgpuCapability) (bool, Return) {
|
||||
var CapResult uint32
|
||||
ret := nvmlDeviceGetVgpuCapabilities(Device, Capability, &CapResult)
|
||||
return (CapResult != 0), ret
|
||||
}
|
||||
|
||||
func (Device Device) GetVgpuCapabilities(Capability DeviceVgpuCapability) (bool, Return) {
|
||||
return DeviceGetVgpuCapabilities(Device, Capability)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetVgpuSchedulerLog()
|
||||
func DeviceGetVgpuSchedulerLog(Device Device) (VgpuSchedulerLog, Return) {
|
||||
var PSchedulerLog VgpuSchedulerLog
|
||||
ret := nvmlDeviceGetVgpuSchedulerLog(Device, &PSchedulerLog)
|
||||
return PSchedulerLog, ret
|
||||
}
|
||||
|
||||
func (Device Device) GetVgpuSchedulerLog() (VgpuSchedulerLog, Return) {
|
||||
return DeviceGetVgpuSchedulerLog(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetVgpuSchedulerState()
|
||||
func DeviceGetVgpuSchedulerState(Device Device) (VgpuSchedulerGetState, Return) {
|
||||
var PSchedulerState VgpuSchedulerGetState
|
||||
ret := nvmlDeviceGetVgpuSchedulerState(Device, &PSchedulerState)
|
||||
return PSchedulerState, ret
|
||||
}
|
||||
|
||||
func (Device Device) GetVgpuSchedulerState() (VgpuSchedulerGetState, Return) {
|
||||
return DeviceGetVgpuSchedulerState(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceSetVgpuSchedulerState()
|
||||
func DeviceSetVgpuSchedulerState(Device Device, PSchedulerState *VgpuSchedulerSetState) Return {
|
||||
return nvmlDeviceSetVgpuSchedulerState(Device, PSchedulerState)
|
||||
}
|
||||
|
||||
func (Device Device) SetVgpuSchedulerState(PSchedulerState *VgpuSchedulerSetState) Return {
|
||||
return DeviceSetVgpuSchedulerState(Device, PSchedulerState)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetVgpuSchedulerCapabilities()
|
||||
func DeviceGetVgpuSchedulerCapabilities(Device Device) (VgpuSchedulerCapabilities, Return) {
|
||||
var PCapabilities VgpuSchedulerCapabilities
|
||||
ret := nvmlDeviceGetVgpuSchedulerCapabilities(Device, &PCapabilities)
|
||||
return PCapabilities, ret
|
||||
}
|
||||
|
||||
func (Device Device) GetVgpuSchedulerCapabilities() (VgpuSchedulerCapabilities, Return) {
|
||||
return DeviceGetVgpuSchedulerCapabilities(Device)
|
||||
}
|
||||
|
||||
// nvml.GpuInstanceGetComputeInstancePossiblePlacements()
|
||||
func GpuInstanceGetComputeInstancePossiblePlacements(GpuInstance GpuInstance, ProfileId int) ([]ComputeInstancePlacement, Return) {
|
||||
var Count uint32
|
||||
ret := nvmlGpuInstanceGetComputeInstancePossiblePlacements(GpuInstance, uint32(ProfileId), nil, &Count)
|
||||
if ret != SUCCESS {
|
||||
return nil, ret
|
||||
}
|
||||
if Count == 0 {
|
||||
return []ComputeInstancePlacement{}, ret
|
||||
}
|
||||
PlacementArray := make([]ComputeInstancePlacement, Count)
|
||||
ret = nvmlGpuInstanceGetComputeInstancePossiblePlacements(GpuInstance, uint32(ProfileId), &PlacementArray[0], &Count)
|
||||
return PlacementArray, ret
|
||||
}
|
||||
|
||||
func (GpuInstance GpuInstance) GetComputeInstancePossiblePlacements(ProfileId int) ([]ComputeInstancePlacement, Return) {
|
||||
return GpuInstanceGetComputeInstancePossiblePlacements(GpuInstance, ProfileId)
|
||||
}
|
||||
|
||||
// nvml.GpuInstanceCreateComputeInstanceWithPlacement()
|
||||
func GpuInstanceCreateComputeInstanceWithPlacement(GpuInstance GpuInstance, ProfileId int, Placement *ComputeInstancePlacement, ComputeInstance *ComputeInstance) Return {
|
||||
return nvmlGpuInstanceCreateComputeInstanceWithPlacement(GpuInstance, uint32(ProfileId), Placement, ComputeInstance)
|
||||
}
|
||||
|
||||
func (GpuInstance GpuInstance) CreateComputeInstanceWithPlacement(ProfileId int, Placement *ComputeInstancePlacement, ComputeInstance *ComputeInstance) Return {
|
||||
return GpuInstanceCreateComputeInstanceWithPlacement(GpuInstance, ProfileId, Placement, ComputeInstance)
|
||||
}
|
||||
|
||||
// nvml.DeviceGetGpuFabricInfo()
|
||||
func DeviceGetGpuFabricInfo(Device Device) (GpuFabricInfo, Return) {
|
||||
var GpuFabricInfo GpuFabricInfo
|
||||
ret := nvmlDeviceGetGpuFabricInfo(Device, &GpuFabricInfo)
|
||||
return GpuFabricInfo, ret
|
||||
}
|
||||
|
||||
func (Device Device) GetGpuFabricInfo() (GpuFabricInfo, Return) {
|
||||
return DeviceGetGpuFabricInfo(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceCcuGetStreamState()
|
||||
func DeviceCcuGetStreamState(Device Device) (int, Return) {
|
||||
var State uint32
|
||||
ret := nvmlDeviceCcuGetStreamState(Device, &State)
|
||||
return int(State), ret
|
||||
}
|
||||
|
||||
func (Device Device) CcuGetStreamState() (int, Return) {
|
||||
return DeviceCcuGetStreamState(Device)
|
||||
}
|
||||
|
||||
// nvml.DeviceCcuSetStreamState()
|
||||
func DeviceCcuSetStreamState(Device Device, State int) Return {
|
||||
return nvmlDeviceCcuSetStreamState(Device, uint32(State))
|
||||
}
|
||||
|
||||
func (Device Device) CcuSetStreamState(State int) Return {
|
||||
return DeviceCcuSetStreamState(Device, State)
|
||||
}
|
||||
|
||||
// nvml.DeviceSetNvLinkDeviceLowPowerThreshold()
|
||||
func DeviceSetNvLinkDeviceLowPowerThreshold(Device Device, Info *NvLinkPowerThres) Return {
|
||||
return nvmlDeviceSetNvLinkDeviceLowPowerThreshold(Device, Info)
|
||||
}
|
||||
|
||||
func (Device Device) SetNvLinkDeviceLowPowerThreshold(Info *NvLinkPowerThres) Return {
|
||||
return DeviceSetNvLinkDeviceLowPowerThreshold(Device, Info)
|
||||
}
|
||||
|
||||
387
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.go
generated
vendored
387
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.go
generated
vendored
@@ -486,6 +486,15 @@ func nvmlDeviceGetMaxPcieLinkGeneration(Device Device, MaxLinkGen *uint32) Retur
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetGpuMaxPcieLinkGeneration function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetGpuMaxPcieLinkGeneration(Device Device, MaxLinkGenDevice *uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cMaxLinkGenDevice, _ := (*C.uint)(unsafe.Pointer(MaxLinkGenDevice)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetGpuMaxPcieLinkGeneration(cDevice, cMaxLinkGenDevice)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetMaxPcieLinkWidth function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetMaxPcieLinkWidth(Device Device, MaxLinkWidth *uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
@@ -670,6 +679,55 @@ func nvmlDeviceGetFanSpeed_v2(Device Device, Fan uint32, Speed *uint32) Return {
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetTargetFanSpeed function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetTargetFanSpeed(Device Device, Fan uint32, TargetSpeed *uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cFan, _ := (C.uint)(Fan), cgoAllocsUnknown
|
||||
cTargetSpeed, _ := (*C.uint)(unsafe.Pointer(TargetSpeed)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetTargetFanSpeed(cDevice, cFan, cTargetSpeed)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceSetDefaultFanSpeed_v2 function as declared in nvml/nvml.h
|
||||
func nvmlDeviceSetDefaultFanSpeed_v2(Device Device, Fan uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cFan, _ := (C.uint)(Fan), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceSetDefaultFanSpeed_v2(cDevice, cFan)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetMinMaxFanSpeed function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetMinMaxFanSpeed(Device Device, MinSpeed *uint32, MaxSpeed *uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cMinSpeed, _ := (*C.uint)(unsafe.Pointer(MinSpeed)), cgoAllocsUnknown
|
||||
cMaxSpeed, _ := (*C.uint)(unsafe.Pointer(MaxSpeed)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetMinMaxFanSpeed(cDevice, cMinSpeed, cMaxSpeed)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetFanControlPolicy_v2 function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetFanControlPolicy_v2(Device Device, Fan uint32, Policy *FanControlPolicy) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cFan, _ := (C.uint)(Fan), cgoAllocsUnknown
|
||||
cPolicy, _ := (*C.nvmlFanControlPolicy_t)(unsafe.Pointer(Policy)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetFanControlPolicy_v2(cDevice, cFan, cPolicy)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceSetFanControlPolicy function as declared in nvml/nvml.h
|
||||
func nvmlDeviceSetFanControlPolicy(Device Device, Fan uint32, Policy FanControlPolicy) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cFan, _ := (C.uint)(Fan), cgoAllocsUnknown
|
||||
cPolicy, _ := (C.nvmlFanControlPolicy_t)(Policy), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceSetFanControlPolicy(cDevice, cFan, cPolicy)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetNumFans function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetNumFans(Device Device, NumFans *uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
@@ -709,6 +767,16 @@ func nvmlDeviceSetTemperatureThreshold(Device Device, ThresholdType TemperatureT
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetThermalSettings function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetThermalSettings(Device Device, SensorIndex uint32, PThermalSettings *GpuThermalSettings) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cSensorIndex, _ := (C.uint)(SensorIndex), cgoAllocsUnknown
|
||||
cPThermalSettings, _ := (*C.nvmlGpuThermalSettings_t)(unsafe.Pointer(PThermalSettings)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetThermalSettings(cDevice, cSensorIndex, cPThermalSettings)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetPerformanceState function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetPerformanceState(Device Device, PState *Pstates) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
@@ -866,6 +934,15 @@ func nvmlDeviceGetEccMode(Device Device, Current *EnableState, Pending *EnableSt
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetDefaultEccMode function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetDefaultEccMode(Device Device, DefaultMode *EnableState) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cDefaultMode, _ := (*C.nvmlEnableState_t)(unsafe.Pointer(DefaultMode)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetDefaultEccMode(cDevice, cDefaultMode)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetBoardId function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetBoardId(Device Device, BoardId *uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
@@ -1153,6 +1230,15 @@ func nvmlDeviceGetPcieLinkMaxSpeed(Device Device, MaxSpeed *uint32) Return {
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetPcieSpeed function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetPcieSpeed(Device Device, PcieSpeed *uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cPcieSpeed, _ := (*C.uint)(unsafe.Pointer(PcieSpeed)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetPcieSpeed(cDevice, cPcieSpeed)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetAdaptiveClockInfoStatus function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetAdaptiveClockInfoStatus(Device Device, AdaptiveClockStatus *uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
@@ -1635,6 +1721,16 @@ func nvmlDeviceGetFieldValues(Device Device, ValuesCount int32, Values *FieldVal
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceClearFieldValues function as declared in nvml/nvml.h
|
||||
func nvmlDeviceClearFieldValues(Device Device, ValuesCount int32, Values *FieldValue) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cValuesCount, _ := (C.int)(ValuesCount), cgoAllocsUnknown
|
||||
cValues, _ := (*C.nvmlFieldValue_t)(unsafe.Pointer(Values)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceClearFieldValues(cDevice, cValuesCount, cValues)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetVirtualizationMode function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetVirtualizationMode(Device Device, PVirtualMode *GpuVirtualizationMode) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
@@ -1682,6 +1778,44 @@ func nvmlDeviceGetProcessUtilization(Device Device, Utilization *ProcessUtilizat
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetGspFirmwareVersion function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetGspFirmwareVersion(Device Device, Version *byte) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cVersion, _ := (*C.char)(unsafe.Pointer(Version)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetGspFirmwareVersion(cDevice, cVersion)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetGspFirmwareMode function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetGspFirmwareMode(Device Device, IsEnabled *uint32, DefaultMode *uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cIsEnabled, _ := (*C.uint)(unsafe.Pointer(IsEnabled)), cgoAllocsUnknown
|
||||
cDefaultMode, _ := (*C.uint)(unsafe.Pointer(DefaultMode)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetGspFirmwareMode(cDevice, cIsEnabled, cDefaultMode)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlGetVgpuDriverCapabilities function as declared in nvml/nvml.h
|
||||
func nvmlGetVgpuDriverCapabilities(Capability VgpuDriverCapability, CapResult *uint32) Return {
|
||||
cCapability, _ := (C.nvmlVgpuDriverCapability_t)(Capability), cgoAllocsUnknown
|
||||
cCapResult, _ := (*C.uint)(unsafe.Pointer(CapResult)), cgoAllocsUnknown
|
||||
__ret := C.nvmlGetVgpuDriverCapabilities(cCapability, cCapResult)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetVgpuCapabilities function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetVgpuCapabilities(Device Device, Capability DeviceVgpuCapability, CapResult *uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cCapability, _ := (C.nvmlDeviceVgpuCapability_t)(Capability), cgoAllocsUnknown
|
||||
cCapResult, _ := (*C.uint)(unsafe.Pointer(CapResult)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetVgpuCapabilities(cDevice, cCapability, cCapResult)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetSupportedVgpus function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetSupportedVgpus(Device Device, VgpuCount *uint32, VgpuTypeIds *VgpuTypeId) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
@@ -1971,6 +2105,16 @@ func nvmlVgpuInstanceGetGpuPciId(VgpuInstance VgpuInstance, VgpuPciId *byte, Len
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlVgpuTypeGetCapabilities function as declared in nvml/nvml.h
|
||||
func nvmlVgpuTypeGetCapabilities(VgpuTypeId VgpuTypeId, Capability VgpuCapability, CapResult *uint32) Return {
|
||||
cVgpuTypeId, _ := (C.nvmlVgpuTypeId_t)(VgpuTypeId), cgoAllocsUnknown
|
||||
cCapability, _ := (C.nvmlVgpuCapability_t)(Capability), cgoAllocsUnknown
|
||||
cCapResult, _ := (*C.uint)(unsafe.Pointer(CapResult)), cgoAllocsUnknown
|
||||
__ret := C.nvmlVgpuTypeGetCapabilities(cVgpuTypeId, cCapability, cCapResult)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlVgpuInstanceGetMetadata function as declared in nvml/nvml.h
|
||||
func nvmlVgpuInstanceGetMetadata(VgpuInstance VgpuInstance, nvmlVgpuMetadata *nvmlVgpuMetadata, BufferSize *uint32) Return {
|
||||
cVgpuInstance, _ := (C.nvmlVgpuInstance_t)(VgpuInstance), cgoAllocsUnknown
|
||||
@@ -2011,6 +2155,42 @@ func nvmlDeviceGetPgpuMetadataString(Device Device, PgpuMetadata *byte, BufferSi
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetVgpuSchedulerLog function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetVgpuSchedulerLog(Device Device, PSchedulerLog *VgpuSchedulerLog) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cPSchedulerLog, _ := (*C.nvmlVgpuSchedulerLog_t)(unsafe.Pointer(PSchedulerLog)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetVgpuSchedulerLog(cDevice, cPSchedulerLog)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetVgpuSchedulerState function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetVgpuSchedulerState(Device Device, PSchedulerState *VgpuSchedulerGetState) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cPSchedulerState, _ := (*C.nvmlVgpuSchedulerGetState_t)(unsafe.Pointer(PSchedulerState)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetVgpuSchedulerState(cDevice, cPSchedulerState)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceSetVgpuSchedulerState function as declared in nvml/nvml.h
|
||||
func nvmlDeviceSetVgpuSchedulerState(Device Device, PSchedulerState *VgpuSchedulerSetState) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cPSchedulerState, _ := (*C.nvmlVgpuSchedulerSetState_t)(unsafe.Pointer(PSchedulerState)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceSetVgpuSchedulerState(cDevice, cPSchedulerState)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetVgpuSchedulerCapabilities function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetVgpuSchedulerCapabilities(Device Device, PCapabilities *VgpuSchedulerCapabilities) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cPCapabilities, _ := (*C.nvmlVgpuSchedulerCapabilities_t)(unsafe.Pointer(PCapabilities)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetVgpuSchedulerCapabilities(cDevice, cPCapabilities)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlGetVgpuVersion function as declared in nvml/nvml.h
|
||||
func nvmlGetVgpuVersion(Supported *VgpuVersion, Current *VgpuVersion) Return {
|
||||
cSupported, _ := (*C.nvmlVgpuVersion_t)(unsafe.Pointer(Supported)), cgoAllocsUnknown
|
||||
@@ -2266,6 +2446,17 @@ func nvmlGpuInstanceGetComputeInstanceRemainingCapacity(GpuInstance GpuInstance,
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlGpuInstanceGetComputeInstancePossiblePlacements function as declared in nvml/nvml.h
|
||||
func nvmlGpuInstanceGetComputeInstancePossiblePlacements(GpuInstance GpuInstance, ProfileId uint32, Placements *ComputeInstancePlacement, Count *uint32) Return {
|
||||
cGpuInstance, _ := *(*C.nvmlGpuInstance_t)(unsafe.Pointer(&GpuInstance)), cgoAllocsUnknown
|
||||
cProfileId, _ := (C.uint)(ProfileId), cgoAllocsUnknown
|
||||
cPlacements, _ := (*C.nvmlComputeInstancePlacement_t)(unsafe.Pointer(Placements)), cgoAllocsUnknown
|
||||
cCount, _ := (*C.uint)(unsafe.Pointer(Count)), cgoAllocsUnknown
|
||||
__ret := C.nvmlGpuInstanceGetComputeInstancePossiblePlacements(cGpuInstance, cProfileId, cPlacements, cCount)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlGpuInstanceCreateComputeInstance function as declared in nvml/nvml.h
|
||||
func nvmlGpuInstanceCreateComputeInstance(GpuInstance GpuInstance, ProfileId uint32, ComputeInstance *ComputeInstance) Return {
|
||||
cGpuInstance, _ := *(*C.nvmlGpuInstance_t)(unsafe.Pointer(&GpuInstance)), cgoAllocsUnknown
|
||||
@@ -2276,6 +2467,17 @@ func nvmlGpuInstanceCreateComputeInstance(GpuInstance GpuInstance, ProfileId uin
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlGpuInstanceCreateComputeInstanceWithPlacement function as declared in nvml/nvml.h
|
||||
func nvmlGpuInstanceCreateComputeInstanceWithPlacement(GpuInstance GpuInstance, ProfileId uint32, Placement *ComputeInstancePlacement, ComputeInstance *ComputeInstance) Return {
|
||||
cGpuInstance, _ := *(*C.nvmlGpuInstance_t)(unsafe.Pointer(&GpuInstance)), cgoAllocsUnknown
|
||||
cProfileId, _ := (C.uint)(ProfileId), cgoAllocsUnknown
|
||||
cPlacement, _ := (*C.nvmlComputeInstancePlacement_t)(unsafe.Pointer(Placement)), cgoAllocsUnknown
|
||||
cComputeInstance, _ := (*C.nvmlComputeInstance_t)(unsafe.Pointer(ComputeInstance)), cgoAllocsUnknown
|
||||
__ret := C.nvmlGpuInstanceCreateComputeInstanceWithPlacement(cGpuInstance, cProfileId, cPlacement, cComputeInstance)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlComputeInstanceDestroy function as declared in nvml/nvml.h
|
||||
func nvmlComputeInstanceDestroy(ComputeInstance ComputeInstance) Return {
|
||||
cComputeInstance, _ := *(*C.nvmlComputeInstance_t)(unsafe.Pointer(&ComputeInstance)), cgoAllocsUnknown
|
||||
@@ -2378,6 +2580,191 @@ func nvmlDeviceGetBusType(Device Device, _type *BusType) Return {
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetDynamicPstatesInfo function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetDynamicPstatesInfo(Device Device, PDynamicPstatesInfo *GpuDynamicPstatesInfo) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cPDynamicPstatesInfo, _ := (*C.nvmlGpuDynamicPstatesInfo_t)(unsafe.Pointer(PDynamicPstatesInfo)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetDynamicPstatesInfo(cDevice, cPDynamicPstatesInfo)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceSetFanSpeed_v2 function as declared in nvml/nvml.h
|
||||
func nvmlDeviceSetFanSpeed_v2(Device Device, Fan uint32, Speed uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cFan, _ := (C.uint)(Fan), cgoAllocsUnknown
|
||||
cSpeed, _ := (C.uint)(Speed), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceSetFanSpeed_v2(cDevice, cFan, cSpeed)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetGpcClkVfOffset function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetGpcClkVfOffset(Device Device, Offset *int32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cOffset, _ := (*C.int)(unsafe.Pointer(Offset)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetGpcClkVfOffset(cDevice, cOffset)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceSetGpcClkVfOffset function as declared in nvml/nvml.h
|
||||
func nvmlDeviceSetGpcClkVfOffset(Device Device, Offset int32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cOffset, _ := (C.int)(Offset), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceSetGpcClkVfOffset(cDevice, cOffset)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetMemClkVfOffset function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetMemClkVfOffset(Device Device, Offset *int32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cOffset, _ := (*C.int)(unsafe.Pointer(Offset)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetMemClkVfOffset(cDevice, cOffset)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceSetMemClkVfOffset function as declared in nvml/nvml.h
|
||||
func nvmlDeviceSetMemClkVfOffset(Device Device, Offset int32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cOffset, _ := (C.int)(Offset), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceSetMemClkVfOffset(cDevice, cOffset)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetMinMaxClockOfPState function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetMinMaxClockOfPState(Device Device, _type ClockType, Pstate Pstates, MinClockMHz *uint32, MaxClockMHz *uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
c_type, _ := (C.nvmlClockType_t)(_type), cgoAllocsUnknown
|
||||
cPstate, _ := (C.nvmlPstates_t)(Pstate), cgoAllocsUnknown
|
||||
cMinClockMHz, _ := (*C.uint)(unsafe.Pointer(MinClockMHz)), cgoAllocsUnknown
|
||||
cMaxClockMHz, _ := (*C.uint)(unsafe.Pointer(MaxClockMHz)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetMinMaxClockOfPState(cDevice, c_type, cPstate, cMinClockMHz, cMaxClockMHz)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetSupportedPerformanceStates function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetSupportedPerformanceStates(Device Device, Pstates *Pstates, Size uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cPstates, _ := (*C.nvmlPstates_t)(unsafe.Pointer(Pstates)), cgoAllocsUnknown
|
||||
cSize, _ := (C.uint)(Size), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetSupportedPerformanceStates(cDevice, cPstates, cSize)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetGpcClkMinMaxVfOffset function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetGpcClkMinMaxVfOffset(Device Device, MinOffset *int32, MaxOffset *int32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cMinOffset, _ := (*C.int)(unsafe.Pointer(MinOffset)), cgoAllocsUnknown
|
||||
cMaxOffset, _ := (*C.int)(unsafe.Pointer(MaxOffset)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetGpcClkMinMaxVfOffset(cDevice, cMinOffset, cMaxOffset)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetMemClkMinMaxVfOffset function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetMemClkMinMaxVfOffset(Device Device, MinOffset *int32, MaxOffset *int32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cMinOffset, _ := (*C.int)(unsafe.Pointer(MinOffset)), cgoAllocsUnknown
|
||||
cMaxOffset, _ := (*C.int)(unsafe.Pointer(MaxOffset)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetMemClkMinMaxVfOffset(cDevice, cMinOffset, cMaxOffset)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceGetGpuFabricInfo function as declared in nvml/nvml.h
|
||||
func nvmlDeviceGetGpuFabricInfo(Device Device, GpuFabricInfo *GpuFabricInfo) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cGpuFabricInfo, _ := (*C.nvmlGpuFabricInfo_t)(unsafe.Pointer(GpuFabricInfo)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceGetGpuFabricInfo(cDevice, cGpuFabricInfo)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlGpmMetricsGet function as declared in nvml/nvml.h
|
||||
func nvmlGpmMetricsGet(MetricsGet *GpmMetricsGetType) Return {
|
||||
cMetricsGet, _ := (*C.nvmlGpmMetricsGet_t)(unsafe.Pointer(MetricsGet)), cgoAllocsUnknown
|
||||
__ret := C.nvmlGpmMetricsGet(cMetricsGet)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlGpmSampleFree function as declared in nvml/nvml.h
|
||||
func nvmlGpmSampleFree(GpmSample GpmSample) Return {
|
||||
cGpmSample, _ := *(*C.nvmlGpmSample_t)(unsafe.Pointer(&GpmSample)), cgoAllocsUnknown
|
||||
__ret := C.nvmlGpmSampleFree(cGpmSample)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlGpmSampleAlloc function as declared in nvml/nvml.h
|
||||
func nvmlGpmSampleAlloc(GpmSample *GpmSample) Return {
|
||||
cGpmSample, _ := (*C.nvmlGpmSample_t)(unsafe.Pointer(GpmSample)), cgoAllocsUnknown
|
||||
__ret := C.nvmlGpmSampleAlloc(cGpmSample)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlGpmSampleGet function as declared in nvml/nvml.h
|
||||
func nvmlGpmSampleGet(Device Device, GpmSample GpmSample) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cGpmSample, _ := *(*C.nvmlGpmSample_t)(unsafe.Pointer(&GpmSample)), cgoAllocsUnknown
|
||||
__ret := C.nvmlGpmSampleGet(cDevice, cGpmSample)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlGpmMigSampleGet function as declared in nvml/nvml.h
|
||||
func nvmlGpmMigSampleGet(Device Device, GpuInstanceId uint32, GpmSample GpmSample) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cGpuInstanceId, _ := (C.uint)(GpuInstanceId), cgoAllocsUnknown
|
||||
cGpmSample, _ := *(*C.nvmlGpmSample_t)(unsafe.Pointer(&GpmSample)), cgoAllocsUnknown
|
||||
__ret := C.nvmlGpmMigSampleGet(cDevice, cGpuInstanceId, cGpmSample)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlGpmQueryDeviceSupport function as declared in nvml/nvml.h
|
||||
func nvmlGpmQueryDeviceSupport(Device Device, GpmSupport *GpmSupport) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cGpmSupport, _ := (*C.nvmlGpmSupport_t)(unsafe.Pointer(GpmSupport)), cgoAllocsUnknown
|
||||
__ret := C.nvmlGpmQueryDeviceSupport(cDevice, cGpmSupport)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceCcuGetStreamState function as declared in nvml/nvml.h
|
||||
func nvmlDeviceCcuGetStreamState(Device Device, State *uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cState, _ := (*C.uint)(unsafe.Pointer(State)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceCcuGetStreamState(cDevice, cState)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceCcuSetStreamState function as declared in nvml/nvml.h
|
||||
func nvmlDeviceCcuSetStreamState(Device Device, State uint32) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cState, _ := (C.uint)(State), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceCcuSetStreamState(cDevice, cState)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlDeviceSetNvLinkDeviceLowPowerThreshold function as declared in nvml/nvml.h
|
||||
func nvmlDeviceSetNvLinkDeviceLowPowerThreshold(Device Device, Info *NvLinkPowerThres) Return {
|
||||
cDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device)), cgoAllocsUnknown
|
||||
cInfo, _ := (*C.nvmlNvLinkPowerThres_t)(unsafe.Pointer(Info)), cgoAllocsUnknown
|
||||
__ret := C.nvmlDeviceSetNvLinkDeviceLowPowerThreshold(cDevice, cInfo)
|
||||
__v := (Return)(__ret)
|
||||
return __v
|
||||
}
|
||||
|
||||
// nvmlInit_v1 function as declared in nvml/nvml.h
|
||||
func nvmlInit_v1() Return {
|
||||
__ret := C.nvmlInit()
|
||||
|
||||
1336
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.h
generated
vendored
1336
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.h
generated
vendored
File diff suppressed because it is too large
Load Diff
138
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/types_gen.go
generated
vendored
138
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/types_gen.go
generated
vendored
@@ -119,6 +119,19 @@ type ViolationTime struct {
|
||||
ViolationTime uint64
|
||||
}
|
||||
|
||||
type GpuThermalSettingsSensor struct {
|
||||
Controller int32
|
||||
DefaultMinTemp int32
|
||||
DefaultMaxTemp int32
|
||||
CurrentTemp int32
|
||||
Target int32
|
||||
}
|
||||
|
||||
type GpuThermalSettings struct {
|
||||
Count uint32
|
||||
Sensor [3]GpuThermalSettingsSensor
|
||||
}
|
||||
|
||||
type ClkMonFaultInfo struct {
|
||||
ClkApiDomain uint32
|
||||
ClkDomainFaultMask uint32
|
||||
@@ -154,6 +167,73 @@ type VgpuProcessUtilizationSample struct {
|
||||
DecUtil uint32
|
||||
}
|
||||
|
||||
type VgpuSchedulerParamsVgpuSchedDataWithARR struct {
|
||||
AvgFactor uint32
|
||||
Timeslice uint32
|
||||
}
|
||||
|
||||
type VgpuSchedulerParamsVgpuSchedData struct {
|
||||
Timeslice uint32
|
||||
}
|
||||
|
||||
const sizeofVgpuSchedulerParams = unsafe.Sizeof([8]byte{})
|
||||
|
||||
type VgpuSchedulerParams [sizeofVgpuSchedulerParams]byte
|
||||
|
||||
type VgpuSchedulerLogEntry struct {
|
||||
Timestamp uint64
|
||||
TimeRunTotal uint64
|
||||
TimeRun uint64
|
||||
SwRunlistId uint32
|
||||
TargetTimeSlice uint64
|
||||
CumulativePreemptionTime uint64
|
||||
}
|
||||
|
||||
type VgpuSchedulerLog struct {
|
||||
EngineId uint32
|
||||
SchedulerPolicy uint32
|
||||
IsEnabledARR uint32
|
||||
SchedulerParams [8]byte
|
||||
EntriesCount uint32
|
||||
LogEntries [200]VgpuSchedulerLogEntry
|
||||
}
|
||||
|
||||
type VgpuSchedulerGetState struct {
|
||||
SchedulerPolicy uint32
|
||||
IsEnabledARR uint32
|
||||
SchedulerParams [8]byte
|
||||
}
|
||||
|
||||
type VgpuSchedulerSetParamsVgpuSchedDataWithARR struct {
|
||||
AvgFactor uint32
|
||||
Frequency uint32
|
||||
}
|
||||
|
||||
type VgpuSchedulerSetParamsVgpuSchedData struct {
|
||||
Timeslice uint32
|
||||
}
|
||||
|
||||
const sizeofVgpuSchedulerSetParams = unsafe.Sizeof([8]byte{})
|
||||
|
||||
type VgpuSchedulerSetParams [sizeofVgpuSchedulerSetParams]byte
|
||||
|
||||
type VgpuSchedulerSetState struct {
|
||||
SchedulerPolicy uint32
|
||||
EnableARRMode uint32
|
||||
SchedulerParams [8]byte
|
||||
}
|
||||
|
||||
type VgpuSchedulerCapabilities struct {
|
||||
SupportedSchedulers [3]uint32
|
||||
MaxTimeslice uint32
|
||||
MinTimeslice uint32
|
||||
IsArrModeSupported uint32
|
||||
MaxFrequencyForARR uint32
|
||||
MinFrequencyForARR uint32
|
||||
MaxAvgFactorForARR uint32
|
||||
MinAvgFactorForARR uint32
|
||||
}
|
||||
|
||||
type VgpuLicenseExpiry struct {
|
||||
Year uint32
|
||||
Month uint16
|
||||
@@ -210,8 +290,22 @@ type DeviceArchitecture uint32
|
||||
|
||||
type BusType uint32
|
||||
|
||||
type FanControlPolicy uint32
|
||||
|
||||
type PowerSource uint32
|
||||
|
||||
type GpuDynamicPstatesInfoUtilization struct {
|
||||
BIsPresent uint32
|
||||
Percentage uint32
|
||||
IncThreshold uint32
|
||||
DecThreshold uint32
|
||||
}
|
||||
|
||||
type GpuDynamicPstatesInfo struct {
|
||||
Flags uint32
|
||||
Utilization [8]GpuDynamicPstatesInfoUtilization
|
||||
}
|
||||
|
||||
type FieldValue struct {
|
||||
FieldId uint32
|
||||
ScopeId uint32
|
||||
@@ -314,6 +408,16 @@ type FBCSessionInfo struct {
|
||||
AverageLatency uint32
|
||||
}
|
||||
|
||||
type GpuFabricState byte
|
||||
|
||||
type GpuFabricInfo struct {
|
||||
ClusterUuid [16]int8
|
||||
Status uint32
|
||||
PartitionId uint32
|
||||
State uint8
|
||||
Pad_cgo_0 [3]byte
|
||||
}
|
||||
|
||||
type AffinityScope uint32
|
||||
|
||||
type VgpuVersion struct {
|
||||
@@ -443,3 +547,37 @@ type ComputeInstanceInfo struct {
|
||||
type ComputeInstance struct {
|
||||
Handle *_Ctype_struct_nvmlComputeInstance_st
|
||||
}
|
||||
|
||||
type GpmSample struct {
|
||||
Handle *_Ctype_struct_nvmlGpmSample_st
|
||||
}
|
||||
|
||||
type GpmMetricMetricInfo struct {
|
||||
ShortName *int8
|
||||
LongName *int8
|
||||
Unit *int8
|
||||
}
|
||||
|
||||
type GpmMetric struct {
|
||||
MetricId uint32
|
||||
NvmlReturn uint32
|
||||
Value float64
|
||||
MetricInfo GpmMetricMetricInfo
|
||||
}
|
||||
|
||||
type GpmMetricsGetType struct {
|
||||
Version uint32
|
||||
NumMetrics uint32
|
||||
Sample1 GpmSample
|
||||
Sample2 GpmSample
|
||||
Metrics [98]GpmMetric
|
||||
}
|
||||
|
||||
type GpmSupport struct {
|
||||
Version uint32
|
||||
IsSupportedDevice uint32
|
||||
}
|
||||
|
||||
type NvLinkPowerThres struct {
|
||||
LowPwrThreshold uint32
|
||||
}
|
||||
|
||||
18
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/vgpu.go
generated
vendored
18
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/vgpu.go
generated
vendored
@@ -460,3 +460,21 @@ func VgpuInstanceGetMdevUUID(VgpuInstance VgpuInstance) (string, Return) {
|
||||
func (VgpuInstance VgpuInstance) GetMdevUUID() (string, Return) {
|
||||
return VgpuInstanceGetMdevUUID(VgpuInstance)
|
||||
}
|
||||
|
||||
// nvml.VgpuTypeGetCapabilities()
|
||||
func VgpuTypeGetCapabilities(VgpuTypeId VgpuTypeId, Capability VgpuCapability) (bool, Return) {
|
||||
var CapResult uint32
|
||||
ret := nvmlVgpuTypeGetCapabilities(VgpuTypeId, Capability, &CapResult)
|
||||
return (CapResult != 0), ret
|
||||
}
|
||||
|
||||
func (VgpuTypeId VgpuTypeId) GetCapabilities(Capability VgpuCapability) (bool, Return) {
|
||||
return VgpuTypeGetCapabilities(VgpuTypeId, Capability)
|
||||
}
|
||||
|
||||
// nvml.GetVgpuDriverCapabilities()
|
||||
func GetVgpuDriverCapabilities(Capability VgpuDriverCapability) (bool, Return) {
|
||||
var CapResult uint32
|
||||
ret := nvmlGetVgpuDriverCapabilities(Capability, &CapResult)
|
||||
return (CapResult != 0), ret
|
||||
}
|
||||
|
||||
36
vendor/gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info/info.go
generated
vendored
36
vendor/gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info/info.go
generated
vendored
@@ -27,6 +27,7 @@ import (
|
||||
|
||||
// Interface provides the API to the info package
|
||||
type Interface interface {
|
||||
HasDXCore() (bool, string)
|
||||
HasNvml() (bool, string)
|
||||
IsTegraSystem() (bool, string)
|
||||
}
|
||||
@@ -37,17 +38,26 @@ type infolib struct {
|
||||
|
||||
var _ Interface = &infolib{}
|
||||
|
||||
// HasDXCore returns true if DXCore is detected on the system.
|
||||
func (i *infolib) HasDXCore() (bool, string) {
|
||||
const (
|
||||
libraryName = "libdxcore.so"
|
||||
)
|
||||
if err := assertHasLibrary(libraryName); err != nil {
|
||||
return false, fmt.Sprintf("could not load DXCore library: %v", err)
|
||||
}
|
||||
|
||||
return true, "found DXCore library"
|
||||
}
|
||||
|
||||
// HasNvml returns true if NVML is detected on the system
|
||||
func (i *infolib) HasNvml() (bool, string) {
|
||||
const (
|
||||
nvmlLibraryName = "libnvidia-ml.so.1"
|
||||
nvmlLibraryLoadFlags = dl.RTLD_LAZY
|
||||
libraryName = "libnvidia-ml.so.1"
|
||||
)
|
||||
lib := dl.New(nvmlLibraryName, nvmlLibraryLoadFlags)
|
||||
if err := lib.Open(); err != nil {
|
||||
return false, fmt.Sprintf("could not load NVML: %v", err)
|
||||
if err := assertHasLibrary(libraryName); err != nil {
|
||||
return false, fmt.Sprintf("could not load NVML library: %v", err)
|
||||
}
|
||||
defer lib.Close()
|
||||
|
||||
return true, "found NVML library"
|
||||
}
|
||||
@@ -76,3 +86,17 @@ func (i *infolib) IsTegraSystem() (bool, string) {
|
||||
|
||||
return false, fmt.Sprintf("%v has no 'tegra' prefix", tegraFamilyFile)
|
||||
}
|
||||
|
||||
// assertHasLibrary returns an error if the specified library cannot be loaded
|
||||
func assertHasLibrary(libraryName string) error {
|
||||
const (
|
||||
libraryLoadFlags = dl.RTLD_LAZY
|
||||
)
|
||||
lib := dl.New(libraryName, libraryLoadFlags)
|
||||
if err := lib.Open(); err != nil {
|
||||
return err
|
||||
}
|
||||
defer lib.Close()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
4
vendor/modules.txt
vendored
4
vendor/modules.txt
vendored
@@ -2,7 +2,7 @@
|
||||
## explicit; go 1.16
|
||||
github.com/BurntSushi/toml
|
||||
github.com/BurntSushi/toml/internal
|
||||
# github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82
|
||||
# github.com/NVIDIA/go-nvml v0.12.0-0
|
||||
## explicit; go 1.15
|
||||
github.com/NVIDIA/go-nvml/pkg/dl
|
||||
github.com/NVIDIA/go-nvml/pkg/nvml
|
||||
@@ -62,7 +62,7 @@ github.com/syndtr/gocapability/capability
|
||||
github.com/urfave/cli/v2
|
||||
# github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb
|
||||
## explicit
|
||||
# gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230119114711-6fe07bb33342
|
||||
# gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230209143738-95328d8c4438
|
||||
## explicit; go 1.16
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info
|
||||
|
||||
@@ -13,7 +13,13 @@
|
||||
# limitations under the License.
|
||||
|
||||
LIB_NAME := nvidia-container-toolkit
|
||||
LIB_VERSION := 1.12.0
|
||||
LIB_VERSION := 1.13.0
|
||||
LIB_TAG := rc.1
|
||||
|
||||
# The package version is the combination of the library version and tag.
|
||||
# If the tag is specified the two components are joined with a tilde (~).
|
||||
PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
PACKAGE_REVISION := 1
|
||||
|
||||
# Specify the nvidia-docker2 and nvidia-container-runtime package versions.
|
||||
# Note: The build tooling uses `LIB_TAG` above as the version tag.
|
||||
|
||||
Reference in New Issue
Block a user