mirror of
				https://github.com/NVIDIA/nvidia-container-toolkit
				synced 2025-06-26 18:18:24 +00:00 
			
		
		
		
	Add discovery of GDS and MOFED devices
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
		
							parent
							
								
									7f47a61986
								
							
						
					
					
						commit
						cb34f7c6d1
					
				
							
								
								
									
										79
									
								
								internal/discover/gds.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								internal/discover/gds.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,79 @@ | |||||||
|  | /** | ||||||
|  | # Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved. | ||||||
|  | # | ||||||
|  | # Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | # you may not use this file except in compliance with the License. | ||||||
|  | # You may obtain a copy of the License at | ||||||
|  | # | ||||||
|  | #     http://www.apache.org/licenses/LICENSE-2.0
 | ||||||
|  | # | ||||||
|  | # Unless required by applicable law or agreed to in writing, software | ||||||
|  | # distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | # See the License for the specific language governing permissions and | ||||||
|  | # limitations under the License. | ||||||
|  | **/ | ||||||
|  | 
 | ||||||
|  | package discover | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" | ||||||
|  | 	"github.com/sirupsen/logrus" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | type gdsDeviceDiscoverer struct { | ||||||
|  | 	None | ||||||
|  | 	logger  *logrus.Logger | ||||||
|  | 	devices Discover | ||||||
|  | 	mounts  Discover | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts.
 | ||||||
|  | func NewGDSDiscoverer(logger *logrus.Logger, root string) (Discover, error) { | ||||||
|  | 	devices := &mounts{ | ||||||
|  | 		logger:   logger, | ||||||
|  | 		lookup:   lookup.NewCharDeviceLocator(logger, root), | ||||||
|  | 		required: []string{"/dev/nvidia-fs*"}, | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	mounts := &mounts{ | ||||||
|  | 		logger:   logger, | ||||||
|  | 		lookup:   lookup.NewDirectoryLocator(logger, root), | ||||||
|  | 		required: []string{"/run/udev"}, | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	d := gdsDeviceDiscoverer{ | ||||||
|  | 		logger:  logger, | ||||||
|  | 		devices: devices, | ||||||
|  | 		mounts:  mounts, | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return &d, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Devices discoveres the nvidia-fs device nodes for use with GPUDirect Storage
 | ||||||
|  | func (d *gdsDeviceDiscoverer) Devices() ([]Device, error) { | ||||||
|  | 	devicesAsMounts, err := d.devices.Mounts() | ||||||
|  | 	if err != nil { | ||||||
|  | 		d.logger.Debugf("Could not locate GDS devices: %v", err) | ||||||
|  | 		return nil, nil | ||||||
|  | 	} | ||||||
|  | 	var devices []Device | ||||||
|  | 	for _, mount := range devicesAsMounts { | ||||||
|  | 		devices = append(devices, Device(mount)) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return devices, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Mounts discovers the required mounts for GDS.
 | ||||||
|  | // If no devices are discovered the discovered mounts are empty
 | ||||||
|  | func (d *gdsDeviceDiscoverer) Mounts() ([]Mount, error) { | ||||||
|  | 	devices, err := d.Devices() | ||||||
|  | 	if err != nil || len(devices) == 0 { | ||||||
|  | 		d.logger.Debugf("No nvidia-fs devices detected; skipping detection of mounts") | ||||||
|  | 		return nil, nil | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return d.mounts.Mounts() | ||||||
|  | } | ||||||
							
								
								
									
										53
									
								
								internal/discover/mofed.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								internal/discover/mofed.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,53 @@ | |||||||
|  | /** | ||||||
|  | # Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved. | ||||||
|  | # | ||||||
|  | # Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | # you may not use this file except in compliance with the License. | ||||||
|  | # You may obtain a copy of the License at | ||||||
|  | # | ||||||
|  | #     http://www.apache.org/licenses/LICENSE-2.0
 | ||||||
|  | # | ||||||
|  | # Unless required by applicable law or agreed to in writing, software | ||||||
|  | # distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | # See the License for the specific language governing permissions and | ||||||
|  | # limitations under the License. | ||||||
|  | **/ | ||||||
|  | 
 | ||||||
|  | package discover | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" | ||||||
|  | 	"github.com/sirupsen/logrus" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | type mofedDeviceDiscoverer mounts | ||||||
|  | 
 | ||||||
|  | // NewMOFEDDiscoverer creates a discoverer for MOFED devices.
 | ||||||
|  | func NewMOFEDDiscoverer(logger *logrus.Logger, root string) (Discover, error) { | ||||||
|  | 	devices := &mofedDeviceDiscoverer{ | ||||||
|  | 		logger: logger, | ||||||
|  | 		lookup: lookup.NewCharDeviceLocator(logger, root), | ||||||
|  | 		required: []string{ | ||||||
|  | 			"/dev/infiniband/uverbs*", | ||||||
|  | 			"/dev/infiniband/rdma_cm", | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return devices, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Devices discovers the uverbs* and rdma_cm device nodes for use with GPUDirect Storage and the MOFED stack.
 | ||||||
|  | func (d *mofedDeviceDiscoverer) Devices() ([]Device, error) { | ||||||
|  | 	devicesAsMounts, err := (*mounts)(d).Mounts() | ||||||
|  | 	if err != nil { | ||||||
|  | 		d.logger.Debugf("Could not locate MOFED devices: %v", err) | ||||||
|  | 		return nil, nil | ||||||
|  | 	} | ||||||
|  | 	var devices []Device | ||||||
|  | 	for _, mount := range devicesAsMounts { | ||||||
|  | 		devices = append(devices, Device(mount)) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return devices, nil | ||||||
|  | } | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user