/*** NVML VERSION: 11.6.55 ***/ /*** From https://api.anaconda.org/download/nvidia/cuda-nvml-dev/11.6.55/linux-64/cuda-nvml-dev-11.6.55-haa9ef22_0.tar.bz2 ***/ /* * Copyright 1993-2021 NVIDIA Corporation. All rights reserved. * * NOTICE TO USER: * * This source code is subject to NVIDIA ownership rights under U.S. and * international Copyright laws. Users and possessors of this source code * are hereby granted a nonexclusive, royalty-free license to use this code * in individual and commercial software. * * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE * OR PERFORMANCE OF THIS SOURCE CODE. * * U.S. Government End Users. This source code is a "commercial item" as * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of * "commercial computer software" and "commercial computer software * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) * and is provided to the U.S. Government only as a commercial end item. * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the * source code with only those rights set forth herein. * * Any use of this source code in individual and commercial software must * include, in the user documentation and internal comments to the code, * the above Disclaimer and U.S. Government End Users Notice. */ /* NVML API Reference The NVIDIA Management Library (NVML) is a C-based programmatic interface for monitoring and managing various states within NVIDIA Tesla &tm; GPUs. It is intended to be a platform for building 3rd party applications, and is also the underlying library for the NVIDIA-supported nvidia-smi tool. NVML is thread-safe so it is safe to make simultaneous NVML calls from multiple threads. API Documentation Supported platforms: - Windows: Windows Server 2008 R2 64bit, Windows Server 2012 R2 64bit, Windows 7 64bit, Windows 8 64bit, Windows 10 64bit - Linux: 32-bit and 64-bit - Hypervisors: Windows Server 2008R2/2012 Hyper-V 64bit, Citrix XenServer 6.2 SP1+, VMware ESX 5.1/5.5 Supported products: - Full Support - All Tesla products, starting with the Fermi architecture - All Quadro products, starting with the Fermi architecture - All vGPU Software products, starting with the Kepler architecture - Selected GeForce Titan products - Limited Support - All Geforce products, starting with the Fermi architecture The NVML library can be found at \%ProgramW6432\%\\"NVIDIA Corporation"\\NVSMI\\ on Windows. It is not be added to the system path by default. To dynamically link to NVML, add this path to the PATH environmental variable. To dynamically load NVML, call LoadLibrary with this path. On Linux the NVML library will be found on the standard library path. For 64 bit Linux, both the 32 bit and 64 bit NVML libraries will be installed. Online documentation for this library is available at http://docs.nvidia.com/deploy/nvml-api/index.html */ #ifndef __nvml_nvml_h__ #define __nvml_nvml_h__ #ifdef __cplusplus extern "C" { #endif /* * On Windows, set up methods for DLL export * define NVML_STATIC_IMPORT when using nvml_loader library */ #if defined _WINDOWS #if !defined NVML_STATIC_IMPORT #if defined NVML_LIB_EXPORT #define DECLDIR __declspec(dllexport) #else #define DECLDIR __declspec(dllimport) #endif #else #define DECLDIR #endif #else #define DECLDIR #endif /** * NVML API versioning support */ #define NVML_API_VERSION 11 #define NVML_API_VERSION_STR "11" /** * Defining NVML_NO_UNVERSIONED_FUNC_DEFS will disable "auto upgrading" of APIs. * e.g. the user will have to call nvmlInit_v2 instead of nvmlInit. Enable this * guard if you need to support older versions of the API */ #ifndef NVML_NO_UNVERSIONED_FUNC_DEFS #define nvmlInit nvmlInit_v2 #define nvmlDeviceGetPciInfo nvmlDeviceGetPciInfo_v3 #define nvmlDeviceGetCount nvmlDeviceGetCount_v2 #define nvmlDeviceGetHandleByIndex nvmlDeviceGetHandleByIndex_v2 #define nvmlDeviceGetHandleByPciBusId nvmlDeviceGetHandleByPciBusId_v2 #define nvmlDeviceGetNvLinkRemotePciInfo nvmlDeviceGetNvLinkRemotePciInfo_v2 #define nvmlDeviceRemoveGpu nvmlDeviceRemoveGpu_v2 #define nvmlDeviceGetGridLicensableFeatures nvmlDeviceGetGridLicensableFeatures_v4 #define nvmlEventSetWait nvmlEventSetWait_v2 #define nvmlDeviceGetAttributes nvmlDeviceGetAttributes_v2 #define nvmlComputeInstanceGetInfo nvmlComputeInstanceGetInfo_v2 #define nvmlDeviceGetComputeRunningProcesses nvmlDeviceGetComputeRunningProcesses_v3 #define nvmlDeviceGetGraphicsRunningProcesses nvmlDeviceGetGraphicsRunningProcesses_v3 #define nvmlDeviceGetMPSComputeRunningProcesses nvmlDeviceGetMPSComputeRunningProcesses_v3 #define nvmlBlacklistDeviceInfo_t nvmlExcludedDeviceInfo_t #define nvmlGetBlacklistDeviceCount nvmlGetExcludedDeviceCount #define nvmlGetBlacklistDeviceInfoByIndex nvmlGetExcludedDeviceInfoByIndex #define nvmlDeviceGetGpuInstancePossiblePlacements nvmlDeviceGetGpuInstancePossiblePlacements_v2 #define nvmlVgpuInstanceGetLicenseInfo nvmlVgpuInstanceGetLicenseInfo_v2 #endif // #ifndef NVML_NO_UNVERSIONED_FUNC_DEFS #define NVML_STRUCT_VERSION(data, ver) (unsigned int)(sizeof(nvml ## data ## _v ## ver ## _t) | \ (ver << 24U)) /***************************************************************************************************/ /** @defgroup nvmlDeviceStructs Device Structs * @{ */ /***************************************************************************************************/ /** * Special constant that some fields take when they are not available. * Used when only part of the struct is not available. * * Each structure explicitly states when to check for this value. */ #define NVML_VALUE_NOT_AVAILABLE (-1) typedef struct { struct nvmlDevice_st* handle; } nvmlDevice_t; /** * Buffer size guaranteed to be large enough for pci bus id */ #define NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE 32 /** * Buffer size guaranteed to be large enough for pci bus id for ::busIdLegacy */ #define NVML_DEVICE_PCI_BUS_ID_BUFFER_V2_SIZE 16 /** * PCI information about a GPU device. */ typedef struct nvmlPciInfo_st { char busIdLegacy[NVML_DEVICE_PCI_BUS_ID_BUFFER_V2_SIZE]; //!< The legacy tuple domain:bus:device.function PCI identifier (& NULL terminator) unsigned int domain; //!< The PCI domain on which the device's bus resides, 0 to 0xffffffff unsigned int bus; //!< The bus on which the device resides, 0 to 0xff unsigned int device; //!< The device's id on the bus, 0 to 31 unsigned int pciDeviceId; //!< The combined 16-bit device id and 16-bit vendor id // Added in NVML 2.285 API unsigned int pciSubSystemId; //!< The 32-bit Sub System Device ID char busId[NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE]; //!< The tuple domain:bus:device.function PCI identifier (& NULL terminator) } nvmlPciInfo_t; /** * PCI format string for ::busIdLegacy */ #define NVML_DEVICE_PCI_BUS_ID_LEGACY_FMT "%04X:%02X:%02X.0" /** * PCI format string for ::busId */ #define NVML_DEVICE_PCI_BUS_ID_FMT "%08X:%02X:%02X.0" /** * Utility macro for filling the pci bus id format from a nvmlPciInfo_t */ #define NVML_DEVICE_PCI_BUS_ID_FMT_ARGS(pciInfo) (pciInfo)->domain, \ (pciInfo)->bus, \ (pciInfo)->device /** * Detailed ECC error counts for a device. * * @deprecated Different GPU families can have different memory error counters * See \ref nvmlDeviceGetMemoryErrorCounter */ typedef struct nvmlEccErrorCounts_st { unsigned long long l1Cache; //!< L1 cache errors unsigned long long l2Cache; //!< L2 cache errors unsigned long long deviceMemory; //!< Device memory errors unsigned long long registerFile; //!< Register file errors } nvmlEccErrorCounts_t; /** * Utilization information for a device. * Each sample period may be between 1 second and 1/6 second, depending on the product being queried. */ typedef struct nvmlUtilization_st { unsigned int gpu; //!< Percent of time over the past sample period during which one or more kernels was executing on the GPU unsigned int memory; //!< Percent of time over the past sample period during which global (device) memory was being read or written } nvmlUtilization_t; /** * Memory allocation information for a device (v1). * The total amount is equal to the sum of the amounts of free and used memory. */ typedef struct nvmlMemory_st { unsigned long long total; //!< Total physical device memory (in bytes) unsigned long long free; //!< Unallocated device memory (in bytes) unsigned long long used; //!< Sum of Reserved and Allocated device memory (in bytes). //!< Note that the driver/GPU always sets aside a small amount of memory for bookkeeping } nvmlMemory_t; /** * Memory allocation information for a device (v2). * * Version 2 adds versioning for the struct and the amount of system-reserved memory as an output. * @note The \ref nvmlMemory_v2_t.used amount also includes the \ref nvmlMemory_v2_t.reserved amount. */ typedef struct nvmlMemory_v2_st { unsigned int version; //!< Structure format version (must be 2) unsigned long long total; //!< Total physical device memory (in bytes) unsigned long long reserved; //!< Device memory (in bytes) reserved for system use (driver or firmware) unsigned long long free; //!< Unallocated device memory (in bytes) unsigned long long used; //!< Allocated device memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping } nvmlMemory_v2_t; #define nvmlMemory_v2 NVML_STRUCT_VERSION(Memory, 2) /** * BAR1 Memory allocation Information for a device */ typedef struct nvmlBAR1Memory_st { unsigned long long bar1Total; //!< Total BAR1 Memory (in bytes) unsigned long long bar1Free; //!< Unallocated BAR1 Memory (in bytes) unsigned long long bar1Used; //!< Allocated Used Memory (in bytes) }nvmlBAR1Memory_t; /** * Information about running compute processes on the GPU, legacy version * for older versions of the API. */ typedef struct nvmlProcessInfo_v1_st { unsigned int pid; //!< Process ID unsigned long long usedGpuMemory; //!< Amount of used GPU memory in bytes. //! Under WDDM, \ref NVML_VALUE_NOT_AVAILABLE is always reported //! because Windows KMD manages all the memory and not the NVIDIA driver } nvmlProcessInfo_v1_t; /** * Information about running compute processes on the GPU */ typedef struct nvmlProcessInfo_v2_st { unsigned int pid; //!< Process ID unsigned long long usedGpuMemory; //!< Amount of used GPU memory in bytes. //! Under WDDM, \ref NVML_VALUE_NOT_AVAILABLE is always reported //! because Windows KMD manages all the memory and not the NVIDIA driver unsigned int gpuInstanceId; //!< If MIG is enabled, stores a valid GPU instance ID. gpuInstanceId is set to // 0xFFFFFFFF otherwise. unsigned int computeInstanceId; //!< If MIG is enabled, stores a valid compute instance ID. computeInstanceId is set to // 0xFFFFFFFF otherwise. } nvmlProcessInfo_v2_t; /** * Information about running compute processes on the GPU * Version 2 adds versioning for the struct and the conf compute protected memory in output. */ typedef struct nvmlProcessInfo_st { unsigned int pid; //!< Process ID unsigned long long usedGpuMemory; //!< Amount of used GPU memory in bytes. //! Under WDDM, \ref NVML_VALUE_NOT_AVAILABLE is always reported //! because Windows KMD manages all the memory and not the NVIDIA driver unsigned int gpuInstanceId; //!< If MIG is enabled, stores a valid GPU instance ID. gpuInstanceId is set to // 0xFFFFFFFF otherwise. unsigned int computeInstanceId; //!< If MIG is enabled, stores a valid compute instance ID. computeInstanceId is set to // 0xFFFFFFFF otherwise. } nvmlProcessInfo_t; typedef struct nvmlDeviceAttributes_st { unsigned int multiprocessorCount; //!< Streaming Multiprocessor count unsigned int sharedCopyEngineCount; //!< Shared Copy Engine count unsigned int sharedDecoderCount; //!< Shared Decoder Engine count unsigned int sharedEncoderCount; //!< Shared Encoder Engine count unsigned int sharedJpegCount; //!< Shared JPEG Engine count unsigned int sharedOfaCount; //!< Shared OFA Engine count unsigned int gpuInstanceSliceCount; //!< GPU instance slice count unsigned int computeInstanceSliceCount; //!< Compute instance slice count unsigned long long memorySizeMB; //!< Device memory size (in MiB) } nvmlDeviceAttributes_t; /** * Possible values that classify the remap availability for each bank. The max * field will contain the number of banks that have maximum remap availability * (all reserved rows are available). None means that there are no reserved * rows available. */ typedef struct nvmlRowRemapperHistogramValues_st { unsigned int max; unsigned int high; unsigned int partial; unsigned int low; unsigned int none; } nvmlRowRemapperHistogramValues_t; /** * Enum to represent type of bridge chip */ typedef enum nvmlBridgeChipType_enum { NVML_BRIDGE_CHIP_PLX = 0, NVML_BRIDGE_CHIP_BRO4 = 1 }nvmlBridgeChipType_t; /** * Maximum number of NvLink links supported */ #define NVML_NVLINK_MAX_LINKS 12 /** * Enum to represent the NvLink utilization counter packet units */ typedef enum nvmlNvLinkUtilizationCountUnits_enum { NVML_NVLINK_COUNTER_UNIT_CYCLES = 0, // count by cycles NVML_NVLINK_COUNTER_UNIT_PACKETS = 1, // count by packets NVML_NVLINK_COUNTER_UNIT_BYTES = 2, // count by bytes NVML_NVLINK_COUNTER_UNIT_RESERVED = 3, // count reserved for internal use // this must be last NVML_NVLINK_COUNTER_UNIT_COUNT } nvmlNvLinkUtilizationCountUnits_t; /** * Enum to represent the NvLink utilization counter packet types to count * ** this is ONLY applicable with the units as packets or bytes * ** as specified in \a nvmlNvLinkUtilizationCountUnits_t * ** all packet filter descriptions are target GPU centric * ** these can be "OR'd" together */ typedef enum nvmlNvLinkUtilizationCountPktTypes_enum { NVML_NVLINK_COUNTER_PKTFILTER_NOP = 0x1, // no operation packets NVML_NVLINK_COUNTER_PKTFILTER_READ = 0x2, // read packets NVML_NVLINK_COUNTER_PKTFILTER_WRITE = 0x4, // write packets NVML_NVLINK_COUNTER_PKTFILTER_RATOM = 0x8, // reduction atomic requests NVML_NVLINK_COUNTER_PKTFILTER_NRATOM = 0x10, // non-reduction atomic requests NVML_NVLINK_COUNTER_PKTFILTER_FLUSH = 0x20, // flush requests NVML_NVLINK_COUNTER_PKTFILTER_RESPDATA = 0x40, // responses with data NVML_NVLINK_COUNTER_PKTFILTER_RESPNODATA = 0x80, // responses without data NVML_NVLINK_COUNTER_PKTFILTER_ALL = 0xFF // all packets } nvmlNvLinkUtilizationCountPktTypes_t; /** * Struct to define the NVLINK counter controls */ typedef struct nvmlNvLinkUtilizationControl_st { nvmlNvLinkUtilizationCountUnits_t units; nvmlNvLinkUtilizationCountPktTypes_t pktfilter; } nvmlNvLinkUtilizationControl_t; /** * Enum to represent NvLink queryable capabilities */ typedef enum nvmlNvLinkCapability_enum { NVML_NVLINK_CAP_P2P_SUPPORTED = 0, // P2P over NVLink is supported NVML_NVLINK_CAP_SYSMEM_ACCESS = 1, // Access to system memory is supported NVML_NVLINK_CAP_P2P_ATOMICS = 2, // P2P atomics are supported NVML_NVLINK_CAP_SYSMEM_ATOMICS= 3, // System memory atomics are supported NVML_NVLINK_CAP_SLI_BRIDGE = 4, // SLI is supported over this link NVML_NVLINK_CAP_VALID = 5, // Link is supported on this device // should be last NVML_NVLINK_CAP_COUNT } nvmlNvLinkCapability_t; /** * Enum to represent NvLink queryable error counters */ typedef enum nvmlNvLinkErrorCounter_enum { NVML_NVLINK_ERROR_DL_REPLAY = 0, // Data link transmit replay error counter NVML_NVLINK_ERROR_DL_RECOVERY = 1, // Data link transmit recovery error counter NVML_NVLINK_ERROR_DL_CRC_FLIT = 2, // Data link receive flow control digit CRC error counter NVML_NVLINK_ERROR_DL_CRC_DATA = 3, // Data link receive data CRC error counter NVML_NVLINK_ERROR_DL_ECC_DATA = 4, // Data link receive data ECC error counter // this must be last NVML_NVLINK_ERROR_COUNT } nvmlNvLinkErrorCounter_t; /** * Enum to represent NvLink's remote device type */ typedef enum nvmlIntNvLinkDeviceType_enum { NVML_NVLINK_DEVICE_TYPE_GPU = 0x00, NVML_NVLINK_DEVICE_TYPE_IBMNPU = 0x01, NVML_NVLINK_DEVICE_TYPE_SWITCH = 0x02, NVML_NVLINK_DEVICE_TYPE_UNKNOWN = 0xFF } nvmlIntNvLinkDeviceType_t; /** * Represents level relationships within a system between two GPUs * The enums are spaced to allow for future relationships */ typedef enum nvmlGpuLevel_enum { NVML_TOPOLOGY_INTERNAL = 0, // e.g. Tesla K80 NVML_TOPOLOGY_SINGLE = 10, // all devices that only need traverse a single PCIe switch NVML_TOPOLOGY_MULTIPLE = 20, // all devices that need not traverse a host bridge NVML_TOPOLOGY_HOSTBRIDGE = 30, // all devices that are connected to the same host bridge NVML_TOPOLOGY_NODE = 40, // all devices that are connected to the same NUMA node but possibly multiple host bridges NVML_TOPOLOGY_SYSTEM = 50 // all devices in the system // there is purposefully no COUNT here because of the need for spacing above } nvmlGpuTopologyLevel_t; /* Compatibility for CPU->NODE renaming */ #define NVML_TOPOLOGY_CPU NVML_TOPOLOGY_NODE /* P2P Capability Index Status*/ typedef enum nvmlGpuP2PStatus_enum { NVML_P2P_STATUS_OK = 0, NVML_P2P_STATUS_CHIPSET_NOT_SUPPORED, NVML_P2P_STATUS_GPU_NOT_SUPPORTED, NVML_P2P_STATUS_IOH_TOPOLOGY_NOT_SUPPORTED, NVML_P2P_STATUS_DISABLED_BY_REGKEY, NVML_P2P_STATUS_NOT_SUPPORTED, NVML_P2P_STATUS_UNKNOWN } nvmlGpuP2PStatus_t; /* P2P Capability Index*/ typedef enum nvmlGpuP2PCapsIndex_enum { NVML_P2P_CAPS_INDEX_READ = 0, NVML_P2P_CAPS_INDEX_WRITE, NVML_P2P_CAPS_INDEX_NVLINK, NVML_P2P_CAPS_INDEX_ATOMICS, NVML_P2P_CAPS_INDEX_PROP, NVML_P2P_CAPS_INDEX_UNKNOWN }nvmlGpuP2PCapsIndex_t; /** * Maximum limit on Physical Bridges per Board */ #define NVML_MAX_PHYSICAL_BRIDGE (128) /** * Information about the Bridge Chip Firmware */ typedef struct nvmlBridgeChipInfo_st { nvmlBridgeChipType_t type; //!< Type of Bridge Chip unsigned int fwVersion; //!< Firmware Version. 0=Version is unavailable }nvmlBridgeChipInfo_t; /** * This structure stores the complete Hierarchy of the Bridge Chip within the board. The immediate * bridge is stored at index 0 of bridgeInfoList, parent to immediate bridge is at index 1 and so forth. */ typedef struct nvmlBridgeChipHierarchy_st { unsigned char bridgeCount; //!< Number of Bridge Chips on the Board nvmlBridgeChipInfo_t bridgeChipInfo[NVML_MAX_PHYSICAL_BRIDGE]; //!< Hierarchy of Bridge Chips on the board }nvmlBridgeChipHierarchy_t; /** * Represents Type of Sampling Event */ typedef enum nvmlSamplingType_enum { NVML_TOTAL_POWER_SAMPLES = 0, //!< To represent total power drawn by GPU NVML_GPU_UTILIZATION_SAMPLES = 1, //!< To represent percent of time during which one or more kernels was executing on the GPU NVML_MEMORY_UTILIZATION_SAMPLES = 2, //!< To represent percent of time during which global (device) memory was being read or written NVML_ENC_UTILIZATION_SAMPLES = 3, //!< To represent percent of time during which NVENC remains busy NVML_DEC_UTILIZATION_SAMPLES = 4, //!< To represent percent of time during which NVDEC remains busy NVML_PROCESSOR_CLK_SAMPLES = 5, //!< To represent processor clock samples NVML_MEMORY_CLK_SAMPLES = 6, //!< To represent memory clock samples // Keep this last NVML_SAMPLINGTYPE_COUNT }nvmlSamplingType_t; /** * Represents the queryable PCIe utilization counters */ typedef enum nvmlPcieUtilCounter_enum { NVML_PCIE_UTIL_TX_BYTES = 0, // 1KB granularity NVML_PCIE_UTIL_RX_BYTES = 1, // 1KB granularity // Keep this last NVML_PCIE_UTIL_COUNT } nvmlPcieUtilCounter_t; /** * Represents the type for sample value returned */ typedef enum nvmlValueType_enum { NVML_VALUE_TYPE_DOUBLE = 0, NVML_VALUE_TYPE_UNSIGNED_INT = 1, NVML_VALUE_TYPE_UNSIGNED_LONG = 2, NVML_VALUE_TYPE_UNSIGNED_LONG_LONG = 3, NVML_VALUE_TYPE_SIGNED_LONG_LONG = 4, // Keep this last NVML_VALUE_TYPE_COUNT }nvmlValueType_t; /** * Union to represent different types of Value */ typedef union nvmlValue_st { double dVal; //!< If the value is double unsigned int uiVal; //!< If the value is unsigned int unsigned long ulVal; //!< If the value is unsigned long unsigned long long ullVal; //!< If the value is unsigned long long signed long long sllVal; //!< If the value is signed long long }nvmlValue_t; /** * Information for Sample */ typedef struct nvmlSample_st { unsigned long long timeStamp; //!< CPU Timestamp in microseconds nvmlValue_t sampleValue; //!< Sample Value }nvmlSample_t; /** * Represents type of perf policy for which violation times can be queried */ typedef enum nvmlPerfPolicyType_enum { NVML_PERF_POLICY_POWER = 0, //!< How long did power violations cause the GPU to be below application clocks NVML_PERF_POLICY_THERMAL = 1, //!< How long did thermal violations cause the GPU to be below application clocks NVML_PERF_POLICY_SYNC_BOOST = 2, //!< How long did sync boost cause the GPU to be below application clocks NVML_PERF_POLICY_BOARD_LIMIT = 3, //!< How long did the board limit cause the GPU to be below application clocks NVML_PERF_POLICY_LOW_UTILIZATION = 4, //!< How long did low utilization cause the GPU to be below application clocks NVML_PERF_POLICY_RELIABILITY = 5, //!< How long did the board reliability limit cause the GPU to be below application clocks NVML_PERF_POLICY_TOTAL_APP_CLOCKS = 10, //!< Total time the GPU was held below application clocks by any limiter (0 - 5 above) NVML_PERF_POLICY_TOTAL_BASE_CLOCKS = 11, //!< Total time the GPU was held below base clocks // Keep this last NVML_PERF_POLICY_COUNT }nvmlPerfPolicyType_t; /** * Struct to hold perf policy violation status data */ typedef struct nvmlViolationTime_st { unsigned long long referenceTime; //!< referenceTime represents CPU timestamp in microseconds unsigned long long violationTime; //!< violationTime in Nanoseconds }nvmlViolationTime_t; /** @} */ /***************************************************************************************************/ /** @defgroup nvmlDeviceEnumvs Device Enums * @{ */ /***************************************************************************************************/ /** * Generic enable/disable enum. */ typedef enum nvmlEnableState_enum { NVML_FEATURE_DISABLED = 0, //!< Feature disabled NVML_FEATURE_ENABLED = 1 //!< Feature enabled } nvmlEnableState_t; //! Generic flag used to specify the default behavior of some functions. See description of particular functions for details. #define nvmlFlagDefault 0x00 //! Generic flag used to force some behavior. See description of particular functions for details. #define nvmlFlagForce 0x01 /** * * The Brand of the GPU * */ typedef enum nvmlBrandType_enum { NVML_BRAND_UNKNOWN = 0, NVML_BRAND_QUADRO = 1, NVML_BRAND_TESLA = 2, NVML_BRAND_NVS = 3, NVML_BRAND_GRID = 4, // Deprecated from API reporting. Keeping definition for backward compatibility. NVML_BRAND_GEFORCE = 5, NVML_BRAND_TITAN = 6, NVML_BRAND_NVIDIA_VAPPS = 7, // NVIDIA Virtual Applications NVML_BRAND_NVIDIA_VPC = 8, // NVIDIA Virtual PC NVML_BRAND_NVIDIA_VCS = 9, // NVIDIA Virtual Compute Server NVML_BRAND_NVIDIA_VWS = 10, // NVIDIA RTX Virtual Workstation NVML_BRAND_NVIDIA_CLOUD_GAMING = 11, // NVIDIA Cloud Gaming NVML_BRAND_NVIDIA_VGAMING = NVML_BRAND_NVIDIA_CLOUD_GAMING, // Deprecated from API reporting. Keeping definition for backward compatibility. NVML_BRAND_QUADRO_RTX = 12, NVML_BRAND_NVIDIA_RTX = 13, NVML_BRAND_NVIDIA = 14, NVML_BRAND_GEFORCE_RTX = 15, // Unused NVML_BRAND_TITAN_RTX = 16, // Unused // Keep this last NVML_BRAND_COUNT } nvmlBrandType_t; /** * Temperature thresholds. */ typedef enum nvmlTemperatureThresholds_enum { NVML_TEMPERATURE_THRESHOLD_SHUTDOWN = 0, // Temperature at which the GPU will // shut down for HW protection NVML_TEMPERATURE_THRESHOLD_SLOWDOWN = 1, // Temperature at which the GPU will // begin HW slowdown NVML_TEMPERATURE_THRESHOLD_MEM_MAX = 2, // Memory Temperature at which the GPU will // begin SW slowdown NVML_TEMPERATURE_THRESHOLD_GPU_MAX = 3, // GPU Temperature at which the GPU // can be throttled below base clock NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MIN = 4, // Minimum GPU Temperature that can be // set as acoustic threshold NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_CURR = 5, // Current temperature that is set as // acoustic threshold. NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MAX = 6, // Maximum GPU temperature that can be // set as acoustic threshold. // Keep this last NVML_TEMPERATURE_THRESHOLD_COUNT } nvmlTemperatureThresholds_t; /** * Temperature sensors. */ typedef enum nvmlTemperatureSensors_enum { NVML_TEMPERATURE_GPU = 0, //!< Temperature sensor for the GPU die // Keep this last NVML_TEMPERATURE_COUNT } nvmlTemperatureSensors_t; /** * Compute mode. * * NVML_COMPUTEMODE_EXCLUSIVE_PROCESS was added in CUDA 4.0. * Earlier CUDA versions supported a single exclusive mode, * which is equivalent to NVML_COMPUTEMODE_EXCLUSIVE_THREAD in CUDA 4.0 and beyond. */ typedef enum nvmlComputeMode_enum { NVML_COMPUTEMODE_DEFAULT = 0, //!< Default compute mode -- multiple contexts per device NVML_COMPUTEMODE_EXCLUSIVE_THREAD = 1, //!< Support Removed NVML_COMPUTEMODE_PROHIBITED = 2, //!< Compute-prohibited mode -- no contexts per device NVML_COMPUTEMODE_EXCLUSIVE_PROCESS = 3, //!< Compute-exclusive-process mode -- only one context per device, usable from multiple threads at a time // Keep this last NVML_COMPUTEMODE_COUNT } nvmlComputeMode_t; /** * Max Clock Monitors available */ #define MAX_CLK_DOMAINS 32 /** * Clock Monitor error types */ typedef struct nvmlClkMonFaultInfo_struct { /** * The Domain which faulted */ unsigned int clkApiDomain; /** * Faults Information */ unsigned int clkDomainFaultMask; } nvmlClkMonFaultInfo_t; /** * Clock Monitor Status */ typedef struct nvmlClkMonStatus_status { /** * Fault status Indicator */ unsigned int bGlobalStatus; /** * Total faulted domain numbers */ unsigned int clkMonListSize; /** * The fault Information structure */ nvmlClkMonFaultInfo_t clkMonList[MAX_CLK_DOMAINS]; } nvmlClkMonStatus_t; /** * ECC bit types. * * @deprecated See \ref nvmlMemoryErrorType_t for a more flexible type */ #define nvmlEccBitType_t nvmlMemoryErrorType_t /** * Single bit ECC errors * * @deprecated Mapped to \ref NVML_MEMORY_ERROR_TYPE_CORRECTED */ #define NVML_SINGLE_BIT_ECC NVML_MEMORY_ERROR_TYPE_CORRECTED /** * Double bit ECC errors * * @deprecated Mapped to \ref NVML_MEMORY_ERROR_TYPE_UNCORRECTED */ #define NVML_DOUBLE_BIT_ECC NVML_MEMORY_ERROR_TYPE_UNCORRECTED /** * Memory error types */ typedef enum nvmlMemoryErrorType_enum { /** * A memory error that was corrected * * For ECC errors, these are single bit errors * For Texture memory, these are errors fixed by resend */ NVML_MEMORY_ERROR_TYPE_CORRECTED = 0, /** * A memory error that was not corrected * * For ECC errors, these are double bit errors * For Texture memory, these are errors where the resend fails */ NVML_MEMORY_ERROR_TYPE_UNCORRECTED = 1, // Keep this last NVML_MEMORY_ERROR_TYPE_COUNT //!< Count of memory error types } nvmlMemoryErrorType_t; /** * ECC counter types. * * Note: Volatile counts are reset each time the driver loads. On Windows this is once per boot. On Linux this can be more frequent. * On Linux the driver unloads when no active clients exist. If persistence mode is enabled or there is always a driver * client active (e.g. X11), then Linux also sees per-boot behavior. If not, volatile counts are reset each time a compute app * is run. */ typedef enum nvmlEccCounterType_enum { NVML_VOLATILE_ECC = 0, //!< Volatile counts are reset each time the driver loads. NVML_AGGREGATE_ECC = 1, //!< Aggregate counts persist across reboots (i.e. for the lifetime of the device) // Keep this last NVML_ECC_COUNTER_TYPE_COUNT //!< Count of memory counter types } nvmlEccCounterType_t; /** * Clock types. * * All speeds are in Mhz. */ typedef enum nvmlClockType_enum { NVML_CLOCK_GRAPHICS = 0, //!< Graphics clock domain NVML_CLOCK_SM = 1, //!< SM clock domain NVML_CLOCK_MEM = 2, //!< Memory clock domain NVML_CLOCK_VIDEO = 3, //!< Video encoder/decoder clock domain // Keep this last NVML_CLOCK_COUNT //!< Count of clock types } nvmlClockType_t; /** * Clock Ids. These are used in combination with nvmlClockType_t * to specify a single clock value. */ typedef enum nvmlClockId_enum { NVML_CLOCK_ID_CURRENT = 0, //!< Current actual clock value NVML_CLOCK_ID_APP_CLOCK_TARGET = 1, //!< Target application clock NVML_CLOCK_ID_APP_CLOCK_DEFAULT = 2, //!< Default application clock target NVML_CLOCK_ID_CUSTOMER_BOOST_MAX = 3, //!< OEM-defined maximum clock rate //Keep this last NVML_CLOCK_ID_COUNT //!< Count of Clock Ids. } nvmlClockId_t; /** * Driver models. * * Windows only. */ typedef enum nvmlDriverModel_enum { NVML_DRIVER_WDDM = 0, //!< WDDM driver model -- GPU treated as a display device NVML_DRIVER_WDM = 1 //!< WDM (TCC) model (recommended) -- GPU treated as a generic device } nvmlDriverModel_t; /** * Allowed PStates. */ typedef enum nvmlPStates_enum { NVML_PSTATE_0 = 0, //!< Performance state 0 -- Maximum Performance NVML_PSTATE_1 = 1, //!< Performance state 1 NVML_PSTATE_2 = 2, //!< Performance state 2 NVML_PSTATE_3 = 3, //!< Performance state 3 NVML_PSTATE_4 = 4, //!< Performance state 4 NVML_PSTATE_5 = 5, //!< Performance state 5 NVML_PSTATE_6 = 6, //!< Performance state 6 NVML_PSTATE_7 = 7, //!< Performance state 7 NVML_PSTATE_8 = 8, //!< Performance state 8 NVML_PSTATE_9 = 9, //!< Performance state 9 NVML_PSTATE_10 = 10, //!< Performance state 10 NVML_PSTATE_11 = 11, //!< Performance state 11 NVML_PSTATE_12 = 12, //!< Performance state 12 NVML_PSTATE_13 = 13, //!< Performance state 13 NVML_PSTATE_14 = 14, //!< Performance state 14 NVML_PSTATE_15 = 15, //!< Performance state 15 -- Minimum Performance NVML_PSTATE_UNKNOWN = 32 //!< Unknown performance state } nvmlPstates_t; /** * GPU Operation Mode * * GOM allows to reduce power usage and optimize GPU throughput by disabling GPU features. * * Each GOM is designed to meet specific user needs. */ typedef enum nvmlGom_enum { NVML_GOM_ALL_ON = 0, //!< Everything is enabled and running at full speed NVML_GOM_COMPUTE = 1, //!< Designed for running only compute tasks. Graphics operations //!< are not allowed NVML_GOM_LOW_DP = 2 //!< Designed for running graphics applications that don't require //!< high bandwidth double precision } nvmlGpuOperationMode_t; /** * Available infoROM objects. */ typedef enum nvmlInforomObject_enum { NVML_INFOROM_OEM = 0, //!< An object defined by OEM NVML_INFOROM_ECC = 1, //!< The ECC object determining the level of ECC support NVML_INFOROM_POWER = 2, //!< The power management object // Keep this last NVML_INFOROM_COUNT //!< This counts the number of infoROM objects the driver knows about } nvmlInforomObject_t; /** * Return values for NVML API calls. */ typedef enum nvmlReturn_enum { // cppcheck-suppress * NVML_SUCCESS = 0, //!< The operation was successful NVML_ERROR_UNINITIALIZED = 1, //!< NVML was not first initialized with nvmlInit() NVML_ERROR_INVALID_ARGUMENT = 2, //!< A supplied argument is invalid NVML_ERROR_NOT_SUPPORTED = 3, //!< The requested operation is not available on target device NVML_ERROR_NO_PERMISSION = 4, //!< The current user does not have permission for operation NVML_ERROR_ALREADY_INITIALIZED = 5, //!< Deprecated: Multiple initializations are now allowed through ref counting NVML_ERROR_NOT_FOUND = 6, //!< A query to find an object was unsuccessful NVML_ERROR_INSUFFICIENT_SIZE = 7, //!< An input argument is not large enough NVML_ERROR_INSUFFICIENT_POWER = 8, //!< A device's external power cables are not properly attached NVML_ERROR_DRIVER_NOT_LOADED = 9, //!< NVIDIA driver is not loaded NVML_ERROR_TIMEOUT = 10, //!< User provided timeout passed NVML_ERROR_IRQ_ISSUE = 11, //!< NVIDIA Kernel detected an interrupt issue with a GPU NVML_ERROR_LIBRARY_NOT_FOUND = 12, //!< NVML Shared Library couldn't be found or loaded NVML_ERROR_FUNCTION_NOT_FOUND = 13, //!< Local version of NVML doesn't implement this function NVML_ERROR_CORRUPTED_INFOROM = 14, //!< infoROM is corrupted NVML_ERROR_GPU_IS_LOST = 15, //!< The GPU has fallen off the bus or has otherwise become inaccessible NVML_ERROR_RESET_REQUIRED = 16, //!< The GPU requires a reset before it can be used again NVML_ERROR_OPERATING_SYSTEM = 17, //!< The GPU control device has been blocked by the operating system/cgroups NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18, //!< RM detects a driver/library version mismatch NVML_ERROR_IN_USE = 19, //!< An operation cannot be performed because the GPU is currently in use NVML_ERROR_MEMORY = 20, //!< Insufficient memory NVML_ERROR_NO_DATA = 21, //!< No data NVML_ERROR_VGPU_ECC_NOT_SUPPORTED = 22, //!< The requested vgpu operation is not available on target device, becasue ECC is enabled NVML_ERROR_INSUFFICIENT_RESOURCES = 23, //!< Ran out of critical resources, other than memory NVML_ERROR_FREQ_NOT_SUPPORTED = 24, //!< Ran out of critical resources, other than memory NVML_ERROR_UNKNOWN = 999 //!< An internal driver error occurred } nvmlReturn_t; /** * See \ref nvmlDeviceGetMemoryErrorCounter */ typedef enum nvmlMemoryLocation_enum { NVML_MEMORY_LOCATION_L1_CACHE = 0, //!< GPU L1 Cache NVML_MEMORY_LOCATION_L2_CACHE = 1, //!< GPU L2 Cache NVML_MEMORY_LOCATION_DRAM = 2, //!< Turing+ DRAM NVML_MEMORY_LOCATION_DEVICE_MEMORY = 2, //!< GPU Device Memory NVML_MEMORY_LOCATION_REGISTER_FILE = 3, //!< GPU Register File NVML_MEMORY_LOCATION_TEXTURE_MEMORY = 4, //!< GPU Texture Memory NVML_MEMORY_LOCATION_TEXTURE_SHM = 5, //!< Shared memory NVML_MEMORY_LOCATION_CBU = 6, //!< CBU NVML_MEMORY_LOCATION_SRAM = 7, //!< Turing+ SRAM // Keep this last NVML_MEMORY_LOCATION_COUNT //!< This counts the number of memory locations the driver knows about } nvmlMemoryLocation_t; /** * Causes for page retirement */ typedef enum nvmlPageRetirementCause_enum { NVML_PAGE_RETIREMENT_CAUSE_MULTIPLE_SINGLE_BIT_ECC_ERRORS = 0, //!< Page was retired due to multiple single bit ECC error NVML_PAGE_RETIREMENT_CAUSE_DOUBLE_BIT_ECC_ERROR = 1, //!< Page was retired due to double bit ECC error // Keep this last NVML_PAGE_RETIREMENT_CAUSE_COUNT } nvmlPageRetirementCause_t; /** * API types that allow changes to default permission restrictions */ typedef enum nvmlRestrictedAPI_enum { NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0, //!< APIs that change application clocks, see nvmlDeviceSetApplicationsClocks //!< and see nvmlDeviceResetApplicationsClocks NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1, //!< APIs that enable/disable Auto Boosted clocks //!< see nvmlDeviceSetAutoBoostedClocksEnabled // Keep this last NVML_RESTRICTED_API_COUNT } nvmlRestrictedAPI_t; /** * Enum to represent NvLink ECC per-lane error counts */ typedef enum nvmlNvLinkEccLaneErrorCounter_enum { NVML_NVLINK_ERROR_DL_ECC_LANE0 = 0, // Data link receive ECC error counter lane 0 NVML_NVLINK_ERROR_DL_ECC_LANE1 = 1, // Data link receive ECC error counter lane 1 NVML_NVLINK_ERROR_DL_ECC_LANE2 = 2, // Data link receive ECC error counter lane 2 NVML_NVLINK_ERROR_DL_ECC_LANE3 = 3, // Data link receive ECC error counter lane 3 // this must be last NVML_NVLINK_ERROR_DL_ECC_COUNT } nvmlNvLinkEccLaneErrorCounter_t; /** @} */ /***************************************************************************************************/ /** @addtogroup virtualGPU * @{ */ /***************************************************************************************************/ /** @defgroup nvmlVirtualGpuEnums vGPU Enums * @{ */ /***************************************************************************************************/ /*! * GPU virtualization mode types. */ typedef enum nvmlGpuVirtualizationMode { NVML_GPU_VIRTUALIZATION_MODE_NONE = 0, //!< Represents Bare Metal GPU NVML_GPU_VIRTUALIZATION_MODE_PASSTHROUGH = 1, //!< Device is associated with GPU-Passthorugh NVML_GPU_VIRTUALIZATION_MODE_VGPU = 2, //!< Device is associated with vGPU inside virtual machine. NVML_GPU_VIRTUALIZATION_MODE_HOST_VGPU = 3, //!< Device is associated with VGX hypervisor in vGPU mode NVML_GPU_VIRTUALIZATION_MODE_HOST_VSGA = 4 //!< Device is associated with VGX hypervisor in vSGA mode } nvmlGpuVirtualizationMode_t; /** * Host vGPU modes */ typedef enum nvmlHostVgpuMode_enum { NVML_HOST_VGPU_MODE_NON_SRIOV = 0, //!< Non SR-IOV mode NVML_HOST_VGPU_MODE_SRIOV = 1 //!< SR-IOV mode } nvmlHostVgpuMode_t; /*! * Types of VM identifiers */ typedef enum nvmlVgpuVmIdType { NVML_VGPU_VM_ID_DOMAIN_ID = 0, //!< VM ID represents DOMAIN ID NVML_VGPU_VM_ID_UUID = 1 //!< VM ID represents UUID } nvmlVgpuVmIdType_t; /** * vGPU GUEST info state. */ typedef enum nvmlVgpuGuestInfoState_enum { NVML_VGPU_INSTANCE_GUEST_INFO_STATE_UNINITIALIZED = 0, //!< Guest-dependent fields uninitialized NVML_VGPU_INSTANCE_GUEST_INFO_STATE_INITIALIZED = 1 //!< Guest-dependent fields initialized } nvmlVgpuGuestInfoState_t; /** * vGPU software licensable features */ typedef enum { NVML_GRID_LICENSE_FEATURE_CODE_UNKNOWN = 0, //!< Unknown NVML_GRID_LICENSE_FEATURE_CODE_VGPU = 1, //!< Virtual GPU NVML_GRID_LICENSE_FEATURE_CODE_NVIDIA_RTX = 2, //!< Nvidia RTX NVML_GRID_LICENSE_FEATURE_CODE_VWORKSTATION = NVML_GRID_LICENSE_FEATURE_CODE_NVIDIA_RTX, //!< Deprecated, do not use. NVML_GRID_LICENSE_FEATURE_CODE_GAMING = 3, //!< Gaming NVML_GRID_LICENSE_FEATURE_CODE_COMPUTE = 4 //!< Compute } nvmlGridLicenseFeatureCode_t; /** * Status codes for license expiry */ #define NVML_GRID_LICENSE_EXPIRY_NOT_AVAILABLE 0 //!< Expiry information not available #define NVML_GRID_LICENSE_EXPIRY_INVALID 1 //!< Invalid expiry or error fetching expiry #define NVML_GRID_LICENSE_EXPIRY_VALID 2 //!< Valid expiry #define NVML_GRID_LICENSE_EXPIRY_NOT_APPLICABLE 3 //!< Expiry not applicable #define NVML_GRID_LICENSE_EXPIRY_PERMANENT 4 //!< Permanent expiry /** @} */ /***************************************************************************************************/ /** @defgroup nvmlVgpuConstants vGPU Constants * @{ */ /***************************************************************************************************/ /** * Buffer size guaranteed to be large enough for \ref nvmlVgpuTypeGetLicense */ #define NVML_GRID_LICENSE_BUFFER_SIZE 128 #define NVML_VGPU_NAME_BUFFER_SIZE 64 #define NVML_GRID_LICENSE_FEATURE_MAX_COUNT 3 #define INVALID_GPU_INSTANCE_PROFILE_ID 0xFFFFFFFF #define INVALID_GPU_INSTANCE_ID 0xFFFFFFFF /*! * Macros for vGPU instance's virtualization capabilities bitfield. */ #define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION 0:0 #define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION_NO 0x0 #define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION_YES 0x1 /*! * Macros for pGPU's virtualization capabilities bitfield. */ #define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION 0:0 #define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_NO 0x0 #define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_YES 0x1 /** @} */ /***************************************************************************************************/ /** @defgroup nvmlVgpuStructs vGPU Structs * @{ */ /***************************************************************************************************/ typedef unsigned int nvmlVgpuTypeId_t; typedef unsigned int nvmlVgpuInstance_t; /** * Structure to store Utilization Value and vgpuInstance */ typedef struct nvmlVgpuInstanceUtilizationSample_st { nvmlVgpuInstance_t vgpuInstance; //!< vGPU Instance unsigned long long timeStamp; //!< CPU Timestamp in microseconds nvmlValue_t smUtil; //!< SM (3D/Compute) Util Value nvmlValue_t memUtil; //!< Frame Buffer Memory Util Value nvmlValue_t encUtil; //!< Encoder Util Value nvmlValue_t decUtil; //!< Decoder Util Value } nvmlVgpuInstanceUtilizationSample_t; /** * Structure to store Utilization Value, vgpuInstance and subprocess information */ typedef struct nvmlVgpuProcessUtilizationSample_st { nvmlVgpuInstance_t vgpuInstance; //!< vGPU Instance unsigned int pid; //!< PID of process running within the vGPU VM char processName[NVML_VGPU_NAME_BUFFER_SIZE]; //!< Name of process running within the vGPU VM unsigned long long timeStamp; //!< CPU Timestamp in microseconds unsigned int smUtil; //!< SM (3D/Compute) Util Value unsigned int memUtil; //!< Frame Buffer Memory Util Value unsigned int encUtil; //!< Encoder Util Value unsigned int decUtil; //!< Decoder Util Value } nvmlVgpuProcessUtilizationSample_t; /** * Structure to store the vGPU license expiry details */ typedef struct nvmlVgpuLicenseExpiry_st { unsigned int year; //!< Year of license expiry unsigned short month; //!< Month of license expiry unsigned short day; //!< Day of license expiry unsigned short hour; //!< Hour of license expiry unsigned short min; //!< Minutes of license expiry unsigned short sec; //!< Seconds of license expiry unsigned char status; //!< License expiry status } nvmlVgpuLicenseExpiry_t; /** * vGPU license state */ #define NVML_GRID_LICENSE_STATE_UNKNOWN 0 //!< Unknown state #define NVML_GRID_LICENSE_STATE_UNINITIALIZED 1 //!< Uninitialized state #define NVML_GRID_LICENSE_STATE_UNLICENSED_UNRESTRICTED 2 //!< Unlicensed unrestricted state #define NVML_GRID_LICENSE_STATE_UNLICENSED_RESTRICTED 3 //!< Unlicensed restricted state #define NVML_GRID_LICENSE_STATE_UNLICENSED 4 //!< Unlicensed state #define NVML_GRID_LICENSE_STATE_LICENSED 5 //!< Licensed state typedef struct nvmlVgpuLicenseInfo_st { unsigned char isLicensed; //!< License status nvmlVgpuLicenseExpiry_t licenseExpiry; //!< License expiry information unsigned int currentState; //!< Current license state } nvmlVgpuLicenseInfo_t; /** * Structure to store utilization value and process Id */ typedef struct nvmlProcessUtilizationSample_st { unsigned int pid; //!< PID of process unsigned long long timeStamp; //!< CPU Timestamp in microseconds unsigned int smUtil; //!< SM (3D/Compute) Util Value unsigned int memUtil; //!< Frame Buffer Memory Util Value unsigned int encUtil; //!< Encoder Util Value unsigned int decUtil; //!< Decoder Util Value } nvmlProcessUtilizationSample_t; /** * Structure to store license expiry date and time values */ typedef struct nvmlGridLicenseExpiry_st { unsigned int year; //!< Year value of license expiry unsigned short month; //!< Month value of license expiry unsigned short day; //!< Day value of license expiry unsigned short hour; //!< Hour value of license expiry unsigned short min; //!< Minutes value of license expiry unsigned short sec; //!< Seconds value of license expiry unsigned char status; //!< License expiry status } nvmlGridLicenseExpiry_t; /** * Structure containing vGPU software licensable feature information */ typedef struct nvmlGridLicensableFeature_st { nvmlGridLicenseFeatureCode_t featureCode; //!< Licensed feature code unsigned int featureState; //!< Non-zero if feature is currently licensed, otherwise zero char licenseInfo[NVML_GRID_LICENSE_BUFFER_SIZE]; //!< Deprecated. char productName[NVML_GRID_LICENSE_BUFFER_SIZE]; //!< Product name of feature unsigned int featureEnabled; //!< Non-zero if feature is enabled, otherwise zero nvmlGridLicenseExpiry_t licenseExpiry; //!< License expiry structure containing date and time } nvmlGridLicensableFeature_t; /** * Structure to store vGPU software licensable features */ typedef struct nvmlGridLicensableFeatures_st { int isGridLicenseSupported; //!< Non-zero if vGPU Software Licensing is supported on the system, otherwise zero unsigned int licensableFeaturesCount; //!< Entries returned in \a gridLicensableFeatures array nvmlGridLicensableFeature_t gridLicensableFeatures[NVML_GRID_LICENSE_FEATURE_MAX_COUNT]; //!< Array of vGPU software licensable features. } nvmlGridLicensableFeatures_t; /** * Simplified chip architecture */ #define NVML_DEVICE_ARCH_KEPLER 2 // Devices based on the NVIDIA Kepler architecture #define NVML_DEVICE_ARCH_MAXWELL 3 // Devices based on the NVIDIA Maxwell architecture #define NVML_DEVICE_ARCH_PASCAL 4 // Devices based on the NVIDIA Pascal architecture #define NVML_DEVICE_ARCH_VOLTA 5 // Devices based on the NVIDIA Volta architecture #define NVML_DEVICE_ARCH_TURING 6 // Devices based on the NVIDIA Turing architecture #define NVML_DEVICE_ARCH_AMPERE 7 // Devices based on the NVIDIA Ampere architecture #define NVML_DEVICE_ARCH_UNKNOWN 0xffffffff // Anything else, presumably something newer typedef unsigned int nvmlDeviceArchitecture_t; /** * PCI bus types */ #define NVML_BUS_TYPE_UNKNOWN 0 #define NVML_BUS_TYPE_PCI 1 #define NVML_BUS_TYPE_PCIE 2 #define NVML_BUS_TYPE_FPCI 3 #define NVML_BUS_TYPE_AGP 4 typedef unsigned int nvmlBusType_t; /** * Device Power Source */ #define NVML_POWER_SOURCE_AC 0x00000000 #define NVML_POWER_SOURCE_BATTERY 0x00000001 typedef unsigned int nvmlPowerSource_t; /* * Device PCIE link Max Speed */ #define NVML_PCIE_LINK_MAX_SPEED_INVALID 0x00000000 #define NVML_PCIE_LINK_MAX_SPEED_2500MBPS 0x00000001 #define NVML_PCIE_LINK_MAX_SPEED_5000MBPS 0x00000002 #define NVML_PCIE_LINK_MAX_SPEED_8000MBPS 0x00000003 #define NVML_PCIE_LINK_MAX_SPEED_16000MBPS 0x00000004 #define NVML_PCIE_LINK_MAX_SPEED_32000MBPS 0x00000005 /* * Adaptive clocking status */ #define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_DISABLED 0x00000000 #define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED 0x00000001 /** @} */ /** @} */ /***************************************************************************************************/ /** @defgroup nvmlFieldValueEnums Field Value Enums * @{ */ /***************************************************************************************************/ /** * Field Identifiers. * * All Identifiers pertain to a device. Each ID is only used once and is guaranteed never to change. */ #define NVML_FI_DEV_ECC_CURRENT 1 //!< Current ECC mode. 1=Active. 0=Inactive #define NVML_FI_DEV_ECC_PENDING 2 //!< Pending ECC mode. 1=Active. 0=Inactive /* ECC Count Totals */ #define NVML_FI_DEV_ECC_SBE_VOL_TOTAL 3 //!< Total single bit volatile ECC errors #define NVML_FI_DEV_ECC_DBE_VOL_TOTAL 4 //!< Total double bit volatile ECC errors #define NVML_FI_DEV_ECC_SBE_AGG_TOTAL 5 //!< Total single bit aggregate (persistent) ECC errors #define NVML_FI_DEV_ECC_DBE_AGG_TOTAL 6 //!< Total double bit aggregate (persistent) ECC errors /* Individual ECC locations */ #define NVML_FI_DEV_ECC_SBE_VOL_L1 7 //!< L1 cache single bit volatile ECC errors #define NVML_FI_DEV_ECC_DBE_VOL_L1 8 //!< L1 cache double bit volatile ECC errors #define NVML_FI_DEV_ECC_SBE_VOL_L2 9 //!< L2 cache single bit volatile ECC errors #define NVML_FI_DEV_ECC_DBE_VOL_L2 10 //!< L2 cache double bit volatile ECC errors #define NVML_FI_DEV_ECC_SBE_VOL_DEV 11 //!< Device memory single bit volatile ECC errors #define NVML_FI_DEV_ECC_DBE_VOL_DEV 12 //!< Device memory double bit volatile ECC errors #define NVML_FI_DEV_ECC_SBE_VOL_REG 13 //!< Register file single bit volatile ECC errors #define NVML_FI_DEV_ECC_DBE_VOL_REG 14 //!< Register file double bit volatile ECC errors #define NVML_FI_DEV_ECC_SBE_VOL_TEX 15 //!< Texture memory single bit volatile ECC errors #define NVML_FI_DEV_ECC_DBE_VOL_TEX 16 //!< Texture memory double bit volatile ECC errors #define NVML_FI_DEV_ECC_DBE_VOL_CBU 17 //!< CBU double bit volatile ECC errors #define NVML_FI_DEV_ECC_SBE_AGG_L1 18 //!< L1 cache single bit aggregate (persistent) ECC errors #define NVML_FI_DEV_ECC_DBE_AGG_L1 19 //!< L1 cache double bit aggregate (persistent) ECC errors #define NVML_FI_DEV_ECC_SBE_AGG_L2 20 //!< L2 cache single bit aggregate (persistent) ECC errors #define NVML_FI_DEV_ECC_DBE_AGG_L2 21 //!< L2 cache double bit aggregate (persistent) ECC errors #define NVML_FI_DEV_ECC_SBE_AGG_DEV 22 //!< Device memory single bit aggregate (persistent) ECC errors #define NVML_FI_DEV_ECC_DBE_AGG_DEV 23 //!< Device memory double bit aggregate (persistent) ECC errors #define NVML_FI_DEV_ECC_SBE_AGG_REG 24 //!< Register File single bit aggregate (persistent) ECC errors #define NVML_FI_DEV_ECC_DBE_AGG_REG 25 //!< Register File double bit aggregate (persistent) ECC errors #define NVML_FI_DEV_ECC_SBE_AGG_TEX 26 //!< Texture memory single bit aggregate (persistent) ECC errors #define NVML_FI_DEV_ECC_DBE_AGG_TEX 27 //!< Texture memory double bit aggregate (persistent) ECC errors #define NVML_FI_DEV_ECC_DBE_AGG_CBU 28 //!< CBU double bit aggregate ECC errors /* Page Retirement */ #define NVML_FI_DEV_RETIRED_SBE 29 //!< Number of retired pages because of single bit errors #define NVML_FI_DEV_RETIRED_DBE 30 //!< Number of retired pages because of double bit errors #define NVML_FI_DEV_RETIRED_PENDING 31 //!< If any pages are pending retirement. 1=yes. 0=no. /* NvLink Flit Error Counters */ #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L0 32 //!< NVLink flow control CRC Error Counter for Lane 0 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L1 33 //!< NVLink flow control CRC Error Counter for Lane 1 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L2 34 //!< NVLink flow control CRC Error Counter for Lane 2 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L3 35 //!< NVLink flow control CRC Error Counter for Lane 3 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L4 36 //!< NVLink flow control CRC Error Counter for Lane 4 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L5 37 //!< NVLink flow control CRC Error Counter for Lane 5 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL 38 //!< NVLink flow control CRC Error Counter total for all Lanes /* NvLink CRC Data Error Counters */ #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L0 39 //!< NVLink data CRC Error Counter for Lane 0 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L1 40 //!< NVLink data CRC Error Counter for Lane 1 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L2 41 //!< NVLink data CRC Error Counter for Lane 2 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L3 42 //!< NVLink data CRC Error Counter for Lane 3 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L4 43 //!< NVLink data CRC Error Counter for Lane 4 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L5 44 //!< NVLink data CRC Error Counter for Lane 5 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_TOTAL 45 //!< NvLink data CRC Error Counter total for all Lanes /* NvLink Replay Error Counters */ #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L0 46 //!< NVLink Replay Error Counter for Lane 0 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L1 47 //!< NVLink Replay Error Counter for Lane 1 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L2 48 //!< NVLink Replay Error Counter for Lane 2 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L3 49 //!< NVLink Replay Error Counter for Lane 3 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L4 50 //!< NVLink Replay Error Counter for Lane 4 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L5 51 //!< NVLink Replay Error Counter for Lane 5 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_TOTAL 52 //!< NVLink Replay Error Counter total for all Lanes /* NvLink Recovery Error Counters */ #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L0 53 //!< NVLink Recovery Error Counter for Lane 0 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L1 54 //!< NVLink Recovery Error Counter for Lane 1 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L2 55 //!< NVLink Recovery Error Counter for Lane 2 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L3 56 //!< NVLink Recovery Error Counter for Lane 3 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L4 57 //!< NVLink Recovery Error Counter for Lane 4 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L5 58 //!< NVLink Recovery Error Counter for Lane 5 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_TOTAL 59 //!< NVLink Recovery Error Counter total for all Lanes /* NvLink Bandwidth Counters */ /* * NVML_FI_DEV_NVLINK_BANDWIDTH_* field values are now deprecated. * Please use the following field values instead: * NVML_FI_DEV_NVLINK_THROUGHPUT_DATA_TX * NVML_FI_DEV_NVLINK_THROUGHPUT_DATA_RX * NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_TX * NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_RX */ #define NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L0 60 //!< NVLink Bandwidth Counter for Counter Set 0, Lane 0 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L1 61 //!< NVLink Bandwidth Counter for Counter Set 0, Lane 1 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L2 62 //!< NVLink Bandwidth Counter for Counter Set 0, Lane 2 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L3 63 //!< NVLink Bandwidth Counter for Counter Set 0, Lane 3 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L4 64 //!< NVLink Bandwidth Counter for Counter Set 0, Lane 4 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L5 65 //!< NVLink Bandwidth Counter for Counter Set 0, Lane 5 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C0_TOTAL 66 //!< NVLink Bandwidth Counter Total for Counter Set 0, All Lanes /* NvLink Bandwidth Counters */ #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L0 67 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 0 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L1 68 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 1 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L2 69 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 2 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L3 70 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 3 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L4 71 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 4 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L5 72 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 5 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_TOTAL 73 //!< NVLink Bandwidth Counter Total for Counter Set 1, All Lanes /* NVML Perf Policy Counters */ #define NVML_FI_DEV_PERF_POLICY_POWER 74 //!< Perf Policy Counter for Power Policy #define NVML_FI_DEV_PERF_POLICY_THERMAL 75 //!< Perf Policy Counter for Thermal Policy #define NVML_FI_DEV_PERF_POLICY_SYNC_BOOST 76 //!< Perf Policy Counter for Sync boost Policy #define NVML_FI_DEV_PERF_POLICY_BOARD_LIMIT 77 //!< Perf Policy Counter for Board Limit #define NVML_FI_DEV_PERF_POLICY_LOW_UTILIZATION 78 //!< Perf Policy Counter for Low GPU Utilization Policy #define NVML_FI_DEV_PERF_POLICY_RELIABILITY 79 //!< Perf Policy Counter for Reliability Policy #define NVML_FI_DEV_PERF_POLICY_TOTAL_APP_CLOCKS 80 //!< Perf Policy Counter for Total App Clock Policy #define NVML_FI_DEV_PERF_POLICY_TOTAL_BASE_CLOCKS 81 //!< Perf Policy Counter for Total Base Clocks Policy /* Memory temperatures */ #define NVML_FI_DEV_MEMORY_TEMP 82 //!< Memory temperature for the device /* Energy Counter */ #define NVML_FI_DEV_TOTAL_ENERGY_CONSUMPTION 83 //!< Total energy consumption for the GPU in mJ since the driver was last reloaded /* NVLink Speed */ #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L0 84 //!< NVLink Speed in MBps for Link 0 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L1 85 //!< NVLink Speed in MBps for Link 1 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L2 86 //!< NVLink Speed in MBps for Link 2 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L3 87 //!< NVLink Speed in MBps for Link 3 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L4 88 //!< NVLink Speed in MBps for Link 4 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L5 89 //!< NVLink Speed in MBps for Link 5 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_COMMON 90 //!< Common NVLink Speed in MBps for active links #define NVML_FI_DEV_NVLINK_LINK_COUNT 91 //!< Number of NVLinks present on the device #define NVML_FI_DEV_RETIRED_PENDING_SBE 92 //!< If any pages are pending retirement due to SBE. 1=yes. 0=no. #define NVML_FI_DEV_RETIRED_PENDING_DBE 93 //!< If any pages are pending retirement due to DBE. 1=yes. 0=no. #define NVML_FI_DEV_PCIE_REPLAY_COUNTER 94 //!< PCIe replay counter #define NVML_FI_DEV_PCIE_REPLAY_ROLLOVER_COUNTER 95 //!< PCIe replay rollover counter /* NvLink Flit Error Counters */ #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L6 96 //!< NVLink flow control CRC Error Counter for Lane 6 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L7 97 //!< NVLink flow control CRC Error Counter for Lane 7 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L8 98 //!< NVLink flow control CRC Error Counter for Lane 8 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L9 99 //!< NVLink flow control CRC Error Counter for Lane 9 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L10 100 //!< NVLink flow control CRC Error Counter for Lane 10 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L11 101 //!< NVLink flow control CRC Error Counter for Lane 11 /* NvLink CRC Data Error Counters */ #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L6 102 //!< NVLink data CRC Error Counter for Lane 6 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L7 103 //!< NVLink data CRC Error Counter for Lane 7 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L8 104 //!< NVLink data CRC Error Counter for Lane 8 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L9 105 //!< NVLink data CRC Error Counter for Lane 9 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L10 106 //!< NVLink data CRC Error Counter for Lane 10 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L11 107 //!< NVLink data CRC Error Counter for Lane 11 /* NvLink Replay Error Counters */ #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L6 108 //!< NVLink Replay Error Counter for Lane 6 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L7 109 //!< NVLink Replay Error Counter for Lane 7 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L8 110 //!< NVLink Replay Error Counter for Lane 8 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L9 111 //!< NVLink Replay Error Counter for Lane 9 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L10 112 //!< NVLink Replay Error Counter for Lane 10 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L11 113 //!< NVLink Replay Error Counter for Lane 11 /* NvLink Recovery Error Counters */ #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L6 114 //!< NVLink Recovery Error Counter for Lane 6 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L7 115 //!< NVLink Recovery Error Counter for Lane 7 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L8 116 //!< NVLink Recovery Error Counter for Lane 8 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L9 117 //!< NVLink Recovery Error Counter for Lane 9 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L10 118 //!< NVLink Recovery Error Counter for Lane 10 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L11 119 //!< NVLink Recovery Error Counter for Lane 11 /* NvLink Bandwidth Counters */ /* * NVML_FI_DEV_NVLINK_BANDWIDTH_* field values are now deprecated. * Please use the following field values instead: * NVML_FI_DEV_NVLINK_THROUGHPUT_DATA_TX * NVML_FI_DEV_NVLINK_THROUGHPUT_DATA_RX * NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_TX * NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_RX */ #define NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L6 120 //!< NVLink Bandwidth Counter for Counter Set 0, Lane 6 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L7 121 //!< NVLink Bandwidth Counter for Counter Set 0, Lane 7 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L8 122 //!< NVLink Bandwidth Counter for Counter Set 0, Lane 8 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L9 123 //!< NVLink Bandwidth Counter for Counter Set 0, Lane 9 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L10 124 //!< NVLink Bandwidth Counter for Counter Set 0, Lane 10 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L11 125 //!< NVLink Bandwidth Counter for Counter Set 0, Lane 11 /* NvLink Bandwidth Counters */ #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L6 126 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 6 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L7 127 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 7 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L8 128 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 8 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L9 129 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 9 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L10 130 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 10 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L11 131 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 11 /* NVLink Speed */ #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L6 132 //!< NVLink Speed in MBps for Link 6 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L7 133 //!< NVLink Speed in MBps for Link 7 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L8 134 //!< NVLink Speed in MBps for Link 8 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L9 135 //!< NVLink Speed in MBps for Link 9 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L10 136 //!< NVLink Speed in MBps for Link 10 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L11 137 //!< NVLink Speed in MBps for Link 11 /** * NVLink throughput counters field values * * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. * A scopeId of UINT_MAX returns aggregate value summed up across all links * for the specified counter type in fieldId. */ #define NVML_FI_DEV_NVLINK_THROUGHPUT_DATA_TX 138 //!< NVLink TX Data throughput in KiB #define NVML_FI_DEV_NVLINK_THROUGHPUT_DATA_RX 139 //!< NVLink RX Data throughput in KiB #define NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_TX 140 //!< NVLink TX Data + protocol overhead in KiB #define NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_RX 141 //!< NVLink RX Data + protocol overhead in KiB /* Row Remapper */ #define NVML_FI_DEV_REMAPPED_COR 142 //!< Number of remapped rows due to correctable errors #define NVML_FI_DEV_REMAPPED_UNC 143 //!< Number of remapped rows due to uncorrectable errors #define NVML_FI_DEV_REMAPPED_PENDING 144 //!< If any rows are pending remapping. 1=yes 0=no #define NVML_FI_DEV_REMAPPED_FAILURE 145 //!< If any rows failed to be remapped 1=yes 0=no /** * Remote device NVLink ID * * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. */ #define NVML_FI_DEV_NVLINK_REMOTE_NVLINK_ID 146 //!< Remote device NVLink ID /** * NVSwitch: connected NVLink count */ #define NVML_FI_DEV_NVSWITCH_CONNECTED_LINK_COUNT 147 //!< Number of NVLinks connected to NVSwitch /* NvLink ECC Data Error Counters * * Lane ID needs to be specified in the scopeId field in nvmlFieldValue_t. * */ #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L0 148 //!< NVLink data ECC Error Counter for Link 0 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L1 149 //!< NVLink data ECC Error Counter for Link 1 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L2 150 //!< NVLink data ECC Error Counter for Link 2 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L3 151 //!< NVLink data ECC Error Counter for Link 3 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L4 152 //!< NVLink data ECC Error Counter for Link 4 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L5 153 //!< NVLink data ECC Error Counter for Link 5 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L6 154 //!< NVLink data ECC Error Counter for Link 6 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L7 155 //!< NVLink data ECC Error Counter for Link 7 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L8 156 //!< NVLink data ECC Error Counter for Link 8 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L9 157 //!< NVLink data ECC Error Counter for Link 9 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L10 158 //!< NVLink data ECC Error Counter for Link 10 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L11 159 //!< NVLink data ECC Error Counter for Link 11 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_TOTAL 160 //!< NvLink data ECC Error Counter total for all Links #define NVML_FI_MAX 161 //!< One greater than the largest field ID defined above /** * Information for a Field Value Sample */ typedef struct nvmlFieldValue_st { unsigned int fieldId; //!< ID of the NVML field to retrieve. This must be set before any call that uses this struct. See the constants starting with NVML_FI_ above. unsigned int scopeId; //!< Scope ID can represent data used by NVML depending on fieldId's context. For example, for NVLink throughput counter data, scopeId can represent linkId. long long timestamp; //!< CPU Timestamp of this value in microseconds since 1970 long long latencyUsec; //!< How long this field value took to update (in usec) within NVML. This may be averaged across several fields that are serviced by the same driver call. nvmlValueType_t valueType; //!< Type of the value stored in value nvmlReturn_t nvmlReturn; //!< Return code for retrieving this value. This must be checked before looking at value, as value is undefined if nvmlReturn != NVML_SUCCESS nvmlValue_t value; //!< Value for this field. This is only valid if nvmlReturn == NVML_SUCCESS } nvmlFieldValue_t; /** @} */ /***************************************************************************************************/ /** @defgroup nvmlUnitStructs Unit Structs * @{ */ /***************************************************************************************************/ typedef struct { struct nvmlUnit_st* handle; } nvmlUnit_t; /** * Description of HWBC entry */ typedef struct nvmlHwbcEntry_st { unsigned int hwbcId; char firmwareVersion[32]; } nvmlHwbcEntry_t; /** * Fan state enum. */ typedef enum nvmlFanState_enum { NVML_FAN_NORMAL = 0, //!< Fan is working properly NVML_FAN_FAILED = 1 //!< Fan has failed } nvmlFanState_t; /** * Led color enum. */ typedef enum nvmlLedColor_enum { NVML_LED_COLOR_GREEN = 0, //!< GREEN, indicates good health NVML_LED_COLOR_AMBER = 1 //!< AMBER, indicates problem } nvmlLedColor_t; /** * LED states for an S-class unit. */ typedef struct nvmlLedState_st { char cause[256]; //!< If amber, a text description of the cause nvmlLedColor_t color; //!< GREEN or AMBER } nvmlLedState_t; /** * Static S-class unit info. */ typedef struct nvmlUnitInfo_st { char name[96]; //!< Product name char id[96]; //!< Product identifier char serial[96]; //!< Product serial number char firmwareVersion[96]; //!< Firmware version } nvmlUnitInfo_t; /** * Power usage information for an S-class unit. * The power supply state is a human readable string that equals "Normal" or contains * a combination of "Abnormal" plus one or more of the following: * * - High voltage * - Fan failure * - Heatsink temperature * - Current limit * - Voltage below UV alarm threshold * - Low-voltage * - SI2C remote off command * - MOD_DISABLE input * - Short pin transition */ typedef struct nvmlPSUInfo_st { char state[256]; //!< The power supply state unsigned int current; //!< PSU current (A) unsigned int voltage; //!< PSU voltage (V) unsigned int power; //!< PSU power draw (W) } nvmlPSUInfo_t; /** * Fan speed reading for a single fan in an S-class unit. */ typedef struct nvmlUnitFanInfo_st { unsigned int speed; //!< Fan speed (RPM) nvmlFanState_t state; //!< Flag that indicates whether fan is working properly } nvmlUnitFanInfo_t; /** * Fan speed readings for an entire S-class unit. */ typedef struct nvmlUnitFanSpeeds_st { nvmlUnitFanInfo_t fans[24]; //!< Fan speed data for each fan unsigned int count; //!< Number of fans in unit } nvmlUnitFanSpeeds_t; /** @} */ /***************************************************************************************************/ /** @addtogroup nvmlEvents * @{ */ /***************************************************************************************************/ /** * Handle to an event set */ typedef struct { struct nvmlEventSet_st* handle; } nvmlEventSet_t; /** @defgroup nvmlEventType Event Types * @{ * Event Types which user can be notified about. * See description of particular functions for details. * * See \ref nvmlDeviceRegisterEvents and \ref nvmlDeviceGetSupportedEventTypes to check which devices * support each event. * * Types can be combined with bitwise or operator '|' when passed to \ref nvmlDeviceRegisterEvents */ //! Event about single bit ECC errors /** * \note A corrected texture memory error is not an ECC error, so it does not generate a single bit event */ #define nvmlEventTypeSingleBitEccError 0x0000000000000001LL //! Event about double bit ECC errors /** * \note An uncorrected texture memory error is not an ECC error, so it does not generate a double bit event */ #define nvmlEventTypeDoubleBitEccError 0x0000000000000002LL //! Event about PState changes /** * \note On Fermi architecture PState changes are also an indicator that GPU is throttling down due to * no work being executed on the GPU, power capping or thermal capping. In a typical situation, * Fermi-based GPU should stay in P0 for the duration of the execution of the compute process. */ #define nvmlEventTypePState 0x0000000000000004LL //! Event that Xid critical error occurred #define nvmlEventTypeXidCriticalError 0x0000000000000008LL //! Event about clock changes /** * Kepler only */ #define nvmlEventTypeClock 0x0000000000000010LL //! Event about AC/Battery power source changes #define nvmlEventTypePowerSourceChange 0x0000000000000080LL //! Event about MIG configuration changes #define nvmlEventMigConfigChange 0x0000000000000100LL //! Mask with no events #define nvmlEventTypeNone 0x0000000000000000LL //! Mask of all events #define nvmlEventTypeAll (nvmlEventTypeNone \ | nvmlEventTypeSingleBitEccError \ | nvmlEventTypeDoubleBitEccError \ | nvmlEventTypePState \ | nvmlEventTypeClock \ | nvmlEventTypeXidCriticalError \ | nvmlEventTypePowerSourceChange \ | nvmlEventMigConfigChange \ ) /** @} */ /** * Information about occurred event */ typedef struct nvmlEventData_st { nvmlDevice_t device; //!< Specific device where the event occurred unsigned long long eventType; //!< Information about what specific event occurred unsigned long long eventData; //!< Stores XID error for the device in the event of nvmlEventTypeXidCriticalError, // eventData is 0 for any other event. eventData is set as 999 for unknown xid error. unsigned int gpuInstanceId; //!< If MIG is enabled and nvmlEventTypeXidCriticalError event is attributable to a GPU // instance, stores a valid GPU instance ID. gpuInstanceId is set to 0xFFFFFFFF // otherwise. unsigned int computeInstanceId; //!< If MIG is enabled and nvmlEventTypeXidCriticalError event is attributable to a // compute instance, stores a valid compute instance ID. computeInstanceId is set to // 0xFFFFFFFF otherwise. } nvmlEventData_t; /** @} */ /***************************************************************************************************/ /** @addtogroup nvmlClocksThrottleReasons * @{ */ /***************************************************************************************************/ /** Nothing is running on the GPU and the clocks are dropping to Idle state * \note This limiter may be removed in a later release */ #define nvmlClocksThrottleReasonGpuIdle 0x0000000000000001LL /** GPU clocks are limited by current setting of applications clocks * * @see nvmlDeviceSetApplicationsClocks * @see nvmlDeviceGetApplicationsClock */ #define nvmlClocksThrottleReasonApplicationsClocksSetting 0x0000000000000002LL /** * @deprecated Renamed to \ref nvmlClocksThrottleReasonApplicationsClocksSetting * as the name describes the situation more accurately. */ #define nvmlClocksThrottleReasonUserDefinedClocks nvmlClocksThrottleReasonApplicationsClocksSetting /** SW Power Scaling algorithm is reducing the clocks below requested clocks * * @see nvmlDeviceGetPowerUsage * @see nvmlDeviceSetPowerManagementLimit * @see nvmlDeviceGetPowerManagementLimit */ #define nvmlClocksThrottleReasonSwPowerCap 0x0000000000000004LL /** HW Slowdown (reducing the core clocks by a factor of 2 or more) is engaged * * This is an indicator of: * - temperature being too high * - External Power Brake Assertion is triggered (e.g. by the system power supply) * - Power draw is too high and Fast Trigger protection is reducing the clocks * - May be also reported during PState or clock change * - This behavior may be removed in a later release. * * @see nvmlDeviceGetTemperature * @see nvmlDeviceGetTemperatureThreshold * @see nvmlDeviceGetPowerUsage */ #define nvmlClocksThrottleReasonHwSlowdown 0x0000000000000008LL /** Sync Boost * * This GPU has been added to a Sync boost group with nvidia-smi or DCGM in * order to maximize performance per watt. All GPUs in the sync boost group * will boost to the minimum possible clocks across the entire group. Look at * the throttle reasons for other GPUs in the system to see why those GPUs are * holding this one at lower clocks. * */ #define nvmlClocksThrottleReasonSyncBoost 0x0000000000000010LL /** SW Thermal Slowdown * * This is an indicator of one or more of the following: * - Current GPU temperature above the GPU Max Operating Temperature * - Current memory temperature above the Memory Max Operating Temperature * */ #define nvmlClocksThrottleReasonSwThermalSlowdown 0x0000000000000020LL /** HW Thermal Slowdown (reducing the core clocks by a factor of 2 or more) is engaged * * This is an indicator of: * - temperature being too high * * @see nvmlDeviceGetTemperature * @see nvmlDeviceGetTemperatureThreshold * @see nvmlDeviceGetPowerUsage */ #define nvmlClocksThrottleReasonHwThermalSlowdown 0x0000000000000040LL /** HW Power Brake Slowdown (reducing the core clocks by a factor of 2 or more) is engaged * * This is an indicator of: * - External Power Brake Assertion being triggered (e.g. by the system power supply) * * @see nvmlDeviceGetTemperature * @see nvmlDeviceGetTemperatureThreshold * @see nvmlDeviceGetPowerUsage */ #define nvmlClocksThrottleReasonHwPowerBrakeSlowdown 0x0000000000000080LL /** GPU clocks are limited by current setting of Display clocks * * @see bug 1997531 */ #define nvmlClocksThrottleReasonDisplayClockSetting 0x0000000000000100LL /** Bit mask representing no clocks throttling * * Clocks are as high as possible. * */ #define nvmlClocksThrottleReasonNone 0x0000000000000000LL /** Bit mask representing all supported clocks throttling reasons * New reasons might be added to this list in the future */ #define nvmlClocksThrottleReasonAll (nvmlClocksThrottleReasonNone \ | nvmlClocksThrottleReasonGpuIdle \ | nvmlClocksThrottleReasonApplicationsClocksSetting \ | nvmlClocksThrottleReasonSwPowerCap \ | nvmlClocksThrottleReasonHwSlowdown \ | nvmlClocksThrottleReasonSyncBoost \ | nvmlClocksThrottleReasonSwThermalSlowdown \ | nvmlClocksThrottleReasonHwThermalSlowdown \ | nvmlClocksThrottleReasonHwPowerBrakeSlowdown \ | nvmlClocksThrottleReasonDisplayClockSetting \ ) /** @} */ /***************************************************************************************************/ /** @defgroup nvmlAccountingStats Accounting Statistics * @{ * * Set of APIs designed to provide per process information about usage of GPU. * * @note All accounting statistics and accounting mode live in nvidia driver and reset * to default (Disabled) when driver unloads. * It is advised to run with persistence mode enabled. * * @note Enabling accounting mode has no negative impact on the GPU performance. */ /***************************************************************************************************/ /** * Describes accounting statistics of a process. */ typedef struct nvmlAccountingStats_st { unsigned int gpuUtilization; //!< Percent of time over the process's lifetime during which one or more kernels was executing on the GPU. //! Utilization stats just like returned by \ref nvmlDeviceGetUtilizationRates but for the life time of a //! process (not just the last sample period). //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlDeviceGetUtilizationRates is not supported unsigned int memoryUtilization; //!< Percent of time over the process's lifetime during which global (device) memory was being read or written. //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlDeviceGetUtilizationRates is not supported unsigned long long maxMemoryUsage; //!< Maximum total memory in bytes that was ever allocated by the process. //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlProcessInfo_t->usedGpuMemory is not supported unsigned long long time; //!< Amount of time in ms during which the compute context was active. The time is reported as 0 if //!< the process is not terminated unsigned long long startTime; //!< CPU Timestamp in usec representing start time for the process unsigned int isRunning; //!< Flag to represent if the process is running (1 for running, 0 for terminated) unsigned int reserved[5]; //!< Reserved for future use } nvmlAccountingStats_t; /** @} */ /***************************************************************************************************/ /** @defgroup nvmlEncoderStructs Encoder Structs * @{ */ /***************************************************************************************************/ /** * Represents type of encoder for capacity can be queried */ typedef enum nvmlEncoderQueryType_enum { NVML_ENCODER_QUERY_H264 = 0, //!< H264 encoder NVML_ENCODER_QUERY_HEVC = 1 //!< HEVC encoder }nvmlEncoderType_t; /** * Structure to hold encoder session data */ typedef struct nvmlEncoderSessionInfo_st { unsigned int sessionId; //!< Unique session ID unsigned int pid; //!< Owning process ID nvmlVgpuInstance_t vgpuInstance; //!< Owning vGPU instance ID (only valid on vGPU hosts, otherwise zero) nvmlEncoderType_t codecType; //!< Video encoder type unsigned int hResolution; //!< Current encode horizontal resolution unsigned int vResolution; //!< Current encode vertical resolution unsigned int averageFps; //!< Moving average encode frames per second unsigned int averageLatency; //!< Moving average encode latency in microseconds }nvmlEncoderSessionInfo_t; /** @} */ /***************************************************************************************************/ /** @defgroup nvmlFBCStructs Frame Buffer Capture Structures * @{ */ /***************************************************************************************************/ /** * Represents frame buffer capture session type */ typedef enum nvmlFBCSessionType_enum { NVML_FBC_SESSION_TYPE_UNKNOWN = 0, //!< Unknwon NVML_FBC_SESSION_TYPE_TOSYS, //!< ToSys NVML_FBC_SESSION_TYPE_CUDA, //!< Cuda NVML_FBC_SESSION_TYPE_VID, //!< Vid NVML_FBC_SESSION_TYPE_HWENC //!< HEnc } nvmlFBCSessionType_t; /** * Structure to hold frame buffer capture sessions stats */ typedef struct nvmlFBCStats_st { unsigned int sessionsCount; //!< Total no of sessions unsigned int averageFPS; //!< Moving average new frames captured per second unsigned int averageLatency; //!< Moving average new frame capture latency in microseconds } nvmlFBCStats_t; #define NVML_NVFBC_SESSION_FLAG_DIFFMAP_ENABLED 0x00000001 //!< Bit specifying differential map state. #define NVML_NVFBC_SESSION_FLAG_CLASSIFICATIONMAP_ENABLED 0x00000002 //!< Bit specifying classification map state. #define NVML_NVFBC_SESSION_FLAG_CAPTURE_WITH_WAIT_NO_WAIT 0x00000004 //!< Bit specifying if capture was requested as non-blocking call. #define NVML_NVFBC_SESSION_FLAG_CAPTURE_WITH_WAIT_INFINITE 0x00000008 //!< Bit specifying if capture was requested as blocking call. #define NVML_NVFBC_SESSION_FLAG_CAPTURE_WITH_WAIT_TIMEOUT 0x00000010 //!< Bit specifying if capture was requested as blocking call with timeout period. /** * Structure to hold FBC session data */ typedef struct nvmlFBCSessionInfo_st { unsigned int sessionId; //!< Unique session ID unsigned int pid; //!< Owning process ID nvmlVgpuInstance_t vgpuInstance; //!< Owning vGPU instance ID (only valid on vGPU hosts, otherwise zero) unsigned int displayOrdinal; //!< Display identifier nvmlFBCSessionType_t sessionType; //!< Type of frame buffer capture session unsigned int sessionFlags; //!< Session flags (one or more of NVML_NVFBC_SESSION_FLAG_XXX). unsigned int hMaxResolution; //!< Max horizontal resolution supported by the capture session unsigned int vMaxResolution; //!< Max vertical resolution supported by the capture session unsigned int hResolution; //!< Horizontal resolution requested by caller in capture call unsigned int vResolution; //!< Vertical resolution requested by caller in capture call unsigned int averageFPS; //!< Moving average new frames captured per second unsigned int averageLatency; //!< Moving average new frame capture latency in microseconds } nvmlFBCSessionInfo_t; /** @} */ /***************************************************************************************************/ /** @defgroup nvmlDrainDefs definitions related to the drain state * @{ */ /***************************************************************************************************/ /** * Is the GPU device to be removed from the kernel by nvmlDeviceRemoveGpu() */ typedef enum nvmlDetachGpuState_enum { NVML_DETACH_GPU_KEEP = 0, NVML_DETACH_GPU_REMOVE } nvmlDetachGpuState_t; /** * Parent bridge PCIe link state requested by nvmlDeviceRemoveGpu() */ typedef enum nvmlPcieLinkState_enum { NVML_PCIE_LINK_KEEP = 0, NVML_PCIE_LINK_SHUT_DOWN } nvmlPcieLinkState_t; /** @} */ /***************************************************************************************************/ /** @defgroup nvmlInitializationAndCleanup Initialization and Cleanup * This chapter describes the methods that handle NVML initialization and cleanup. * It is the user's responsibility to call \ref nvmlInit_v2() before calling any other methods, and * nvmlShutdown() once NVML is no longer being used. * @{ */ /***************************************************************************************************/ #define NVML_INIT_FLAG_NO_GPUS 1 //!< Don't fail nvmlInit() when no GPUs are found #define NVML_INIT_FLAG_NO_ATTACH 2 //!< Don't attach GPUs /** * Initialize NVML, but don't initialize any GPUs yet. * * \note nvmlInit_v3 introduces a "flags" argument, that allows passing boolean values * modifying the behaviour of nvmlInit(). * \note In NVML 5.319 new nvmlInit_v2 has replaced nvmlInit"_v1" (default in NVML 4.304 and older) that * did initialize all GPU devices in the system. * * This allows NVML to communicate with a GPU * when other GPUs in the system are unstable or in a bad state. When using this API, GPUs are * discovered and initialized in nvmlDeviceGetHandleBy* functions instead. * * \note To contrast nvmlInit_v2 with nvmlInit"_v1", NVML 4.304 nvmlInit"_v1" will fail when any detected GPU is in * a bad or unstable state. * * For all products. * * This method, should be called once before invoking any other methods in the library. * A reference count of the number of initializations is maintained. Shutdown only occurs * when the reference count reaches zero. * * @return * - \ref NVML_SUCCESS if NVML has been properly initialized * - \ref NVML_ERROR_DRIVER_NOT_LOADED if NVIDIA driver is not running * - \ref NVML_ERROR_NO_PERMISSION if NVML does not have permission to talk to the driver * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlInit_v2(void); /** * nvmlInitWithFlags is a variant of nvmlInit(), that allows passing a set of boolean values * modifying the behaviour of nvmlInit(). * Other than the "flags" parameter it is completely similar to \ref nvmlInit_v2. * * For all products. * * @param flags behaviour modifier flags * * @return * - \ref NVML_SUCCESS if NVML has been properly initialized * - \ref NVML_ERROR_DRIVER_NOT_LOADED if NVIDIA driver is not running * - \ref NVML_ERROR_NO_PERMISSION if NVML does not have permission to talk to the driver * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlInitWithFlags(unsigned int flags); /** * Shut down NVML by releasing all GPU resources previously allocated with \ref nvmlInit_v2(). * * For all products. * * This method should be called after NVML work is done, once for each call to \ref nvmlInit_v2() * A reference count of the number of initializations is maintained. Shutdown only occurs * when the reference count reaches zero. For backwards compatibility, no error is reported if * nvmlShutdown() is called more times than nvmlInit(). * * @return * - \ref NVML_SUCCESS if NVML has been properly shut down * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlShutdown(void); /** @} */ /***************************************************************************************************/ /** @defgroup nvmlErrorReporting Error reporting * This chapter describes helper functions for error reporting routines. * @{ */ /***************************************************************************************************/ /** * Helper method for converting NVML error codes into readable strings. * * For all products. * * @param result NVML error code to convert * * @return String representation of the error. * */ const DECLDIR char* nvmlErrorString(nvmlReturn_t result); /** @} */ /***************************************************************************************************/ /** @defgroup nvmlConstants Constants * @{ */ /***************************************************************************************************/ /** * Buffer size guaranteed to be large enough for \ref nvmlDeviceGetInforomVersion and \ref nvmlDeviceGetInforomImageVersion */ #define NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE 16 /** * Buffer size guaranteed to be large enough for storing GPU identifiers. */ #define NVML_DEVICE_UUID_BUFFER_SIZE 80 /** * Buffer size guaranteed to be large enough for \ref nvmlDeviceGetUUID */ #define NVML_DEVICE_UUID_V2_BUFFER_SIZE 96 /** * Buffer size guaranteed to be large enough for \ref nvmlDeviceGetBoardPartNumber */ #define NVML_DEVICE_PART_NUMBER_BUFFER_SIZE 80 /** * Buffer size guaranteed to be large enough for \ref nvmlSystemGetDriverVersion */ #define NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE 80 /** * Buffer size guaranteed to be large enough for \ref nvmlSystemGetNVMLVersion */ #define NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE 80 /** * Buffer size guaranteed to be large enough for storing GPU device names. */ #define NVML_DEVICE_NAME_BUFFER_SIZE 64 /** * Buffer size guaranteed to be large enough for \ref nvmlDeviceGetName */ #define NVML_DEVICE_NAME_V2_BUFFER_SIZE 96 /** * Buffer size guaranteed to be large enough for \ref nvmlDeviceGetSerial */ #define NVML_DEVICE_SERIAL_BUFFER_SIZE 30 /** * Buffer size guaranteed to be large enough for \ref nvmlDeviceGetVbiosVersion */ #define NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE 32 /** @} */ /***************************************************************************************************/ /** @defgroup nvmlSystemQueries System Queries * This chapter describes the queries that NVML can perform against the local system. These queries * are not device-specific. * @{ */ /***************************************************************************************************/ /** * Retrieves the version of the system's graphics driver. * * For all products. * * The version identifier is an alphanumeric string. It will not exceed 80 characters in length * (including the NULL terminator). See \ref nvmlConstants::NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE. * * @param version Reference in which to return the version identifier * @param length The maximum allowed length of the string returned in \a version * * @return * - \ref NVML_SUCCESS if \a version has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a version is NULL * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small */ nvmlReturn_t DECLDIR nvmlSystemGetDriverVersion(char *version, unsigned int length); /** * Retrieves the version of the NVML library. * * For all products. * * The version identifier is an alphanumeric string. It will not exceed 80 characters in length * (including the NULL terminator). See \ref nvmlConstants::NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE. * * @param version Reference in which to return the version identifier * @param length The maximum allowed length of the string returned in \a version * * @return * - \ref NVML_SUCCESS if \a version has been set * - \ref NVML_ERROR_INVALID_ARGUMENT if \a version is NULL * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small */ nvmlReturn_t DECLDIR nvmlSystemGetNVMLVersion(char *version, unsigned int length); /** * Retrieves the version of the CUDA driver. * * For all products. * * The CUDA driver version returned will be retreived from the currently installed version of CUDA. * If the cuda library is not found, this function will return a known supported version number. * * @param cudaDriverVersion Reference in which to return the version identifier * * @return * - \ref NVML_SUCCESS if \a cudaDriverVersion has been set * - \ref NVML_ERROR_INVALID_ARGUMENT if \a cudaDriverVersion is NULL */ nvmlReturn_t DECLDIR nvmlSystemGetCudaDriverVersion(int *cudaDriverVersion); /** * Retrieves the version of the CUDA driver from the shared library. * * For all products. * * The returned CUDA driver version by calling cuDriverGetVersion() * * @param cudaDriverVersion Reference in which to return the version identifier * * @return * - \ref NVML_SUCCESS if \a cudaDriverVersion has been set * - \ref NVML_ERROR_INVALID_ARGUMENT if \a cudaDriverVersion is NULL * - \ref NVML_ERROR_LIBRARY_NOT_FOUND if \a libcuda.so.1 or libcuda.dll is not found * - \ref NVML_ERROR_FUNCTION_NOT_FOUND if \a cuDriverGetVersion() is not found in the shared library */ nvmlReturn_t DECLDIR nvmlSystemGetCudaDriverVersion_v2(int *cudaDriverVersion); /** * Macros for converting the CUDA driver version number to Major and Minor version numbers. */ #define NVML_CUDA_DRIVER_VERSION_MAJOR(v) ((v)/1000) #define NVML_CUDA_DRIVER_VERSION_MINOR(v) (((v)%1000)/10) /** * Gets name of the process with provided process id * * For all products. * * Returned process name is cropped to provided length. * name string is encoded in ANSI. * * @param pid The identifier of the process * @param name Reference in which to return the process name * @param length The maximum allowed length of the string returned in \a name * * @return * - \ref NVML_SUCCESS if \a name has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a name is NULL or \a length is 0. * - \ref NVML_ERROR_NOT_FOUND if process doesn't exists * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlSystemGetProcessName(unsigned int pid, char *name, unsigned int length); /** @} */ /***************************************************************************************************/ /** @defgroup nvmlUnitQueries Unit Queries * This chapter describes that queries that NVML can perform against each unit. For S-class systems only. * In each case the device is identified with an nvmlUnit_t handle. This handle is obtained by * calling \ref nvmlUnitGetHandleByIndex(). * @{ */ /***************************************************************************************************/ /** * Retrieves the number of units in the system. * * For S-class products. * * @param unitCount Reference in which to return the number of units * * @return * - \ref NVML_SUCCESS if \a unitCount has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unitCount is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlUnitGetCount(unsigned int *unitCount); /** * Acquire the handle for a particular unit, based on its index. * * For S-class products. * * Valid indices are derived from the \a unitCount returned by \ref nvmlUnitGetCount(). * For example, if \a unitCount is 2 the valid indices are 0 and 1, corresponding to UNIT 0 and UNIT 1. * * The order in which NVML enumerates units has no guarantees of consistency between reboots. * * @param index The index of the target unit, >= 0 and < \a unitCount * @param unit Reference in which to return the unit handle * * @return * - \ref NVML_SUCCESS if \a unit has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a index is invalid or \a unit is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlUnitGetHandleByIndex(unsigned int index, nvmlUnit_t *unit); /** * Retrieves the static information associated with a unit. * * For S-class products. * * See \ref nvmlUnitInfo_t for details on available unit info. * * @param unit The identifier of the target unit * @param info Reference in which to return the unit information * * @return * - \ref NVML_SUCCESS if \a info has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid or \a info is NULL */ nvmlReturn_t DECLDIR nvmlUnitGetUnitInfo(nvmlUnit_t unit, nvmlUnitInfo_t *info); /** * Retrieves the LED state associated with this unit. * * For S-class products. * * See \ref nvmlLedState_t for details on allowed states. * * @param unit The identifier of the target unit * @param state Reference in which to return the current LED state * * @return * - \ref NVML_SUCCESS if \a state has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid or \a state is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlUnitSetLedState() */ nvmlReturn_t DECLDIR nvmlUnitGetLedState(nvmlUnit_t unit, nvmlLedState_t *state); /** * Retrieves the PSU stats for the unit. * * For S-class products. * * See \ref nvmlPSUInfo_t for details on available PSU info. * * @param unit The identifier of the target unit * @param psu Reference in which to return the PSU information * * @return * - \ref NVML_SUCCESS if \a psu has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid or \a psu is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlUnitGetPsuInfo(nvmlUnit_t unit, nvmlPSUInfo_t *psu); /** * Retrieves the temperature readings for the unit, in degrees C. * * For S-class products. * * Depending on the product, readings may be available for intake (type=0), * exhaust (type=1) and board (type=2). * * @param unit The identifier of the target unit * @param type The type of reading to take * @param temp Reference in which to return the intake temperature * * @return * - \ref NVML_SUCCESS if \a temp has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit or \a type is invalid or \a temp is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlUnitGetTemperature(nvmlUnit_t unit, unsigned int type, unsigned int *temp); /** * Retrieves the fan speed readings for the unit. * * For S-class products. * * See \ref nvmlUnitFanSpeeds_t for details on available fan speed info. * * @param unit The identifier of the target unit * @param fanSpeeds Reference in which to return the fan speed information * * @return * - \ref NVML_SUCCESS if \a fanSpeeds has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid or \a fanSpeeds is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlUnitGetFanSpeedInfo(nvmlUnit_t unit, nvmlUnitFanSpeeds_t *fanSpeeds); /** * Retrieves the set of GPU devices that are attached to the specified unit. * * For S-class products. * * The \a deviceCount argument is expected to be set to the size of the input \a devices array. * * @param unit The identifier of the target unit * @param deviceCount Reference in which to provide the \a devices array size, and * to return the number of attached GPU devices * @param devices Reference in which to return the references to the attached GPU devices * * @return * - \ref NVML_SUCCESS if \a deviceCount and \a devices have been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a deviceCount indicates that the \a devices array is too small * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid, either of \a deviceCount or \a devices is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlUnitGetDevices(nvmlUnit_t unit, unsigned int *deviceCount, nvmlDevice_t *devices); /** * Retrieves the IDs and firmware versions for any Host Interface Cards (HICs) in the system. * * For S-class products. * * The \a hwbcCount argument is expected to be set to the size of the input \a hwbcEntries array. * The HIC must be connected to an S-class system for it to be reported by this function. * * @param hwbcCount Size of hwbcEntries array * @param hwbcEntries Array holding information about hwbc * * @return * - \ref NVML_SUCCESS if \a hwbcCount and \a hwbcEntries have been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if either \a hwbcCount or \a hwbcEntries is NULL * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a hwbcCount indicates that the \a hwbcEntries array is too small */ nvmlReturn_t DECLDIR nvmlSystemGetHicVersion(unsigned int *hwbcCount, nvmlHwbcEntry_t *hwbcEntries); /** @} */ /***************************************************************************************************/ /** @defgroup nvmlDeviceQueries Device Queries * This chapter describes that queries that NVML can perform against each device. * In each case the device is identified with an nvmlDevice_t handle. This handle is obtained by * calling one of \ref nvmlDeviceGetHandleByIndex_v2(), \ref nvmlDeviceGetHandleBySerial(), * \ref nvmlDeviceGetHandleByPciBusId_v2(). or \ref nvmlDeviceGetHandleByUUID(). * @{ */ /***************************************************************************************************/ /** * Retrieves the number of compute devices in the system. A compute device is a single GPU. * * For all products. * * Note: New nvmlDeviceGetCount_v2 (default in NVML 5.319) returns count of all devices in the system * even if nvmlDeviceGetHandleByIndex_v2 returns NVML_ERROR_NO_PERMISSION for such device. * Update your code to handle this error, or use NVML 4.304 or older nvml header file. * For backward binary compatibility reasons _v1 version of the API is still present in the shared * library. * Old _v1 version of nvmlDeviceGetCount doesn't count devices that NVML has no permission to talk to. * * @param deviceCount Reference in which to return the number of accessible devices * * @return * - \ref NVML_SUCCESS if \a deviceCount has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a deviceCount is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetCount_v2(unsigned int *deviceCount); /** * Get attributes (engine counts etc.) for the given NVML device handle. * * @note This API currently only supports MIG device handles. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * * @param device NVML device handle * @param attributes Device attributes * * @return * - \ref NVML_SUCCESS if \a device attributes were successfully retrieved * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device handle is invalid * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetAttributes_v2(nvmlDevice_t device, nvmlDeviceAttributes_t *attributes); /** * Acquire the handle for a particular device, based on its index. * * For all products. * * Valid indices are derived from the \a accessibleDevices count returned by * \ref nvmlDeviceGetCount_v2(). For example, if \a accessibleDevices is 2 the valid indices * are 0 and 1, corresponding to GPU 0 and GPU 1. * * The order in which NVML enumerates devices has no guarantees of consistency between reboots. For that reason it * is recommended that devices be looked up by their PCI ids or UUID. See * \ref nvmlDeviceGetHandleByUUID() and \ref nvmlDeviceGetHandleByPciBusId_v2(). * * Note: The NVML index may not correlate with other APIs, such as the CUDA device index. * * Starting from NVML 5, this API causes NVML to initialize the target GPU * NVML may initialize additional GPUs if: * - The target GPU is an SLI slave * * Note: New nvmlDeviceGetCount_v2 (default in NVML 5.319) returns count of all devices in the system * even if nvmlDeviceGetHandleByIndex_v2 returns NVML_ERROR_NO_PERMISSION for such device. * Update your code to handle this error, or use NVML 4.304 or older nvml header file. * For backward binary compatibility reasons _v1 version of the API is still present in the shared * library. * Old _v1 version of nvmlDeviceGetCount doesn't count devices that NVML has no permission to talk to. * * This means that nvmlDeviceGetHandleByIndex_v2 and _v1 can return different devices for the same index. * If you don't touch macros that map old (_v1) versions to _v2 versions at the top of the file you don't * need to worry about that. * * @param index The index of the target GPU, >= 0 and < \a accessibleDevices * @param device Reference in which to return the device handle * * @return * - \ref NVML_SUCCESS if \a device has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a index is invalid or \a device is NULL * - \ref NVML_ERROR_INSUFFICIENT_POWER if any attached devices have improperly attached external power cables * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to talk to this device * - \ref NVML_ERROR_IRQ_ISSUE if NVIDIA kernel detected an interrupt issue with the attached GPUs * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetIndex * @see nvmlDeviceGetCount */ nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex_v2(unsigned int index, nvmlDevice_t *device); /** * Acquire the handle for a particular device, based on its board serial number. * * For Fermi &tm; or newer fully supported devices. * * This number corresponds to the value printed directly on the board, and to the value returned by * \ref nvmlDeviceGetSerial(). * * @deprecated Since more than one GPU can exist on a single board this function is deprecated in favor * of \ref nvmlDeviceGetHandleByUUID. * For dual GPU boards this function will return NVML_ERROR_INVALID_ARGUMENT. * * Starting from NVML 5, this API causes NVML to initialize the target GPU * NVML may initialize additional GPUs as it searches for the target GPU * * @param serial The board serial number of the target GPU * @param device Reference in which to return the device handle * * @return * - \ref NVML_SUCCESS if \a device has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a serial is invalid, \a device is NULL or more than one * device has the same serial (dual GPU boards) * - \ref NVML_ERROR_NOT_FOUND if \a serial does not match a valid device on the system * - \ref NVML_ERROR_INSUFFICIENT_POWER if any attached devices have improperly attached external power cables * - \ref NVML_ERROR_IRQ_ISSUE if NVIDIA kernel detected an interrupt issue with the attached GPUs * - \ref NVML_ERROR_GPU_IS_LOST if any GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetSerial * @see nvmlDeviceGetHandleByUUID */ nvmlReturn_t DECLDIR nvmlDeviceGetHandleBySerial(const char *serial, nvmlDevice_t *device); /** * Acquire the handle for a particular device, based on its globally unique immutable UUID associated with each device. * * For all products. * * @param uuid The UUID of the target GPU or MIG instance * @param device Reference in which to return the device handle or MIG device handle * * Starting from NVML 5, this API causes NVML to initialize the target GPU * NVML may initialize additional GPUs as it searches for the target GPU * * @return * - \ref NVML_SUCCESS if \a device has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a uuid is invalid or \a device is null * - \ref NVML_ERROR_NOT_FOUND if \a uuid does not match a valid device on the system * - \ref NVML_ERROR_INSUFFICIENT_POWER if any attached devices have improperly attached external power cables * - \ref NVML_ERROR_IRQ_ISSUE if NVIDIA kernel detected an interrupt issue with the attached GPUs * - \ref NVML_ERROR_GPU_IS_LOST if any GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetUUID */ nvmlReturn_t DECLDIR nvmlDeviceGetHandleByUUID(const char *uuid, nvmlDevice_t *device); /** * Acquire the handle for a particular device, based on its PCI bus id. * * For all products. * * This value corresponds to the nvmlPciInfo_t::busId returned by \ref nvmlDeviceGetPciInfo_v3(). * * Starting from NVML 5, this API causes NVML to initialize the target GPU * NVML may initialize additional GPUs if: * - The target GPU is an SLI slave * * \note NVML 4.304 and older version of nvmlDeviceGetHandleByPciBusId"_v1" returns NVML_ERROR_NOT_FOUND * instead of NVML_ERROR_NO_PERMISSION. * * @param pciBusId The PCI bus id of the target GPU * @param device Reference in which to return the device handle * * @return * - \ref NVML_SUCCESS if \a device has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a pciBusId is invalid or \a device is NULL * - \ref NVML_ERROR_NOT_FOUND if \a pciBusId does not match a valid device on the system * - \ref NVML_ERROR_INSUFFICIENT_POWER if the attached device has improperly attached external power cables * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to talk to this device * - \ref NVML_ERROR_IRQ_ISSUE if NVIDIA kernel detected an interrupt issue with the attached GPUs * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetHandleByPciBusId_v2(const char *pciBusId, nvmlDevice_t *device); /** * Retrieves the name of this device. * * For all products. * * The name is an alphanumeric string that denotes a particular product, e.g. Tesla &tm; C2070. It will not * exceed 96 characters in length (including the NULL terminator). See \ref * nvmlConstants::NVML_DEVICE_NAME_V2_BUFFER_SIZE. * * When used with MIG device handles the API returns MIG device names which can be used to identify devices * based on their attributes. * * @param device The identifier of the target device * @param name Reference in which to return the product name * @param length The maximum allowed length of the string returned in \a name * * @return * - \ref NVML_SUCCESS if \a name has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a name is NULL * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetName(nvmlDevice_t device, char *name, unsigned int length); /** * Retrieves the brand of this device. * * For all products. * * The type is a member of \ref nvmlBrandType_t defined above. * * @param device The identifier of the target device * @param type Reference in which to return the product brand type * * @return * - \ref NVML_SUCCESS if \a name has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a type is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetBrand(nvmlDevice_t device, nvmlBrandType_t *type); /** * Retrieves the NVML index of this device. * * For all products. * * Valid indices are derived from the \a accessibleDevices count returned by * \ref nvmlDeviceGetCount_v2(). For example, if \a accessibleDevices is 2 the valid indices * are 0 and 1, corresponding to GPU 0 and GPU 1. * * The order in which NVML enumerates devices has no guarantees of consistency between reboots. For that reason it * is recommended that devices be looked up by their PCI ids or GPU UUID. See * \ref nvmlDeviceGetHandleByPciBusId_v2() and \ref nvmlDeviceGetHandleByUUID(). * * When used with MIG device handles this API returns indices that can be * passed to \ref nvmlDeviceGetMigDeviceHandleByIndex to retrieve an identical handle. * MIG device indices are unique within a device. * * Note: The NVML index may not correlate with other APIs, such as the CUDA device index. * * @param device The identifier of the target device * @param index Reference in which to return the NVML index of the device * * @return * - \ref NVML_SUCCESS if \a index has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a index is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetHandleByIndex() * @see nvmlDeviceGetCount() */ nvmlReturn_t DECLDIR nvmlDeviceGetIndex(nvmlDevice_t device, unsigned int *index); /** * Retrieves the globally unique board serial number associated with this device's board. * * For all products with an inforom. * * The serial number is an alphanumeric string that will not exceed 30 characters (including the NULL terminator). * This number matches the serial number tag that is physically attached to the board. See \ref * nvmlConstants::NVML_DEVICE_SERIAL_BUFFER_SIZE. * * @param device The identifier of the target device * @param serial Reference in which to return the board/module serial number * @param length The maximum allowed length of the string returned in \a serial * * @return * - \ref NVML_SUCCESS if \a serial has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a serial is NULL * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetSerial(nvmlDevice_t device, char *serial, unsigned int length); /***************************************************************************************************/ /** @defgroup nvmlAffinity CPU and Memory Affinity * This chapter describes NVML operations that are associated with CPU and memory * affinity. * @{ */ /***************************************************************************************************/ //! Scope of NUMA node for affinity queries #define NVML_AFFINITY_SCOPE_NODE 0 //! Scope of processor socket for affinity queries #define NVML_AFFINITY_SCOPE_SOCKET 1 typedef unsigned int nvmlAffinityScope_t; /** * Retrieves an array of unsigned ints (sized to nodeSetSize) of bitmasks with * the ideal memory affinity within node or socket for the device. * For example, if NUMA node 0, 1 are ideal within the socket for the device and nodeSetSize == 1, * result[0] = 0x3 * * \note If requested scope is not applicable to the target topology, the API * will fall back to reporting the memory affinity for the immediate non-I/O * ancestor of the device. * * For Kepler &tm; or newer fully supported devices. * Supported on Linux only. * * @param device The identifier of the target device * @param nodeSetSize The size of the nodeSet array that is safe to access * @param nodeSet Array reference in which to return a bitmask of NODEs, 64 NODEs per * unsigned long on 64-bit machines, 32 on 32-bit machines * @param scope Scope that change the default behavior * * @return * - \ref NVML_SUCCESS if \a NUMA node Affinity has been filled * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, nodeSetSize == 0, nodeSet is NULL or scope is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMemoryAffinity(nvmlDevice_t device, unsigned int nodeSetSize, unsigned long *nodeSet, nvmlAffinityScope_t scope); /** * Retrieves an array of unsigned ints (sized to cpuSetSize) of bitmasks with the * ideal CPU affinity within node or socket for the device. * For example, if processors 0, 1, 32, and 33 are ideal for the device and cpuSetSize == 2, * result[0] = 0x3, result[1] = 0x3 * * \note If requested scope is not applicable to the target topology, the API * will fall back to reporting the CPU affinity for the immediate non-I/O * ancestor of the device. * * For Kepler &tm; or newer fully supported devices. * Supported on Linux only. * * @param device The identifier of the target device * @param cpuSetSize The size of the cpuSet array that is safe to access * @param cpuSet Array reference in which to return a bitmask of CPUs, 64 CPUs per * unsigned long on 64-bit machines, 32 on 32-bit machines * @param scope Scope that change the default behavior * * @return * - \ref NVML_SUCCESS if \a cpuAffinity has been filled * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, cpuSetSize == 0, cpuSet is NULL or sope is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetCpuAffinityWithinScope(nvmlDevice_t device, unsigned int cpuSetSize, unsigned long *cpuSet, nvmlAffinityScope_t scope); /** * Retrieves an array of unsigned ints (sized to cpuSetSize) of bitmasks with the ideal CPU affinity for the device * For example, if processors 0, 1, 32, and 33 are ideal for the device and cpuSetSize == 2, * result[0] = 0x3, result[1] = 0x3 * This is equivalent to calling \ref nvmlDeviceGetCpuAffinityWithinScope with \ref NVML_AFFINITY_SCOPE_NODE. * * For Kepler &tm; or newer fully supported devices. * Supported on Linux only. * * @param device The identifier of the target device * @param cpuSetSize The size of the cpuSet array that is safe to access * @param cpuSet Array reference in which to return a bitmask of CPUs, 64 CPUs per * unsigned long on 64-bit machines, 32 on 32-bit machines * * @return * - \ref NVML_SUCCESS if \a cpuAffinity has been filled * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, cpuSetSize == 0, or cpuSet is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetCpuAffinity(nvmlDevice_t device, unsigned int cpuSetSize, unsigned long *cpuSet); /** * Sets the ideal affinity for the calling thread and device using the guidelines * given in nvmlDeviceGetCpuAffinity(). Note, this is a change as of version 8.0. * Older versions set the affinity for a calling process and all children. * Currently supports up to 1024 processors. * * For Kepler &tm; or newer fully supported devices. * Supported on Linux only. * * @param device The identifier of the target device * * @return * - \ref NVML_SUCCESS if the calling process has been successfully bound * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceSetCpuAffinity(nvmlDevice_t device); /** * Clear all affinity bindings for the calling thread. Note, this is a change as of version * 8.0 as older versions cleared the affinity for a calling process and all children. * * For Kepler &tm; or newer fully supported devices. * Supported on Linux only. * * @param device The identifier of the target device * * @return * - \ref NVML_SUCCESS if the calling process has been successfully unbound * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceClearCpuAffinity(nvmlDevice_t device); /** * Retrieve the common ancestor for two devices * For all products. * Supported on Linux only. * * @param device1 The identifier of the first device * @param device2 The identifier of the second device * @param pathInfo A \ref nvmlGpuTopologyLevel_t that gives the path type * * @return * - \ref NVML_SUCCESS if \a pathInfo has been set * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device1, or \a device2 is invalid, or \a pathInfo is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device or OS does not support this feature * - \ref NVML_ERROR_UNKNOWN an error has occurred in underlying topology discovery */ /** @} */ nvmlReturn_t DECLDIR nvmlDeviceGetTopologyCommonAncestor(nvmlDevice_t device1, nvmlDevice_t device2, nvmlGpuTopologyLevel_t *pathInfo); /** * Retrieve the set of GPUs that are nearest to a given device at a specific interconnectivity level * For all products. * Supported on Linux only. * * @param device The identifier of the first device * @param level The \ref nvmlGpuTopologyLevel_t level to search for other GPUs * @param count When zero, is set to the number of matching GPUs such that \a deviceArray * can be malloc'd. When non-zero, \a deviceArray will be filled with \a count * number of device handles. * @param deviceArray An array of device handles for GPUs found at \a level * * @return * - \ref NVML_SUCCESS if \a deviceArray or \a count (if initially zero) has been set * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a level, or \a count is invalid, or \a deviceArray is NULL with a non-zero \a count * - \ref NVML_ERROR_NOT_SUPPORTED if the device or OS does not support this feature * - \ref NVML_ERROR_UNKNOWN an error has occurred in underlying topology discovery */ nvmlReturn_t DECLDIR nvmlDeviceGetTopologyNearestGpus(nvmlDevice_t device, nvmlGpuTopologyLevel_t level, unsigned int *count, nvmlDevice_t *deviceArray); /** * Retrieve the set of GPUs that have a CPU affinity with the given CPU number * For all products. * Supported on Linux only. * * @param cpuNumber The CPU number * @param count When zero, is set to the number of matching GPUs such that \a deviceArray * can be malloc'd. When non-zero, \a deviceArray will be filled with \a count * number of device handles. * @param deviceArray An array of device handles for GPUs found with affinity to \a cpuNumber * * @return * - \ref NVML_SUCCESS if \a deviceArray or \a count (if initially zero) has been set * - \ref NVML_ERROR_INVALID_ARGUMENT if \a cpuNumber, or \a count is invalid, or \a deviceArray is NULL with a non-zero \a count * - \ref NVML_ERROR_NOT_SUPPORTED if the device or OS does not support this feature * - \ref NVML_ERROR_UNKNOWN an error has occurred in underlying topology discovery */ nvmlReturn_t DECLDIR nvmlSystemGetTopologyGpuSet(unsigned int cpuNumber, unsigned int *count, nvmlDevice_t *deviceArray); /** * Retrieve the status for a given p2p capability index between a given pair of GPU * * @param device1 The first device * @param device2 The second device * @param p2pIndex p2p Capability Index being looked for between \a device1 and \a device2 * @param p2pStatus Reference in which to return the status of the \a p2pIndex * between \a device1 and \a device2 * @return * - \ref NVML_SUCCESS if \a p2pStatus has been populated * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device1 or \a device2 or \a p2pIndex is invalid or \a p2pStatus is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetP2PStatus(nvmlDevice_t device1, nvmlDevice_t device2, nvmlGpuP2PCapsIndex_t p2pIndex,nvmlGpuP2PStatus_t *p2pStatus); /** * Retrieves the globally unique immutable UUID associated with this device, as a 5 part hexadecimal string, * that augments the immutable, board serial identifier. * * For all products. * * The UUID is a globally unique identifier. It is the only available identifier for pre-Fermi-architecture products. * It does NOT correspond to any identifier printed on the board. It will not exceed 96 characters in length * (including the NULL terminator). See \ref nvmlConstants::NVML_DEVICE_UUID_V2_BUFFER_SIZE. * * When used with MIG device handles the API returns globally unique UUIDs which can be used to identify MIG * devices across both GPU and MIG devices. UUIDs are immutable for the lifetime of a MIG device. * * @param device The identifier of the target device * @param uuid Reference in which to return the GPU UUID * @param length The maximum allowed length of the string returned in \a uuid * * @return * - \ref NVML_SUCCESS if \a uuid has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a uuid is NULL * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetUUID(nvmlDevice_t device, char *uuid, unsigned int length); /** * Retrieve the MDEV UUID of a vGPU instance. * * The MDEV UUID is a globally unique identifier of the mdev device assigned to the VM, and is returned as a 5-part hexadecimal string, * not exceeding 80 characters in length (including the NULL terminator). * MDEV UUID is displayed only on KVM platform. * See \ref nvmlConstants::NVML_DEVICE_UUID_BUFFER_SIZE. * * For Maxwell &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param mdevUuid Pointer to caller-supplied buffer to hold MDEV UUID * @param size Size of buffer in bytes * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_NOT_SUPPORTED on any hypervisor other than KVM * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a mdevUuid is NULL * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a size is too small * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetMdevUUID(nvmlVgpuInstance_t vgpuInstance, char *mdevUuid, unsigned int size); /** * Retrieves minor number for the device. The minor number for the device is such that the Nvidia device node file for * each GPU will have the form /dev/nvidia[minor number]. * * For all products. * Supported only for Linux * * @param device The identifier of the target device * @param minorNumber Reference in which to return the minor number for the device * @return * - \ref NVML_SUCCESS if the minor number is successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a minorNumber is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMinorNumber(nvmlDevice_t device, unsigned int *minorNumber); /** * Retrieves the the device board part number which is programmed into the board's InfoROM * * For all products. * * @param device Identifier of the target device * @param partNumber Reference to the buffer to return * @param length Length of the buffer reference * * @return * - \ref NVML_SUCCESS if \a partNumber has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_NOT_SUPPORTED if the needed VBIOS fields have not been filled * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a serial is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetBoardPartNumber(nvmlDevice_t device, char* partNumber, unsigned int length); /** * Retrieves the version information for the device's infoROM object. * * For all products with an inforom. * * Fermi and higher parts have non-volatile on-board memory for persisting device info, such as aggregate * ECC counts. The version of the data structures in this memory may change from time to time. It will not * exceed 16 characters in length (including the NULL terminator). * See \ref nvmlConstants::NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE. * * See \ref nvmlInforomObject_t for details on the available infoROM objects. * * @param device The identifier of the target device * @param object The target infoROM object * @param version Reference in which to return the infoROM version * @param length The maximum allowed length of the string returned in \a version * * @return * - \ref NVML_SUCCESS if \a version has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a version is NULL * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have an infoROM * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetInforomImageVersion */ nvmlReturn_t DECLDIR nvmlDeviceGetInforomVersion(nvmlDevice_t device, nvmlInforomObject_t object, char *version, unsigned int length); /** * Retrieves the global infoROM image version * * For all products with an inforom. * * Image version just like VBIOS version uniquely describes the exact version of the infoROM flashed on the board * in contrast to infoROM object version which is only an indicator of supported features. * Version string will not exceed 16 characters in length (including the NULL terminator). * See \ref nvmlConstants::NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE. * * @param device The identifier of the target device * @param version Reference in which to return the infoROM image version * @param length The maximum allowed length of the string returned in \a version * * @return * - \ref NVML_SUCCESS if \a version has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a version is NULL * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have an infoROM * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetInforomVersion */ nvmlReturn_t DECLDIR nvmlDeviceGetInforomImageVersion(nvmlDevice_t device, char *version, unsigned int length); /** * Retrieves the checksum of the configuration stored in the device's infoROM. * * For all products with an inforom. * * Can be used to make sure that two GPUs have the exact same configuration. * Current checksum takes into account configuration stored in PWR and ECC infoROM objects. * Checksum can change between driver releases or when user changes configuration (e.g. disable/enable ECC) * * @param device The identifier of the target device * @param checksum Reference in which to return the infoROM configuration checksum * * @return * - \ref NVML_SUCCESS if \a checksum has been set * - \ref NVML_ERROR_CORRUPTED_INFOROM if the device's checksum couldn't be retrieved due to infoROM corruption * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a checksum is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetInforomConfigurationChecksum(nvmlDevice_t device, unsigned int *checksum); /** * Reads the infoROM from the flash and verifies the checksums. * * For all products with an inforom. * * @param device The identifier of the target device * * @return * - \ref NVML_SUCCESS if infoROM is not corrupted * - \ref NVML_ERROR_CORRUPTED_INFOROM if the device's infoROM is corrupted * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceValidateInforom(nvmlDevice_t device); /** * Retrieves the display mode for the device. * * For all products. * * This method indicates whether a physical display (e.g. monitor) is currently connected to * any of the device's connectors. * * See \ref nvmlEnableState_t for details on allowed modes. * * @param device The identifier of the target device * @param display Reference in which to return the display mode * * @return * - \ref NVML_SUCCESS if \a display has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a display is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetDisplayMode(nvmlDevice_t device, nvmlEnableState_t *display); /** * Retrieves the display active state for the device. * * For all products. * * This method indicates whether a display is initialized on the device. * For example whether X Server is attached to this device and has allocated memory for the screen. * * Display can be active even when no monitor is physically attached. * * See \ref nvmlEnableState_t for details on allowed modes. * * @param device The identifier of the target device * @param isActive Reference in which to return the display active state * * @return * - \ref NVML_SUCCESS if \a isActive has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a isActive is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetDisplayActive(nvmlDevice_t device, nvmlEnableState_t *isActive); /** * Retrieves the persistence mode associated with this device. * * For all products. * For Linux only. * * When driver persistence mode is enabled the driver software state is not torn down when the last * client disconnects. By default this feature is disabled. * * See \ref nvmlEnableState_t for details on allowed modes. * * @param device The identifier of the target device * @param mode Reference in which to return the current driver persistence mode * * @return * - \ref NVML_SUCCESS if \a mode has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceSetPersistenceMode() */ nvmlReturn_t DECLDIR nvmlDeviceGetPersistenceMode(nvmlDevice_t device, nvmlEnableState_t *mode); /** * Retrieves the PCI attributes of this device. * * For all products. * * See \ref nvmlPciInfo_t for details on the available PCI info. * * @param device The identifier of the target device * @param pci Reference in which to return the PCI info * * @return * - \ref NVML_SUCCESS if \a pci has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a pci is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetPciInfo_v3(nvmlDevice_t device, nvmlPciInfo_t *pci); /** * Retrieves the maximum PCIe link generation possible with this device and system * * I.E. for a generation 2 PCIe device attached to a generation 1 PCIe bus the max link generation this function will * report is generation 1. * * For Fermi &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param maxLinkGen Reference in which to return the max PCIe link generation * * @return * - \ref NVML_SUCCESS if \a maxLinkGen has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a maxLinkGen is null * - \ref NVML_ERROR_NOT_SUPPORTED if PCIe link information is not available * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMaxPcieLinkGeneration(nvmlDevice_t device, unsigned int *maxLinkGen); /** * Retrieves the maximum PCIe link width possible with this device and system * * I.E. for a device with a 16x PCIe bus width attached to a 8x PCIe system bus this function will report * a max link width of 8. * * For Fermi &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param maxLinkWidth Reference in which to return the max PCIe link generation * * @return * - \ref NVML_SUCCESS if \a maxLinkWidth has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a maxLinkWidth is null * - \ref NVML_ERROR_NOT_SUPPORTED if PCIe link information is not available * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMaxPcieLinkWidth(nvmlDevice_t device, unsigned int *maxLinkWidth); /** * Retrieves the current PCIe link generation * * For Fermi &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param currLinkGen Reference in which to return the current PCIe link generation * * @return * - \ref NVML_SUCCESS if \a currLinkGen has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a currLinkGen is null * - \ref NVML_ERROR_NOT_SUPPORTED if PCIe link information is not available * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetCurrPcieLinkGeneration(nvmlDevice_t device, unsigned int *currLinkGen); /** * Retrieves the current PCIe link width * * For Fermi &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param currLinkWidth Reference in which to return the current PCIe link generation * * @return * - \ref NVML_SUCCESS if \a currLinkWidth has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a currLinkWidth is null * - \ref NVML_ERROR_NOT_SUPPORTED if PCIe link information is not available * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetCurrPcieLinkWidth(nvmlDevice_t device, unsigned int *currLinkWidth); /** * Retrieve PCIe utilization information. * This function is querying a byte counter over a 20ms interval and thus is the * PCIe throughput over that interval. * * For Maxwell &tm; or newer fully supported devices. * * This method is not supported in virtual machines running virtual GPU (vGPU). * * @param device The identifier of the target device * @param counter The specific counter that should be queried \ref nvmlPcieUtilCounter_t * @param value Reference in which to return throughput in KB/s * * @return * - \ref NVML_SUCCESS if \a value has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a counter is invalid, or \a value is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetPcieThroughput(nvmlDevice_t device, nvmlPcieUtilCounter_t counter, unsigned int *value); /** * Retrieve the PCIe replay counter. * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param value Reference in which to return the counter's value * * @return * - \ref NVML_SUCCESS if \a value has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a value is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetPcieReplayCounter(nvmlDevice_t device, unsigned int *value); /** * Retrieves the current clock speeds for the device. * * For Fermi &tm; or newer fully supported devices. * * See \ref nvmlClockType_t for details on available clock information. * * @param device The identifier of the target device * @param type Identify which clock domain to query * @param clock Reference in which to return the clock speed in MHz * * @return * - \ref NVML_SUCCESS if \a clock has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clock is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device cannot report the specified clock * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock); /** * Retrieves the maximum clock speeds for the device. * * For Fermi &tm; or newer fully supported devices. * * See \ref nvmlClockType_t for details on available clock information. * * \note On GPUs from Fermi family current P0 clocks (reported by \ref nvmlDeviceGetClockInfo) can differ from max clocks * by few MHz. * * @param device The identifier of the target device * @param type Identify which clock domain to query * @param clock Reference in which to return the clock speed in MHz * * @return * - \ref NVML_SUCCESS if \a clock has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clock is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device cannot report the specified clock * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock); /** * Retrieves the current setting of a clock that applications will use unless an overspec situation occurs. * Can be changed using \ref nvmlDeviceSetApplicationsClocks. * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param clockType Identify which clock domain to query * @param clockMHz Reference in which to return the clock in MHz * * @return * - \ref NVML_SUCCESS if \a clockMHz has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clockMHz is NULL or \a clockType is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetApplicationsClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz); /** * Retrieves the default applications clock that GPU boots with or * defaults to after \ref nvmlDeviceResetApplicationsClocks call. * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param clockType Identify which clock domain to query * @param clockMHz Reference in which to return the default clock in MHz * * @return * - \ref NVML_SUCCESS if \a clockMHz has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clockMHz is NULL or \a clockType is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * \see nvmlDeviceGetApplicationsClock */ nvmlReturn_t DECLDIR nvmlDeviceGetDefaultApplicationsClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz); /** * Resets the application clock to the default value * * This is the applications clock that will be used after system reboot or driver reload. * Default value is constant, but the current value an be changed using \ref nvmlDeviceSetApplicationsClocks. * * On Pascal and newer hardware, if clocks were previously locked with \ref nvmlDeviceSetApplicationsClocks, * this call will unlock clocks. This returns clocks their default behavior ofautomatically boosting above * base clocks as thermal limits allow. * * @see nvmlDeviceGetApplicationsClock * @see nvmlDeviceSetApplicationsClocks * * For Fermi &tm; or newer non-GeForce fully supported devices and Maxwell or newer GeForce devices. * * @param device The identifier of the target device * * @return * - \ref NVML_SUCCESS if new settings were successfully set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceResetApplicationsClocks(nvmlDevice_t device); /** * Retrieves the clock speed for the clock specified by the clock type and clock ID. * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param clockType Identify which clock domain to query * @param clockId Identify which clock in the domain to query * @param clockMHz Reference in which to return the clock in MHz * * @return * - \ref NVML_SUCCESS if \a clockMHz has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clockMHz is NULL or \a clockType is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetClock(nvmlDevice_t device, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz); /** * Retrieves the customer defined maximum boost clock speed specified by the given clock type. * * For Pascal &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param clockType Identify which clock domain to query * @param clockMHz Reference in which to return the clock in MHz * * @return * - \ref NVML_SUCCESS if \a clockMHz has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clockMHz is NULL or \a clockType is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device or the \a clockType on this device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMaxCustomerBoostClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz); /** * Retrieves the list of possible memory clocks that can be used as an argument for \ref nvmlDeviceSetApplicationsClocks. * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param count Reference in which to provide the \a clocksMHz array size, and * to return the number of elements * @param clocksMHz Reference in which to return the clock in MHz * * @return * - \ref NVML_SUCCESS if \a count and \a clocksMHz have been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a count is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a count is too small (\a count is set to the number of * required elements) * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceSetApplicationsClocks * @see nvmlDeviceGetSupportedGraphicsClocks */ nvmlReturn_t DECLDIR nvmlDeviceGetSupportedMemoryClocks(nvmlDevice_t device, unsigned int *count, unsigned int *clocksMHz); /** * Retrieves the list of possible graphics clocks that can be used as an argument for \ref nvmlDeviceSetApplicationsClocks. * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param memoryClockMHz Memory clock for which to return possible graphics clocks * @param count Reference in which to provide the \a clocksMHz array size, and * to return the number of elements * @param clocksMHz Reference in which to return the clocks in MHz * * @return * - \ref NVML_SUCCESS if \a count and \a clocksMHz have been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_NOT_FOUND if the specified \a memoryClockMHz is not a supported frequency * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clock is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a count is too small * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceSetApplicationsClocks * @see nvmlDeviceGetSupportedMemoryClocks */ nvmlReturn_t DECLDIR nvmlDeviceGetSupportedGraphicsClocks(nvmlDevice_t device, unsigned int memoryClockMHz, unsigned int *count, unsigned int *clocksMHz); /** * Retrieve the current state of Auto Boosted clocks on a device and store it in \a isEnabled * * For Kepler &tm; or newer fully supported devices. * * Auto Boosted clocks are enabled by default on some hardware, allowing the GPU to run at higher clock rates * to maximize performance as thermal limits allow. * * On Pascal and newer hardware, Auto Aoosted clocks are controlled through application clocks. * Use \ref nvmlDeviceSetApplicationsClocks and \ref nvmlDeviceResetApplicationsClocks to control Auto Boost * behavior. * * @param device The identifier of the target device * @param isEnabled Where to store the current state of Auto Boosted clocks of the target device * @param defaultIsEnabled Where to store the default Auto Boosted clocks behavior of the target device that the device will * revert to when no applications are using the GPU * * @return * - \ref NVML_SUCCESS If \a isEnabled has been been set with the Auto Boosted clocks state of \a device * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a isEnabled is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support Auto Boosted clocks * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * */ nvmlReturn_t DECLDIR nvmlDeviceGetAutoBoostedClocksEnabled(nvmlDevice_t device, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled); /** * Try to set the current state of Auto Boosted clocks on a device. * * For Kepler &tm; or newer fully supported devices. * * Auto Boosted clocks are enabled by default on some hardware, allowing the GPU to run at higher clock rates * to maximize performance as thermal limits allow. Auto Boosted clocks should be disabled if fixed clock * rates are desired. * * Non-root users may use this API by default but can be restricted by root from using this API by calling * \ref nvmlDeviceSetAPIRestriction with apiType=NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS. * Note: Persistence Mode is required to modify current Auto Boost settings, therefore, it must be enabled. * * On Pascal and newer hardware, Auto Boosted clocks are controlled through application clocks. * Use \ref nvmlDeviceSetApplicationsClocks and \ref nvmlDeviceResetApplicationsClocks to control Auto Boost * behavior. * * @param device The identifier of the target device * @param enabled What state to try to set Auto Boosted clocks of the target device to * * @return * - \ref NVML_SUCCESS If the Auto Boosted clocks were successfully set to the state specified by \a enabled * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support Auto Boosted clocks * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * */ nvmlReturn_t DECLDIR nvmlDeviceSetAutoBoostedClocksEnabled(nvmlDevice_t device, nvmlEnableState_t enabled); /** * Try to set the default state of Auto Boosted clocks on a device. This is the default state that Auto Boosted clocks will * return to when no compute running processes (e.g. CUDA application which have an active context) are running * * For Kepler &tm; or newer non-GeForce fully supported devices and Maxwell or newer GeForce devices. * Requires root/admin permissions. * * Auto Boosted clocks are enabled by default on some hardware, allowing the GPU to run at higher clock rates * to maximize performance as thermal limits allow. Auto Boosted clocks should be disabled if fixed clock * rates are desired. * * On Pascal and newer hardware, Auto Boosted clocks are controlled through application clocks. * Use \ref nvmlDeviceSetApplicationsClocks and \ref nvmlDeviceResetApplicationsClocks to control Auto Boost * behavior. * * @param device The identifier of the target device * @param enabled What state to try to set default Auto Boosted clocks of the target device to * @param flags Flags that change the default behavior. Currently Unused. * * @return * - \ref NVML_SUCCESS If the Auto Boosted clock's default state was successfully set to the state specified by \a enabled * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_NO_PERMISSION If the calling user does not have permission to change Auto Boosted clock's default state. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support Auto Boosted clocks * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * */ nvmlReturn_t DECLDIR nvmlDeviceSetDefaultAutoBoostedClocksEnabled(nvmlDevice_t device, nvmlEnableState_t enabled, unsigned int flags); /** * Retrieves the intended operating speed of the device's fan. * * Note: The reported speed is the intended fan speed. If the fan is physically blocked and unable to spin, the * output will not match the actual fan speed. * * For all discrete products with dedicated fans. * * The fan speed is expressed as a percentage of the product's maximum noise tolerance fan speed. * This value may exceed 100% in certain cases. * * @param device The identifier of the target device * @param speed Reference in which to return the fan speed percentage * * @return * - \ref NVML_SUCCESS if \a speed has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a speed is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have a fan * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed(nvmlDevice_t device, unsigned int *speed); /** * Retrieves the intended operating speed of the device's specified fan. * * Note: The reported speed is the intended fan speed. If the fan is physically blocked and unable to spin, the * output will not match the actual fan speed. * * For all discrete products with dedicated fans. * * The fan speed is expressed as a percentage of the product's maximum noise tolerance fan speed. * This value may exceed 100% in certain cases. * * @param device The identifier of the target device * @param fan The index of the target fan, zero indexed. * @param speed Reference in which to return the fan speed percentage * * @return * - \ref NVML_SUCCESS if \a speed has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a fan is not an acceptable index, or \a speed is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have a fan or is newer than Maxwell * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed_v2(nvmlDevice_t device, unsigned int fan, unsigned int * speed); /** * Retrieves the number of fans on the device. * * For all discrete products with dedicated fans. * * @param device The identifier of the target device * @param numFans The number of fans * * @return * - \ref NVML_SUCCESS if \a fan number query was successful * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a numFans is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have a fan * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetNumFans(nvmlDevice_t device, unsigned int *numFans); /** * Retrieves the current temperature readings for the device, in degrees C. * * For all products. * * See \ref nvmlTemperatureSensors_t for details on available temperature sensors. * * @param device The identifier of the target device * @param sensorType Flag that indicates which sensor reading to retrieve * @param temp Reference in which to return the temperature reading * * @return * - \ref NVML_SUCCESS if \a temp has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a sensorType is invalid or \a temp is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have the specified sensor * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetTemperature(nvmlDevice_t device, nvmlTemperatureSensors_t sensorType, unsigned int *temp); /** * Retrieves the temperature threshold for the GPU with the specified threshold type in degrees C. * * For Kepler &tm; or newer fully supported devices. * * See \ref nvmlTemperatureThresholds_t for details on available temperature thresholds. * * @param device The identifier of the target device * @param thresholdType The type of threshold value queried * @param temp Reference in which to return the temperature reading * @return * - \ref NVML_SUCCESS if \a temp has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a thresholdType is invalid or \a temp is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have a temperature sensor or is unsupported * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetTemperatureThreshold(nvmlDevice_t device, nvmlTemperatureThresholds_t thresholdType, unsigned int *temp); /** * Sets the temperature threshold for the GPU with the specified threshold type in degrees C. * * For Maxwell &tm; or newer fully supported devices. * * See \ref nvmlTemperatureThresholds_t for details on available temperature thresholds. * * @param device The identifier of the target device * @param thresholdType The type of threshold value to be set * @param temp Reference which hold the value to be set * @return * - \ref NVML_SUCCESS if \a temp has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a thresholdType is invalid or \a temp is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have a temperature sensor or is unsupported * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceSetTemperatureThreshold(nvmlDevice_t device, nvmlTemperatureThresholds_t thresholdType, int *temp); /** * Retrieves the current performance state for the device. * * For Fermi &tm; or newer fully supported devices. * * See \ref nvmlPstates_t for details on allowed performance states. * * @param device The identifier of the target device * @param pState Reference in which to return the performance state reading * * @return * - \ref NVML_SUCCESS if \a pState has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a pState is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetPerformanceState(nvmlDevice_t device, nvmlPstates_t *pState); /** * Retrieves current clocks throttling reasons. * * For all fully supported products. * * \note More than one bit can be enabled at the same time. Multiple reasons can be affecting clocks at once. * * @param device The identifier of the target device * @param clocksThrottleReasons Reference in which to return bitmask of active clocks throttle * reasons * * @return * - \ref NVML_SUCCESS if \a clocksThrottleReasons has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clocksThrottleReasons is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlClocksThrottleReasons * @see nvmlDeviceGetSupportedClocksThrottleReasons */ nvmlReturn_t DECLDIR nvmlDeviceGetCurrentClocksThrottleReasons(nvmlDevice_t device, unsigned long long *clocksThrottleReasons); /** * Retrieves bitmask of supported clocks throttle reasons that can be returned by * \ref nvmlDeviceGetCurrentClocksThrottleReasons * * For all fully supported products. * * This method is not supported in virtual machines running virtual GPU (vGPU). * * @param device The identifier of the target device * @param supportedClocksThrottleReasons Reference in which to return bitmask of supported * clocks throttle reasons * * @return * - \ref NVML_SUCCESS if \a supportedClocksThrottleReasons has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a supportedClocksThrottleReasons is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlClocksThrottleReasons * @see nvmlDeviceGetCurrentClocksThrottleReasons */ nvmlReturn_t DECLDIR nvmlDeviceGetSupportedClocksThrottleReasons(nvmlDevice_t device, unsigned long long *supportedClocksThrottleReasons); /** * Deprecated: Use \ref nvmlDeviceGetPerformanceState. This function exposes an incorrect generalization. * * Retrieve the current performance state for the device. * * For Fermi &tm; or newer fully supported devices. * * See \ref nvmlPstates_t for details on allowed performance states. * * @param device The identifier of the target device * @param pState Reference in which to return the performance state reading * * @return * - \ref NVML_SUCCESS if \a pState has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a pState is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetPowerState(nvmlDevice_t device, nvmlPstates_t *pState); /** * This API has been deprecated. * * Retrieves the power management mode associated with this device. * * For products from the Fermi family. * - Requires \a NVML_INFOROM_POWER version 3.0 or higher. * * For from the Kepler or newer families. * - Does not require \a NVML_INFOROM_POWER object. * * This flag indicates whether any power management algorithm is currently active on the device. An * enabled state does not necessarily mean the device is being actively throttled -- only that * that the driver will do so if the appropriate conditions are met. * * See \ref nvmlEnableState_t for details on allowed modes. * * @param device The identifier of the target device * @param mode Reference in which to return the current power management mode * * @return * - \ref NVML_SUCCESS if \a mode has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementMode(nvmlDevice_t device, nvmlEnableState_t *mode); /** * Retrieves the power management limit associated with this device. * * For Fermi &tm; or newer fully supported devices. * * The power limit defines the upper boundary for the card's power draw. If * the card's total power draw reaches this limit the power management algorithm kicks in. * * This reading is only available if power management mode is supported. * See \ref nvmlDeviceGetPowerManagementMode. * * @param device The identifier of the target device * @param limit Reference in which to return the power management limit in milliwatts * * @return * - \ref NVML_SUCCESS if \a limit has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a limit is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimit(nvmlDevice_t device, unsigned int *limit); /** * Retrieves information about possible values of power management limits on this device. * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param minLimit Reference in which to return the minimum power management limit in milliwatts * @param maxLimit Reference in which to return the maximum power management limit in milliwatts * * @return * - \ref NVML_SUCCESS if \a minLimit and \a maxLimit have been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a minLimit or \a maxLimit is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceSetPowerManagementLimit */ nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimitConstraints(nvmlDevice_t device, unsigned int *minLimit, unsigned int *maxLimit); /** * Retrieves default power management limit on this device, in milliwatts. * Default power management limit is a power management limit that the device boots with. * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param defaultLimit Reference in which to return the default power management limit in milliwatts * * @return * - \ref NVML_SUCCESS if \a defaultLimit has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a defaultLimit is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementDefaultLimit(nvmlDevice_t device, unsigned int *defaultLimit); /** * Retrieves power usage for this GPU in milliwatts and its associated circuitry (e.g. memory) * * For Fermi &tm; or newer fully supported devices. * * On Fermi and Kepler GPUs the reading is accurate to within +/- 5% of current power draw. * * It is only available if power management mode is supported. See \ref nvmlDeviceGetPowerManagementMode. * * @param device The identifier of the target device * @param power Reference in which to return the power usage information * * @return * - \ref NVML_SUCCESS if \a power has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a power is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support power readings * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetPowerUsage(nvmlDevice_t device, unsigned int *power); /** * Retrieves total energy consumption for this GPU in millijoules (mJ) since the driver was last reloaded * * For Volta &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param energy Reference in which to return the energy consumption information * * @return * - \ref NVML_SUCCESS if \a energy has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a energy is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support energy readings * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetTotalEnergyConsumption(nvmlDevice_t device, unsigned long long *energy); /** * Get the effective power limit that the driver enforces after taking into account all limiters * * Note: This can be different from the \ref nvmlDeviceGetPowerManagementLimit if other limits are set elsewhere * This includes the out of band power limit interface * * For Kepler &tm; or newer fully supported devices. * * @param device The device to communicate with * @param limit Reference in which to return the power management limit in milliwatts * * @return * - \ref NVML_SUCCESS if \a limit has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a limit is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetEnforcedPowerLimit(nvmlDevice_t device, unsigned int *limit); /** * Retrieves the current GOM and pending GOM (the one that GPU will switch to after reboot). * * For GK110 M-class and X-class Tesla &tm; products from the Kepler family. * Modes \ref NVML_GOM_LOW_DP and \ref NVML_GOM_ALL_ON are supported on fully supported GeForce products. * Not supported on Quadro ® and Tesla &tm; C-class products. * * @param device The identifier of the target device * @param current Reference in which to return the current GOM * @param pending Reference in which to return the pending GOM * * @return * - \ref NVML_SUCCESS if \a mode has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a current or \a pending is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlGpuOperationMode_t * @see nvmlDeviceSetGpuOperationMode */ nvmlReturn_t DECLDIR nvmlDeviceGetGpuOperationMode(nvmlDevice_t device, nvmlGpuOperationMode_t *current, nvmlGpuOperationMode_t *pending); /** * Retrieves the amount of used, free, reserved and total memory available on the device, in bytes. * The reserved amount is supported on version 2 only. * * For all products. * * Enabling ECC reduces the amount of total available memory, due to the extra required parity bits. * Under WDDM most device memory is allocated and managed on startup by Windows. * * Under Linux and Windows TCC, the reported amount of used memory is equal to the sum of memory allocated * by all active channels on the device. * * See \ref nvmlMemory_v2_t for details on available memory info. * * @note In MIG mode, if device handle is provided, the API returns aggregate * information, only if the caller has appropriate privileges. Per-instance * information can be queried by using specific MIG device handles. * * @note nvmlDeviceGetMemoryInfo_v2 adds additional memory information. * * @param device The identifier of the target device * @param memory Reference in which to return the memory information * * @return * - \ref NVML_SUCCESS if \a memory has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a memory is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo(nvmlDevice_t device, nvmlMemory_t *memory); nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo_v2(nvmlDevice_t device, nvmlMemory_v2_t *memory); /** * Retrieves the current compute mode for the device. * * For all products. * * See \ref nvmlComputeMode_t for details on allowed compute modes. * * @param device The identifier of the target device * @param mode Reference in which to return the current compute mode * * @return * - \ref NVML_SUCCESS if \a mode has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceSetComputeMode() */ nvmlReturn_t DECLDIR nvmlDeviceGetComputeMode(nvmlDevice_t device, nvmlComputeMode_t *mode); /** * Retrieves the CUDA compute capability of the device. * * For all products. * * Returns the major and minor compute capability version numbers of the * device. The major and minor versions are equivalent to the * CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR and * CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR attributes that would be * returned by CUDA's cuDeviceGetAttribute(). * * @param device The identifier of the target device * @param major Reference in which to return the major CUDA compute capability * @param minor Reference in which to return the minor CUDA compute capability * * @return * - \ref NVML_SUCCESS if \a major and \a minor have been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a major or \a minor are NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetCudaComputeCapability(nvmlDevice_t device, int *major, int *minor); /** * Retrieves the current and pending ECC modes for the device. * * For Fermi &tm; or newer fully supported devices. * Only applicable to devices with ECC. * Requires \a NVML_INFOROM_ECC version 1.0 or higher. * * Changing ECC modes requires a reboot. The "pending" ECC mode refers to the target mode following * the next reboot. * * See \ref nvmlEnableState_t for details on allowed modes. * * @param device The identifier of the target device * @param current Reference in which to return the current ECC mode * @param pending Reference in which to return the pending ECC mode * * @return * - \ref NVML_SUCCESS if \a current and \a pending have been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or either \a current or \a pending is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceSetEccMode() */ nvmlReturn_t DECLDIR nvmlDeviceGetEccMode(nvmlDevice_t device, nvmlEnableState_t *current, nvmlEnableState_t *pending); /** * Retrieves the device boardId from 0-N. * Devices with the same boardId indicate GPUs connected to the same PLX. Use in conjunction with * \ref nvmlDeviceGetMultiGpuBoard() to decide if they are on the same board as well. * The boardId returned is a unique ID for the current configuration. Uniqueness and ordering across * reboots and system configurations is not guaranteed (i.e. if a Tesla K40c returns 0x100 and * the two GPUs on a Tesla K10 in the same system returns 0x200 it is not guaranteed they will * always return those values but they will always be different from each other). * * * For Fermi &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param boardId Reference in which to return the device's board ID * * @return * - \ref NVML_SUCCESS if \a boardId has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a boardId is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetBoardId(nvmlDevice_t device, unsigned int *boardId); /** * Retrieves whether the device is on a Multi-GPU Board * Devices that are on multi-GPU boards will set \a multiGpuBool to a non-zero value. * * For Fermi &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param multiGpuBool Reference in which to return a zero or non-zero value * to indicate whether the device is on a multi GPU board * * @return * - \ref NVML_SUCCESS if \a multiGpuBool has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a multiGpuBool is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMultiGpuBoard(nvmlDevice_t device, unsigned int *multiGpuBool); /** * Retrieves the total ECC error counts for the device. * * For Fermi &tm; or newer fully supported devices. * Only applicable to devices with ECC. * Requires \a NVML_INFOROM_ECC version 1.0 or higher. * Requires ECC Mode to be enabled. * * The total error count is the sum of errors across each of the separate memory systems, i.e. the total set of * errors across the entire device. * * See \ref nvmlMemoryErrorType_t for a description of available error types.\n * See \ref nvmlEccCounterType_t for a description of available counter types. * * @param device The identifier of the target device * @param errorType Flag that specifies the type of the errors. * @param counterType Flag that specifies the counter-type of the errors. * @param eccCounts Reference in which to return the specified ECC errors * * @return * - \ref NVML_SUCCESS if \a eccCounts has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a errorType or \a counterType is invalid, or \a eccCounts is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceClearEccErrorCounts() */ nvmlReturn_t DECLDIR nvmlDeviceGetTotalEccErrors(nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, unsigned long long *eccCounts); /** * Retrieves the detailed ECC error counts for the device. * * @deprecated This API supports only a fixed set of ECC error locations * On different GPU architectures different locations are supported * See \ref nvmlDeviceGetMemoryErrorCounter * * For Fermi &tm; or newer fully supported devices. * Only applicable to devices with ECC. * Requires \a NVML_INFOROM_ECC version 2.0 or higher to report aggregate location-based ECC counts. * Requires \a NVML_INFOROM_ECC version 1.0 or higher to report all other ECC counts. * Requires ECC Mode to be enabled. * * Detailed errors provide separate ECC counts for specific parts of the memory system. * * Reports zero for unsupported ECC error counters when a subset of ECC error counters are supported. * * See \ref nvmlMemoryErrorType_t for a description of available bit types.\n * See \ref nvmlEccCounterType_t for a description of available counter types.\n * See \ref nvmlEccErrorCounts_t for a description of provided detailed ECC counts. * * @param device The identifier of the target device * @param errorType Flag that specifies the type of the errors. * @param counterType Flag that specifies the counter-type of the errors. * @param eccCounts Reference in which to return the specified ECC errors * * @return * - \ref NVML_SUCCESS if \a eccCounts has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a errorType or \a counterType is invalid, or \a eccCounts is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceClearEccErrorCounts() */ nvmlReturn_t DECLDIR nvmlDeviceGetDetailedEccErrors(nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, nvmlEccErrorCounts_t *eccCounts); /** * Retrieves the requested memory error counter for the device. * * For Fermi &tm; or newer fully supported devices. * Requires \a NVML_INFOROM_ECC version 2.0 or higher to report aggregate location-based memory error counts. * Requires \a NVML_INFOROM_ECC version 1.0 or higher to report all other memory error counts. * * Only applicable to devices with ECC. * * Requires ECC Mode to be enabled. * * @note On MIG-enabled GPUs, per instance information can be queried using specific * MIG device handles. Per instance information is currently only supported for * non-DRAM uncorrectable volatile errors. Querying volatile errors using device * handles is currently not supported. * * See \ref nvmlMemoryErrorType_t for a description of available memory error types.\n * See \ref nvmlEccCounterType_t for a description of available counter types.\n * See \ref nvmlMemoryLocation_t for a description of available counter locations.\n * * @param device The identifier of the target device * @param errorType Flag that specifies the type of error. * @param counterType Flag that specifies the counter-type of the errors. * @param locationType Specifies the location of the counter. * @param count Reference in which to return the ECC counter * * @return * - \ref NVML_SUCCESS if \a count has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a bitTyp,e \a counterType or \a locationType is * invalid, or \a count is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support ECC error reporting in the specified memory * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMemoryErrorCounter(nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, nvmlMemoryLocation_t locationType, unsigned long long *count); /** * Retrieves the current utilization rates for the device's major subsystems. * * For Fermi &tm; or newer fully supported devices. * * See \ref nvmlUtilization_t for details on available utilization rates. * * \note During driver initialization when ECC is enabled one can see high GPU and Memory Utilization readings. * This is caused by ECC Memory Scrubbing mechanism that is performed during driver initialization. * * @note On MIG-enabled GPUs, querying device utilization rates is not currently supported. * * @param device The identifier of the target device * @param utilization Reference in which to return the utilization information * * @return * - \ref NVML_SUCCESS if \a utilization has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a utilization is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetUtilizationRates(nvmlDevice_t device, nvmlUtilization_t *utilization); /** * Retrieves the current utilization and sampling size in microseconds for the Encoder * * For Kepler &tm; or newer fully supported devices. * * @note On MIG-enabled GPUs, querying encoder utilization is not currently supported. * * @param device The identifier of the target device * @param utilization Reference to an unsigned int for encoder utilization info * @param samplingPeriodUs Reference to an unsigned int for the sampling period in US * * @return * - \ref NVML_SUCCESS if \a utilization has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a utilization is NULL, or \a samplingPeriodUs is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetEncoderUtilization(nvmlDevice_t device, unsigned int *utilization, unsigned int *samplingPeriodUs); /** * Retrieves the current capacity of the device's encoder, as a percentage of maximum encoder capacity with valid values in the range 0-100. * * For Maxwell &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param encoderQueryType Type of encoder to query * @param encoderCapacity Reference to an unsigned int for the encoder capacity * * @return * - \ref NVML_SUCCESS if \a encoderCapacity is fetched * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a encoderCapacity is NULL, or \a device or \a encoderQueryType * are invalid * - \ref NVML_ERROR_NOT_SUPPORTED if device does not support the encoder specified in \a encodeQueryType * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetEncoderCapacity (nvmlDevice_t device, nvmlEncoderType_t encoderQueryType, unsigned int *encoderCapacity); /** * Retrieves the current encoder statistics for a given device. * * For Maxwell &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param sessionCount Reference to an unsigned int for count of active encoder sessions * @param averageFps Reference to an unsigned int for trailing average FPS of all active sessions * @param averageLatency Reference to an unsigned int for encode latency in microseconds * * @return * - \ref NVML_SUCCESS if \a sessionCount, \a averageFps and \a averageLatency is fetched * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a sessionCount, or \a device or \a averageFps, * or \a averageLatency is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetEncoderStats (nvmlDevice_t device, unsigned int *sessionCount, unsigned int *averageFps, unsigned int *averageLatency); /** * Retrieves information about active encoder sessions on a target device. * * An array of active encoder sessions is returned in the caller-supplied buffer pointed at by \a sessionInfos. The * array elememt count is passed in \a sessionCount, and \a sessionCount is used to return the number of sessions * written to the buffer. * * If the supplied buffer is not large enough to accomodate the active session array, the function returns * NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlEncoderSessionInfo_t array required in \a sessionCount. * To query the number of active encoder sessions, call this function with *sessionCount = 0. The code will return * NVML_SUCCESS with number of active encoder sessions updated in *sessionCount. * * For Maxwell &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param sessionCount Reference to caller supplied array size, and returns the number of sessions. * @param sessionInfos Reference in which to return the session information * * @return * - \ref NVML_SUCCESS if \a sessionInfos is fetched * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a sessionCount is too small, array element count is returned in \a sessionCount * - \ref NVML_ERROR_INVALID_ARGUMENT if \a sessionCount is NULL. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by \a device * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetEncoderSessions(nvmlDevice_t device, unsigned int *sessionCount, nvmlEncoderSessionInfo_t *sessionInfos); /** * Retrieves the current utilization and sampling size in microseconds for the Decoder * * For Kepler &tm; or newer fully supported devices. * * @note On MIG-enabled GPUs, querying decoder utilization is not currently supported. * * @param device The identifier of the target device * @param utilization Reference to an unsigned int for decoder utilization info * @param samplingPeriodUs Reference to an unsigned int for the sampling period in US * * @return * - \ref NVML_SUCCESS if \a utilization has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a utilization is NULL, or \a samplingPeriodUs is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetDecoderUtilization(nvmlDevice_t device, unsigned int *utilization, unsigned int *samplingPeriodUs); /** * Retrieves the active frame buffer capture sessions statistics for a given device. * * For Maxwell &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param fbcStats Reference to nvmlFBCStats_t structure contianing NvFBC stats * * @return * - \ref NVML_SUCCESS if \a fbcStats is fetched * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a fbcStats is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetFBCStats(nvmlDevice_t device, nvmlFBCStats_t *fbcStats); /** * Retrieves information about active frame buffer capture sessions on a target device. * * An array of active FBC sessions is returned in the caller-supplied buffer pointed at by \a sessionInfo. The * array element count is passed in \a sessionCount, and \a sessionCount is used to return the number of sessions * written to the buffer. * * If the supplied buffer is not large enough to accomodate the active session array, the function returns * NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlFBCSessionInfo_t array required in \a sessionCount. * To query the number of active FBC sessions, call this function with *sessionCount = 0. The code will return * NVML_SUCCESS with number of active FBC sessions updated in *sessionCount. * * For Maxwell &tm; or newer fully supported devices. * * @note hResolution, vResolution, averageFPS and averageLatency data for a FBC session returned in \a sessionInfo may * be zero if there are no new frames captured since the session started. * * @param device The identifier of the target device * @param sessionCount Reference to caller supplied array size, and returns the number of sessions. * @param sessionInfo Reference in which to return the session information * * @return * - \ref NVML_SUCCESS if \a sessionInfo is fetched * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a sessionCount is too small, array element count is returned in \a sessionCount * - \ref NVML_ERROR_INVALID_ARGUMENT if \a sessionCount is NULL. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetFBCSessions(nvmlDevice_t device, unsigned int *sessionCount, nvmlFBCSessionInfo_t *sessionInfo); /** * Retrieves the current and pending driver model for the device. * * For Fermi &tm; or newer fully supported devices. * For windows only. * * On Windows platforms the device driver can run in either WDDM or WDM (TCC) mode. If a display is attached * to the device it must run in WDDM mode. TCC mode is preferred if a display is not attached. * * See \ref nvmlDriverModel_t for details on available driver models. * * @param device The identifier of the target device * @param current Reference in which to return the current driver model * @param pending Reference in which to return the pending driver model * * @return * - \ref NVML_SUCCESS if either \a current and/or \a pending have been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or both \a current and \a pending are NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the platform is not windows * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceSetDriverModel() */ nvmlReturn_t DECLDIR nvmlDeviceGetDriverModel(nvmlDevice_t device, nvmlDriverModel_t *current, nvmlDriverModel_t *pending); /** * Get VBIOS version of the device. * * For all products. * * The VBIOS version may change from time to time. It will not exceed 32 characters in length * (including the NULL terminator). See \ref nvmlConstants::NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE. * * @param device The identifier of the target device * @param version Reference to which to return the VBIOS version * @param length The maximum allowed length of the string returned in \a version * * @return * - \ref NVML_SUCCESS if \a version has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a version is NULL * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetVbiosVersion(nvmlDevice_t device, char *version, unsigned int length); /** * Get Bridge Chip Information for all the bridge chips on the board. * * For all fully supported products. * Only applicable to multi-GPU products. * * @param device The identifier of the target device * @param bridgeHierarchy Reference to the returned bridge chip Hierarchy * * @return * - \ref NVML_SUCCESS if bridge chip exists * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a bridgeInfo is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if bridge chip not supported on the device * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * */ nvmlReturn_t DECLDIR nvmlDeviceGetBridgeChipInfo(nvmlDevice_t device, nvmlBridgeChipHierarchy_t *bridgeHierarchy); /** * Get information about processes with a compute context on a device * * For Fermi &tm; or newer fully supported devices. * * This function returns information only about compute running processes (e.g. CUDA application which have * active context). Any graphics applications (e.g. using OpenGL, DirectX) won't be listed by this function. * * To query the current number of running compute processes, call this function with *infoCount = 0. The * return code will be NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if none are running. For this call * \a infos is allowed to be NULL. * * The usedGpuMemory field returned is all of the memory used by the application. * * Keep in mind that information returned by this call is dynamic and the number of elements might change in * time. Allocate more space for \a infos table in case new compute processes are spawned. * * @note In MIG mode, if device handle is provided, the API returns aggregate information, only if * the caller has appropriate privileges. Per-instance information can be queried by using * specific MIG device handles. * Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode. * * @param device The device handle or MIG device handle * @param infoCount Reference in which to provide the \a infos array size, and * to return the number of returned elements * @param infos Reference in which to return the process information * * @return * - \ref NVML_SUCCESS if \a infoCount and \a infos have been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a infoCount indicates that the \a infos array is too small * \a infoCount will contain minimal amount of space necessary for * the call to complete * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, either of \a infoCount or \a infos is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by \a device * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see \ref nvmlSystemGetProcessName */ nvmlReturn_t DECLDIR nvmlDeviceGetComputeRunningProcesses_v3(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_t *infos); /** * Get information about processes with a graphics context on a device * * For Kepler &tm; or newer fully supported devices. * * This function returns information only about graphics based processes * (eg. applications using OpenGL, DirectX) * * To query the current number of running graphics processes, call this function with *infoCount = 0. The * return code will be NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if none are running. For this call * \a infos is allowed to be NULL. * * The usedGpuMemory field returned is all of the memory used by the application. * * Keep in mind that information returned by this call is dynamic and the number of elements might change in * time. Allocate more space for \a infos table in case new graphics processes are spawned. * * @note In MIG mode, if device handle is provided, the API returns aggregate information, only if * the caller has appropriate privileges. Per-instance information can be queried by using * specific MIG device handles. * Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode. * * @param device The device handle or MIG device handle * @param infoCount Reference in which to provide the \a infos array size, and * to return the number of returned elements * @param infos Reference in which to return the process information * * @return * - \ref NVML_SUCCESS if \a infoCount and \a infos have been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a infoCount indicates that the \a infos array is too small * \a infoCount will contain minimal amount of space necessary for * the call to complete * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, either of \a infoCount or \a infos is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by \a device * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see \ref nvmlSystemGetProcessName */ nvmlReturn_t DECLDIR nvmlDeviceGetGraphicsRunningProcesses_v3(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_t *infos); /** * Get information about processes with a MPS compute context on a device * * For Volta &tm; or newer fully supported devices. * * This function returns information only about compute running processes (e.g. CUDA application which have * active context) utilizing MPS. Any graphics applications (e.g. using OpenGL, DirectX) won't be listed by * this function. * * To query the current number of running compute processes, call this function with *infoCount = 0. The * return code will be NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if none are running. For this call * \a infos is allowed to be NULL. * * The usedGpuMemory field returned is all of the memory used by the application. * * Keep in mind that information returned by this call is dynamic and the number of elements might change in * time. Allocate more space for \a infos table in case new compute processes are spawned. * * @note In MIG mode, if device handle is provided, the API returns aggregate information, only if * the caller has appropriate privileges. Per-instance information can be queried by using * specific MIG device handles. * Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode. * * @param device The device handle or MIG device handle * @param infoCount Reference in which to provide the \a infos array size, and * to return the number of returned elements * @param infos Reference in which to return the process information * * @return * - \ref NVML_SUCCESS if \a infoCount and \a infos have been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a infoCount indicates that the \a infos array is too small * \a infoCount will contain minimal amount of space necessary for * the call to complete * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, either of \a infoCount or \a infos is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by \a device * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see \ref nvmlSystemGetProcessName */ nvmlReturn_t DECLDIR nvmlDeviceGetMPSComputeRunningProcesses_v3(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_t *infos); /** * Check if the GPU devices are on the same physical board. * * For all fully supported products. * * @param device1 The first GPU device * @param device2 The second GPU device * @param onSameBoard Reference in which to return the status. * Non-zero indicates that the GPUs are on the same board. * * @return * - \ref NVML_SUCCESS if \a onSameBoard has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a dev1 or \a dev2 are invalid or \a onSameBoard is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this check is not supported by the device * - \ref NVML_ERROR_GPU_IS_LOST if the either GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceOnSameBoard(nvmlDevice_t device1, nvmlDevice_t device2, int *onSameBoard); /** * Retrieves the root/admin permissions on the target API. See \a nvmlRestrictedAPI_t for the list of supported APIs. * If an API is restricted only root users can call that API. See \a nvmlDeviceSetAPIRestriction to change current permissions. * * For all fully supported products. * * @param device The identifier of the target device * @param apiType Target API type for this operation * @param isRestricted Reference in which to return the current restriction * NVML_FEATURE_ENABLED indicates that the API is root-only * NVML_FEATURE_DISABLED indicates that the API is accessible to all users * * @return * - \ref NVML_SUCCESS if \a isRestricted has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a apiType incorrect or \a isRestricted is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device or the device does not support * the feature that is being queried (E.G. Enabling/disabling Auto Boosted clocks is * not supported by the device) * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlRestrictedAPI_t */ nvmlReturn_t DECLDIR nvmlDeviceGetAPIRestriction(nvmlDevice_t device, nvmlRestrictedAPI_t apiType, nvmlEnableState_t *isRestricted); /** * Gets recent samples for the GPU. * * For Kepler &tm; or newer fully supported devices. * * Based on type, this method can be used to fetch the power, utilization or clock samples maintained in the buffer by * the driver. * * Power, Utilization and Clock samples are returned as type "unsigned int" for the union nvmlValue_t. * * To get the size of samples that user needs to allocate, the method is invoked with samples set to NULL. * The returned samplesCount will provide the number of samples that can be queried. The user needs to * allocate the buffer with size as samplesCount * sizeof(nvmlSample_t). * * lastSeenTimeStamp represents CPU timestamp in microseconds. Set it to 0 to fetch all the samples maintained by the * underlying buffer. Set lastSeenTimeStamp to one of the timeStamps retrieved from the date of the previous query * to get more recent samples. * * This method fetches the number of entries which can be accommodated in the provided samples array, and the * reference samplesCount is updated to indicate how many samples were actually retrieved. The advantage of using this * method for samples in contrast to polling via existing methods is to get get higher frequency data at lower polling cost. * * @note On MIG-enabled GPUs, querying the following sample types, NVML_GPU_UTILIZATION_SAMPLES, NVML_MEMORY_UTILIZATION_SAMPLES * NVML_ENC_UTILIZATION_SAMPLES and NVML_DEC_UTILIZATION_SAMPLES, is not currently supported. * * @param device The identifier for the target device * @param type Type of sampling event * @param lastSeenTimeStamp Return only samples with timestamp greater than lastSeenTimeStamp. * @param sampleValType Output parameter to represent the type of sample value as described in nvmlSampleVal_t * @param sampleCount Reference to provide the number of elements which can be queried in samples array * @param samples Reference in which samples are returned * @return * - \ref NVML_SUCCESS if samples are successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a samplesCount is NULL or * reference to \a sampleCount is 0 for non null \a samples * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_NOT_FOUND if sample entries are not found * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetSamples(nvmlDevice_t device, nvmlSamplingType_t type, unsigned long long lastSeenTimeStamp, nvmlValueType_t *sampleValType, unsigned int *sampleCount, nvmlSample_t *samples); /** * Gets Total, Available and Used size of BAR1 memory. * * BAR1 is used to map the FB (device memory) so that it can be directly accessed by the CPU or by 3rd party * devices (peer-to-peer on the PCIE bus). * * @note In MIG mode, if device handle is provided, the API returns aggregate * information, only if the caller has appropriate privileges. Per-instance * information can be queried by using specific MIG device handles. * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param bar1Memory Reference in which BAR1 memory * information is returned. * * @return * - \ref NVML_SUCCESS if BAR1 memory is successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a bar1Memory is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * */ nvmlReturn_t DECLDIR nvmlDeviceGetBAR1MemoryInfo(nvmlDevice_t device, nvmlBAR1Memory_t *bar1Memory); /** * Gets the duration of time during which the device was throttled (lower than requested clocks) due to power * or thermal constraints. * * The method is important to users who are tying to understand if their GPUs throttle at any point during their applications. The * difference in violation times at two different reference times gives the indication of GPU throttling event. * * Violation for thermal capping is not supported at this time. * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param perfPolicyType Represents Performance policy which can trigger GPU throttling * @param violTime Reference to which violation time related information is returned * * * @return * - \ref NVML_SUCCESS if violation time is successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a perfPolicyType is invalid, or \a violTime is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * */ nvmlReturn_t DECLDIR nvmlDeviceGetViolationStatus(nvmlDevice_t device, nvmlPerfPolicyType_t perfPolicyType, nvmlViolationTime_t *violTime); /** * Gets the device's interrupt number * * @param device The identifier of the target device * @param irqNum The interrupt number associated with the specified device * * @return * - \ref NVML_SUCCESS if irq number is successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a irqNum is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * */ nvmlReturn_t DECLDIR nvmlDeviceGetIrqNum(nvmlDevice_t device, unsigned int *irqNum); /** * Gets the device's core count * * @param device The identifier of the target device * @param numCores The number of cores for the specified device * * @return * - \ref NVML_SUCCESS if Gpu core count is successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a numCores is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * */ nvmlReturn_t DECLDIR nvmlDeviceGetNumGpuCores(nvmlDevice_t device, unsigned int *numCores); /** * Gets the devices power source * * @param device The identifier of the target device * @param powerSource The power source of the device * * @return * - \ref NVML_SUCCESS if the current power source was successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a powerSource is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * */ nvmlReturn_t DECLDIR nvmlDeviceGetPowerSource(nvmlDevice_t device, nvmlPowerSource_t *powerSource); /** * Gets the device's memory bus width * * @param device The identifier of the target device * @param maxSpeed The devices's memory bus width * * @return * - \ref NVML_SUCCESS if the memory bus width is successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a busWidth is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * */ nvmlReturn_t DECLDIR nvmlDeviceGetMemoryBusWidth(nvmlDevice_t device, unsigned int *busWidth); /** * Gets the device's PCIE Max Link speed in MBPS * * @param device The identifier of the target device * @param maxSpeed The devices's PCIE Max Link speed in MBPS * * @return * - \ref NVML_SUCCESS if Pcie Max Link Speed is successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a maxSpeed is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * */ nvmlReturn_t DECLDIR nvmlDeviceGetPcieLinkMaxSpeed(nvmlDevice_t device, unsigned int *maxSpeed); /** * Gets the device's Adaptive Clock status * * @param device The identifier of the target device * @param adaptiveClockStatus The current adaptive clocking status * * @return * - \ref NVML_SUCCESS if the current adaptive clocking status is successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a adaptiveClockStatus is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * */ nvmlReturn_t DECLDIR nvmlDeviceGetAdaptiveClockInfoStatus(nvmlDevice_t device, unsigned int *adaptiveClockStatus); /** * @} */ /** @addtogroup nvmlAccountingStats * @{ */ /** * Queries the state of per process accounting mode. * * For Kepler &tm; or newer fully supported devices. * * See \ref nvmlDeviceGetAccountingStats for more details. * See \ref nvmlDeviceSetAccountingMode * * @param device The identifier of the target device * @param mode Reference in which to return the current accounting mode * * @return * - \ref NVML_SUCCESS if the mode has been successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode are NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetAccountingMode(nvmlDevice_t device, nvmlEnableState_t *mode); /** * Queries process's accounting stats. * * For Kepler &tm; or newer fully supported devices. * * Accounting stats capture GPU utilization and other statistics across the lifetime of a process. * Accounting stats can be queried during life time of the process and after its termination. * The time field in \ref nvmlAccountingStats_t is reported as 0 during the lifetime of the process and * updated to actual running time after its termination. * Accounting stats are kept in a circular buffer, newly created processes overwrite information about old * processes. * * See \ref nvmlAccountingStats_t for description of each returned metric. * List of processes that can be queried can be retrieved from \ref nvmlDeviceGetAccountingPids. * * @note Accounting Mode needs to be on. See \ref nvmlDeviceGetAccountingMode. * @note Only compute and graphics applications stats can be queried. Monitoring applications stats can't be * queried since they don't contribute to GPU utilization. * @note In case of pid collision stats of only the latest process (that terminated last) will be reported * * @warning On Kepler devices per process statistics are accurate only if there's one process running on a GPU. * * @param device The identifier of the target device * @param pid Process Id of the target process to query stats for * @param stats Reference in which to return the process's accounting stats * * @return * - \ref NVML_SUCCESS if stats have been successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a stats are NULL * - \ref NVML_ERROR_NOT_FOUND if process stats were not found * - \ref NVML_ERROR_NOT_SUPPORTED if \a device doesn't support this feature or accounting mode is disabled * or on vGPU host. * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetAccountingBufferSize */ nvmlReturn_t DECLDIR nvmlDeviceGetAccountingStats(nvmlDevice_t device, unsigned int pid, nvmlAccountingStats_t *stats); /** * Queries list of processes that can be queried for accounting stats. The list of processes returned * can be in running or terminated state. * * For Kepler &tm; or newer fully supported devices. * * To just query the number of processes ready to be queried, call this function with *count = 0 and * pids=NULL. The return code will be NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if list is empty. * * For more details see \ref nvmlDeviceGetAccountingStats. * * @note In case of PID collision some processes might not be accessible before the circular buffer is full. * * @param device The identifier of the target device * @param count Reference in which to provide the \a pids array size, and * to return the number of elements ready to be queried * @param pids Reference in which to return list of process ids * * @return * - \ref NVML_SUCCESS if pids were successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a count is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if \a device doesn't support this feature or accounting mode is disabled * or on vGPU host. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a count is too small (\a count is set to * expected value) * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetAccountingBufferSize */ nvmlReturn_t DECLDIR nvmlDeviceGetAccountingPids(nvmlDevice_t device, unsigned int *count, unsigned int *pids); /** * Returns the number of processes that the circular buffer with accounting pids can hold. * * For Kepler &tm; or newer fully supported devices. * * This is the maximum number of processes that accounting information will be stored for before information * about oldest processes will get overwritten by information about new processes. * * @param device The identifier of the target device * @param bufferSize Reference in which to provide the size (in number of elements) * of the circular buffer for accounting stats. * * @return * - \ref NVML_SUCCESS if buffer size was successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a bufferSize is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature or accounting mode is disabled * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetAccountingStats * @see nvmlDeviceGetAccountingPids */ nvmlReturn_t DECLDIR nvmlDeviceGetAccountingBufferSize(nvmlDevice_t device, unsigned int *bufferSize); /** @} */ /** @addtogroup nvmlDeviceQueries * @{ */ /** * Returns the list of retired pages by source, including pages that are pending retirement * The address information provided from this API is the hardware address of the page that was retired. Note * that this does not match the virtual address used in CUDA, but will match the address information in XID 63 * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param cause Filter page addresses by cause of retirement * @param pageCount Reference in which to provide the \a addresses buffer size, and * to return the number of retired pages that match \a cause * Set to 0 to query the size without allocating an \a addresses buffer * @param addresses Buffer to write the page addresses into * * @return * - \ref NVML_SUCCESS if \a pageCount was populated and \a addresses was filled * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a pageCount indicates the buffer is not large enough to store all the * matching page addresses. \a pageCount is set to the needed size. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a pageCount is NULL, \a cause is invalid, or * \a addresses is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetRetiredPages(nvmlDevice_t device, nvmlPageRetirementCause_t cause, unsigned int *pageCount, unsigned long long *addresses); /** * Returns the list of retired pages by source, including pages that are pending retirement * The address information provided from this API is the hardware address of the page that was retired. Note * that this does not match the virtual address used in CUDA, but will match the address information in XID 63 * * \note nvmlDeviceGetRetiredPages_v2 adds an additional timestamps paramter to return the time of each page's * retirement. * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param cause Filter page addresses by cause of retirement * @param pageCount Reference in which to provide the \a addresses buffer size, and * to return the number of retired pages that match \a cause * Set to 0 to query the size without allocating an \a addresses buffer * @param addresses Buffer to write the page addresses into * @param timestamps Buffer to write the timestamps of page retirement, additional for _v2 * * @return * - \ref NVML_SUCCESS if \a pageCount was populated and \a addresses was filled * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a pageCount indicates the buffer is not large enough to store all the * matching page addresses. \a pageCount is set to the needed size. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a pageCount is NULL, \a cause is invalid, or * \a addresses is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetRetiredPages_v2(nvmlDevice_t device, nvmlPageRetirementCause_t cause, unsigned int *pageCount, unsigned long long *addresses, unsigned long long *timestamps); /** * Check if any pages are pending retirement and need a reboot to fully retire. * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param isPending Reference in which to return the pending status * * @return * - \ref NVML_SUCCESS if \a isPending was populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a isPending is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetRetiredPagesPendingStatus(nvmlDevice_t device, nvmlEnableState_t *isPending); /** * Get number of remapped rows. The number of rows reported will be based on * the cause of the remapping. isPending indicates whether or not there are * pending remappings. A reset will be required to actually remap the row. * failureOccurred will be set if a row remapping ever failed in the past. A * pending remapping won't affect future work on the GPU since * error-containment and dynamic page blacklisting will take care of that. * * @note On MIG-enabled GPUs with active instances, querying the number of * remapped rows is not supported * * For Ampere &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param corrRows Reference for number of rows remapped due to correctable errors * @param uncRows Reference for number of rows remapped due to uncorrectable errors * @param isPending Reference for whether or not remappings are pending * @param failureOccurred Reference that is set when a remapping has failed in the past * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_INVALID_ARGUMENT If \a corrRows, \a uncRows, \a isPending or \a failureOccurred is invalid * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN Unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetRemappedRows(nvmlDevice_t device, unsigned int *corrRows, unsigned int *uncRows, unsigned int *isPending, unsigned int *failureOccurred); /** * Get the row remapper histogram. Returns the remap availability for each bank * on the GPU. * * @param device Device handle * @param values Histogram values * * @return * - \ref NVML_SUCCESS On success * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetRowRemapperHistogram(nvmlDevice_t device, nvmlRowRemapperHistogramValues_t *values); /** * Get architecture for device * * @param device The identifier of the target device * @param arch Reference where architecture is returned, if call successful. * Set to NVML_DEVICE_ARCH_* upon success * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device or \a arch (output refererence) are invalid */ nvmlReturn_t DECLDIR nvmlDeviceGetArchitecture(nvmlDevice_t device, nvmlDeviceArchitecture_t *arch); /** @} */ /***************************************************************************************************/ /** @defgroup nvmlUnitCommands Unit Commands * This chapter describes NVML operations that change the state of the unit. For S-class products. * Each of these requires root/admin access. Non-admin users will see an NVML_ERROR_NO_PERMISSION * error code when invoking any of these methods. * @{ */ /***************************************************************************************************/ /** * Set the LED state for the unit. The LED can be either green (0) or amber (1). * * For S-class products. * Requires root/admin permissions. * * This operation takes effect immediately. * * * Current S-Class products don't provide unique LEDs for each unit. As such, both front * and back LEDs will be toggled in unison regardless of which unit is specified with this command. * * See \ref nvmlLedColor_t for available colors. * * @param unit The identifier of the target unit * @param color The target LED color * * @return * - \ref NVML_SUCCESS if the LED color has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit or \a color is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlUnitGetLedState() */ nvmlReturn_t DECLDIR nvmlUnitSetLedState(nvmlUnit_t unit, nvmlLedColor_t color); /** @} */ /***************************************************************************************************/ /** @defgroup nvmlDeviceCommands Device Commands * This chapter describes NVML operations that change the state of the device. * Each of these requires root/admin access. Non-admin users will see an NVML_ERROR_NO_PERMISSION * error code when invoking any of these methods. * @{ */ /***************************************************************************************************/ /** * Set the persistence mode for the device. * * For all products. * For Linux only. * Requires root/admin permissions. * * The persistence mode determines whether the GPU driver software is torn down after the last client * exits. * * This operation takes effect immediately. It is not persistent across reboots. After each reboot the * persistence mode is reset to "Disabled". * * See \ref nvmlEnableState_t for available modes. * * After calling this API with mode set to NVML_FEATURE_DISABLED on a device that has its own NUMA * memory, the given device handle will no longer be valid, and to continue to interact with this * device, a new handle should be obtained from one of the nvmlDeviceGetHandleBy*() APIs. This * limitation is currently only applicable to devices that have a coherent NVLink connection to * system memory. * * @param device The identifier of the target device * @param mode The target persistence mode * * @return * - \ref NVML_SUCCESS if the persistence mode was set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetPersistenceMode() */ nvmlReturn_t DECLDIR nvmlDeviceSetPersistenceMode(nvmlDevice_t device, nvmlEnableState_t mode); /** * Set the compute mode for the device. * * For all products. * Requires root/admin permissions. * * The compute mode determines whether a GPU can be used for compute operations and whether it can * be shared across contexts. * * This operation takes effect immediately. Under Linux it is not persistent across reboots and * always resets to "Default". Under windows it is persistent. * * Under windows compute mode may only be set to DEFAULT when running in WDDM * * @note On MIG-enabled GPUs, compute mode would be set to DEFAULT and changing it is not supported. * * See \ref nvmlComputeMode_t for details on available compute modes. * * @param device The identifier of the target device * @param mode The target compute mode * * @return * - \ref NVML_SUCCESS if the compute mode was set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetComputeMode() */ nvmlReturn_t DECLDIR nvmlDeviceSetComputeMode(nvmlDevice_t device, nvmlComputeMode_t mode); /** * Set the ECC mode for the device. * * For Kepler &tm; or newer fully supported devices. * Only applicable to devices with ECC. * Requires \a NVML_INFOROM_ECC version 1.0 or higher. * Requires root/admin permissions. * * The ECC mode determines whether the GPU enables its ECC support. * * This operation takes effect after the next reboot. * * See \ref nvmlEnableState_t for details on available modes. * * @param device The identifier of the target device * @param ecc The target ECC mode * * @return * - \ref NVML_SUCCESS if the ECC mode was set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a ecc is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetEccMode() */ nvmlReturn_t DECLDIR nvmlDeviceSetEccMode(nvmlDevice_t device, nvmlEnableState_t ecc); /** * Clear the ECC error and other memory error counts for the device. * * For Kepler &tm; or newer fully supported devices. * Only applicable to devices with ECC. * Requires \a NVML_INFOROM_ECC version 2.0 or higher to clear aggregate location-based ECC counts. * Requires \a NVML_INFOROM_ECC version 1.0 or higher to clear all other ECC counts. * Requires root/admin permissions. * Requires ECC Mode to be enabled. * * Sets all of the specified ECC counters to 0, including both detailed and total counts. * * This operation takes effect immediately. * * See \ref nvmlMemoryErrorType_t for details on available counter types. * * @param device The identifier of the target device * @param counterType Flag that indicates which type of errors should be cleared. * * @return * - \ref NVML_SUCCESS if the error counts were cleared * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a counterType is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see * - nvmlDeviceGetDetailedEccErrors() * - nvmlDeviceGetTotalEccErrors() */ nvmlReturn_t DECLDIR nvmlDeviceClearEccErrorCounts(nvmlDevice_t device, nvmlEccCounterType_t counterType); /** * Set the driver model for the device. * * For Fermi &tm; or newer fully supported devices. * For windows only. * Requires root/admin permissions. * * On Windows platforms the device driver can run in either WDDM or WDM (TCC) mode. If a display is attached * to the device it must run in WDDM mode. * * It is possible to force the change to WDM (TCC) while the display is still attached with a force flag (nvmlFlagForce). * This should only be done if the host is subsequently powered down and the display is detached from the device * before the next reboot. * * This operation takes effect after the next reboot. * * Windows driver model may only be set to WDDM when running in DEFAULT compute mode. * * Change driver model to WDDM is not supported when GPU doesn't support graphics acceleration or * will not support it after reboot. See \ref nvmlDeviceSetGpuOperationMode. * * See \ref nvmlDriverModel_t for details on available driver models. * See \ref nvmlFlagDefault and \ref nvmlFlagForce * * @param device The identifier of the target device * @param driverModel The target driver model * @param flags Flags that change the default behavior * * @return * - \ref NVML_SUCCESS if the driver model has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a driverModel is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the platform is not windows or the device does not support this feature * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetDriverModel() */ nvmlReturn_t DECLDIR nvmlDeviceSetDriverModel(nvmlDevice_t device, nvmlDriverModel_t driverModel, unsigned int flags); typedef enum nvmlClockLimitId_enum { NVML_CLOCK_LIMIT_ID_RANGE_START = 0xffffff00, NVML_CLOCK_LIMIT_ID_TDP, NVML_CLOCK_LIMIT_ID_UNLIMITED } nvmlClockLimitId_t; /** * Set clocks that device will lock to. * * Sets the clocks that the device will be running at to the value in the range of minGpuClockMHz to maxGpuClockMHz. * Setting this will supercede application clock values and take effect regardless if a cuda app is running. * See /ref nvmlDeviceSetApplicationsClocks * * Can be used as a setting to request constant performance. * * This can be called with a pair of integer clock frequencies in MHz, or a pair of /ref nvmlClockLimitId_t values. * See the table below for valid combinations of these values. * * minGpuClock | maxGpuClock | Effect * ------------+-------------+-------------------------------------------------- * tdp | tdp | Lock clock to TDP * unlimited | tdp | Upper bound is TDP but clock may drift below this * tdp | unlimited | Lower bound is TDP but clock may boost above this * unlimited | unlimited | Unlocked (== nvmlDeviceResetGpuLockedClocks) * * If one arg takes one of these values, the other must be one of these values as * well. Mixed numeric and symbolic calls return NVML_ERROR_INVALID_ARGUMENT. * * Requires root/admin permissions. * * After system reboot or driver reload applications clocks go back to their default value. * See \ref nvmlDeviceResetGpuLockedClocks. * * For Volta &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param minGpuClockMHz Requested minimum gpu clock in MHz * @param maxGpuClockMHz Requested maximum gpu clock in MHz * * @return * - \ref NVML_SUCCESS if new settings were successfully set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a minGpuClockMHz and \a maxGpuClockMHz * is not a valid clock combination * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceSetGpuLockedClocks(nvmlDevice_t device, unsigned int minGpuClockMHz, unsigned int maxGpuClockMHz); /** * Resets the gpu clock to the default value * * This is the gpu clock that will be used after system reboot or driver reload. * Default values are idle clocks, but the current values can be changed using \ref nvmlDeviceSetApplicationsClocks. * * @see nvmlDeviceSetGpuLockedClocks * * For Volta &tm; or newer fully supported devices. * * @param device The identifier of the target device * * @return * - \ref NVML_SUCCESS if new settings were successfully set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceResetGpuLockedClocks(nvmlDevice_t device); /** * Set memory clocks that device will lock to. * * Sets the device's memory clocks to the value in the range of minMemClockMHz to maxMemClockMHz. * Setting this will supersede application clock values and take effect regardless of whether a cuda app is running. * See /ref nvmlDeviceSetApplicationsClocks * * Can be used as a setting to request constant performance. * * Requires root/admin permissions. * * After system reboot or driver reload applications clocks go back to their default value. * See \ref nvmlDeviceResetMemoryLockedClocks. * * For Ampere &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param minMemClockMHz Requested minimum memory clock in MHz * @param maxMemClockMHz Requested maximum memory clock in MHz * * @return * - \ref NVML_SUCCESS if new settings were successfully set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a minGpuClockMHz and \a maxGpuClockMHz * is not a valid clock combination * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceSetMemoryLockedClocks(nvmlDevice_t device, unsigned int minMemClockMHz, unsigned int maxMemClockMHz); /** * Resets the memory clock to the default value * * This is the memory clock that will be used after system reboot or driver reload. * Default values are idle clocks, but the current values can be changed using \ref nvmlDeviceSetApplicationsClocks. * * @see nvmlDeviceSetMemoryLockedClocks * * For Ampere &tm; or newer fully supported devices. * * @param device The identifier of the target device * * @return * - \ref NVML_SUCCESS if new settings were successfully set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceResetMemoryLockedClocks(nvmlDevice_t device); /** * Set clocks that applications will lock to. * * Sets the clocks that compute and graphics applications will be running at. * e.g. CUDA driver requests these clocks during context creation which means this property * defines clocks at which CUDA applications will be running unless some overspec event * occurs (e.g. over power, over thermal or external HW brake). * * Can be used as a setting to request constant performance. * * On Pascal and newer hardware, this will automatically disable automatic boosting of clocks. * * On K80 and newer Kepler and Maxwell GPUs, users desiring fixed performance should also call * \ref nvmlDeviceSetAutoBoostedClocksEnabled to prevent clocks from automatically boosting * above the clock value being set. * * For Kepler &tm; or newer non-GeForce fully supported devices and Maxwell or newer GeForce devices. * Requires root/admin permissions. * * See \ref nvmlDeviceGetSupportedMemoryClocks and \ref nvmlDeviceGetSupportedGraphicsClocks * for details on how to list available clocks combinations. * * After system reboot or driver reload applications clocks go back to their default value. * See \ref nvmlDeviceResetApplicationsClocks. * * @param device The identifier of the target device * @param memClockMHz Requested memory clock in MHz * @param graphicsClockMHz Requested graphics clock in MHz * * @return * - \ref NVML_SUCCESS if new settings were successfully set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a memClockMHz and \a graphicsClockMHz * is not a valid clock combination * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceSetApplicationsClocks(nvmlDevice_t device, unsigned int memClockMHz, unsigned int graphicsClockMHz); /** * Retrieves the frequency monitor fault status for the device. * * For Ampere &tm; or newer fully supported devices. * Requires root user. * * See \ref nvmlClkMonStatus_t for details on decoding the status output. * * @param device The identifier of the target device * @param status Reference in which to return the clkmon fault status * * @return * - \ref NVML_SUCCESS if \a status has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a status is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetClkMonStatus() */ nvmlReturn_t DECLDIR nvmlDeviceGetClkMonStatus(nvmlDevice_t device, nvmlClkMonStatus_t *status); /** * Set new power limit of this device. * * For Kepler &tm; or newer fully supported devices. * Requires root/admin permissions. * * See \ref nvmlDeviceGetPowerManagementLimitConstraints to check the allowed ranges of values. * * \note Limit is not persistent across reboots or driver unloads. * Enable persistent mode to prevent driver from unloading when no application is using the device. * * @param device The identifier of the target device * @param limit Power management limit in milliwatts to set * * @return * - \ref NVML_SUCCESS if \a limit has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a defaultLimit is out of range * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceGetPowerManagementLimitConstraints * @see nvmlDeviceGetPowerManagementDefaultLimit */ nvmlReturn_t DECLDIR nvmlDeviceSetPowerManagementLimit(nvmlDevice_t device, unsigned int limit); /** * Sets new GOM. See \a nvmlGpuOperationMode_t for details. * * For GK110 M-class and X-class Tesla &tm; products from the Kepler family. * Modes \ref NVML_GOM_LOW_DP and \ref NVML_GOM_ALL_ON are supported on fully supported GeForce products. * Not supported on Quadro ® and Tesla &tm; C-class products. * Requires root/admin permissions. * * Changing GOMs requires a reboot. * The reboot requirement might be removed in the future. * * Compute only GOMs don't support graphics acceleration. Under windows switching to these GOMs when * pending driver model is WDDM is not supported. See \ref nvmlDeviceSetDriverModel. * * @param device The identifier of the target device * @param mode Target GOM * * @return * - \ref NVML_SUCCESS if \a mode has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode incorrect * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support GOM or specific mode * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlGpuOperationMode_t * @see nvmlDeviceGetGpuOperationMode */ nvmlReturn_t DECLDIR nvmlDeviceSetGpuOperationMode(nvmlDevice_t device, nvmlGpuOperationMode_t mode); /** * Changes the root/admin restructions on certain APIs. See \a nvmlRestrictedAPI_t for the list of supported APIs. * This method can be used by a root/admin user to give non-root/admin access to certain otherwise-restricted APIs. * The new setting lasts for the lifetime of the NVIDIA driver; it is not persistent. See \a nvmlDeviceGetAPIRestriction * to query the current restriction settings. * * For Kepler &tm; or newer fully supported devices. * Requires root/admin permissions. * * @param device The identifier of the target device * @param apiType Target API type for this operation * @param isRestricted The target restriction * * @return * - \ref NVML_SUCCESS if \a isRestricted has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a apiType incorrect * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support changing API restrictions or the device does not support * the feature that api restrictions are being set for (E.G. Enabling/disabling auto * boosted clocks is not supported by the device) * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlRestrictedAPI_t */ nvmlReturn_t DECLDIR nvmlDeviceSetAPIRestriction(nvmlDevice_t device, nvmlRestrictedAPI_t apiType, nvmlEnableState_t isRestricted); /** * @} */ /** @addtogroup nvmlAccountingStats * @{ */ /** * Enables or disables per process accounting. * * For Kepler &tm; or newer fully supported devices. * Requires root/admin permissions. * * @note This setting is not persistent and will default to disabled after driver unloads. * Enable persistence mode to be sure the setting doesn't switch off to disabled. * * @note Enabling accounting mode has no negative impact on the GPU performance. * * @note Disabling accounting clears all accounting pids information. * * @note On MIG-enabled GPUs, accounting mode would be set to DISABLED and changing it is not supported. * * See \ref nvmlDeviceGetAccountingMode * See \ref nvmlDeviceGetAccountingStats * See \ref nvmlDeviceClearAccountingPids * * @param device The identifier of the target device * @param mode The target accounting mode * * @return * - \ref NVML_SUCCESS if the new mode has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a mode are invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceSetAccountingMode(nvmlDevice_t device, nvmlEnableState_t mode); /** * Clears accounting information about all processes that have already terminated. * * For Kepler &tm; or newer fully supported devices. * Requires root/admin permissions. * * See \ref nvmlDeviceGetAccountingMode * See \ref nvmlDeviceGetAccountingStats * See \ref nvmlDeviceSetAccountingMode * * @param device The identifier of the target device * * @return * - \ref NVML_SUCCESS if accounting information has been cleared * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device are invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceClearAccountingPids(nvmlDevice_t device); /** @} */ /***************************************************************************************************/ /** @defgroup NvLink NvLink Methods * This chapter describes methods that NVML can perform on NVLINK enabled devices. * @{ */ /***************************************************************************************************/ /** * Retrieves the state of the device's NvLink for the link specified * * For Pascal &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param link Specifies the NvLink link to be queried * @param isActive \a nvmlEnableState_t where NVML_FEATURE_ENABLED indicates that * the link is active and NVML_FEATURE_DISABLED indicates it * is inactive * * @return * - \ref NVML_SUCCESS if \a isActive has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a link is invalid or \a isActive is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkState(nvmlDevice_t device, unsigned int link, nvmlEnableState_t *isActive); /** * Retrieves the version of the device's NvLink for the link specified * * For Pascal &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param link Specifies the NvLink link to be queried * @param version Requested NvLink version * * @return * - \ref NVML_SUCCESS if \a version has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a link is invalid or \a version is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkVersion(nvmlDevice_t device, unsigned int link, unsigned int *version); /** * Retrieves the requested capability from the device's NvLink for the link specified * Please refer to the \a nvmlNvLinkCapability_t structure for the specific caps that can be queried * The return value should be treated as a boolean. * * For Pascal &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param link Specifies the NvLink link to be queried * @param capability Specifies the \a nvmlNvLinkCapability_t to be queried * @param capResult A boolean for the queried capability indicating that feature is available * * @return * - \ref NVML_SUCCESS if \a capResult has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a link, or \a capability is invalid or \a capResult is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkCapability(nvmlDevice_t device, unsigned int link, nvmlNvLinkCapability_t capability, unsigned int *capResult); /** * Retrieves the PCI information for the remote node on a NvLink link * Note: pciSubSystemId is not filled in this function and is indeterminate * * For Pascal &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param link Specifies the NvLink link to be queried * @param pci \a nvmlPciInfo_t of the remote node for the specified link * * @return * - \ref NVML_SUCCESS if \a pci has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a link is invalid or \a pci is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkRemotePciInfo_v2(nvmlDevice_t device, unsigned int link, nvmlPciInfo_t *pci); /** * Retrieves the specified error counter value * Please refer to \a nvmlNvLinkErrorCounter_t for error counters that are available * * For Pascal &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param link Specifies the NvLink link to be queried * @param counter Specifies the NvLink counter to be queried * @param counterValue Returned counter value * * @return * - \ref NVML_SUCCESS if \a counter has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a link, or \a counter is invalid or \a counterValue is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkErrorCounter(nvmlDevice_t device, unsigned int link, nvmlNvLinkErrorCounter_t counter, unsigned long long *counterValue); /** * Resets all error counters to zero * Please refer to \a nvmlNvLinkErrorCounter_t for the list of error counters that are reset * * For Pascal &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param link Specifies the NvLink link to be queried * * @return * - \ref NVML_SUCCESS if the reset is successful * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a link is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceResetNvLinkErrorCounters(nvmlDevice_t device, unsigned int link); /** * Deprecated: Setting utilization counter control is no longer supported. * * Set the NVLINK utilization counter control information for the specified counter, 0 or 1. * Please refer to \a nvmlNvLinkUtilizationControl_t for the structure definition. Performs a reset * of the counters if the reset parameter is non-zero. * * For Pascal &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param counter Specifies the counter that should be set (0 or 1). * @param link Specifies the NvLink link to be queried * @param control A reference to the \a nvmlNvLinkUtilizationControl_t to set * @param reset Resets the counters on set if non-zero * * @return * - \ref NVML_SUCCESS if the control has been set successfully * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a counter, \a link, or \a control is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceSetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control, unsigned int reset); /** * Deprecated: Getting utilization counter control is no longer supported. * * Get the NVLINK utilization counter control information for the specified counter, 0 or 1. * Please refer to \a nvmlNvLinkUtilizationControl_t for the structure definition * * For Pascal &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param counter Specifies the counter that should be set (0 or 1). * @param link Specifies the NvLink link to be queried * @param control A reference to the \a nvmlNvLinkUtilizationControl_t to place information * * @return * - \ref NVML_SUCCESS if the control has been set successfully * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a counter, \a link, or \a control is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control); /** * Deprecated: Use \ref nvmlDeviceGetFieldValues with NVML_FI_DEV_NVLINK_THROUGHPUT_* as field values instead. * * Retrieve the NVLINK utilization counter based on the current control for a specified counter. * In general it is good practice to use \a nvmlDeviceSetNvLinkUtilizationControl * before reading the utilization counters as they have no default state * * For Pascal &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param link Specifies the NvLink link to be queried * @param counter Specifies the counter that should be read (0 or 1). * @param rxcounter Receive counter return value * @param txcounter Transmit counter return value * * @return * - \ref NVML_SUCCESS if \a rxcounter and \a txcounter have been successfully set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a counter, or \a link is invalid or \a rxcounter or \a txcounter are NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkUtilizationCounter(nvmlDevice_t device, unsigned int link, unsigned int counter, unsigned long long *rxcounter, unsigned long long *txcounter); /** * Deprecated: Freezing NVLINK utilization counters is no longer supported. * * Freeze the NVLINK utilization counters * Both the receive and transmit counters are operated on by this function * * For Pascal &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param link Specifies the NvLink link to be queried * @param counter Specifies the counter that should be frozen (0 or 1). * @param freeze NVML_FEATURE_ENABLED = freeze the receive and transmit counters * NVML_FEATURE_DISABLED = unfreeze the receive and transmit counters * * @return * - \ref NVML_SUCCESS if counters were successfully frozen or unfrozen * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a link, \a counter, or \a freeze is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceFreezeNvLinkUtilizationCounter (nvmlDevice_t device, unsigned int link, unsigned int counter, nvmlEnableState_t freeze); /** * Deprecated: Resetting NVLINK utilization counters is no longer supported. * * Reset the NVLINK utilization counters * Both the receive and transmit counters are operated on by this function * * For Pascal &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param link Specifies the NvLink link to be reset * @param counter Specifies the counter that should be reset (0 or 1) * * @return * - \ref NVML_SUCCESS if counters were successfully reset * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a link, or \a counter is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceResetNvLinkUtilizationCounter (nvmlDevice_t device, unsigned int link, unsigned int counter); /** * Get the NVLink device type of the remote device connected over the given link. * * @param device The device handle of the target GPU * @param link The NVLink link index on the target GPU * @param pNvLinkDeviceType Pointer in which the output remote device type is returned * * @return * - \ref NVML_SUCCESS if \a pNvLinkDeviceType has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_NOT_SUPPORTED if NVLink is not supported * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a link is invalid, or * \a pNvLinkDeviceType is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is * otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkRemoteDeviceType(nvmlDevice_t device, unsigned int link, nvmlIntNvLinkDeviceType_t *pNvLinkDeviceType); /** @} */ /***************************************************************************************************/ /** @defgroup nvmlEvents Event Handling Methods * This chapter describes methods that NVML can perform against each device to register and wait for * some event to occur. * @{ */ /***************************************************************************************************/ /** * Create an empty set of events. * Event set should be freed by \ref nvmlEventSetFree * * For Fermi &tm; or newer fully supported devices. * @param set Reference in which to return the event handle * * @return * - \ref NVML_SUCCESS if the event has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a set is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlEventSetFree */ nvmlReturn_t DECLDIR nvmlEventSetCreate(nvmlEventSet_t *set); /** * Starts recording of events on a specified devices and add the events to specified \ref nvmlEventSet_t * * For Fermi &tm; or newer fully supported devices. * Ecc events are available only on ECC enabled devices (see \ref nvmlDeviceGetTotalEccErrors) * Power capping events are available only on Power Management enabled devices (see \ref nvmlDeviceGetPowerManagementMode) * * For Linux only. * * \b IMPORTANT: Operations on \a set are not thread safe * * This call starts recording of events on specific device. * All events that occurred before this call are not recorded. * Checking if some event occurred can be done with \ref nvmlEventSetWait_v2 * * If function reports NVML_ERROR_UNKNOWN, event set is in undefined state and should be freed. * If function reports NVML_ERROR_NOT_SUPPORTED, event set can still be used. None of the requested eventTypes * are registered in that case. * * @param device The identifier of the target device * @param eventTypes Bitmask of \ref nvmlEventType to record * @param set Set to which add new event types * * @return * - \ref NVML_SUCCESS if the event has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a eventTypes is invalid or \a set is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the platform does not support this feature or some of requested event types * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlEventType * @see nvmlDeviceGetSupportedEventTypes * @see nvmlEventSetWait * @see nvmlEventSetFree */ nvmlReturn_t DECLDIR nvmlDeviceRegisterEvents(nvmlDevice_t device, unsigned long long eventTypes, nvmlEventSet_t set); /** * Returns information about events supported on device * * For Fermi &tm; or newer fully supported devices. * * Events are not supported on Windows. So this function returns an empty mask in \a eventTypes on Windows. * * @param device The identifier of the target device * @param eventTypes Reference in which to return bitmask of supported events * * @return * - \ref NVML_SUCCESS if the eventTypes has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a eventType is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlEventType * @see nvmlDeviceRegisterEvents */ nvmlReturn_t DECLDIR nvmlDeviceGetSupportedEventTypes(nvmlDevice_t device, unsigned long long *eventTypes); /** * Waits on events and delivers events * * For Fermi &tm; or newer fully supported devices. * * If some events are ready to be delivered at the time of the call, function returns immediately. * If there are no events ready to be delivered, function sleeps till event arrives * but not longer than specified timeout. This function in certain conditions can return before * specified timeout passes (e.g. when interrupt arrives) * * On Windows, in case of xid error, the function returns the most recent xid error type seen by the system. * If there are multiple xid errors generated before nvmlEventSetWait is invoked then the last seen xid error * type is returned for all xid error events. * * On Linux, every xid error event would return the associated event data and other information if applicable. * * In MIG mode, if device handle is provided, the API reports all the events for the available instances, * only if the caller has appropriate privileges. In absence of required privileges, only the events which * affect all the instances (i.e. whole device) are reported. * * This API does not currently support per-instance event reporting using MIG device handles. * * @param set Reference to set of events to wait on * @param data Reference in which to return event data * @param timeoutms Maximum amount of wait time in milliseconds for registered event * * @return * - \ref NVML_SUCCESS if the data has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a data is NULL * - \ref NVML_ERROR_TIMEOUT if no event arrived in specified timeout or interrupt arrived * - \ref NVML_ERROR_GPU_IS_LOST if a GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlEventType * @see nvmlDeviceRegisterEvents */ nvmlReturn_t DECLDIR nvmlEventSetWait_v2(nvmlEventSet_t set, nvmlEventData_t * data, unsigned int timeoutms); /** * Releases events in the set * * For Fermi &tm; or newer fully supported devices. * * @param set Reference to events to be released * * @return * - \ref NVML_SUCCESS if the event has been successfully released * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlDeviceRegisterEvents */ nvmlReturn_t DECLDIR nvmlEventSetFree(nvmlEventSet_t set); /** @} */ /***************************************************************************************************/ /** @defgroup nvmlZPI Drain states * This chapter describes methods that NVML can perform against each device to control their drain state * and recognition by NVML and NVIDIA kernel driver. These methods can be used with out-of-band tools to * power on/off GPUs, enable robust reset scenarios, etc. * @{ */ /***************************************************************************************************/ /** * Modify the drain state of a GPU. This method forces a GPU to no longer accept new incoming requests. * Any new NVML process will no longer see this GPU. Persistence mode for this GPU must be turned off before * this call is made. * Must be called as administrator. * For Linux only. * * For Pascal &tm; or newer fully supported devices. * Some Kepler devices supported. * * @param pciInfo The PCI address of the GPU drain state to be modified * @param newState The drain state that should be entered, see \ref nvmlEnableState_t * * @return * - \ref NVML_SUCCESS if counters were successfully reset * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a nvmlIndex or \a newState is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_NO_PERMISSION if the calling process has insufficient permissions to perform operation * - \ref NVML_ERROR_IN_USE if the device has persistence mode turned on * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceModifyDrainState (nvmlPciInfo_t *pciInfo, nvmlEnableState_t newState); /** * Query the drain state of a GPU. This method is used to check if a GPU is in a currently draining * state. * For Linux only. * * For Pascal &tm; or newer fully supported devices. * Some Kepler devices supported. * * @param pciInfo The PCI address of the GPU drain state to be queried * @param currentState The current drain state for this GPU, see \ref nvmlEnableState_t * * @return * - \ref NVML_SUCCESS if counters were successfully reset * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a nvmlIndex or \a currentState is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceQueryDrainState (nvmlPciInfo_t *pciInfo, nvmlEnableState_t *currentState); /** * This method will remove the specified GPU from the view of both NVML and the NVIDIA kernel driver * as long as no other processes are attached. If other processes are attached, this call will return * NVML_ERROR_IN_USE and the GPU will be returned to its original "draining" state. Note: the * only situation where a process can still be attached after nvmlDeviceModifyDrainState() is called * to initiate the draining state is if that process was using, and is still using, a GPU before the * call was made. Also note, persistence mode counts as an attachment to the GPU thus it must be disabled * prior to this call. * * For long-running NVML processes please note that this will change the enumeration of current GPUs. * For example, if there are four GPUs present and GPU1 is removed, the new enumeration will be 0-2. * Also, device handles after the removed GPU will not be valid and must be re-established. * Must be run as administrator. * For Linux only. * * For Pascal &tm; or newer fully supported devices. * Some Kepler devices supported. * * @param pciInfo The PCI address of the GPU to be removed * @param gpuState Whether the GPU is to be removed, from the OS * see \ref nvmlDetachGpuState_t * @param linkState Requested upstream PCIe link state, see \ref nvmlPcieLinkState_t * * @return * - \ref NVML_SUCCESS if counters were successfully reset * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a nvmlIndex is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_IN_USE if the device is still in use and cannot be removed */ nvmlReturn_t DECLDIR nvmlDeviceRemoveGpu_v2(nvmlPciInfo_t *pciInfo, nvmlDetachGpuState_t gpuState, nvmlPcieLinkState_t linkState); /** * Request the OS and the NVIDIA kernel driver to rediscover a portion of the PCI subsystem looking for GPUs that * were previously removed. The portion of the PCI tree can be narrowed by specifying a domain, bus, and device. * If all are zeroes then the entire PCI tree will be searched. Please note that for long-running NVML processes * the enumeration will change based on how many GPUs are discovered and where they are inserted in bus order. * * In addition, all newly discovered GPUs will be initialized and their ECC scrubbed which may take several seconds * per GPU. Also, all device handles are no longer guaranteed to be valid post discovery. * * Must be run as administrator. * For Linux only. * * For Pascal &tm; or newer fully supported devices. * Some Kepler devices supported. * * @param pciInfo The PCI tree to be searched. Only the domain, bus, and device * fields are used in this call. * * @return * - \ref NVML_SUCCESS if counters were successfully reset * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a pciInfo is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if the operating system does not support this feature * - \ref NVML_ERROR_OPERATING_SYSTEM if the operating system is denying this feature * - \ref NVML_ERROR_NO_PERMISSION if the calling process has insufficient permissions to perform operation * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceDiscoverGpus (nvmlPciInfo_t *pciInfo); /** @} */ /***************************************************************************************************/ /** @defgroup nvmlFieldValueQueries Field Value Queries * This chapter describes NVML operations that are associated with retrieving Field Values from NVML * @{ */ /***************************************************************************************************/ /** * Request values for a list of fields for a device. This API allows multiple fields to be queried at once. * If any of the underlying fieldIds are populated by the same driver call, the results for those field IDs * will be populated from a single call rather than making a driver call for each fieldId. * * @param device The device handle of the GPU to request field values for * @param valuesCount Number of entries in values that should be retrieved * @param values Array of \a valuesCount structures to hold field values. * Each value's fieldId must be populated prior to this call * * @return * - \ref NVML_SUCCESS if any values in \a values were populated. Note that you must * check the nvmlReturn field of each value for each individual * status * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a values is NULL */ nvmlReturn_t DECLDIR nvmlDeviceGetFieldValues(nvmlDevice_t device, int valuesCount, nvmlFieldValue_t *values); /** @} */ /***************************************************************************************************/ /** @defgroup vGPU Enums, Constants and Structs * @{ */ /** @} */ /***************************************************************************************************/ /***************************************************************************************************/ /** @defgroup nvmlVirtualGpuQueries vGPU APIs * This chapter describes operations that are associated with NVIDIA vGPU Software products. * @{ */ /***************************************************************************************************/ /** * This method is used to get the virtualization mode corresponding to the GPU. * * For Kepler &tm; or newer fully supported devices. * * @param device Identifier of the target device * @param pVirtualMode Reference to virtualization mode. One of NVML_GPU_VIRTUALIZATION_? * * @return * - \ref NVML_SUCCESS if \a pVirtualMode is fetched * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a pVirtualMode is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetVirtualizationMode(nvmlDevice_t device, nvmlGpuVirtualizationMode_t *pVirtualMode); /** * Queries if SR-IOV host operation is supported on a vGPU supported device. * * Checks whether SR-IOV host capability is supported by the device and the * driver, and indicates device is in SR-IOV mode if both of these conditions * are true. * * @param device The identifier of the target device * @param pHostVgpuMode Reference in which to return the current vGPU mode * * @return * - \ref NVML_SUCCESS if device's vGPU mode has been successfully retrieved * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device handle is 0 or \a pVgpuMode is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if \a device doesn't support this feature. * - \ref NVML_ERROR_UNKNOWN if any unexpected error occurred */ nvmlReturn_t DECLDIR nvmlDeviceGetHostVgpuMode(nvmlDevice_t device, nvmlHostVgpuMode_t *pHostVgpuMode); /** * This method is used to set the virtualization mode corresponding to the GPU. * * For Kepler &tm; or newer fully supported devices. * * @param device Identifier of the target device * @param virtualMode virtualization mode. One of NVML_GPU_VIRTUALIZATION_? * * @return * - \ref NVML_SUCCESS if \a pVirtualMode is set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a pVirtualMode is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_NOT_SUPPORTED if setting of virtualization mode is not supported. * - \ref NVML_ERROR_NO_PERMISSION if setting of virtualization mode is not allowed for this client. */ nvmlReturn_t DECLDIR nvmlDeviceSetVirtualizationMode(nvmlDevice_t device, nvmlGpuVirtualizationMode_t virtualMode); /** * Retrieve the vGPU Software licensable features. * * Identifies whether the system supports vGPU Software Licensing. If it does, return the list of licensable feature(s) * and their current license status. * * @param device Identifier of the target device * @param pGridLicensableFeatures Pointer to structure in which vGPU software licensable features are returned * * @return * - \ref NVML_SUCCESS if licensable features are successfully retrieved * - \ref NVML_ERROR_INVALID_ARGUMENT if \a pGridLicensableFeatures is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetGridLicensableFeatures_v4(nvmlDevice_t device, nvmlGridLicensableFeatures_t *pGridLicensableFeatures); /** * Retrieves the current utilization and process ID * * For Maxwell &tm; or newer fully supported devices. * * Reads recent utilization of GPU SM (3D/Compute), framebuffer, video encoder, and video decoder for processes running. * Utilization values are returned as an array of utilization sample structures in the caller-supplied buffer pointed at * by \a utilization. One utilization sample structure is returned per process running, that had some non-zero utilization * during the last sample period. It includes the CPU timestamp at which the samples were recorded. Individual utilization values * are returned as "unsigned int" values. * * To read utilization values, first determine the size of buffer required to hold the samples by invoking the function with * \a utilization set to NULL. The caller should allocate a buffer of size * processSamplesCount * sizeof(nvmlProcessUtilizationSample_t). Invoke the function again with the allocated buffer passed * in \a utilization, and \a processSamplesCount set to the number of entries the buffer is sized for. * * On successful return, the function updates \a processSamplesCount with the number of process utilization sample * structures that were actually written. This may differ from a previously read value as instances are created or * destroyed. * * lastSeenTimeStamp represents the CPU timestamp in microseconds at which utilization samples were last read. Set it to 0 * to read utilization based on all the samples maintained by the driver's internal sample buffer. Set lastSeenTimeStamp * to a timeStamp retrieved from a previous query to read utilization since the previous query. * * @note On MIG-enabled GPUs, querying process utilization is not currently supported. * * @param device The identifier of the target device * @param utilization Pointer to caller-supplied buffer in which guest process utilization samples are returned * @param processSamplesCount Pointer to caller-supplied array size, and returns number of processes running * @param lastSeenTimeStamp Return only samples with timestamp greater than lastSeenTimeStamp. * @return * - \ref NVML_SUCCESS if \a utilization has been populated * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a utilization is NULL, or \a samplingPeriodUs is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature * - \ref NVML_ERROR_NOT_FOUND if sample entries are not found * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetProcessUtilization(nvmlDevice_t device, nvmlProcessUtilizationSample_t *utilization, unsigned int *processSamplesCount, unsigned long long lastSeenTimeStamp); /** @} */ /***************************************************************************************************/ /** @defgroup nvmlVgpu vGPU Management * @{ * * This chapter describes APIs supporting NVIDIA vGPU. */ /***************************************************************************************************/ /** * Retrieve the supported vGPU types on a physical GPU (device). * * An array of supported vGPU types for the physical GPU indicated by \a device is returned in the caller-supplied buffer * pointed at by \a vgpuTypeIds. The element count of nvmlVgpuTypeId_t array is passed in \a vgpuCount, and \a vgpuCount * is used to return the number of vGPU types written to the buffer. * * If the supplied buffer is not large enough to accomodate the vGPU type array, the function returns * NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlVgpuTypeId_t array required in \a vgpuCount. * To query the number of vGPU types supported for the GPU, call this function with *vgpuCount = 0. * The code will return NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if no vGPU types are supported. * * @param device The identifier of the target device * @param vgpuCount Pointer to caller-supplied array size, and returns number of vGPU types * @param vgpuTypeIds Pointer to caller-supplied array in which to return list of vGPU types * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_INSUFFICIENT_SIZE \a vgpuTypeIds buffer is too small, array element count is returned in \a vgpuCount * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuCount is NULL or \a device is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if vGPU is not supported by the device * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetSupportedVgpus(nvmlDevice_t device, unsigned int *vgpuCount, nvmlVgpuTypeId_t *vgpuTypeIds); /** * Retrieve the currently creatable vGPU types on a physical GPU (device). * * An array of creatable vGPU types for the physical GPU indicated by \a device is returned in the caller-supplied buffer * pointed at by \a vgpuTypeIds. The element count of nvmlVgpuTypeId_t array is passed in \a vgpuCount, and \a vgpuCount * is used to return the number of vGPU types written to the buffer. * * The creatable vGPU types for a device may differ over time, as there may be restrictions on what type of vGPU types * can concurrently run on a device. For example, if only one vGPU type is allowed at a time on a device, then the creatable * list will be restricted to whatever vGPU type is already running on the device. * * If the supplied buffer is not large enough to accomodate the vGPU type array, the function returns * NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlVgpuTypeId_t array required in \a vgpuCount. * To query the number of vGPU types createable for the GPU, call this function with *vgpuCount = 0. * The code will return NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if no vGPU types are creatable. * * @param device The identifier of the target device * @param vgpuCount Pointer to caller-supplied array size, and returns number of vGPU types * @param vgpuTypeIds Pointer to caller-supplied array in which to return list of vGPU types * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_INSUFFICIENT_SIZE \a vgpuTypeIds buffer is too small, array element count is returned in \a vgpuCount * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuCount is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if vGPU is not supported by the device * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetCreatableVgpus(nvmlDevice_t device, unsigned int *vgpuCount, nvmlVgpuTypeId_t *vgpuTypeIds); /** * Retrieve the class of a vGPU type. It will not exceed 64 characters in length (including the NUL terminator). * See \ref nvmlConstants::NVML_DEVICE_NAME_BUFFER_SIZE. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuTypeId Handle to vGPU type * @param vgpuTypeClass Pointer to string array to return class in * @param size Size of string * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid, or \a vgpuTypeClass is NULL * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a size is too small * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuTypeGetClass(nvmlVgpuTypeId_t vgpuTypeId, char *vgpuTypeClass, unsigned int *size); /** * Retrieve the vGPU type name. * * The name is an alphanumeric string that denotes a particular vGPU, e.g. GRID M60-2Q. It will not * exceed 64 characters in length (including the NUL terminator). See \ref * nvmlConstants::NVML_DEVICE_NAME_BUFFER_SIZE. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuTypeId Handle to vGPU type * @param vgpuTypeName Pointer to buffer to return name * @param size Size of buffer * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid, or \a name is NULL * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a size is too small * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuTypeGetName(nvmlVgpuTypeId_t vgpuTypeId, char *vgpuTypeName, unsigned int *size); /** * Retrieve the GPU Instance Profile ID for the given vGPU type ID. * The API will return a valid GPU Instance Profile ID for the MIG capable vGPU types, else INVALID_GPU_INSTANCE_PROFILE_ID is * returned. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuTypeId Handle to vGPU type * @param gpuInstanceProfileId GPU Instance Profile ID * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_NOT_SUPPORTED if \a device is not in vGPU Host virtualization mode * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid, or \a gpuInstanceProfileId is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuTypeGetGpuInstanceProfileId(nvmlVgpuTypeId_t vgpuTypeId, unsigned int *gpuInstanceProfileId); /** * Retrieve the device ID of a vGPU type. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuTypeId Handle to vGPU type * @param deviceID Device ID and vendor ID of the device contained in single 32 bit value * @param subsystemID Subsytem ID and subsytem vendor ID of the device contained in single 32 bit value * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid, or \a deviceId or \a subsystemID are NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuTypeGetDeviceID(nvmlVgpuTypeId_t vgpuTypeId, unsigned long long *deviceID, unsigned long long *subsystemID); /** * Retrieve the vGPU framebuffer size in bytes. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuTypeId Handle to vGPU type * @param fbSize Pointer to framebuffer size in bytes * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid, or \a fbSize is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuTypeGetFramebufferSize(nvmlVgpuTypeId_t vgpuTypeId, unsigned long long *fbSize); /** * Retrieve count of vGPU's supported display heads. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuTypeId Handle to vGPU type * @param numDisplayHeads Pointer to number of display heads * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid, or \a numDisplayHeads is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuTypeGetNumDisplayHeads(nvmlVgpuTypeId_t vgpuTypeId, unsigned int *numDisplayHeads); /** * Retrieve vGPU display head's maximum supported resolution. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuTypeId Handle to vGPU type * @param displayIndex Zero-based index of display head * @param xdim Pointer to maximum number of pixels in X dimension * @param ydim Pointer to maximum number of pixels in Y dimension * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid, or \a xdim or \a ydim are NULL, or \a displayIndex * is out of range. * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuTypeGetResolution(nvmlVgpuTypeId_t vgpuTypeId, unsigned int displayIndex, unsigned int *xdim, unsigned int *ydim); /** * Retrieve license requirements for a vGPU type * * The license type and version required to run the specified vGPU type is returned as an alphanumeric string, in the form * ",", for example "GRID-Virtual-PC,2.0". If a vGPU is runnable with* more than one type of license, * the licenses are delimited by a semicolon, for example "GRID-Virtual-PC,2.0;GRID-Virtual-WS,2.0;GRID-Virtual-WS-Ext,2.0". * * The total length of the returned string will not exceed 128 characters, including the NUL terminator. * See \ref nvmlVgpuConstants::NVML_GRID_LICENSE_BUFFER_SIZE. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuTypeId Handle to vGPU type * @param vgpuTypeLicenseString Pointer to buffer to return license info * @param size Size of \a vgpuTypeLicenseString buffer * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid, or \a vgpuTypeLicenseString is NULL * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a size is too small * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuTypeGetLicense(nvmlVgpuTypeId_t vgpuTypeId, char *vgpuTypeLicenseString, unsigned int size); /** * Retrieve the static frame rate limit value of the vGPU type * * For Kepler &tm; or newer fully supported devices. * * @param vgpuTypeId Handle to vGPU type * @param frameRateLimit Reference to return the frame rate limit value * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_NOT_SUPPORTED if frame rate limiter is turned off for the vGPU type * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid, or \a frameRateLimit is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuTypeGetFrameRateLimit(nvmlVgpuTypeId_t vgpuTypeId, unsigned int *frameRateLimit); /** * Retrieve the maximum number of vGPU instances creatable on a device for given vGPU type * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param vgpuTypeId Handle to vGPU type * @param vgpuInstanceCount Pointer to get the max number of vGPU instances * that can be created on a deicve for given vgpuTypeId * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid or is not supported on target device, * or \a vgpuInstanceCount is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuTypeGetMaxInstances(nvmlDevice_t device, nvmlVgpuTypeId_t vgpuTypeId, unsigned int *vgpuInstanceCount); /** * Retrieve the maximum number of vGPU instances supported per VM for given vGPU type * * For Kepler &tm; or newer fully supported devices. * * @param vgpuTypeId Handle to vGPU type * @param vgpuInstanceCountPerVm Pointer to get the max number of vGPU instances supported per VM for given \a vgpuTypeId * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid, or \a vgpuInstanceCountPerVm is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuTypeGetMaxInstancesPerVm(nvmlVgpuTypeId_t vgpuTypeId, unsigned int *vgpuInstanceCountPerVm); /** * Retrieve the active vGPU instances on a device. * * An array of active vGPU instances is returned in the caller-supplied buffer pointed at by \a vgpuInstances. The * array elememt count is passed in \a vgpuCount, and \a vgpuCount is used to return the number of vGPU instances * written to the buffer. * * If the supplied buffer is not large enough to accomodate the vGPU instance array, the function returns * NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlVgpuInstance_t array required in \a vgpuCount. * To query the number of active vGPU instances, call this function with *vgpuCount = 0. The code will return * NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if no vGPU Types are supported. * * For Kepler &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param vgpuCount Pointer which passes in the array size as well as get * back the number of types * @param vgpuInstances Pointer to array in which to return list of vGPU instances * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a vgpuCount is NULL * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a size is too small * - \ref NVML_ERROR_NOT_SUPPORTED if vGPU is not supported by the device * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetActiveVgpus(nvmlDevice_t device, unsigned int *vgpuCount, nvmlVgpuInstance_t *vgpuInstances); /** * Retrieve the VM ID associated with a vGPU instance. * * The VM ID is returned as a string, not exceeding 80 characters in length (including the NUL terminator). * See \ref nvmlConstants::NVML_DEVICE_UUID_BUFFER_SIZE. * * The format of the VM ID varies by platform, and is indicated by the type identifier returned in \a vmIdType. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param vmId Pointer to caller-supplied buffer to hold VM ID * @param size Size of buffer in bytes * @param vmIdType Pointer to hold VM ID type * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vmId or \a vmIdType is NULL, or \a vgpuInstance is 0 * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a size is too small * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetVmID(nvmlVgpuInstance_t vgpuInstance, char *vmId, unsigned int size, nvmlVgpuVmIdType_t *vmIdType); /** * Retrieve the UUID of a vGPU instance. * * The UUID is a globally unique identifier associated with the vGPU, and is returned as a 5-part hexadecimal string, * not exceeding 80 characters in length (including the NULL terminator). * See \ref nvmlConstants::NVML_DEVICE_UUID_BUFFER_SIZE. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param uuid Pointer to caller-supplied buffer to hold vGPU UUID * @param size Size of buffer in bytes * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a uuid is NULL * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a size is too small * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetUUID(nvmlVgpuInstance_t vgpuInstance, char *uuid, unsigned int size); /** * Retrieve the NVIDIA driver version installed in the VM associated with a vGPU. * * The version is returned as an alphanumeric string in the caller-supplied buffer \a version. The length of the version * string will not exceed 80 characters in length (including the NUL terminator). * See \ref nvmlConstants::NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE. * * nvmlVgpuInstanceGetVmDriverVersion() may be called at any time for a vGPU instance. The guest VM driver version is * returned as "Not Available" if no NVIDIA driver is installed in the VM, or the VM has not yet booted to the point where the * NVIDIA driver is loaded and initialized. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param version Caller-supplied buffer to return driver version string * @param length Size of \a version buffer * * @return * - \ref NVML_SUCCESS if \a version has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0 * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetVmDriverVersion(nvmlVgpuInstance_t vgpuInstance, char* version, unsigned int length); /** * Retrieve the framebuffer usage in bytes. * * Framebuffer usage is the amont of vGPU framebuffer memory that is currently in use by the VM. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuInstance The identifier of the target instance * @param fbUsage Pointer to framebuffer usage in bytes * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a fbUsage is NULL * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetFbUsage(nvmlVgpuInstance_t vgpuInstance, unsigned long long *fbUsage); /** * @deprecated Use \ref nvmlVgpuInstanceGetLicenseInfo_v2. * * Retrieve the current licensing state of the vGPU instance. * * If the vGPU is currently licensed, \a licensed is set to 1, otherwise it is set to 0. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param licensed Reference to return the licensing status * * @return * - \ref NVML_SUCCESS if \a licensed has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a licensed is NULL * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetLicenseStatus(nvmlVgpuInstance_t vgpuInstance, unsigned int *licensed); /** * Retrieve the vGPU type of a vGPU instance. * * Returns the vGPU type ID of vgpu assigned to the vGPU instance. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param vgpuTypeId Reference to return the vgpuTypeId * * @return * - \ref NVML_SUCCESS if \a vgpuTypeId has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a vgpuTypeId is NULL * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetType(nvmlVgpuInstance_t vgpuInstance, nvmlVgpuTypeId_t *vgpuTypeId); /** * Retrieve the frame rate limit set for the vGPU instance. * * Returns the value of the frame rate limit set for the vGPU instance * * For Kepler &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param frameRateLimit Reference to return the frame rate limit * * @return * - \ref NVML_SUCCESS if \a frameRateLimit has been set * - \ref NVML_ERROR_NOT_SUPPORTED if frame rate limiter is turned off for the vGPU type * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a frameRateLimit is NULL * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetFrameRateLimit(nvmlVgpuInstance_t vgpuInstance, unsigned int *frameRateLimit); /** * Retrieve the current ECC mode of vGPU instance. * * @param vgpuInstance The identifier of the target vGPU instance * @param eccMode Reference in which to return the current ECC mode * * @return * - \ref NVML_SUCCESS if the vgpuInstance's ECC mode has been successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a mode is NULL * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_NOT_SUPPORTED if the vGPU doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetEccMode(nvmlVgpuInstance_t vgpuInstance, nvmlEnableState_t *eccMode); /** * Retrieve the encoder capacity of a vGPU instance, as a percentage of maximum encoder capacity with valid values in the range 0-100. * * For Maxwell &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param encoderCapacity Reference to an unsigned int for the encoder capacity * * @return * - \ref NVML_SUCCESS if \a encoderCapacity has been retrived * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a encoderQueryType is invalid * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetEncoderCapacity(nvmlVgpuInstance_t vgpuInstance, unsigned int *encoderCapacity); /** * Set the encoder capacity of a vGPU instance, as a percentage of maximum encoder capacity with valid values in the range 0-100. * * For Maxwell &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param encoderCapacity Unsigned int for the encoder capacity value * * @return * - \ref NVML_SUCCESS if \a encoderCapacity has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a encoderCapacity is out of range of 0-100. * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceSetEncoderCapacity(nvmlVgpuInstance_t vgpuInstance, unsigned int encoderCapacity); /** * Retrieves the current encoder statistics of a vGPU Instance * * For Maxwell &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param sessionCount Reference to an unsigned int for count of active encoder sessions * @param averageFps Reference to an unsigned int for trailing average FPS of all active sessions * @param averageLatency Reference to an unsigned int for encode latency in microseconds * * @return * - \ref NVML_SUCCESS if \a sessionCount, \a averageFps and \a averageLatency is fetched * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a sessionCount , or \a averageFps or \a averageLatency is NULL * or \a vgpuInstance is 0. * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetEncoderStats(nvmlVgpuInstance_t vgpuInstance, unsigned int *sessionCount, unsigned int *averageFps, unsigned int *averageLatency); /** * Retrieves information about all active encoder sessions on a vGPU Instance. * * An array of active encoder sessions is returned in the caller-supplied buffer pointed at by \a sessionInfo. The * array element count is passed in \a sessionCount, and \a sessionCount is used to return the number of sessions * written to the buffer. * * If the supplied buffer is not large enough to accomodate the active session array, the function returns * NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlEncoderSessionInfo_t array required in \a sessionCount. * To query the number of active encoder sessions, call this function with *sessionCount = 0. The code will return * NVML_SUCCESS with number of active encoder sessions updated in *sessionCount. * * For Maxwell &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param sessionCount Reference to caller supplied array size, and returns * the number of sessions. * @param sessionInfo Reference to caller supplied array in which the list * of session information us returned. * * @return * - \ref NVML_SUCCESS if \a sessionInfo is fetched * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a sessionCount is too small, array element count is returned in \a sessionCount * - \ref NVML_ERROR_INVALID_ARGUMENT if \a sessionCount is NULL, or \a vgpuInstance is 0. * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetEncoderSessions(nvmlVgpuInstance_t vgpuInstance, unsigned int *sessionCount, nvmlEncoderSessionInfo_t *sessionInfo); /** * Retrieves the active frame buffer capture sessions statistics of a vGPU Instance * * For Maxwell &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param fbcStats Reference to nvmlFBCStats_t structure contianing NvFBC stats * * @return * - \ref NVML_SUCCESS if \a fbcStats is fetched * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a fbcStats is NULL * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetFBCStats(nvmlVgpuInstance_t vgpuInstance, nvmlFBCStats_t *fbcStats); /** * Retrieves information about active frame buffer capture sessions on a vGPU Instance. * * An array of active FBC sessions is returned in the caller-supplied buffer pointed at by \a sessionInfo. The * array element count is passed in \a sessionCount, and \a sessionCount is used to return the number of sessions * written to the buffer. * * If the supplied buffer is not large enough to accomodate the active session array, the function returns * NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlFBCSessionInfo_t array required in \a sessionCount. * To query the number of active FBC sessions, call this function with *sessionCount = 0. The code will return * NVML_SUCCESS with number of active FBC sessions updated in *sessionCount. * * For Maxwell &tm; or newer fully supported devices. * * @note hResolution, vResolution, averageFPS and averageLatency data for a FBC session returned in \a sessionInfo may * be zero if there are no new frames captured since the session started. * * @param vgpuInstance Identifier of the target vGPU instance * @param sessionCount Reference to caller supplied array size, and returns the number of sessions. * @param sessionInfo Reference in which to return the session information * * @return * - \ref NVML_SUCCESS if \a sessionInfo is fetched * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a sessionCount is NULL. * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a sessionCount is too small, array element count is returned in \a sessionCount * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetFBCSessions(nvmlVgpuInstance_t vgpuInstance, unsigned int *sessionCount, nvmlFBCSessionInfo_t *sessionInfo); /** * Retrieve the GPU Instance ID for the given vGPU Instance. * The API will return a valid GPU Instance ID for MIG backed vGPU Instance, else INVALID_GPU_INSTANCE_ID is returned. * * For Kepler &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param gpuInstanceId GPU Instance ID * * @return * - \ref NVML_SUCCESS successful completion * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a gpuInstanceId is NULL. * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetGpuInstanceId(nvmlVgpuInstance_t vgpuInstance, unsigned int *gpuInstanceId); /** * Retrieves the PCI Id of the given vGPU Instance i.e. the PCI Id of the GPU as seen inside the VM. * * The vGPU PCI id is returned as "00000000:00:00.0" if NVIDIA driver is not installed on the vGPU instance. * * @param vgpuInstance Identifier of the target vGPU instance * @param vgpuPciId Caller-supplied buffer to return vGPU PCI Id string * @param length Size of the vgpuPciId buffer * * @return * - \ref NVML_SUCCESS if vGPU PCI Id is sucessfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a vgpuPciId is NULL * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_DRIVER_NOT_LOADED if NVIDIA driver is not running on the vGPU instance * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small, \a length is set to required length * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetGpuPciId(nvmlVgpuInstance_t vgpuInstance, char *vgpuPciId, unsigned int *length); /** @} */ /***************************************************************************************************/ /** @defgroup nvml vGPU Migration * This chapter describes operations that are associated with vGPU Migration. * @{ */ /***************************************************************************************************/ /** * Structure representing range of vGPU versions. */ typedef struct nvmlVgpuVersion_st { unsigned int minVersion; //!< Minimum vGPU version. unsigned int maxVersion; //!< Maximum vGPU version. } nvmlVgpuVersion_t; /** * vGPU metadata structure. */ typedef struct nvmlVgpuMetadata_st { unsigned int version; //!< Current version of the structure unsigned int revision; //!< Current revision of the structure nvmlVgpuGuestInfoState_t guestInfoState; //!< Current state of Guest-dependent fields char guestDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE]; //!< Version of driver installed in guest char hostDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE]; //!< Version of driver installed in host unsigned int reserved[6]; //!< Reserved for internal use unsigned int vgpuVirtualizationCaps; //!< vGPU virtualizaion capabilities bitfileld unsigned int guestVgpuVersion; //!< vGPU version of guest driver unsigned int opaqueDataSize; //!< Size of opaque data field in bytes char opaqueData[4]; //!< Opaque data } nvmlVgpuMetadata_t; /** * Physical GPU metadata structure */ typedef struct nvmlVgpuPgpuMetadata_st { unsigned int version; //!< Current version of the structure unsigned int revision; //!< Current revision of the structure char hostDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE]; //!< Host driver version unsigned int pgpuVirtualizationCaps; //!< Pgpu virtualizaion capabilities bitfileld unsigned int reserved[5]; //!< Reserved for internal use nvmlVgpuVersion_t hostSupportedVgpuRange; //!< vGPU version range supported by host driver unsigned int opaqueDataSize; //!< Size of opaque data field in bytes char opaqueData[4]; //!< Opaque data } nvmlVgpuPgpuMetadata_t; /** * vGPU VM compatibility codes */ typedef enum nvmlVgpuVmCompatibility_enum { NVML_VGPU_VM_COMPATIBILITY_NONE = 0x0, //!< vGPU is not runnable NVML_VGPU_VM_COMPATIBILITY_COLD = 0x1, //!< vGPU is runnable from a cold / powered-off state (ACPI S5) NVML_VGPU_VM_COMPATIBILITY_HIBERNATE = 0x2, //!< vGPU is runnable from a hibernated state (ACPI S4) NVML_VGPU_VM_COMPATIBILITY_SLEEP = 0x4, //!< vGPU is runnable from a sleeped state (ACPI S3) NVML_VGPU_VM_COMPATIBILITY_LIVE = 0x8 //!< vGPU is runnable from a live/paused (ACPI S0) } nvmlVgpuVmCompatibility_t; /** * vGPU-pGPU compatibility limit codes */ typedef enum nvmlVgpuPgpuCompatibilityLimitCode_enum { NVML_VGPU_COMPATIBILITY_LIMIT_NONE = 0x0, //!< Compatibility is not limited. NVML_VGPU_COMPATIBILITY_LIMIT_HOST_DRIVER = 0x1, //!< ompatibility is limited by host driver version. NVML_VGPU_COMPATIBILITY_LIMIT_GUEST_DRIVER = 0x2, //!< Compatibility is limited by guest driver version. NVML_VGPU_COMPATIBILITY_LIMIT_GPU = 0x4, //!< Compatibility is limited by GPU hardware. NVML_VGPU_COMPATIBILITY_LIMIT_OTHER = 0x80000000 //!< Compatibility is limited by an undefined factor. } nvmlVgpuPgpuCompatibilityLimitCode_t; /** * vGPU-pGPU compatibility structure */ typedef struct nvmlVgpuPgpuCompatibility_st { nvmlVgpuVmCompatibility_t vgpuVmCompatibility; //!< Compatibility of vGPU VM. See \ref nvmlVgpuVmCompatibility_t nvmlVgpuPgpuCompatibilityLimitCode_t compatibilityLimitCode; //!< Limiting factor for vGPU-pGPU compatibility. See \ref nvmlVgpuPgpuCompatibilityLimitCode_t } nvmlVgpuPgpuCompatibility_t; /** * Returns vGPU metadata structure for a running vGPU. The structure contains information about the vGPU and its associated VM * such as the currently installed NVIDIA guest driver version, together with host driver version and an opaque data section * containing internal state. * * nvmlVgpuInstanceGetMetadata() may be called at any time for a vGPU instance. Some fields in the returned structure are * dependent on information obtained from the guest VM, which may not yet have reached a state where that information * is available. The current state of these dependent fields is reflected in the info structure's \ref nvmlVgpuGuestInfoState_t field. * * The VMM may choose to read and save the vGPU's VM info as persistent metadata associated with the VM, and provide * it to Virtual GPU Manager when creating a vGPU for subsequent instances of the VM. * * The caller passes in a buffer via \a vgpuMetadata, with the size of the buffer in \a bufferSize. If the vGPU Metadata structure * is too large to fit in the supplied buffer, the function returns NVML_ERROR_INSUFFICIENT_SIZE with the size needed * in \a bufferSize. * * @param vgpuInstance vGPU instance handle * @param vgpuMetadata Pointer to caller-supplied buffer into which vGPU metadata is written * @param bufferSize Size of vgpuMetadata buffer * * @return * - \ref NVML_SUCCESS vGPU metadata structure was successfully returned * - \ref NVML_ERROR_INSUFFICIENT_SIZE vgpuMetadata buffer is too small, required size is returned in \a bufferSize * - \ref NVML_ERROR_INVALID_ARGUMENT if \a bufferSize is NULL or \a vgpuInstance is 0; if \a vgpuMetadata is NULL and the value of \a bufferSize is not 0. * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetMetadata(nvmlVgpuInstance_t vgpuInstance, nvmlVgpuMetadata_t *vgpuMetadata, unsigned int *bufferSize); /** * Returns a vGPU metadata structure for the physical GPU indicated by \a device. The structure contains information about * the GPU and the currently installed NVIDIA host driver version that's controlling it, together with an opaque data section * containing internal state. * * The caller passes in a buffer via \a pgpuMetadata, with the size of the buffer in \a bufferSize. If the \a pgpuMetadata * structure is too large to fit in the supplied buffer, the function returns NVML_ERROR_INSUFFICIENT_SIZE with the size needed * in \a bufferSize. * * @param device The identifier of the target device * @param pgpuMetadata Pointer to caller-supplied buffer into which \a pgpuMetadata is written * @param bufferSize Pointer to size of \a pgpuMetadata buffer * * @return * - \ref NVML_SUCCESS GPU metadata structure was successfully returned * - \ref NVML_ERROR_INSUFFICIENT_SIZE pgpuMetadata buffer is too small, required size is returned in \a bufferSize * - \ref NVML_ERROR_INVALID_ARGUMENT if \a bufferSize is NULL or \a device is invalid; if \a pgpuMetadata is NULL and the value of \a bufferSize is not 0. * - \ref NVML_ERROR_NOT_SUPPORTED vGPU is not supported by the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuMetadata(nvmlDevice_t device, nvmlVgpuPgpuMetadata_t *pgpuMetadata, unsigned int *bufferSize); /** * Takes a vGPU instance metadata structure read from \ref nvmlVgpuInstanceGetMetadata(), and a vGPU metadata structure for a * physical GPU read from \ref nvmlDeviceGetVgpuMetadata(), and returns compatibility information of the vGPU instance and the * physical GPU. * * The caller passes in a buffer via \a compatibilityInfo, into which a compatibility information structure is written. The * structure defines the states in which the vGPU / VM may be booted on the physical GPU. If the vGPU / VM compatibility * with the physical GPU is limited, a limit code indicates the factor limiting compability. * (see \ref nvmlVgpuPgpuCompatibilityLimitCode_t for details). * * Note: vGPU compatibility does not take into account dynamic capacity conditions that may limit a system's ability to * boot a given vGPU or associated VM. * * @param vgpuMetadata Pointer to caller-supplied vGPU metadata structure * @param pgpuMetadata Pointer to caller-supplied GPU metadata structure * @param compatibilityInfo Pointer to caller-supplied buffer to hold compatibility info * * @return * - \ref NVML_SUCCESS vGPU metadata structure was successfully returned * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuMetadata or \a pgpuMetadata or \a bufferSize are NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlGetVgpuCompatibility(nvmlVgpuMetadata_t *vgpuMetadata, nvmlVgpuPgpuMetadata_t *pgpuMetadata, nvmlVgpuPgpuCompatibility_t *compatibilityInfo); /** * Returns the properties of the physical GPU indicated by the device in an ascii-encoded string format. * * The caller passes in a buffer via \a pgpuMetadata, with the size of the buffer in \a bufferSize. If the * string is too large to fit in the supplied buffer, the function returns NVML_ERROR_INSUFFICIENT_SIZE with the size needed * in \a bufferSize. * * @param device The identifier of the target device * @param pgpuMetadata Pointer to caller-supplied buffer into which \a pgpuMetadata is written * @param bufferSize Pointer to size of \a pgpuMetadata buffer * * @return * - \ref NVML_SUCCESS GPU metadata structure was successfully returned * - \ref NVML_ERROR_INSUFFICIENT_SIZE \a pgpuMetadata buffer is too small, required size is returned in \a bufferSize * - \ref NVML_ERROR_INVALID_ARGUMENT if \a bufferSize is NULL or \a device is invalid; if \a pgpuMetadata is NULL and the value of \a bufferSize is not 0. * - \ref NVML_ERROR_NOT_SUPPORTED if vGPU is not supported by the system * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetPgpuMetadataString(nvmlDevice_t device, char *pgpuMetadata, unsigned int *bufferSize); /* * Virtual GPU (vGPU) version * * The NVIDIA vGPU Manager and the guest drivers are tagged with a range of supported vGPU versions. This determines the range of NVIDIA guest driver versions that * are compatible for vGPU feature support with a given NVIDIA vGPU Manager. For vGPU feature support, the range of supported versions for the NVIDIA vGPU Manager * and the guest driver must overlap. Otherwise, the guest driver fails to load in the VM. * * When the NVIDIA guest driver loads, either when the VM is booted or when the driver is installed or upgraded, a negotiation occurs between the guest driver * and the NVIDIA vGPU Manager to select the highest mutually compatible vGPU version. The negotiated vGPU version stays the same across VM migration. */ /** * Query the ranges of supported vGPU versions. * * This function gets the linear range of supported vGPU versions that is preset for the NVIDIA vGPU Manager and the range set by an administrator. * If the preset range has not been overridden by \ref nvmlSetVgpuVersion, both ranges are the same. * * The caller passes pointers to the following \ref nvmlVgpuVersion_t structures, into which the NVIDIA vGPU Manager writes the ranges: * 1. \a supported structure that represents the preset range of vGPU versions supported by the NVIDIA vGPU Manager. * 2. \a current structure that represents the range of supported vGPU versions set by an administrator. By default, this range is the same as the preset range. * * @param supported Pointer to the structure in which the preset range of vGPU versions supported by the NVIDIA vGPU Manager is written * @param current Pointer to the structure in which the range of supported vGPU versions set by an administrator is written * * @return * - \ref NVML_SUCCESS The vGPU version range structures were successfully obtained. * - \ref NVML_ERROR_NOT_SUPPORTED The API is not supported. * - \ref NVML_ERROR_INVALID_ARGUMENT The \a supported parameter or the \a current parameter is NULL. * - \ref NVML_ERROR_UNKNOWN An error occurred while the data was being fetched. */ nvmlReturn_t DECLDIR nvmlGetVgpuVersion(nvmlVgpuVersion_t *supported, nvmlVgpuVersion_t *current); /** * Override the preset range of vGPU versions supported by the NVIDIA vGPU Manager with a range set by an administrator. * * This function configures the NVIDIA vGPU Manager with a range of supported vGPU versions set by an administrator. This range must be a subset of the * preset range that the NVIDIA vGPU Manager supports. The custom range set by an administrator takes precedence over the preset range and is advertised to * the guest VM for negotiating the vGPU version. See \ref nvmlGetVgpuVersion for details of how to query the preset range of versions supported. * * This function takes a pointer to vGPU version range structure \ref nvmlVgpuVersion_t as input to override the preset vGPU version range that the NVIDIA vGPU Manager supports. * * After host system reboot or driver reload, the range of supported versions reverts to the range that is preset for the NVIDIA vGPU Manager. * * @note 1. The range set by the administrator must be a subset of the preset range that the NVIDIA vGPU Manager supports. Otherwise, an error is returned. * 2. If the range of supported guest driver versions does not overlap the range set by the administrator, the guest driver fails to load. * 3. If the range of supported guest driver versions overlaps the range set by the administrator, the guest driver will load with a negotiated * vGPU version that is the maximum value in the overlapping range. * 4. No VMs must be running on the host when this function is called. If a VM is running on the host, the call to this function fails. * * @param vgpuVersion Pointer to a caller-supplied range of supported vGPU versions. * * @return * - \ref NVML_SUCCESS The preset range of supported vGPU versions was successfully overridden. * - \ref NVML_ERROR_NOT_SUPPORTED The API is not supported. * - \ref NVML_ERROR_IN_USE The range was not overridden because a VM is running on the host. * - \ref NVML_ERROR_INVALID_ARGUMENT The \a vgpuVersion parameter specifies a range that is outside the range supported by the NVIDIA vGPU Manager or if \a vgpuVersion is NULL. */ nvmlReturn_t DECLDIR nvmlSetVgpuVersion(nvmlVgpuVersion_t *vgpuVersion); /** @} */ /***************************************************************************************************/ /** @defgroup nvmlUtil vGPU Utilization and Accounting * This chapter describes operations that are associated with vGPU Utilization and Accounting. * @{ */ /***************************************************************************************************/ /** * Retrieves current utilization for vGPUs on a physical GPU (device). * * For Kepler &tm; or newer fully supported devices. * * Reads recent utilization of GPU SM (3D/Compute), framebuffer, video encoder, and video decoder for vGPU instances running * on a device. Utilization values are returned as an array of utilization sample structures in the caller-supplied buffer * pointed at by \a utilizationSamples. One utilization sample structure is returned per vGPU instance, and includes the * CPU timestamp at which the samples were recorded. Individual utilization values are returned as "unsigned int" values * in nvmlValue_t unions. The function sets the caller-supplied \a sampleValType to NVML_VALUE_TYPE_UNSIGNED_INT to * indicate the returned value type. * * To read utilization values, first determine the size of buffer required to hold the samples by invoking the function with * \a utilizationSamples set to NULL. The function will return NVML_ERROR_INSUFFICIENT_SIZE, with the current vGPU instance * count in \a vgpuInstanceSamplesCount, or NVML_SUCCESS if the current vGPU instance count is zero. The caller should allocate * a buffer of size vgpuInstanceSamplesCount * sizeof(nvmlVgpuInstanceUtilizationSample_t). Invoke the function again with * the allocated buffer passed in \a utilizationSamples, and \a vgpuInstanceSamplesCount set to the number of entries the * buffer is sized for. * * On successful return, the function updates \a vgpuInstanceSampleCount with the number of vGPU utilization sample * structures that were actually written. This may differ from a previously read value as vGPU instances are created or * destroyed. * * lastSeenTimeStamp represents the CPU timestamp in microseconds at which utilization samples were last read. Set it to 0 * to read utilization based on all the samples maintained by the driver's internal sample buffer. Set lastSeenTimeStamp * to a timeStamp retrieved from a previous query to read utilization since the previous query. * * @param device The identifier for the target device * @param lastSeenTimeStamp Return only samples with timestamp greater than lastSeenTimeStamp. * @param sampleValType Pointer to caller-supplied buffer to hold the type of returned sample values * @param vgpuInstanceSamplesCount Pointer to caller-supplied array size, and returns number of vGPU instances * @param utilizationSamples Pointer to caller-supplied buffer in which vGPU utilization samples are returned * @return * - \ref NVML_SUCCESS if utilization samples are successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a vgpuInstanceSamplesCount or \a sampleValType is * NULL, or a sample count of 0 is passed with a non-NULL \a utilizationSamples * - \ref NVML_ERROR_INSUFFICIENT_SIZE if supplied \a vgpuInstanceSamplesCount is too small to return samples for all * vGPU instances currently executing on the device * - \ref NVML_ERROR_NOT_SUPPORTED if vGPU is not supported by the device * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_NOT_FOUND if sample entries are not found * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuUtilization(nvmlDevice_t device, unsigned long long lastSeenTimeStamp, nvmlValueType_t *sampleValType, unsigned int *vgpuInstanceSamplesCount, nvmlVgpuInstanceUtilizationSample_t *utilizationSamples); /** * Retrieves current utilization for processes running on vGPUs on a physical GPU (device). * * For Maxwell &tm; or newer fully supported devices. * * Reads recent utilization of GPU SM (3D/Compute), framebuffer, video encoder, and video decoder for processes running on * vGPU instances active on a device. Utilization values are returned as an array of utilization sample structures in the * caller-supplied buffer pointed at by \a utilizationSamples. One utilization sample structure is returned per process running * on vGPU instances, that had some non-zero utilization during the last sample period. It includes the CPU timestamp at which * the samples were recorded. Individual utilization values are returned as "unsigned int" values. * * To read utilization values, first determine the size of buffer required to hold the samples by invoking the function with * \a utilizationSamples set to NULL. The function will return NVML_ERROR_INSUFFICIENT_SIZE, with the current vGPU instance * count in \a vgpuProcessSamplesCount. The caller should allocate a buffer of size * vgpuProcessSamplesCount * sizeof(nvmlVgpuProcessUtilizationSample_t). Invoke the function again with * the allocated buffer passed in \a utilizationSamples, and \a vgpuProcessSamplesCount set to the number of entries the * buffer is sized for. * * On successful return, the function updates \a vgpuSubProcessSampleCount with the number of vGPU sub process utilization sample * structures that were actually written. This may differ from a previously read value depending on the number of processes that are active * in any given sample period. * * lastSeenTimeStamp represents the CPU timestamp in microseconds at which utilization samples were last read. Set it to 0 * to read utilization based on all the samples maintained by the driver's internal sample buffer. Set lastSeenTimeStamp * to a timeStamp retrieved from a previous query to read utilization since the previous query. * * @param device The identifier for the target device * @param lastSeenTimeStamp Return only samples with timestamp greater than lastSeenTimeStamp. * @param vgpuProcessSamplesCount Pointer to caller-supplied array size, and returns number of processes running on vGPU instances * @param utilizationSamples Pointer to caller-supplied buffer in which vGPU sub process utilization samples are returned * @return * - \ref NVML_SUCCESS if utilization samples are successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a vgpuProcessSamplesCount or a sample count of 0 is * passed with a non-NULL \a utilizationSamples * - \ref NVML_ERROR_INSUFFICIENT_SIZE if supplied \a vgpuProcessSamplesCount is too small to return samples for all * vGPU instances currently executing on the device * - \ref NVML_ERROR_NOT_SUPPORTED if vGPU is not supported by the device * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_NOT_FOUND if sample entries are not found * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuProcessUtilization(nvmlDevice_t device, unsigned long long lastSeenTimeStamp, unsigned int *vgpuProcessSamplesCount, nvmlVgpuProcessUtilizationSample_t *utilizationSamples); /** * Queries the state of per process accounting mode on vGPU. * * For Maxwell &tm; or newer fully supported devices. * * @param vgpuInstance The identifier of the target vGPU instance * @param mode Reference in which to return the current accounting mode * * @return * - \ref NVML_SUCCESS if the mode has been successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a mode is NULL * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_NOT_SUPPORTED if the vGPU doesn't support this feature * - \ref NVML_ERROR_DRIVER_NOT_LOADED if NVIDIA driver is not running on the vGPU instance * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetAccountingMode(nvmlVgpuInstance_t vgpuInstance, nvmlEnableState_t *mode); /** * Queries list of processes running on vGPU that can be queried for accounting stats. The list of processes * returned can be in running or terminated state. * * For Maxwell &tm; or newer fully supported devices. * * To just query the maximum number of processes that can be queried, call this function with *count = 0 and * pids=NULL. The return code will be NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if list is empty. * * For more details see \ref nvmlVgpuInstanceGetAccountingStats. * * @note In case of PID collision some processes might not be accessible before the circular buffer is full. * * @param vgpuInstance The identifier of the target vGPU instance * @param count Reference in which to provide the \a pids array size, and * to return the number of elements ready to be queried * @param pids Reference in which to return list of process ids * * @return * - \ref NVML_SUCCESS if pids were successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a count is NULL * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_NOT_SUPPORTED if the vGPU doesn't support this feature or accounting mode is disabled * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a count is too small (\a count is set to expected value) * - \ref NVML_ERROR_UNKNOWN on any unexpected error * * @see nvmlVgpuInstanceGetAccountingPids */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetAccountingPids(nvmlVgpuInstance_t vgpuInstance, unsigned int *count, unsigned int *pids); /** * Queries process's accounting stats. * * For Maxwell &tm; or newer fully supported devices. * * Accounting stats capture GPU utilization and other statistics across the lifetime of a process, and * can be queried during life time of the process or after its termination. * The time field in \ref nvmlAccountingStats_t is reported as 0 during the lifetime of the process and * updated to actual running time after its termination. * Accounting stats are kept in a circular buffer, newly created processes overwrite information about old * processes. * * See \ref nvmlAccountingStats_t for description of each returned metric. * List of processes that can be queried can be retrieved from \ref nvmlVgpuInstanceGetAccountingPids. * * @note Accounting Mode needs to be on. See \ref nvmlVgpuInstanceGetAccountingMode. * @note Only compute and graphics applications stats can be queried. Monitoring applications stats can't be * queried since they don't contribute to GPU utilization. * @note In case of pid collision stats of only the latest process (that terminated last) will be reported * * @param vgpuInstance The identifier of the target vGPU instance * @param pid Process Id of the target process to query stats for * @param stats Reference in which to return the process's accounting stats * * @return * - \ref NVML_SUCCESS if stats have been successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a stats is NULL * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * or \a stats is not found * - \ref NVML_ERROR_NOT_SUPPORTED if the vGPU doesn't support this feature or accounting mode is disabled * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetAccountingStats(nvmlVgpuInstance_t vgpuInstance, unsigned int pid, nvmlAccountingStats_t *stats); /** * Clears accounting information of the vGPU instance that have already terminated. * * For Maxwell &tm; or newer fully supported devices. * Requires root/admin permissions. * * @note Accounting Mode needs to be on. See \ref nvmlVgpuInstanceGetAccountingMode. * @note Only compute and graphics applications stats are reported and can be cleared since monitoring applications * stats don't contribute to GPU utilization. * * @param vgpuInstance The identifier of the target vGPU instance * * @return * - \ref NVML_SUCCESS if accounting information has been cleared * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is invalid * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_NOT_SUPPORTED if the vGPU doesn't support this feature or accounting mode is disabled * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceClearAccountingPids(nvmlVgpuInstance_t vgpuInstance); /** * Query the license information of the vGPU instance. * * For Maxwell &tm; or newer fully supported devices. * * @param vgpuInstance Identifier of the target vGPU instance * @param licenseInfo Pointer to vGPU license information structure * * @return * - \ref NVML_SUCCESS if information is successfully retrieved * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a licenseInfo is NULL * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system * - \ref NVML_ERROR_DRIVER_NOT_LOADED if NVIDIA driver is not running on the vGPU instance * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetLicenseInfo_v2(nvmlVgpuInstance_t vgpuInstance, nvmlVgpuLicenseInfo_t *licenseInfo); /** @} */ /***************************************************************************************************/ /** @defgroup nvmlExcludedGpuQueries Excluded GPU Queries * This chapter describes NVML operations that are associated with excluded GPUs. * @{ */ /***************************************************************************************************/ /** * Excluded GPU device information **/ typedef struct nvmlExcludedDeviceInfo_st { nvmlPciInfo_t pciInfo; //!< The PCI information for the excluded GPU char uuid[NVML_DEVICE_UUID_BUFFER_SIZE]; //!< The ASCII string UUID for the excluded GPU } nvmlExcludedDeviceInfo_t; /** * Retrieves the number of excluded GPU devices in the system. * * For all products. * * @param deviceCount Reference in which to return the number of excluded devices * * @return * - \ref NVML_SUCCESS if \a deviceCount has been set * - \ref NVML_ERROR_INVALID_ARGUMENT if \a deviceCount is NULL */ nvmlReturn_t DECLDIR nvmlGetExcludedDeviceCount(unsigned int *deviceCount); /** * Acquire the device information for an excluded GPU device, based on its index. * * For all products. * * Valid indices are derived from the \a deviceCount returned by * \ref nvmlGetExcludedDeviceCount(). For example, if \a deviceCount is 2 the valid indices * are 0 and 1, corresponding to GPU 0 and GPU 1. * * @param index The index of the target GPU, >= 0 and < \a deviceCount * @param info Reference in which to return the device information * * @return * - \ref NVML_SUCCESS if \a device has been set * - \ref NVML_ERROR_INVALID_ARGUMENT if \a index is invalid or \a info is NULL * * @see nvmlGetExcludedDeviceCount */ nvmlReturn_t DECLDIR nvmlGetExcludedDeviceInfoByIndex(unsigned int index, nvmlExcludedDeviceInfo_t *info); /** @} */ /***************************************************************************************************/ /** @defgroup nvmlMultiInstanceGPU Multi Instance GPU Management * This chapter describes NVML operations that are associated with Multi Instance GPU management. * @{ */ /***************************************************************************************************/ /** * Disable Multi Instance GPU mode. */ #define NVML_DEVICE_MIG_DISABLE 0x0 /** * Enable Multi Instance GPU mode. */ #define NVML_DEVICE_MIG_ENABLE 0x1 /** * GPU instance profiles. * * These macros should be passed to \ref nvmlDeviceGetGpuInstanceProfileInfo to retrieve the * detailed information about a GPU instance such as profile ID, engine counts. */ #define NVML_GPU_INSTANCE_PROFILE_1_SLICE 0x0 #define NVML_GPU_INSTANCE_PROFILE_2_SLICE 0x1 #define NVML_GPU_INSTANCE_PROFILE_3_SLICE 0x2 #define NVML_GPU_INSTANCE_PROFILE_4_SLICE 0x3 #define NVML_GPU_INSTANCE_PROFILE_7_SLICE 0x4 #define NVML_GPU_INSTANCE_PROFILE_8_SLICE 0x5 #define NVML_GPU_INSTANCE_PROFILE_6_SLICE 0x6 #define NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV1 0x7 #define NVML_GPU_INSTANCE_PROFILE_COUNT 0x8 typedef struct nvmlGpuInstancePlacement_st { unsigned int start; //!< Index of first occupied memory slice unsigned int size; //!< Number of memory slices occupied } nvmlGpuInstancePlacement_t; /** * GPU instance profile information. */ typedef struct nvmlGpuInstanceProfileInfo_st { unsigned int id; //!< Unique profile ID within the device unsigned int isP2pSupported; //!< Peer-to-Peer support unsigned int sliceCount; //!< GPU Slice count unsigned int instanceCount; //!< GPU instance count unsigned int multiprocessorCount; //!< Streaming Multiprocessor count unsigned int copyEngineCount; //!< Copy Engine count unsigned int decoderCount; //!< Decoder Engine count unsigned int encoderCount; //!< Encoder Engine count unsigned int jpegCount; //!< JPEG Engine count unsigned int ofaCount; //!< OFA Engine count unsigned long long memorySizeMB; //!< Memory size in MBytes } nvmlGpuInstanceProfileInfo_t; /** * GPU instance profile information (v2). * * Version 2 adds the \ref nvmlGpuInstanceProfileInfo_v2_t.version field * to the start of the structure, and the \ref nvmlGpuInstanceProfileInfo_v2_t.name * field to the end. This structure is not backwards-compatible with * \ref nvmlGpuInstanceProfileInfo_t. */ typedef struct nvmlGpuInstanceProfileInfo_v2_st { unsigned int version; //!< Structure version identifier (set to \ref nvmlGpuInstanceProfileInfo_v2) unsigned int id; //!< Unique profile ID within the device unsigned int isP2pSupported; //!< Peer-to-Peer support unsigned int sliceCount; //!< GPU Slice count unsigned int instanceCount; //!< GPU instance count unsigned int multiprocessorCount; //!< Streaming Multiprocessor count unsigned int copyEngineCount; //!< Copy Engine count unsigned int decoderCount; //!< Decoder Engine count unsigned int encoderCount; //!< Encoder Engine count unsigned int jpegCount; //!< JPEG Engine count unsigned int ofaCount; //!< OFA Engine count unsigned long long memorySizeMB; //!< Memory size in MBytes char name[NVML_DEVICE_NAME_V2_BUFFER_SIZE]; //!< Profile name } nvmlGpuInstanceProfileInfo_v2_t; /** * Version identifier value for \ref nvmlGpuInstanceProfileInfo_v2_t.version. */ #define nvmlGpuInstanceProfileInfo_v2 NVML_STRUCT_VERSION(GpuInstanceProfileInfo, 2) typedef struct nvmlGpuInstanceInfo_st { nvmlDevice_t device; //!< Parent device unsigned int id; //!< Unique instance ID within the device unsigned int profileId; //!< Unique profile ID within the device nvmlGpuInstancePlacement_t placement; //!< Placement for this instance } nvmlGpuInstanceInfo_t; typedef struct { struct nvmlGpuInstance_st* handle; } nvmlGpuInstance_t; /** * Compute instance profiles. * * These macros should be passed to \ref nvmlGpuInstanceGetComputeInstanceProfileInfo to retrieve the * detailed information about a compute instance such as profile ID, engine counts */ #define NVML_COMPUTE_INSTANCE_PROFILE_1_SLICE 0x0 #define NVML_COMPUTE_INSTANCE_PROFILE_2_SLICE 0x1 #define NVML_COMPUTE_INSTANCE_PROFILE_3_SLICE 0x2 #define NVML_COMPUTE_INSTANCE_PROFILE_4_SLICE 0x3 #define NVML_COMPUTE_INSTANCE_PROFILE_7_SLICE 0x4 #define NVML_COMPUTE_INSTANCE_PROFILE_8_SLICE 0x5 #define NVML_COMPUTE_INSTANCE_PROFILE_6_SLICE 0x6 #define NVML_COMPUTE_INSTANCE_PROFILE_COUNT 0x7 #define NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED 0x0 //!< All the engines except multiprocessors would be shared #define NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT 0x1 typedef struct nvmlComputeInstancePlacement_st { unsigned int start; //!< Index of first occupied compute slice unsigned int size; //!< Number of compute slices occupied } nvmlComputeInstancePlacement_t; /** * Compute instance profile information. */ typedef struct nvmlComputeInstanceProfileInfo_st { unsigned int id; //!< Unique profile ID within the GPU instance unsigned int sliceCount; //!< GPU Slice count unsigned int instanceCount; //!< Compute instance count unsigned int multiprocessorCount; //!< Streaming Multiprocessor count unsigned int sharedCopyEngineCount; //!< Shared Copy Engine count unsigned int sharedDecoderCount; //!< Shared Decoder Engine count unsigned int sharedEncoderCount; //!< Shared Encoder Engine count unsigned int sharedJpegCount; //!< Shared JPEG Engine count unsigned int sharedOfaCount; //!< Shared OFA Engine count } nvmlComputeInstanceProfileInfo_t; /** * Compute instance profile information (v2). * * Version 2 adds the \ref nvmlComputeInstanceProfileInfo_v2_t.version field * to the start of the structure, and the \ref nvmlComputeInstanceProfileInfo_v2_t.name * field to the end. This structure is not backwards-compatible with * \ref nvmlComputeInstanceProfileInfo_t. */ typedef struct nvmlComputeInstanceProfileInfo_v2_st { unsigned int version; //!< Structure version identifier (set to \ref nvmlComputeInstanceProfileInfo_v2) unsigned int id; //!< Unique profile ID within the GPU instance unsigned int sliceCount; //!< GPU Slice count unsigned int instanceCount; //!< Compute instance count unsigned int multiprocessorCount; //!< Streaming Multiprocessor count unsigned int sharedCopyEngineCount; //!< Shared Copy Engine count unsigned int sharedDecoderCount; //!< Shared Decoder Engine count unsigned int sharedEncoderCount; //!< Shared Encoder Engine count unsigned int sharedJpegCount; //!< Shared JPEG Engine count unsigned int sharedOfaCount; //!< Shared OFA Engine count char name[NVML_DEVICE_NAME_V2_BUFFER_SIZE]; //!< Profile name } nvmlComputeInstanceProfileInfo_v2_t; /** * Version identifier value for \ref nvmlComputeInstanceProfileInfo_v2_t.version. */ #define nvmlComputeInstanceProfileInfo_v2 NVML_STRUCT_VERSION(ComputeInstanceProfileInfo, 2) typedef struct nvmlComputeInstanceInfo_st { nvmlDevice_t device; //!< Parent device nvmlGpuInstance_t gpuInstance; //!< Parent GPU instance unsigned int id; //!< Unique instance ID within the GPU instance unsigned int profileId; //!< Unique profile ID within the GPU instance nvmlComputeInstancePlacement_t placement; //!< Placement for this instance within the GPU instance's compute slice range {0, sliceCount} } nvmlComputeInstanceInfo_t; typedef struct { struct nvmlComputeInstance_st* handle; } nvmlComputeInstance_t; /** * Set MIG mode for the device. * * For Ampere &tm; or newer fully supported devices. * Requires root user. * * This mode determines whether a GPU instance can be created. * * This API may unbind or reset the device to activate the requested mode. Thus, the attributes associated with the * device, such as minor number, might change. The caller of this API is expected to query such attributes again. * * On certain platforms like pass-through virtualization, where reset functionality may not be exposed directly, VM * reboot is required. \a activationStatus would return \ref NVML_ERROR_RESET_REQUIRED for such cases. * * \a activationStatus would return the appropriate error code upon unsuccessful activation. For example, if device * unbind fails because the device isn't idle, \ref NVML_ERROR_IN_USE would be returned. The caller of this API * is expected to idle the device and retry setting the \a mode. * * @note On Windows, only disabling MIG mode is supported. \a activationStatus would return \ref * NVML_ERROR_NOT_SUPPORTED as GPU reset is not supported on Windows through this API. * * @param device The identifier of the target device * @param mode The mode to be set, \ref NVML_DEVICE_MIG_DISABLE or * \ref NVML_DEVICE_MIG_ENABLE * @param activationStatus The activationStatus status * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device,\a mode or \a activationStatus are invalid * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't support MIG mode */ nvmlReturn_t DECLDIR nvmlDeviceSetMigMode(nvmlDevice_t device, unsigned int mode, nvmlReturn_t *activationStatus); /** * Get MIG mode for the device. * * For Ampere &tm; or newer fully supported devices. * * Changing MIG modes may require device unbind or reset. The "pending" MIG mode refers to the target mode following the * next activation trigger. * * @param device The identifier of the target device * @param currentMode Returns the current mode, \ref NVML_DEVICE_MIG_DISABLE or * \ref NVML_DEVICE_MIG_ENABLE * @param pendingMode Returns the pending mode, \ref NVML_DEVICE_MIG_DISABLE or * \ref NVML_DEVICE_MIG_ENABLE * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a currentMode or \a pendingMode are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't support MIG mode */ nvmlReturn_t DECLDIR nvmlDeviceGetMigMode(nvmlDevice_t device, unsigned int *currentMode, unsigned int *pendingMode); /** * Get GPU instance profile information. * * Information provided by this API is immutable throughout the lifetime of a MIG mode. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * * @param device The identifier of the target device * @param profile One of the NVML_GPU_INSTANCE_PROFILE_* * @param info Returns detailed profile information * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a profile or \a info are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't have MIG mode enabled or \a profile isn't supported * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation */ nvmlReturn_t DECLDIR nvmlDeviceGetGpuInstanceProfileInfo(nvmlDevice_t device, unsigned int profile, nvmlGpuInstanceProfileInfo_t *info); /** * Versioned wrapper around \ref nvmlDeviceGetGpuInstanceProfileInfo that accepts a versioned * \ref nvmlGpuInstanceProfileInfo_v2_t or later output structure. * * @note The caller must set the \ref nvmlGpuInstanceProfileInfo_v2_t.version field to the * appropriate version prior to calling this function. For example: * \code * nvmlGpuInstanceProfileInfo_v2_t profileInfo = * { .version = nvmlGpuInstanceProfileInfo_v2 }; * nvmlReturn_t result = nvmlDeviceGetGpuInstanceProfileInfoV(device, * profile, * &profileInfo); * \endcode * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * * @param device The identifier of the target device * @param profile One of the NVML_GPU_INSTANCE_PROFILE_* * @param info Returns detailed profile information * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a profile, \a info, or \a info->version are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't have MIG mode enabled or \a profile isn't supported * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation */ nvmlReturn_t DECLDIR nvmlDeviceGetGpuInstanceProfileInfoV(nvmlDevice_t device, unsigned int profile, nvmlGpuInstanceProfileInfo_v2_t *info); /** * Get GPU instance placements. * * A placement represents the location of a GPU instance within a device. This API only returns all the possible * placements for the given profile. * A created GPU instance occupies memory slices described by its placement. Creation of new GPU instance will * fail if there is overlap with the already occupied memory slices. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * Requires privileged user. * * @param device The identifier of the target device * @param profileId The GPU instance profile ID. See \ref nvmlDeviceGetGpuInstanceProfileInfo * @param placements Returns placements allowed for the profile. Can be NULL to discover number * of allowed placements for this profile. If non-NULL must be large enough * to accommodate the placements supported by the profile. * @param count Returns number of allowed placemenets for the profile. * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a profileId or \a count are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't have MIG mode enabled or \a profileId isn't supported * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation */ nvmlReturn_t DECLDIR nvmlDeviceGetGpuInstancePossiblePlacements_v2(nvmlDevice_t device, unsigned int profileId, nvmlGpuInstancePlacement_t *placements, unsigned int *count); /** * Get GPU instance profile capacity. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * Requires privileged user. * * @param device The identifier of the target device * @param profileId The GPU instance profile ID. See \ref nvmlDeviceGetGpuInstanceProfileInfo * @param count Returns remaining instance count for the profile ID * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a profileId or \a count are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't have MIG mode enabled or \a profileId isn't supported * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation */ nvmlReturn_t DECLDIR nvmlDeviceGetGpuInstanceRemainingCapacity(nvmlDevice_t device, unsigned int profileId, unsigned int *count); /** * Create GPU instance. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * Requires privileged user. * * If the parent device is unbound, reset or the GPU instance is destroyed explicitly, the GPU instance handle would * become invalid. The GPU instance must be recreated to acquire a valid handle. * * @param device The identifier of the target device * @param profileId The GPU instance profile ID. See \ref nvmlDeviceGetGpuInstanceProfileInfo * @param gpuInstance Returns the GPU instance handle * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a profile, \a profileId or \a gpuInstance are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't have MIG mode enabled or in vGPU guest * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation * - \ref NVML_ERROR_INSUFFICIENT_RESOURCES If the requested GPU instance could not be created */ nvmlReturn_t DECLDIR nvmlDeviceCreateGpuInstance(nvmlDevice_t device, unsigned int profileId, nvmlGpuInstance_t *gpuInstance); /** * Create GPU instance with the specified placement. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * Requires privileged user. * * If the parent device is unbound, reset or the GPU instance is destroyed explicitly, the GPU instance handle would * become invalid. The GPU instance must be recreated to acquire a valid handle. * * @param device The identifier of the target device * @param profileId The GPU instance profile ID. See \ref nvmlDeviceGetGpuInstanceProfileInfo * @param placement The requested placement. See \ref nvmlDeviceGetGpuInstancePossiblePlacements_v2 * @param gpuInstance Returns the GPU instance handle * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a profile, \a profileId, \a placement or \a gpuInstance * are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't have MIG mode enabled or in vGPU guest * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation * - \ref NVML_ERROR_INSUFFICIENT_RESOURCES If the requested GPU instance could not be created */ nvmlReturn_t DECLDIR nvmlDeviceCreateGpuInstanceWithPlacement(nvmlDevice_t device, unsigned int profileId, const nvmlGpuInstancePlacement_t *placement, nvmlGpuInstance_t *gpuInstance); /** * Destroy GPU instance. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * Requires privileged user. * * @param gpuInstance The GPU instance handle * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't have MIG mode enabled or in vGPU guest * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation * - \ref NVML_ERROR_IN_USE If the GPU instance is in use. This error would be returned if processes * (e.g. CUDA application) or compute instances are active on the * GPU instance. */ nvmlReturn_t DECLDIR nvmlGpuInstanceDestroy(nvmlGpuInstance_t gpuInstance); /** * Get GPU instances for given profile ID. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * Requires privileged user. * * @param device The identifier of the target device * @param profileId The GPU instance profile ID. See \ref nvmlDeviceGetGpuInstanceProfileInfo * @param gpuInstances Returns pre-exiting GPU instances, the buffer must be large enough to * accommodate the instances supported by the profile. * See \ref nvmlDeviceGetGpuInstanceProfileInfo * @param count The count of returned GPU instances * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a profileId, \a gpuInstances or \a count are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't have MIG mode enabled * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation */ nvmlReturn_t DECLDIR nvmlDeviceGetGpuInstances(nvmlDevice_t device, unsigned int profileId, nvmlGpuInstance_t *gpuInstances, unsigned int *count); /** * Get GPU instances for given instance ID. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * Requires privileged user. * * @param device The identifier of the target device * @param id The GPU instance ID * @param gpuInstance Returns GPU instance * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a id or \a gpuInstance are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't have MIG mode enabled * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation * - \ref NVML_ERROR_NOT_FOUND If the GPU instance is not found. */ nvmlReturn_t DECLDIR nvmlDeviceGetGpuInstanceById(nvmlDevice_t device, unsigned int id, nvmlGpuInstance_t *gpuInstance); /** * Get GPU instance information. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * * @param gpuInstance The GPU instance handle * @param info Return GPU instance information * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance or \a info are invalid * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation */ nvmlReturn_t DECLDIR nvmlGpuInstanceGetInfo(nvmlGpuInstance_t gpuInstance, nvmlGpuInstanceInfo_t *info); /** * Get compute instance profile information. * * Information provided by this API is immutable throughout the lifetime of a MIG mode. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * * @param gpuInstance The identifier of the target GPU instance * @param profile One of the NVML_COMPUTE_INSTANCE_PROFILE_* * @param engProfile One of the NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_* * @param info Returns detailed profile information * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance, \a profile, \a engProfile or \a info are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a profile isn't supported * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation */ nvmlReturn_t DECLDIR nvmlGpuInstanceGetComputeInstanceProfileInfo(nvmlGpuInstance_t gpuInstance, unsigned int profile, unsigned int engProfile, nvmlComputeInstanceProfileInfo_t *info); /** * Versioned wrapper around \ref nvmlGpuInstanceGetComputeInstanceProfileInfo that accepts a versioned * \ref nvmlComputeInstanceProfileInfo_v2_t or later output structure. * * @note The caller must set the \ref nvmlGpuInstanceProfileInfo_v2_t.version field to the * appropriate version prior to calling this function. For example: * \code * nvmlComputeInstanceProfileInfo_v2_t profileInfo = * { .version = nvmlComputeInstanceProfileInfo_v2 }; * nvmlReturn_t result = nvmlGpuInstanceGetComputeInstanceProfileInfoV(gpuInstance, * profile, * engProfile, * &profileInfo); * \endcode * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * * @param gpuInstance The identifier of the target GPU instance * @param profile One of the NVML_COMPUTE_INSTANCE_PROFILE_* * @param engProfile One of the NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_* * @param info Returns detailed profile information * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance, \a profile, \a engProfile, \a info, or \a info->version are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a profile isn't supported * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation */ nvmlReturn_t DECLDIR nvmlGpuInstanceGetComputeInstanceProfileInfoV(nvmlGpuInstance_t gpuInstance, unsigned int profile, unsigned int engProfile, nvmlComputeInstanceProfileInfo_v2_t *info); /** * Get compute instance profile capacity. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * Requires privileged user. * * @param gpuInstance The identifier of the target GPU instance * @param profileId The compute instance profile ID. * See \ref nvmlGpuInstanceGetComputeInstanceProfileInfo * @param count Returns remaining instance count for the profile ID * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance, \a profileId or \a availableCount are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a profileId isn't supported * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation */ nvmlReturn_t DECLDIR nvmlGpuInstanceGetComputeInstanceRemainingCapacity(nvmlGpuInstance_t gpuInstance, unsigned int profileId, unsigned int *count); /** * Create compute instance. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * Requires privileged user. * * If the parent device is unbound, reset or the parent GPU instance is destroyed or the compute instance is destroyed * explicitly, the compute instance handle would become invalid. The compute instance must be recreated to acquire * a valid handle. * * @param gpuInstance The identifier of the target GPU instance * @param profileId The compute instance profile ID. * See \ref nvmlGpuInstanceGetComputeInstanceProfileInfo * @param computeInstance Returns the compute instance handle * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance, \a profile, \a profileId or \a computeInstance * are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a profileId isn't supported * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation * - \ref NVML_ERROR_INSUFFICIENT_RESOURCES If the requested compute instance could not be created */ nvmlReturn_t DECLDIR nvmlGpuInstanceCreateComputeInstance(nvmlGpuInstance_t gpuInstance, unsigned int profileId, nvmlComputeInstance_t *computeInstance); /** * Destroy compute instance. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * Requires privileged user. * * @param computeInstance The compute instance handle * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a computeInstance is invalid * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation * - \ref NVML_ERROR_IN_USE If the compute instance is in use. This error would be returned if * processes (e.g. CUDA application) are active on the compute instance. */ nvmlReturn_t DECLDIR nvmlComputeInstanceDestroy(nvmlComputeInstance_t computeInstance); /** * Get compute instances for given profile ID. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * Requires privileged user. * * @param gpuInstance The identifier of the target GPU instance * @param profileId The compute instance profile ID. * See \ref nvmlGpuInstanceGetComputeInstanceProfileInfo * @param computeInstances Returns pre-exiting compute instances, the buffer must be large enough to * accommodate the instances supported by the profile. * See \ref nvmlGpuInstanceGetComputeInstanceProfileInfo * @param count The count of returned compute instances * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance, \a profileId, \a computeInstances or \a count * are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a profileId isn't supported * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation */ nvmlReturn_t DECLDIR nvmlGpuInstanceGetComputeInstances(nvmlGpuInstance_t gpuInstance, unsigned int profileId, nvmlComputeInstance_t *computeInstances, unsigned int *count); /** * Get compute instance for given instance ID. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * Requires privileged user. * * @param gpuInstance The identifier of the target GPU instance * @param id The compute instance ID * @param computeInstance Returns compute instance * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a ID or \a computeInstance are invalid * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't have MIG mode enabled * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation * - \ref NVML_ERROR_NOT_FOUND If the compute instance is not found. */ nvmlReturn_t DECLDIR nvmlGpuInstanceGetComputeInstanceById(nvmlGpuInstance_t gpuInstance, unsigned int id, nvmlComputeInstance_t *computeInstance); /** * Get compute instance information. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * * @param computeInstance The compute instance handle * @param info Return compute instance information * * @return * - \ref NVML_SUCCESS Upon success * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT If \a computeInstance or \a info are invalid * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation */ nvmlReturn_t DECLDIR nvmlComputeInstanceGetInfo_v2(nvmlComputeInstance_t computeInstance, nvmlComputeInstanceInfo_t *info); /** * Test if the given handle refers to a MIG device. * * A MIG device handle is an NVML abstraction which maps to a MIG compute instance. * These overloaded references can be used (with some restrictions) interchangeably * with a GPU device handle to execute queries at a per-compute instance granularity. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * * @param device NVML handle to test * @param isMigDevice True when handle refers to a MIG device * * @return * - \ref NVML_SUCCESS if \a device status was successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device handle or \a isMigDevice reference is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if this check is not supported by the device * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceIsMigDeviceHandle(nvmlDevice_t device, unsigned int *isMigDevice); /** * Get GPU instance ID for the given MIG device handle. * * GPU instance IDs are unique per device and remain valid until the GPU instance is destroyed. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * * @param device Target MIG device handle * @param id GPU instance ID * * @return * - \ref NVML_SUCCESS if instance ID was successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a id reference is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetGpuInstanceId(nvmlDevice_t device, unsigned int *id); /** * Get compute instance ID for the given MIG device handle. * * Compute instance IDs are unique per GPU instance and remain valid until the compute instance * is destroyed. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * * @param device Target MIG device handle * @param id Compute instance ID * * @return * - \ref NVML_SUCCESS if instance ID was successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a id reference is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetComputeInstanceId(nvmlDevice_t device, unsigned int *id); /** * Get the maximum number of MIG devices that can exist under a given parent NVML device. * * Returns zero if MIG is not supported or enabled. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * * @param device Target device handle * @param count Count of MIG devices * * @return * - \ref NVML_SUCCESS if \a count was successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a count reference is invalid * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMaxMigDeviceCount(nvmlDevice_t device, unsigned int *count); /** * Get MIG device handle for the given index under its parent NVML device. * * If the compute instance is destroyed either explicitly or by destroying, * resetting or unbinding the parent GPU instance or the GPU device itself * the MIG device handle would remain invalid and must be requested again * using this API. Handles may be reused and their properties can change in * the process. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * * @param device Reference to the parent GPU device handle * @param index Index of the MIG device * @param migDevice Reference to the MIG device handle * * @return * - \ref NVML_SUCCESS if \a migDevice handle was successfully created * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a index or \a migDevice reference is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_NOT_FOUND if no valid MIG device was found at \a index * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMigDeviceHandleByIndex(nvmlDevice_t device, unsigned int index, nvmlDevice_t *migDevice); /** * Get parent device handle from a MIG device handle. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * * @param migDevice MIG device handle * @param device Device handle * * @return * - \ref NVML_SUCCESS if \a device handle was successfully created * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a migDevice or \a device is invalid * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetDeviceHandleFromMigDeviceHandle(nvmlDevice_t migDevice, nvmlDevice_t *device); /** * Get the type of the GPU Bus (PCIe, PCI, ...) * * @param device The identifier of the target device * @param type The PCI Bus type * * return * - \ref NVML_SUCCESS if the bus \a type is successfully retreived * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \device is invalid or \type is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetBusType(nvmlDevice_t device, nvmlBusType_t *type); /** @} */ /** * NVML API versioning support */ #ifdef NVML_NO_UNVERSIONED_FUNC_DEFS nvmlReturn_t DECLDIR nvmlInit(void); nvmlReturn_t DECLDIR nvmlDeviceGetCount(unsigned int *deviceCount); nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t *device); nvmlReturn_t DECLDIR nvmlDeviceGetHandleByPciBusId(const char *pciBusId, nvmlDevice_t *device); nvmlReturn_t DECLDIR nvmlDeviceGetPciInfo(nvmlDevice_t device, nvmlPciInfo_t *pci); nvmlReturn_t DECLDIR nvmlDeviceGetPciInfo_v2(nvmlDevice_t device, nvmlPciInfo_t *pci); nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkRemotePciInfo(nvmlDevice_t device, unsigned int link, nvmlPciInfo_t *pci); nvmlReturn_t DECLDIR nvmlDeviceGetGridLicensableFeatures(nvmlDevice_t device, nvmlGridLicensableFeatures_t *pGridLicensableFeatures); nvmlReturn_t DECLDIR nvmlDeviceGetGridLicensableFeatures_v2(nvmlDevice_t device, nvmlGridLicensableFeatures_t *pGridLicensableFeatures); nvmlReturn_t DECLDIR nvmlDeviceGetGridLicensableFeatures_v3(nvmlDevice_t device, nvmlGridLicensableFeatures_t *pGridLicensableFeatures); nvmlReturn_t DECLDIR nvmlDeviceRemoveGpu(nvmlPciInfo_t *pciInfo); nvmlReturn_t DECLDIR nvmlEventSetWait(nvmlEventSet_t set, nvmlEventData_t * data, unsigned int timeoutms); nvmlReturn_t DECLDIR nvmlDeviceGetAttributes(nvmlDevice_t device, nvmlDeviceAttributes_t *attributes); nvmlReturn_t DECLDIR nvmlComputeInstanceGetInfo(nvmlComputeInstance_t computeInstance, nvmlComputeInstanceInfo_t *info); nvmlReturn_t DECLDIR nvmlDeviceGetComputeRunningProcesses(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_v1_t *infos); nvmlReturn_t DECLDIR nvmlDeviceGetComputeRunningProcesses_v2(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_v2_t *infos); nvmlReturn_t DECLDIR nvmlDeviceGetGraphicsRunningProcesses(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_v1_t *infos); nvmlReturn_t DECLDIR nvmlDeviceGetGraphicsRunningProcesses_v2(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_v2_t *infos); nvmlReturn_t DECLDIR nvmlDeviceGetMPSComputeRunningProcesses(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_v1_t *infos); nvmlReturn_t DECLDIR nvmlDeviceGetMPSComputeRunningProcesses_v2(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_v2_t *infos); nvmlReturn_t DECLDIR nvmlDeviceGetGpuInstancePossiblePlacements(nvmlDevice_t device, unsigned int profileId, nvmlGpuInstancePlacement_t *placements, unsigned int *count); nvmlReturn_t DECLDIR nvmlVgpuInstanceGetLicenseInfo(nvmlVgpuInstance_t vgpuInstance, nvmlVgpuLicenseInfo_t *licenseInfo); #endif // #ifdef NVML_NO_UNVERSIONED_FUNC_DEFS #if defined(NVML_NO_UNVERSIONED_FUNC_DEFS) // We don't define APIs to run new versions if this guard is present so there is // no need to undef #elif defined(__NVML_API_VERSION_INTERNAL) #undef nvmlDeviceGetGraphicsRunningProcesses #undef nvmlDeviceGetComputeRunningProcesses #undef nvmlDeviceGetMPSComputeRunningProcesses #undef nvmlDeviceGetAttributes #undef nvmlComputeInstanceGetInfo #undef nvmlEventSetWait #undef nvmlDeviceGetGridLicensableFeatures #undef nvmlDeviceRemoveGpu #undef nvmlDeviceGetNvLinkRemotePciInfo #undef nvmlDeviceGetPciInfo #undef nvmlDeviceGetCount #undef nvmlDeviceGetHandleByIndex #undef nvmlDeviceGetHandleByPciBusId #undef nvmlInit #undef nvmlBlacklistDeviceInfo_t #undef nvmlGetBlacklistDeviceCount #undef nvmlGetBlacklistDeviceInfoByIndex #undef nvmlDeviceGetGpuInstancePossiblePlacements #undef nvmlVgpuInstanceGetLicenseInfo #endif #ifdef __cplusplus } #endif #endif