From 7eb435eb730f28e2cd95baeb6e827e18400051e9 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 1 Feb 2023 16:17:57 +0100 Subject: [PATCH] Add basic dxcore bindings This change copies dxcore.h and dxcore.c from libnvidia-container to allow for the driver store path to be queried. Modifications are made to dxcore to remove the code associated with checking the components in the driver store path. Signed-off-by: Evan Lezar --- internal/dxcore/api.go | 58 +++++++ internal/dxcore/dxcore.c | 334 ++++++++++++++++++++++++++++++++++++++ internal/dxcore/dxcore.go | 59 +++++++ internal/dxcore/dxcore.h | 39 +++++ 4 files changed, 490 insertions(+) create mode 100644 internal/dxcore/api.go create mode 100644 internal/dxcore/dxcore.c create mode 100644 internal/dxcore/dxcore.go create mode 100644 internal/dxcore/dxcore.h diff --git a/internal/dxcore/api.go b/internal/dxcore/api.go new file mode 100644 index 00000000..4408c29a --- /dev/null +++ b/internal/dxcore/api.go @@ -0,0 +1,58 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package dxcore + +import ( + "github.com/NVIDIA/go-nvml/pkg/dl" +) + +const ( + libraryName = "libdxcore.so" + libraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL +) + +// dxcore stores a reference the dxcore dynamic library +var dxcore *context + +// Init initializes the dxcore dynamic library +func Init() error { + c, err := initContext() + if err != nil { + return err + } + dxcore = c + return nil +} + +// Shutdown closes the dxcore dynamic library +func Shutdown() error { + if dxcore != nil && dxcore.initialized != 0 { + dxcore.deinitContext() + } + return nil +} + +// GetDriverStorePaths returns the list of driver store paths +func GetDriverStorePaths() []string { + var paths []string + for i := 0; i < dxcore.getAdapterCount(); i++ { + adapter := dxcore.getAdapter(i) + paths = append(paths, adapter.getDriverStorePath()) + } + + return paths +} diff --git a/internal/dxcore/dxcore.c b/internal/dxcore/dxcore.c new file mode 100644 index 00000000..0b61143f --- /dev/null +++ b/internal/dxcore/dxcore.c @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + */ + +#include +#include + +#include "dxcore.h" + +// We define log_write as an empty macro to allow dxcore to remain unchanged. +#define log_write(...) + +// We define the following macros to allow dxcore to remain largely unchanged. +#define log_info(msg) log_write('I', __FILE__, __LINE__, msg) +#define log_warn(msg) log_write('W', __FILE__, __LINE__, msg) +#define log_err(msg) log_write('E', __FILE__, __LINE__, msg) +#define log_infof(fmt, ...) log_write('I', __FILE__, __LINE__, fmt, __VA_ARGS__) +#define log_warnf(fmt, ...) log_write('W', __FILE__, __LINE__, fmt, __VA_ARGS__) +#define log_errf(fmt, ...) log_write('E', __FILE__, __LINE__, fmt, __VA_ARGS__) + + +#define DXCORE_MAX_PATH 260 + +/* + * List of components we expect to find in the driver store that we need to mount + */ +static const char * const dxcore_nvidia_driver_store_components[] = { + "libcuda.so.1.1", /* Core library for cuda support */ + "libcuda_loader.so", /* Core library for cuda support on WSL */ + "libnvidia-ptxjitcompiler.so.1", /* Core library for PTX Jit support */ + "libnvidia-ml.so.1", /* Core library for nvml */ + "libnvidia-ml_loader.so", /* Core library for nvml on WSL */ + "nvidia-smi", /* nvidia-smi binary*/ + "nvcubins.bin", /* Binary containing GPU code for cuda */ +}; + + +/* + * List of functions and structures we need to communicate with libdxcore. + * Documentation on these functions can be found on docs.microsoft.com in d3dkmthk. + */ + +struct dxcore_enumAdapters2; +struct dxcore_queryAdapterInfo; + +typedef int(*pfnDxcoreEnumAdapters2)(struct dxcore_enumAdapters2* pParams); +typedef int(*pfnDxcoreQueryAdapterInfo)(struct dxcore_queryAdapterInfo* pParams); + +struct dxcore_lib { + void* hDxcoreLib; + pfnDxcoreEnumAdapters2 pDxcoreEnumAdapters2; + pfnDxcoreQueryAdapterInfo pDxcoreQueryAdapterInfo; +}; + +struct dxcore_adapterInfo +{ + unsigned int hAdapter; + struct dxcore_luid AdapterLuid; + unsigned int NumOfSources; + unsigned int bPresentMoveRegionsPreferred; +}; + +struct dxcore_enumAdapters2 +{ + unsigned int NumAdapters; + struct dxcore_adapterInfo *pAdapters; +}; + +enum dxcore_kmtqueryAdapterInfoType +{ + DXCORE_QUERYDRIVERVERSION = 13, + DXCORE_QUERYREGISTRY = 48, +}; + +enum dxcore_queryregistry_type { + DXCORE_QUERYREGISTRY_DRIVERSTOREPATH = 2, + DXCORE_QUERYREGISTRY_DRIVERIMAGEPATH = 3, +}; + +enum dxcore_queryregistry_status { + DXCORE_QUERYREGISTRY_STATUS_SUCCESS = 0, + DXCORE_QUERYREGISTRY_STATUS_BUFFER_OVERFLOW = 1, + DXCORE_QUERYREGISTRY_STATUS_FAIL = 2, +}; + +struct dxcore_queryregistry_info { + enum dxcore_queryregistry_type QueryType; + unsigned int QueryFlags; + wchar_t ValueName[DXCORE_MAX_PATH]; + unsigned int ValueType; + unsigned int PhysicalAdapterIndex; + unsigned int OutputValueSize; + enum dxcore_queryregistry_status Status; + union { + unsigned long long OutputQword; + wchar_t Output; + }; +}; + +struct dxcore_queryAdapterInfo +{ + unsigned int hAdapter; + enum dxcore_kmtqueryAdapterInfoType Type; + void *pPrivateDriverData; + unsigned int PrivateDriverDataSize; +}; + +static int dxcore_query_adapter_info_helper(struct dxcore_lib* pLib, + unsigned int hAdapter, + enum dxcore_kmtqueryAdapterInfoType type, + void* pPrivateDriverDate, + unsigned int privateDriverDataSize) +{ + struct dxcore_queryAdapterInfo queryAdapterInfo = { 0 }; + + queryAdapterInfo.hAdapter = hAdapter; + queryAdapterInfo.Type = type; + queryAdapterInfo.pPrivateDriverData = pPrivateDriverDate; + queryAdapterInfo.PrivateDriverDataSize = privateDriverDataSize; + + return pLib->pDxcoreQueryAdapterInfo(&queryAdapterInfo); +} + +static int dxcore_query_adapter_wddm_version(struct dxcore_lib* pLib, unsigned int hAdapter, unsigned int* version) +{ + return dxcore_query_adapter_info_helper(pLib, + hAdapter, + DXCORE_QUERYDRIVERVERSION, + (void*)version, + sizeof(*version)); +} + +static int dxcore_query_adapter_driverstore(struct dxcore_lib* pLib, unsigned int hAdapter, char** ppDriverStorePath) +{ + struct dxcore_queryregistry_info params = {0}; + struct dxcore_queryregistry_info* pValue = NULL; + wchar_t* pOutput; + size_t outputSizeInBytes; + size_t outputSize; + + params.QueryType = DXCORE_QUERYREGISTRY_DRIVERSTOREPATH; + + if (dxcore_query_adapter_info_helper(pLib, + hAdapter, + DXCORE_QUERYREGISTRY, + (void*)¶ms, + sizeof(params))) + { + log_err("Failed to query driver store path size for the WDDM Adapter"); + return (-1); + } + + if (params.OutputValueSize > DXCORE_MAX_PATH * sizeof(wchar_t)) { + log_err("The driver store path size returned by dxcore is not valid"); + return (-1); + } + + outputSizeInBytes = (size_t)params.OutputValueSize; + outputSize = outputSizeInBytes / sizeof(wchar_t); + + pValue = calloc(sizeof(struct dxcore_queryregistry_info) + outputSizeInBytes + sizeof(wchar_t), 1); + if (!pValue) { + log_err("Out of memory while allocating temp buffer to query adapter info"); + return (-1); + } + + pValue->QueryType = DXCORE_QUERYREGISTRY_DRIVERSTOREPATH; + pValue->OutputValueSize = (unsigned int)outputSizeInBytes; + + if (dxcore_query_adapter_info_helper(pLib, + hAdapter, + DXCORE_QUERYREGISTRY, + (void*)pValue, + (unsigned int)(sizeof(struct dxcore_queryregistry_info) + outputSizeInBytes))) + { + log_err("Failed to query driver store path data for the WDDM Adapter"); + free(pValue); + return (-1); + } + pOutput = (wchar_t*)(&pValue->Output); + + // Make sure no matter what happened the wchar_t string is null terminated + pOutput[outputSize] = L'\0'; + + // Convert the output into a regular c string + *ppDriverStorePath = (char*)calloc(outputSize + 1, sizeof(char)); + if (!*ppDriverStorePath) { + log_err("Out of memory while allocating the buffer for the driver store path"); + free(pValue); + return (-1); + } + wcstombs(*ppDriverStorePath, pOutput, outputSize); + + free(pValue); + + return 0; +} + +static void dxcore_add_adapter(struct dxcore_context* pCtx, struct dxcore_lib* pLib, struct dxcore_adapterInfo *pAdapterInfo) +{ + unsigned int wddmVersion = 0; + char* driverStorePath = NULL; + + log_infof("Creating a new WDDM Adapter for hAdapter:%x luid:%llx", pAdapterInfo->hAdapter, *((unsigned long long*)&pAdapterInfo->AdapterLuid)); + + if (dxcore_query_adapter_wddm_version(pLib, pAdapterInfo->hAdapter, &wddmVersion)) { + log_err("Failed to query the WDDM version for the specified adapter. Skipping it."); + return; + } + + if (wddmVersion < 2700) { + log_err("Found a WDDM adapter running a driver with pre-WDDM 2.7 . Skipping it."); + return; + } + + if (dxcore_query_adapter_driverstore(pLib, pAdapterInfo->hAdapter, &driverStorePath)) { + log_err("Failed to query driver store path for the WDDM Adapter . Skipping it."); + return; + } + + // We got all the info we needed. Adding it to the tracking structure. + { + struct dxcore_adapter* newList; + newList = realloc(pCtx->adapterList, sizeof(struct dxcore_adapter) * (pCtx->adapterCount + 1)); + if (!newList) { + log_err("Out of memory when trying to add a new WDDM Adapter to the list of valid adapters"); + free(driverStorePath); + return; + } + + pCtx->adapterList = newList; + + pCtx->adapterList[pCtx->adapterCount].hAdapter = pAdapterInfo->hAdapter; + pCtx->adapterList[pCtx->adapterCount].pDriverStorePath = driverStorePath; + pCtx->adapterList[pCtx->adapterCount].wddmVersion = wddmVersion; + pCtx->adapterCount++; + } + + log_infof("Adding new adapter via dxcore hAdapter:%x luid:%llx wddm version:%d", pAdapterInfo->hAdapter, *((unsigned long long*)&pAdapterInfo->AdapterLuid), wddmVersion); +} + +static void dxcore_enum_adapters(struct dxcore_context* pCtx, struct dxcore_lib* pLib) +{ + struct dxcore_enumAdapters2 params = {0}; + unsigned int adapterIndex = 0; + + params.NumAdapters = 0; + params.pAdapters = NULL; + + if (pLib->pDxcoreEnumAdapters2(¶ms)) { + log_err("Failed to enumerate adapters via dxcore"); + return; + } + + params.pAdapters = malloc(sizeof(struct dxcore_adapterInfo) * params.NumAdapters); + if (pLib->pDxcoreEnumAdapters2(¶ms)) { + free(params.pAdapters); + log_err("Failed to enumerate adapters via dxcore"); + return; + } + + for (adapterIndex = 0; adapterIndex < params.NumAdapters; adapterIndex++) { + dxcore_add_adapter(pCtx, pLib, ¶ms.pAdapters[adapterIndex]); + } + + free(params.pAdapters); +} + +int dxcore_init_context(struct dxcore_context* pCtx) +{ + struct dxcore_lib lib = {0}; + + pCtx->initialized = 0; + pCtx->adapterCount = 0; + pCtx->adapterList = NULL; + + lib.hDxcoreLib = dlopen("libdxcore.so", RTLD_LAZY); + if (!lib.hDxcoreLib) { + goto error; + } + + lib.pDxcoreEnumAdapters2 = (pfnDxcoreEnumAdapters2)dlsym(lib.hDxcoreLib, "D3DKMTEnumAdapters2"); + if (!lib.pDxcoreEnumAdapters2) { + log_err("dxcore library is present but the symbol D3DKMTEnumAdapters2 is missing"); + goto error; + } + + lib.pDxcoreQueryAdapterInfo = (pfnDxcoreQueryAdapterInfo)dlsym(lib.hDxcoreLib, "D3DKMTQueryAdapterInfo"); + if (!lib.pDxcoreQueryAdapterInfo) { + log_err("dxcore library is present but the symbol D3DKMTQueryAdapterInfo is missing"); + goto error; + } + + dxcore_enum_adapters(pCtx, &lib); + + log_info("dxcore layer initialized successfully"); + pCtx->initialized = 1; + + dlclose(lib.hDxcoreLib); + + return 0; + +error: + dxcore_deinit_context(pCtx); + + if (lib.hDxcoreLib) + dlclose(lib.hDxcoreLib); + + return (-1); +} + +static void dxcore_deinit_adapter(struct dxcore_adapter* pAdapter) +{ + if (!pAdapter) + return; + + free(pAdapter->pDriverStorePath); +} + +void dxcore_deinit_context(struct dxcore_context* pCtx) +{ + unsigned int adapterIndex = 0; + + if (!pCtx) + return; + + for (adapterIndex = 0; adapterIndex < pCtx->adapterCount; adapterIndex++) { + dxcore_deinit_adapter(&pCtx->adapterList[adapterIndex]); + } + + free(pCtx->adapterList); + + pCtx->initialized = 0; +} diff --git a/internal/dxcore/dxcore.go b/internal/dxcore/dxcore.go new file mode 100644 index 00000000..76cc53f8 --- /dev/null +++ b/internal/dxcore/dxcore.go @@ -0,0 +1,59 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package dxcore + +/* +#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files +#include +*/ +import "C" +import ( + "fmt" + "unsafe" +) + +type context C.struct_dxcore_context +type adapter C.struct_dxcore_adapter + +// initContext initializes the dxcore context and populates the list of adapters. +func initContext() (*context, error) { + cContext := C.struct_dxcore_context{} + if C.dxcore_init_context(&cContext) != 0 { + return nil, fmt.Errorf("failed to initialize dxcore context") + } + c := (*context)(&cContext) + return c, nil +} + +// deinitContext deinitializes the dxcore context and frees the list of adapters. +func (c context) deinitContext() { + cContext := C.struct_dxcore_context(c) + C.dxcore_deinit_context(&cContext) +} + +func (c context) getAdapterCount() int { + return int(c.adapterCount) +} + +func (c context) getAdapter(index int) adapter { + arrayPointer := (*[1 << 30]C.struct_dxcore_adapter)(unsafe.Pointer(c.adapterList)) + return adapter(arrayPointer[index]) +} + +func (a adapter) getDriverStorePath() string { + return C.GoString(a.pDriverStorePath) +} diff --git a/internal/dxcore/dxcore.h b/internal/dxcore/dxcore.h new file mode 100644 index 00000000..9c044fee --- /dev/null +++ b/internal/dxcore/dxcore.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + */ + +#ifndef HEADER_DXCORE_H_ +#define HEADER_DXCORE_H_ + +#define MAX_DXCORE_DRIVERSTORE_LIBRAIRIES (16) + +struct dxcore_luid +{ + unsigned int lowPart; + int highPart; +}; + +struct dxcore_adapter +{ + unsigned int hAdapter; + unsigned int wddmVersion; + char* pDriverStorePath; + unsigned int driverStoreComponentCount; + const char* pDriverStoreComponents[MAX_DXCORE_DRIVERSTORE_LIBRAIRIES]; + struct dxcore_context *pContext; +}; + +struct dxcore_context +{ + unsigned int adapterCount; + struct dxcore_adapter *adapterList; + + int initialized; +}; + + + +int dxcore_init_context(struct dxcore_context* pDxcore_context); +void dxcore_deinit_context(struct dxcore_context* pDxcore_context); + +#endif // HEADER_DXCORE_H_