From ff736a76c9cbbc9b292fa8afd83b4dd73555af55 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 8 Aug 2020 12:40:32 +0300 Subject: [PATCH] Fix GPU stats on Windows machines (issue #177) --- trains/utilities/gpu/gpustat.py | 53 +++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/trains/utilities/gpu/gpustat.py b/trains/utilities/gpu/gpustat.py index f4967e58..0c82c1fc 100644 --- a/trains/utilities/gpu/gpustat.py +++ b/trains/utilities/gpu/gpustat.py @@ -200,24 +200,30 @@ class GPUStatCollection(object): GPUStatCollection.global_processes[nv_process.pid] = \ psutil.Process(pid=nv_process.pid) ps_process = GPUStatCollection.global_processes[nv_process.pid] - process['username'] = ps_process.username() - # cmdline returns full path; - # as in `ps -o comm`, get short cmdnames. - _cmdline = ps_process.cmdline() - if not _cmdline: - # sometimes, zombie or unknown (e.g. [kworker/8:2H]) - process['command'] = '?' - process['full_command'] = ['?'] - else: - process['command'] = os.path.basename(_cmdline[0]) - process['full_command'] = _cmdline - # Bytes to MBytes - process['gpu_memory_usage'] = nv_process.usedGpuMemory // MB - process['cpu_percent'] = ps_process.cpu_percent() - process['cpu_memory_usage'] = \ - round((ps_process.memory_percent() / 100.0) * - psutil.virtual_memory().total) process['pid'] = nv_process.pid + # noinspection PyBroadException + try: + # we do not actually use these, so no point in collecting them + # process['username'] = ps_process.username() + # # cmdline returns full path; + # # as in `ps -o comm`, get short cmdnames. + # _cmdline = ps_process.cmdline() + # if not _cmdline: + # # sometimes, zombie or unknown (e.g. [kworker/8:2H]) + # process['command'] = '?' + # process['full_command'] = ['?'] + # else: + # process['command'] = os.path.basename(_cmdline[0]) + # process['full_command'] = _cmdline + # process['cpu_percent'] = ps_process.cpu_percent() + # process['cpu_memory_usage'] = \ + # round((ps_process.memory_percent() / 100.0) * + # psutil.virtual_memory().total) + # Bytes to MBytes + process['gpu_memory_usage'] = nv_process.usedGpuMemory // MB + except Exception: + # insufficient permissions + pass return process if not GPUStatCollection._gpu_device_info.get(index): @@ -285,12 +291,13 @@ class GPUStatCollection(object): # e.g. nvidia-smi reset or reboot the system pass - # TODO: Do not block if full process info is not requested - time.sleep(0.1) - for process in processes: - pid = process['pid'] - cache_process = GPUStatCollection.global_processes[pid] - process['cpu_percent'] = cache_process.cpu_percent() + # we do not actually use these, so no point in collecting them + # # TODO: Do not block if full process info is not requested + # time.sleep(0.1) + # for process in processes: + # pid = process['pid'] + # cache_process = GPUStatCollection.global_processes[pid] + # process['cpu_percent'] = cache_process.cpu_percent() index = N.nvmlDeviceGetIndex(handle) gpu_info = {