From 15afc479c672299f79e926dc1c90a4f6268dbab0 Mon Sep 17 00:00:00 2001 From: Guillaume Theaud Date: Wed, 15 Jul 2020 11:16:05 -0400 Subject: [PATCH] Robustify GPU monitoring --- trains/utilities/resource_monitor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/trains/utilities/resource_monitor.py b/trains/utilities/resource_monitor.py index 351fe684..46b03f46 100644 --- a/trains/utilities/resource_monitor.py +++ b/trains/utilities/resource_monitor.py @@ -299,6 +299,9 @@ class ResourceMonitor(object): gpu_stat = self._gpustat.new_query(per_process_stats=True) gpu_mem = {} for i, g in enumerate(gpu_stat.gpus): + # only monitor the active gpu's, if none were selected, monitor everything + if self._active_gpus and i not in self._active_gpus: + continue gpu_mem[i] = 0 for p in g.processes: if p['pid'] in self._last_process_id_list: