From cc99077c92d53c90184cfd2c4fa8ceae72673dd4 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sun, 31 May 2020 14:01:14 +0300 Subject: [PATCH] Do not monitor GPU when running with --cpu-only --- trains_agent/helper/resource_monitor.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/trains_agent/helper/resource_monitor.py b/trains_agent/helper/resource_monitor.py index 91dee74..56a25fa 100644 --- a/trains_agent/helper/resource_monitor.py +++ b/trains_agent/helper/resource_monitor.py @@ -75,9 +75,15 @@ class ResourceMonitor(object): self._exit_event = Event() self._gpustat_fail = 0 self._gpustat = gpustat - if not self._gpustat: + self._active_gpus = None + if os.environ.get('NVIDIA_VISIBLE_DEVICES') == 'none': + # NVIDIA_VISIBLE_DEVICES set to none, marks cpu_only flag + # active_gpus == False means no GPU reporting + self._active_gpus = False + elif not self._gpustat: log.warning('Trains-Agent Resource Monitor: GPU monitoring is not available') else: + # None means no filtering, report all gpus self._active_gpus = None try: active_gpus = os.environ.get('NVIDIA_VISIBLE_DEVICES', '') or \ @@ -244,8 +250,8 @@ class ResourceMonitor(object): stats["io_read_mbs"] = BytesSizes.megabytes(io_stats.read_bytes) stats["io_write_mbs"] = BytesSizes.megabytes(io_stats.write_bytes) - # check if we can access the gpu statistics - if self._gpustat: + # check if we need to monitor gpus and if we can access the gpu statistics + if self._active_gpus is not False and self._gpustat: try: gpu_stat = self._gpustat.new_query() for i, g in enumerate(gpu_stat.gpus):