mirror of
https://github.com/clearml/clearml
synced 2025-04-16 05:22:11 +00:00
Fix GPU info such as gpu_memory and gpu_type is not being collected in some cases
This commit is contained in:
parent
1320927fdf
commit
c600b05386
@ -52,7 +52,7 @@ class ResourceMonitor(BackgroundMonitor):
|
||||
try:
|
||||
active_gpus = os.environ.get('NVIDIA_VISIBLE_DEVICES', '') or \
|
||||
os.environ.get('CUDA_VISIBLE_DEVICES', '')
|
||||
if active_gpus != "all":
|
||||
if active_gpus and active_gpus != "all":
|
||||
self._active_gpus = [g.strip() for g in active_gpus.split(',')]
|
||||
except Exception:
|
||||
pass
|
||||
@ -389,7 +389,7 @@ class ResourceMonitor(BackgroundMonitor):
|
||||
if self._gpustat:
|
||||
gpu_stat = self._gpustat.new_query(shutdown=True, get_driver_info=True)
|
||||
if gpu_stat.gpus:
|
||||
gpus = [g for i, g in enumerate(gpu_stat.gpus) if not self._active_gpus or i in self._active_gpus]
|
||||
gpus = [g for i, g in enumerate(gpu_stat.gpus) if not self._skip_nonactive_gpu(i, g)]
|
||||
specs.update(
|
||||
gpu_count=int(len(gpus)),
|
||||
gpu_type=', '.join(g.name for g in gpus),
|
||||
|
Loading…
Reference in New Issue
Block a user