Fix issue with A100 GPU monitoring
Some checks failed
CodeQL / Analyze (python) (push) Has been cancelled

This commit is contained in:
clearml 2025-02-18 15:30:27 +02:00
parent c46f254839
commit 342e1b35f8

View File

@ -486,6 +486,7 @@ class ResourceMonitor(BackgroundMonitor):
# only monitor the active gpu's, if none were selected, monitor everything # only monitor the active gpu's, if none were selected, monitor everything
if self._skip_nonactive_gpu(g): if self._skip_nonactive_gpu(g):
continue continue
if g["temperature.gpu"] is not None:
stats["gpu_%d_temperature" % i] = float(g["temperature.gpu"]) stats["gpu_%d_temperature" % i] = float(g["temperature.gpu"])
if g["utilization.gpu"] is not None: if g["utilization.gpu"] is not None:
stats["gpu_%d_utilization" % i] = float(g["utilization.gpu"]) stats["gpu_%d_utilization" % i] = float(g["utilization.gpu"])