mirror of
https://github.com/clearml/clearml
synced 2025-05-24 13:54:16 +00:00
Fix GPU reporting when NVIDIA_VISIBLE_DEVICES contains a directory reference
This commit is contained in:
parent
dc4241eb0c
commit
2486dd7b9c
@ -108,6 +108,14 @@ class ResourceMonitor(BackgroundMonitor):
|
|||||||
try:
|
try:
|
||||||
active_gpus = os.environ.get("NVIDIA_VISIBLE_DEVICES", "") or os.environ.get("CUDA_VISIBLE_DEVICES", "")
|
active_gpus = os.environ.get("NVIDIA_VISIBLE_DEVICES", "") or os.environ.get("CUDA_VISIBLE_DEVICES", "")
|
||||||
if active_gpus and active_gpus != "all":
|
if active_gpus and active_gpus != "all":
|
||||||
|
if os.path.isdir(active_gpus):
|
||||||
|
try:
|
||||||
|
self._active_gpus = os.listdir(active_gpus)
|
||||||
|
except OSError as e:
|
||||||
|
logging.getLogger("clearml.resource_monitor").warning(
|
||||||
|
"Failed listing {}: {}".format(active_gpus, e)
|
||||||
|
)
|
||||||
|
else:
|
||||||
self._active_gpus = [g.strip() for g in active_gpus.split(",")]
|
self._active_gpus = [g.strip() for g in active_gpus.split(",")]
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
Loading…
Reference in New Issue
Block a user