mirror of
				https://github.com/clearml/clearml-agent
				synced 2025-06-26 18:16:15 +00:00 
			
		
		
		
	Support NVIDIA_VISIBLE_DEVICES using volume mounts
This commit is contained in:
		
							parent
							
								
									768ee3d2cf
								
							
						
					
					
						commit
						553c72e06a
					
				| @ -2,6 +2,7 @@ from __future__ import unicode_literals, division | |||||||
| 
 | 
 | ||||||
| import logging | import logging | ||||||
| import re | import re | ||||||
|  | import os | ||||||
| import shlex | import shlex | ||||||
| from collections import deque | from collections import deque | ||||||
| from itertools import starmap | from itertools import starmap | ||||||
| @ -112,7 +113,15 @@ class ResourceMonitor(object): | |||||||
|                 active_gpus = Session.get_nvidia_visible_env() |                 active_gpus = Session.get_nvidia_visible_env() | ||||||
|                 # None means no filtering, report all gpus |                 # None means no filtering, report all gpus | ||||||
|                 if active_gpus and active_gpus != "all": |                 if active_gpus and active_gpus != "all": | ||||||
|                     self._active_gpus = [g.strip() for g in str(active_gpus).split(',')] |                     if os.path.isdir(active_gpus): | ||||||
|  |                         try: | ||||||
|  |                             self._active_gpus = os.listdir(active_gpus) | ||||||
|  |                         except OSError as e: | ||||||
|  |                             log.warning( | ||||||
|  |                                 "Failed listing {}: {}".format(active_gpus, e) | ||||||
|  |                             ) | ||||||
|  |                     else: | ||||||
|  |                         self._active_gpus = [g.strip() for g in active_gpus.split(",")] | ||||||
|             except Exception: |             except Exception: | ||||||
|                 pass |                 pass | ||||||
|         self._cluster_report_interval_sec = int(session.config.get( |         self._cluster_report_interval_sec = int(session.config.get( | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 clearml
						clearml