mirror of
https://github.com/clearml/clearml-agent
synced 2025-06-26 18:16:15 +00:00
Optimize dynamic GPU to query only relevant workers (requires clearml-server >=v2.0.0, otherwise the selection argument is ignored)
This commit is contained in:
parent
e551ee1eb5
commit
c76dfe7ce6
@ -1730,16 +1730,24 @@ class Worker(ServiceCommandSection):
|
|||||||
def _dynamic_gpu_get_available(self, gpu_indexes):
|
def _dynamic_gpu_get_available(self, gpu_indexes):
|
||||||
# key: cast to string, value: 1 (i.e. gull GPU)
|
# key: cast to string, value: 1 (i.e. gull GPU)
|
||||||
gpu_indexes = {str(g): 1 for g in gpu_indexes}
|
gpu_indexes = {str(g): 1 for g in gpu_indexes}
|
||||||
|
worker_name = self._session.config.get("agent.worker_name", "") + ':gpu'
|
||||||
|
|
||||||
|
# only return "Our" workers (requires server API +2, otherwise the selecort pattern is ignored)
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
response = self._session.send_api(workers_api.GetAllRequest(last_seen=600))
|
response = self._session.send_api(workers_api.GetAllRequest(
|
||||||
|
last_seen=600,
|
||||||
|
worker_pattern="{}*".format(worker_name),
|
||||||
|
_allow_extra_fields_=True
|
||||||
|
))
|
||||||
except Exception:
|
except Exception:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
worker_name = self._session.config.get("agent.worker_name", "") + ':gpu'
|
# filter only our workers, in case the selector pattern above was ignored due to lower version API server
|
||||||
our_workers = [
|
our_workers = [
|
||||||
w.id for w in response.workers
|
w.id for w in response.workers
|
||||||
if w.id.startswith(worker_name) and w.id != self.worker_id]
|
if w.id.startswith(worker_name) and w.id != self.worker_id
|
||||||
|
]
|
||||||
gpus = {}
|
gpus = {}
|
||||||
allocated_gpus = {}
|
allocated_gpus = {}
|
||||||
gpu_pattern = re.compile(r"\d+[.]?\d*[a-z]?")
|
gpu_pattern = re.compile(r"\d+[.]?\d*[a-z]?")
|
||||||
@ -2025,7 +2033,7 @@ class Worker(ServiceCommandSection):
|
|||||||
columns = ("id", "name", "tags")
|
columns = ("id", "name", "tags")
|
||||||
print("Listening to queues:")
|
print("Listening to queues:")
|
||||||
if dynamic_gpus:
|
if dynamic_gpus:
|
||||||
columns = ("id", "name", "tags", "gpus")
|
columns = ("id", "name", "tags", "gpus (min, max)")
|
||||||
for q in queues_info:
|
for q in queues_info:
|
||||||
q['gpus'] = str(dict(dynamic_gpus).get(q['id']) or '')
|
q['gpus'] = str(dict(dynamic_gpus).get(q['id']) or '')
|
||||||
print_table(queues_info, columns=columns, titles=columns)
|
print_table(queues_info, columns=columns, titles=columns)
|
||||||
|
Loading…
Reference in New Issue
Block a user