mirror of
https://github.com/clearml/clearml
synced 2025-01-31 00:56:57 +00:00
Fix autoscaler should recheck that the worker is still IDLE before shutting it down (#1240)
This commit is contained in:
parent
90998403ee
commit
d4e136307c
@ -198,6 +198,13 @@ class AutoScaler(object):
|
|||||||
instance_type=resource_conf["instance_type"],
|
instance_type=resource_conf["instance_type"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def is_worker_still_idle(self, worker_id):
|
||||||
|
self.logger.info("Checking if worker %r is still idle", worker_id)
|
||||||
|
for worker in self.api_client.workers.get_all():
|
||||||
|
if worker.id == worker_id:
|
||||||
|
return getattr(worker, 'task', None) is None
|
||||||
|
return True
|
||||||
|
|
||||||
def supervisor(self):
|
def supervisor(self):
|
||||||
"""
|
"""
|
||||||
Spin up or down resources as necessary.
|
Spin up or down resources as necessary.
|
||||||
@ -323,6 +330,9 @@ class AutoScaler(object):
|
|||||||
continue
|
continue
|
||||||
# Remove from both cloud and clearml all instances that are idle for longer than MAX_IDLE_TIME_MIN
|
# Remove from both cloud and clearml all instances that are idle for longer than MAX_IDLE_TIME_MIN
|
||||||
if time() - timestamp > self.max_idle_time_min * MINUTE:
|
if time() - timestamp > self.max_idle_time_min * MINUTE:
|
||||||
|
if not self.is_worker_still_idle(worker_id):
|
||||||
|
# Skip worker if no more idle
|
||||||
|
continue
|
||||||
wid = WorkerId(worker_id)
|
wid = WorkerId(worker_id)
|
||||||
cloud_id = wid.cloud_id
|
cloud_id = wid.cloud_id
|
||||||
self.driver.spin_down_worker(cloud_id)
|
self.driver.spin_down_worker(cloud_id)
|
||||||
|
Loading…
Reference in New Issue
Block a user