mirror of
https://github.com/clearml/clearml-agent
synced 2025-06-26 18:16:15 +00:00
Fix detached mode to correctly use cache folder slots
This commit is contained in:
parent
860ff8911c
commit
22c5f043aa
@ -737,6 +737,8 @@ class Worker(ServiceCommandSection):
|
|||||||
# in detached mode
|
# in detached mode
|
||||||
# fully detach stdin.stdout/stderr and leave main process, running in the background
|
# fully detach stdin.stdout/stderr and leave main process, running in the background
|
||||||
daemonize_process(out_file.fileno())
|
daemonize_process(out_file.fileno())
|
||||||
|
# make sure we update the singleton lock file to the new pid
|
||||||
|
Singleton.update_pid_file()
|
||||||
# reprint headers to std file (we are now inside the daemon process)
|
# reprint headers to std file (we are now inside the daemon process)
|
||||||
print("Worker \"{}\" :".format(self.worker_id))
|
print("Worker \"{}\" :".format(self.worker_id))
|
||||||
self._session.print_configuration()
|
self._session.print_configuration()
|
||||||
@ -2277,8 +2279,9 @@ class Worker(ServiceCommandSection):
|
|||||||
else:
|
else:
|
||||||
worker_name = '{}:cpu'.format(worker_name)
|
worker_name = '{}:cpu'.format(worker_name)
|
||||||
|
|
||||||
self.worker_id, worker_slot = Singleton.register_instance(unique_worker_id=worker_id, worker_name=worker_name,
|
self.worker_id, worker_slot = Singleton.register_instance(
|
||||||
api_client=self._session.api_client)
|
unique_worker_id=worker_id, worker_name=worker_name, api_client=self._session.api_client)
|
||||||
|
|
||||||
if self.worker_id is None:
|
if self.worker_id is None:
|
||||||
error('Instance with the same WORKER_ID [{}] is already running'.format(worker_id))
|
error('Instance with the same WORKER_ID [{}] is already running'.format(worker_id))
|
||||||
exit(1)
|
exit(1)
|
||||||
|
@ -18,6 +18,24 @@ class Singleton(object):
|
|||||||
_pid_file = None
|
_pid_file = None
|
||||||
_lock_file_name = sep+prefix+sep+'global.lock'
|
_lock_file_name = sep+prefix+sep+'global.lock'
|
||||||
_lock_timeout = 10
|
_lock_timeout = 10
|
||||||
|
_pid = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def update_pid_file(cls):
|
||||||
|
new_pid = str(os.getpid())
|
||||||
|
if not cls._pid_file or cls._pid == new_pid:
|
||||||
|
return
|
||||||
|
old_name = cls._pid_file.name
|
||||||
|
parts = cls._pid_file.name.split(os.path.sep)
|
||||||
|
parts[-1] = parts[-1].replace(cls.sep + cls._pid + cls.sep, cls.sep + new_pid + cls.sep)
|
||||||
|
new_pid_file = os.path.sep.join(parts)
|
||||||
|
cls._pid = new_pid
|
||||||
|
cls._pid_file.name = new_pid_file
|
||||||
|
# we need to rename to match new pid
|
||||||
|
try:
|
||||||
|
os.rename(old_name, new_pid_file)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def register_instance(cls, unique_worker_id=None, worker_name=None, api_client=None):
|
def register_instance(cls, unique_worker_id=None, worker_name=None, api_client=None):
|
||||||
@ -124,8 +142,9 @@ class Singleton(object):
|
|||||||
unique_worker_id = worker_name + cls.worker_name_sep + str(cls.instance_slot)
|
unique_worker_id = worker_name + cls.worker_name_sep + str(cls.instance_slot)
|
||||||
|
|
||||||
# create lock
|
# create lock
|
||||||
cls._pid_file = NamedTemporaryFile(dir=cls._get_temp_folder(),
|
cls._pid = str(os.getpid())
|
||||||
prefix=cls.prefix + cls.sep + str(os.getpid()) + cls.sep, suffix=cls.ext)
|
cls._pid_file = NamedTemporaryFile(
|
||||||
|
dir=cls._get_temp_folder(), prefix=cls.prefix + cls.sep + cls._pid + cls.sep, suffix=cls.ext)
|
||||||
cls._pid_file.write(('{}\n{}'.format(unique_worker_id, cls.instance_slot)).encode())
|
cls._pid_file.write(('{}\n{}'.format(unique_worker_id, cls.instance_slot)).encode())
|
||||||
cls._pid_file.flush()
|
cls._pid_file.flush()
|
||||||
cls.worker_id = unique_worker_id
|
cls.worker_id = unique_worker_id
|
||||||
|
Loading…
Reference in New Issue
Block a user