mirror of
https://github.com/clearml/clearml-agent
synced 2025-02-07 13:26:08 +00:00
Fix detached mode to correctly use cache folder slots
This commit is contained in:
parent
860ff8911c
commit
22c5f043aa
@ -737,6 +737,8 @@ class Worker(ServiceCommandSection):
|
||||
# in detached mode
|
||||
# fully detach stdin.stdout/stderr and leave main process, running in the background
|
||||
daemonize_process(out_file.fileno())
|
||||
# make sure we update the singleton lock file to the new pid
|
||||
Singleton.update_pid_file()
|
||||
# reprint headers to std file (we are now inside the daemon process)
|
||||
print("Worker \"{}\" :".format(self.worker_id))
|
||||
self._session.print_configuration()
|
||||
@ -2277,8 +2279,9 @@ class Worker(ServiceCommandSection):
|
||||
else:
|
||||
worker_name = '{}:cpu'.format(worker_name)
|
||||
|
||||
self.worker_id, worker_slot = Singleton.register_instance(unique_worker_id=worker_id, worker_name=worker_name,
|
||||
api_client=self._session.api_client)
|
||||
self.worker_id, worker_slot = Singleton.register_instance(
|
||||
unique_worker_id=worker_id, worker_name=worker_name, api_client=self._session.api_client)
|
||||
|
||||
if self.worker_id is None:
|
||||
error('Instance with the same WORKER_ID [{}] is already running'.format(worker_id))
|
||||
exit(1)
|
||||
|
@ -18,6 +18,24 @@ class Singleton(object):
|
||||
_pid_file = None
|
||||
_lock_file_name = sep+prefix+sep+'global.lock'
|
||||
_lock_timeout = 10
|
||||
_pid = None
|
||||
|
||||
@classmethod
|
||||
def update_pid_file(cls):
|
||||
new_pid = str(os.getpid())
|
||||
if not cls._pid_file or cls._pid == new_pid:
|
||||
return
|
||||
old_name = cls._pid_file.name
|
||||
parts = cls._pid_file.name.split(os.path.sep)
|
||||
parts[-1] = parts[-1].replace(cls.sep + cls._pid + cls.sep, cls.sep + new_pid + cls.sep)
|
||||
new_pid_file = os.path.sep.join(parts)
|
||||
cls._pid = new_pid
|
||||
cls._pid_file.name = new_pid_file
|
||||
# we need to rename to match new pid
|
||||
try:
|
||||
os.rename(old_name, new_pid_file)
|
||||
except:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def register_instance(cls, unique_worker_id=None, worker_name=None, api_client=None):
|
||||
@ -124,8 +142,9 @@ class Singleton(object):
|
||||
unique_worker_id = worker_name + cls.worker_name_sep + str(cls.instance_slot)
|
||||
|
||||
# create lock
|
||||
cls._pid_file = NamedTemporaryFile(dir=cls._get_temp_folder(),
|
||||
prefix=cls.prefix + cls.sep + str(os.getpid()) + cls.sep, suffix=cls.ext)
|
||||
cls._pid = str(os.getpid())
|
||||
cls._pid_file = NamedTemporaryFile(
|
||||
dir=cls._get_temp_folder(), prefix=cls.prefix + cls.sep + cls._pid + cls.sep, suffix=cls.ext)
|
||||
cls._pid_file.write(('{}\n{}'.format(unique_worker_id, cls.instance_slot)).encode())
|
||||
cls._pid_file.flush()
|
||||
cls.worker_id = unique_worker_id
|
||||
|
Loading…
Reference in New Issue
Block a user