diff --git a/clearml_agent/commands/worker.py b/clearml_agent/commands/worker.py index 94b6521..0ede07f 100644 --- a/clearml_agent/commands/worker.py +++ b/clearml_agent/commands/worker.py @@ -2103,8 +2103,9 @@ class Worker(ServiceCommandSection): end_of_build_marker = "build.done=true" docker_cmd_suffix = ' build --id {task_id} --install-globally; ' \ - 'echo "" >> {conf_file} ; ' \ - 'echo {end_of_build_marker} >> {conf_file} ; ' \ + 'ORG=$(stat -c "%u:%g" {conf_file}) ; chown $(whoami):$(whoami) {conf_file} ; ' \ + 'echo "" >> {conf_file} ; echo {end_of_build_marker} >> {conf_file} ; ' \ + 'chown $ORG {conf_file} ; ' \ 'bash'.format( task_id=task_id, end_of_build_marker=end_of_build_marker, @@ -2123,10 +2124,16 @@ class Worker(ServiceCommandSection): # now we need to wait until the line shows on our configuration file. while True: - while temp_config.stat().st_mtime == base_time_stamp: - sleep(5.0) - with open(temp_config.as_posix()) as f: - lines = [l.strip() for l in f.readlines()] + # noinspection PyBroadException + try: + while temp_config.stat().st_mtime == base_time_stamp: + sleep(5.0) + with open(temp_config.as_posix()) as f: + lines = [l.strip() for l in f.readlines()] + except Exception as ex: + # print("Failed reading status file [{}], retrying in 2 seconds".format(ex)) + sleep(2.0) + if 'build.done=true' in lines: break base_time_stamp = temp_config.stat().st_mtime @@ -2834,8 +2841,8 @@ class Worker(ServiceCommandSection): # Todo: add support for poetry caching if not self.poetry.enabled: # add to cache - print('Adding venv into cache: {}'.format(add_venv_folder_cache)) if add_venv_folder_cache: + print('Adding venv into cache: {}'.format(add_venv_folder_cache)) self.package_api.add_cached_venv( requirements=[freeze, previous_reqs], docker_cmd=execution_info.docker_cmd if execution_info else None, diff --git a/clearml_agent/helper/process.py b/clearml_agent/helper/process.py index 689910b..452f73c 100644 --- a/clearml_agent/helper/process.py +++ b/clearml_agent/helper/process.py @@ -117,10 +117,11 @@ def terminate_all_child_processes(pid=None, timeout=10., include_parent=True): def get_docker_id(docker_cmd_contains): + # noinspection PyBroadException try: containers_running = get_bash_output(cmd='docker ps --no-trunc --format \"{{.ID}}: {{.Command}}\"') for docker_line in containers_running.split('\n'): - parts = docker_line.split(':') + parts = docker_line.split(':', 1) if docker_cmd_contains in parts[-1]: # we found our docker, return it return parts[0]