diff --git a/trains_agent/commands/worker.py b/trains_agent/commands/worker.py index 8ae5555..a107779 100644 --- a/trains_agent/commands/worker.py +++ b/trains_agent/commands/worker.py @@ -39,7 +39,7 @@ from trains_agent.definitions import ( PROGRAM_NAME, DEFAULT_VENV_UPDATE_URL, ENV_TASK_EXECUTE_AS_USER, - ENV_K8S_HOST_MOUNT, + ENV_DOCKER_HOST_MOUNT, ENV_TASK_EXTRA_PYTHON_PATH, ENV_AGENT_GIT_USER, ENV_AGENT_GIT_PASS) @@ -1972,8 +1972,8 @@ class Worker(ServiceCommandSection): temp_config.put("agent.cuda_version", "") temp_config.put("agent.cudnn_version", "") temp_config.put("agent.venvs_dir", mounted_venv_dir) - temp_config.put("agent.git_user", (ENV_AGENT_GIT_USER.get() or self.session.config.get("agent.git_user", None))) - temp_config.put("agent.git_pass", (ENV_AGENT_GIT_PASS.get() or self.session.config.get("agent.git_pass", None))) + temp_config.put("agent.git_user", (ENV_AGENT_GIT_USER.get() or self._session.config.get("agent.git_user", None))) + temp_config.put("agent.git_pass", (ENV_AGENT_GIT_PASS.get() or self._session.config.get("agent.git_pass", None))) host_apt_cache = Path(os.path.expandvars(self._session.config.get( "agent.docker_apt_cache", '~/.trains/apt-cache'))).expanduser().as_posix() @@ -2084,7 +2084,7 @@ class Worker(ServiceCommandSection): base_cmd += [str(a) for a in extra_docker_arguments if a] # check if running inside a kubernetes - if os.environ.get('KUBERNETES_SERVICE_HOST') and os.environ.get('KUBERNETES_PORT'): + if ENV_DOCKER_HOST_MOUNT.get() or (os.environ.get('KUBERNETES_SERVICE_HOST') and os.environ.get('KUBERNETES_PORT')): # map network to sibling docker, unless we have other network argument if not any(a.strip().startswith('--network') for a in base_cmd): try: @@ -2096,9 +2096,9 @@ class Worker(ServiceCommandSection): base_cmd += ['-e', 'NVIDIA_VISIBLE_DEVICES={}'.format(dockers_nvidia_visible_devices)] # check if we need to map host folders - if os.environ.get(ENV_K8S_HOST_MOUNT): + if ENV_DOCKER_HOST_MOUNT.get(): # expect TRAINS_AGENT_K8S_HOST_MOUNT = '/mnt/host/data:/root/.trains' - k8s_node_mnt, _, k8s_pod_mnt = os.environ.get(ENV_K8S_HOST_MOUNT).partition(':') + k8s_node_mnt, _, k8s_pod_mnt = ENV_DOCKER_HOST_MOUNT.get().partition(':') # search and replace all the host folders with the k8s host_mounts = [host_apt_cache, host_pip_cache, host_pip_dl, host_cache, host_vcs_cache] for i, m in enumerate(host_mounts): @@ -2112,6 +2112,7 @@ class Worker(ServiceCommandSection): # copy the configuration file into the mounted folder new_conf_file = os.path.join(k8s_pod_mnt, '.trains_agent.{}.cfg'.format(quote(worker_id, safe=""))) try: + rm_tree(new_conf_file) rm_file(new_conf_file) shutil.copy(conf_file, new_conf_file) conf_file = new_conf_file.replace(k8s_pod_mnt, k8s_node_mnt) diff --git a/trains_agent/definitions.py b/trains_agent/definitions.py index bd5b642..75e2a27 100644 --- a/trains_agent/definitions.py +++ b/trains_agent/definitions.py @@ -124,7 +124,7 @@ ENV_AGENT_GIT_USER = EnvironmentConfig('TRAINS_AGENT_GIT_USER') ENV_AGENT_GIT_PASS = EnvironmentConfig('TRAINS_AGENT_GIT_PASS') ENV_TASK_EXECUTE_AS_USER = 'TRAINS_AGENT_EXEC_USER' ENV_TASK_EXTRA_PYTHON_PATH = 'TRAINS_AGENT_EXTRA_PYTHON_PATH' -ENV_K8S_HOST_MOUNT = 'TRAINS_AGENT_K8S_HOST_MOUNT' +ENV_DOCKER_HOST_MOUNT = EnvironmentConfig('TRAINS_AGENT_K8S_HOST_MOUNT', 'TRAINS_AGENT_DOCKER_HOST_MOUNT') class FileBuffering(IntEnum): diff --git a/trains_agent/helper/singleton.py b/trains_agent/helper/singleton.py index 832ad0d..ec3a7a9 100644 --- a/trains_agent/helper/singleton.py +++ b/trains_agent/helper/singleton.py @@ -4,7 +4,7 @@ from time import sleep from glob import glob from tempfile import gettempdir, NamedTemporaryFile -from trains_agent.definitions import ENV_K8S_HOST_MOUNT +from trains_agent.definitions import ENV_DOCKER_HOST_MOUNT from trains_agent.helper.base import warning @@ -85,7 +85,7 @@ class Singleton(object): pass worker = None - if api_client and os.environ.get(ENV_K8S_HOST_MOUNT) and uid: + if api_client and ENV_DOCKER_HOST_MOUNT.get() and uid: try: worker = [w for w in api_client.workers.get_all() if w.id == uid] except Exception: @@ -134,8 +134,8 @@ class Singleton(object): @classmethod def _get_temp_folder(cls): - if os.environ.get(ENV_K8S_HOST_MOUNT): - return os.environ.get(ENV_K8S_HOST_MOUNT).split(':')[-1] + if ENV_DOCKER_HOST_MOUNT.get(): + return ENV_DOCKER_HOST_MOUNT.get().split(':')[-1] return gettempdir() @classmethod