Improve docker host-mount support, use TRAINS_AGENT_DOCKER_HOST_MOUNT env var

This commit is contained in:
allegroai 2020-05-09 20:02:46 +03:00
parent 7c87797a40
commit 53f511f536
3 changed files with 12 additions and 11 deletions

View File

@ -39,7 +39,7 @@ from trains_agent.definitions import (
PROGRAM_NAME,
DEFAULT_VENV_UPDATE_URL,
ENV_TASK_EXECUTE_AS_USER,
ENV_K8S_HOST_MOUNT,
ENV_DOCKER_HOST_MOUNT,
ENV_TASK_EXTRA_PYTHON_PATH,
ENV_AGENT_GIT_USER,
ENV_AGENT_GIT_PASS)
@ -1972,8 +1972,8 @@ class Worker(ServiceCommandSection):
temp_config.put("agent.cuda_version", "")
temp_config.put("agent.cudnn_version", "")
temp_config.put("agent.venvs_dir", mounted_venv_dir)
temp_config.put("agent.git_user", (ENV_AGENT_GIT_USER.get() or self.session.config.get("agent.git_user", None)))
temp_config.put("agent.git_pass", (ENV_AGENT_GIT_PASS.get() or self.session.config.get("agent.git_pass", None)))
temp_config.put("agent.git_user", (ENV_AGENT_GIT_USER.get() or self._session.config.get("agent.git_user", None)))
temp_config.put("agent.git_pass", (ENV_AGENT_GIT_PASS.get() or self._session.config.get("agent.git_pass", None)))
host_apt_cache = Path(os.path.expandvars(self._session.config.get(
"agent.docker_apt_cache", '~/.trains/apt-cache'))).expanduser().as_posix()
@ -2084,7 +2084,7 @@ class Worker(ServiceCommandSection):
base_cmd += [str(a) for a in extra_docker_arguments if a]
# check if running inside a kubernetes
if os.environ.get('KUBERNETES_SERVICE_HOST') and os.environ.get('KUBERNETES_PORT'):
if ENV_DOCKER_HOST_MOUNT.get() or (os.environ.get('KUBERNETES_SERVICE_HOST') and os.environ.get('KUBERNETES_PORT')):
# map network to sibling docker, unless we have other network argument
if not any(a.strip().startswith('--network') for a in base_cmd):
try:
@ -2096,9 +2096,9 @@ class Worker(ServiceCommandSection):
base_cmd += ['-e', 'NVIDIA_VISIBLE_DEVICES={}'.format(dockers_nvidia_visible_devices)]
# check if we need to map host folders
if os.environ.get(ENV_K8S_HOST_MOUNT):
if ENV_DOCKER_HOST_MOUNT.get():
# expect TRAINS_AGENT_K8S_HOST_MOUNT = '/mnt/host/data:/root/.trains'
k8s_node_mnt, _, k8s_pod_mnt = os.environ.get(ENV_K8S_HOST_MOUNT).partition(':')
k8s_node_mnt, _, k8s_pod_mnt = ENV_DOCKER_HOST_MOUNT.get().partition(':')
# search and replace all the host folders with the k8s
host_mounts = [host_apt_cache, host_pip_cache, host_pip_dl, host_cache, host_vcs_cache]
for i, m in enumerate(host_mounts):
@ -2112,6 +2112,7 @@ class Worker(ServiceCommandSection):
# copy the configuration file into the mounted folder
new_conf_file = os.path.join(k8s_pod_mnt, '.trains_agent.{}.cfg'.format(quote(worker_id, safe="")))
try:
rm_tree(new_conf_file)
rm_file(new_conf_file)
shutil.copy(conf_file, new_conf_file)
conf_file = new_conf_file.replace(k8s_pod_mnt, k8s_node_mnt)

View File

@ -124,7 +124,7 @@ ENV_AGENT_GIT_USER = EnvironmentConfig('TRAINS_AGENT_GIT_USER')
ENV_AGENT_GIT_PASS = EnvironmentConfig('TRAINS_AGENT_GIT_PASS')
ENV_TASK_EXECUTE_AS_USER = 'TRAINS_AGENT_EXEC_USER'
ENV_TASK_EXTRA_PYTHON_PATH = 'TRAINS_AGENT_EXTRA_PYTHON_PATH'
ENV_K8S_HOST_MOUNT = 'TRAINS_AGENT_K8S_HOST_MOUNT'
ENV_DOCKER_HOST_MOUNT = EnvironmentConfig('TRAINS_AGENT_K8S_HOST_MOUNT', 'TRAINS_AGENT_DOCKER_HOST_MOUNT')
class FileBuffering(IntEnum):

View File

@ -4,7 +4,7 @@ from time import sleep
from glob import glob
from tempfile import gettempdir, NamedTemporaryFile
from trains_agent.definitions import ENV_K8S_HOST_MOUNT
from trains_agent.definitions import ENV_DOCKER_HOST_MOUNT
from trains_agent.helper.base import warning
@ -85,7 +85,7 @@ class Singleton(object):
pass
worker = None
if api_client and os.environ.get(ENV_K8S_HOST_MOUNT) and uid:
if api_client and ENV_DOCKER_HOST_MOUNT.get() and uid:
try:
worker = [w for w in api_client.workers.get_all() if w.id == uid]
except Exception:
@ -134,8 +134,8 @@ class Singleton(object):
@classmethod
def _get_temp_folder(cls):
if os.environ.get(ENV_K8S_HOST_MOUNT):
return os.environ.get(ENV_K8S_HOST_MOUNT).split(':')[-1]
if ENV_DOCKER_HOST_MOUNT.get():
return ENV_DOCKER_HOST_MOUNT.get().split(':')[-1]
return gettempdir()
@classmethod