mirror of
https://github.com/clearml/clearml-agent
synced 2025-06-23 02:05:43 +00:00
Improve docker host-mount support, use TRAINS_AGENT_DOCKER_HOST_MOUNT env var
This commit is contained in:
parent
7c87797a40
commit
53f511f536
@ -39,7 +39,7 @@ from trains_agent.definitions import (
|
|||||||
PROGRAM_NAME,
|
PROGRAM_NAME,
|
||||||
DEFAULT_VENV_UPDATE_URL,
|
DEFAULT_VENV_UPDATE_URL,
|
||||||
ENV_TASK_EXECUTE_AS_USER,
|
ENV_TASK_EXECUTE_AS_USER,
|
||||||
ENV_K8S_HOST_MOUNT,
|
ENV_DOCKER_HOST_MOUNT,
|
||||||
ENV_TASK_EXTRA_PYTHON_PATH,
|
ENV_TASK_EXTRA_PYTHON_PATH,
|
||||||
ENV_AGENT_GIT_USER,
|
ENV_AGENT_GIT_USER,
|
||||||
ENV_AGENT_GIT_PASS)
|
ENV_AGENT_GIT_PASS)
|
||||||
@ -1972,8 +1972,8 @@ class Worker(ServiceCommandSection):
|
|||||||
temp_config.put("agent.cuda_version", "")
|
temp_config.put("agent.cuda_version", "")
|
||||||
temp_config.put("agent.cudnn_version", "")
|
temp_config.put("agent.cudnn_version", "")
|
||||||
temp_config.put("agent.venvs_dir", mounted_venv_dir)
|
temp_config.put("agent.venvs_dir", mounted_venv_dir)
|
||||||
temp_config.put("agent.git_user", (ENV_AGENT_GIT_USER.get() or self.session.config.get("agent.git_user", None)))
|
temp_config.put("agent.git_user", (ENV_AGENT_GIT_USER.get() or self._session.config.get("agent.git_user", None)))
|
||||||
temp_config.put("agent.git_pass", (ENV_AGENT_GIT_PASS.get() or self.session.config.get("agent.git_pass", None)))
|
temp_config.put("agent.git_pass", (ENV_AGENT_GIT_PASS.get() or self._session.config.get("agent.git_pass", None)))
|
||||||
|
|
||||||
host_apt_cache = Path(os.path.expandvars(self._session.config.get(
|
host_apt_cache = Path(os.path.expandvars(self._session.config.get(
|
||||||
"agent.docker_apt_cache", '~/.trains/apt-cache'))).expanduser().as_posix()
|
"agent.docker_apt_cache", '~/.trains/apt-cache'))).expanduser().as_posix()
|
||||||
@ -2084,7 +2084,7 @@ class Worker(ServiceCommandSection):
|
|||||||
base_cmd += [str(a) for a in extra_docker_arguments if a]
|
base_cmd += [str(a) for a in extra_docker_arguments if a]
|
||||||
|
|
||||||
# check if running inside a kubernetes
|
# check if running inside a kubernetes
|
||||||
if os.environ.get('KUBERNETES_SERVICE_HOST') and os.environ.get('KUBERNETES_PORT'):
|
if ENV_DOCKER_HOST_MOUNT.get() or (os.environ.get('KUBERNETES_SERVICE_HOST') and os.environ.get('KUBERNETES_PORT')):
|
||||||
# map network to sibling docker, unless we have other network argument
|
# map network to sibling docker, unless we have other network argument
|
||||||
if not any(a.strip().startswith('--network') for a in base_cmd):
|
if not any(a.strip().startswith('--network') for a in base_cmd):
|
||||||
try:
|
try:
|
||||||
@ -2096,9 +2096,9 @@ class Worker(ServiceCommandSection):
|
|||||||
base_cmd += ['-e', 'NVIDIA_VISIBLE_DEVICES={}'.format(dockers_nvidia_visible_devices)]
|
base_cmd += ['-e', 'NVIDIA_VISIBLE_DEVICES={}'.format(dockers_nvidia_visible_devices)]
|
||||||
|
|
||||||
# check if we need to map host folders
|
# check if we need to map host folders
|
||||||
if os.environ.get(ENV_K8S_HOST_MOUNT):
|
if ENV_DOCKER_HOST_MOUNT.get():
|
||||||
# expect TRAINS_AGENT_K8S_HOST_MOUNT = '/mnt/host/data:/root/.trains'
|
# expect TRAINS_AGENT_K8S_HOST_MOUNT = '/mnt/host/data:/root/.trains'
|
||||||
k8s_node_mnt, _, k8s_pod_mnt = os.environ.get(ENV_K8S_HOST_MOUNT).partition(':')
|
k8s_node_mnt, _, k8s_pod_mnt = ENV_DOCKER_HOST_MOUNT.get().partition(':')
|
||||||
# search and replace all the host folders with the k8s
|
# search and replace all the host folders with the k8s
|
||||||
host_mounts = [host_apt_cache, host_pip_cache, host_pip_dl, host_cache, host_vcs_cache]
|
host_mounts = [host_apt_cache, host_pip_cache, host_pip_dl, host_cache, host_vcs_cache]
|
||||||
for i, m in enumerate(host_mounts):
|
for i, m in enumerate(host_mounts):
|
||||||
@ -2112,6 +2112,7 @@ class Worker(ServiceCommandSection):
|
|||||||
# copy the configuration file into the mounted folder
|
# copy the configuration file into the mounted folder
|
||||||
new_conf_file = os.path.join(k8s_pod_mnt, '.trains_agent.{}.cfg'.format(quote(worker_id, safe="")))
|
new_conf_file = os.path.join(k8s_pod_mnt, '.trains_agent.{}.cfg'.format(quote(worker_id, safe="")))
|
||||||
try:
|
try:
|
||||||
|
rm_tree(new_conf_file)
|
||||||
rm_file(new_conf_file)
|
rm_file(new_conf_file)
|
||||||
shutil.copy(conf_file, new_conf_file)
|
shutil.copy(conf_file, new_conf_file)
|
||||||
conf_file = new_conf_file.replace(k8s_pod_mnt, k8s_node_mnt)
|
conf_file = new_conf_file.replace(k8s_pod_mnt, k8s_node_mnt)
|
||||||
|
@ -124,7 +124,7 @@ ENV_AGENT_GIT_USER = EnvironmentConfig('TRAINS_AGENT_GIT_USER')
|
|||||||
ENV_AGENT_GIT_PASS = EnvironmentConfig('TRAINS_AGENT_GIT_PASS')
|
ENV_AGENT_GIT_PASS = EnvironmentConfig('TRAINS_AGENT_GIT_PASS')
|
||||||
ENV_TASK_EXECUTE_AS_USER = 'TRAINS_AGENT_EXEC_USER'
|
ENV_TASK_EXECUTE_AS_USER = 'TRAINS_AGENT_EXEC_USER'
|
||||||
ENV_TASK_EXTRA_PYTHON_PATH = 'TRAINS_AGENT_EXTRA_PYTHON_PATH'
|
ENV_TASK_EXTRA_PYTHON_PATH = 'TRAINS_AGENT_EXTRA_PYTHON_PATH'
|
||||||
ENV_K8S_HOST_MOUNT = 'TRAINS_AGENT_K8S_HOST_MOUNT'
|
ENV_DOCKER_HOST_MOUNT = EnvironmentConfig('TRAINS_AGENT_K8S_HOST_MOUNT', 'TRAINS_AGENT_DOCKER_HOST_MOUNT')
|
||||||
|
|
||||||
|
|
||||||
class FileBuffering(IntEnum):
|
class FileBuffering(IntEnum):
|
||||||
|
@ -4,7 +4,7 @@ from time import sleep
|
|||||||
from glob import glob
|
from glob import glob
|
||||||
from tempfile import gettempdir, NamedTemporaryFile
|
from tempfile import gettempdir, NamedTemporaryFile
|
||||||
|
|
||||||
from trains_agent.definitions import ENV_K8S_HOST_MOUNT
|
from trains_agent.definitions import ENV_DOCKER_HOST_MOUNT
|
||||||
from trains_agent.helper.base import warning
|
from trains_agent.helper.base import warning
|
||||||
|
|
||||||
|
|
||||||
@ -85,7 +85,7 @@ class Singleton(object):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
worker = None
|
worker = None
|
||||||
if api_client and os.environ.get(ENV_K8S_HOST_MOUNT) and uid:
|
if api_client and ENV_DOCKER_HOST_MOUNT.get() and uid:
|
||||||
try:
|
try:
|
||||||
worker = [w for w in api_client.workers.get_all() if w.id == uid]
|
worker = [w for w in api_client.workers.get_all() if w.id == uid]
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -134,8 +134,8 @@ class Singleton(object):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _get_temp_folder(cls):
|
def _get_temp_folder(cls):
|
||||||
if os.environ.get(ENV_K8S_HOST_MOUNT):
|
if ENV_DOCKER_HOST_MOUNT.get():
|
||||||
return os.environ.get(ENV_K8S_HOST_MOUNT).split(':')[-1]
|
return ENV_DOCKER_HOST_MOUNT.get().split(':')[-1]
|
||||||
return gettempdir()
|
return gettempdir()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
Loading…
Reference in New Issue
Block a user