mirror of
https://github.com/clearml/clearml-agent
synced 2025-06-26 18:16:15 +00:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
42450dcbc4 | ||
|
|
ef47225d41 | ||
|
|
e61accefb9 | ||
|
|
5c1543d112 | ||
|
|
7ff6aee20c | ||
|
|
37ea381d98 | ||
|
|
67fc884895 | ||
|
|
1e3646b57c |
@@ -137,6 +137,12 @@
|
||||
},
|
||||
|
||||
translate_ssh: true,
|
||||
|
||||
# set "disable_ssh_mount: true" to disable the automatic mount of ~/.ssh folder into the docker containers
|
||||
# default is false, automatically mounts ~/.ssh
|
||||
# Must be set to True if using "clearml-session" with this agent!
|
||||
# disable_ssh_mount: false
|
||||
|
||||
# reload configuration file every daemon execution
|
||||
reload_config: false,
|
||||
|
||||
|
||||
@@ -72,6 +72,7 @@ from clearml_agent.definitions import (
|
||||
WORKING_STANDALONE_DIR,
|
||||
ENV_DEBUG_INFO,
|
||||
ENV_CHILD_AGENTS_COUNT_CMD,
|
||||
ENV_DOCKER_ARGS_FILTERS,
|
||||
)
|
||||
from clearml_agent.definitions import WORKING_REPOSITORY_DIR, PIP_EXTRA_INDICES
|
||||
from clearml_agent.errors import (
|
||||
@@ -686,6 +687,16 @@ class Worker(ServiceCommandSection):
|
||||
# str - not supported, version string indicates last server version
|
||||
self._runtime_props_support = None
|
||||
|
||||
# allow docker sanitization, needs backend support
|
||||
if ENV_DOCKER_ARGS_FILTERS.get():
|
||||
self._docker_args_filters = \
|
||||
[re.compile(f) for f in shlex.split(ENV_DOCKER_ARGS_FILTERS.get())]
|
||||
elif self._session.config.get('agent.docker_args_filters', None):
|
||||
self._docker_args_filters = \
|
||||
[re.compile(f) for f in self._session.config.get('agent.docker_args_filters', [])]
|
||||
else:
|
||||
self._docker_args_filters = []
|
||||
|
||||
@classmethod
|
||||
def _verify_command_states(cls, kwargs):
|
||||
"""
|
||||
@@ -3269,6 +3280,11 @@ class Worker(ServiceCommandSection):
|
||||
first_time=first_time,
|
||||
)
|
||||
|
||||
# print message so users know they can enable cache
|
||||
if not self.package_api.is_cached_enabled():
|
||||
print('::: Python virtual environment cache is disabled. '
|
||||
'To accelerate spin-up time set `agent.venvs_cache.path=~/.clearml/venvs-cache` :::\n')
|
||||
|
||||
# check if we have a cached folder
|
||||
if cached_requirements and not skip_pip_venv_install and self.package_api.get_cached_venv(
|
||||
requirements=cached_requirements,
|
||||
@@ -3439,7 +3455,7 @@ class Worker(ServiceCommandSection):
|
||||
'-v', '{}:{}'.format(ENV_SSH_AUTH_SOCK.get(), ENV_SSH_AUTH_SOCK.get()),
|
||||
'-e', ssh_auth_sock_env,
|
||||
]
|
||||
elif ENV_AGENT_DISABLE_SSH_MOUNT.get():
|
||||
elif ENV_AGENT_DISABLE_SSH_MOUNT.get() or self._session.config.get("agent.disable_ssh_mount", None):
|
||||
self._host_ssh_cache = None
|
||||
else:
|
||||
self._host_ssh_cache = mkdtemp(prefix='clearml_agent.ssh.')
|
||||
@@ -3591,9 +3607,7 @@ class Worker(ServiceCommandSection):
|
||||
"""Get the amount of running child agents. In case of any error return 0"""
|
||||
parent_worker_label = self._parent_worker_label.format(self.worker_id)
|
||||
|
||||
default_cmd = 'docker ps --filter label={parent_worker_label} --format ' \
|
||||
'{{"ID":"{{{{ .ID }}}}", "Image": "{{{{ .Image }}}}", ' \
|
||||
'"Names":"{{{{ .Names }}}}", "Labels":"{{{{ .Labels }}}}"}}'
|
||||
default_cmd = 'docker ps --filter label={parent_worker_label} --format {{{{.ID}}}}'
|
||||
child_agents_cmd = ENV_CHILD_AGENTS_COUNT_CMD.get() or default_cmd
|
||||
|
||||
cmd = shlex.split(child_agents_cmd.format(parent_worker_label=parent_worker_label))
|
||||
@@ -3607,6 +3621,31 @@ class Worker(ServiceCommandSection):
|
||||
|
||||
return len(output.splitlines()) if output else 0
|
||||
|
||||
def _filter_docker_args(self, docker_args):
|
||||
# type: (List[str]) -> List[str]
|
||||
"""
|
||||
Filter docker args matching specific flags.
|
||||
Supports list of Regular expressions, e.g self._docker_args_filters = ["^--env$", "^-e$"]
|
||||
|
||||
:argument docker_args: List of docker argument strings (flags and values)
|
||||
"""
|
||||
# if no filtering, do nothing
|
||||
if not docker_args or not self._docker_args_filters:
|
||||
return docker_args
|
||||
|
||||
args = docker_args[:]
|
||||
results = []
|
||||
while args:
|
||||
cmd = args.pop(0).strip()
|
||||
if any(f.match(cmd) for f in self._docker_args_filters):
|
||||
results.append(cmd)
|
||||
if "=" not in cmd and args and not args[0].startswith("-"):
|
||||
try:
|
||||
results.append(args.pop(0).strip())
|
||||
except IndexError:
|
||||
pass
|
||||
return results
|
||||
|
||||
def _get_docker_cmd(
|
||||
self,
|
||||
worker_id, parent_worker_id,
|
||||
@@ -3660,6 +3699,7 @@ class Worker(ServiceCommandSection):
|
||||
if docker_arguments:
|
||||
docker_arguments = list(docker_arguments) \
|
||||
if isinstance(docker_arguments, (list, tuple)) else [docker_arguments]
|
||||
docker_arguments = self._filter_docker_args(docker_arguments)
|
||||
base_cmd += [a for a in docker_arguments if a]
|
||||
|
||||
if extra_docker_arguments:
|
||||
|
||||
@@ -150,6 +150,7 @@ ENV_VENV_CACHE_PATH = EnvironmentConfig('CLEARML_AGENT_VENV_CACHE_PATH')
|
||||
ENV_EXTRA_DOCKER_ARGS = EnvironmentConfig('CLEARML_AGENT_EXTRA_DOCKER_ARGS', type=list)
|
||||
ENV_DEBUG_INFO = EnvironmentConfig('CLEARML_AGENT_DEBUG_INFO')
|
||||
ENV_CHILD_AGENTS_COUNT_CMD = EnvironmentConfig('CLEARML_AGENT_CHILD_AGENTS_COUNT_CMD')
|
||||
ENV_DOCKER_ARGS_FILTERS = EnvironmentConfig('CLEARML_AGENT_DOCKER_ARGS_FILTERS')
|
||||
|
||||
ENV_CUSTOM_BUILD_SCRIPT = EnvironmentConfig('CLEARML_AGENT_CUSTOM_BUILD_SCRIPT')
|
||||
"""
|
||||
|
||||
@@ -213,6 +213,13 @@ class PackageManager(object):
|
||||
return
|
||||
return self._get_cache_manager().get_last_copied_entry()
|
||||
|
||||
def is_cached_enabled(self):
|
||||
if not self._cache_manager:
|
||||
cache_folder = ENV_VENV_CACHE_PATH.get() or self.session.config.get(self._config_cache_folder, None)
|
||||
if not cache_folder:
|
||||
return False
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def _generate_reqs_hash_keys(cls, requirements_list, docker_cmd, python_version, cuda_version):
|
||||
# type: (Union[Dict, List[Dict]], Optional[Union[dict, str]], Optional[str], Optional[str]) -> List[str]
|
||||
|
||||
@@ -92,9 +92,10 @@ class ResourceMonitor(object):
|
||||
# None means no filtering, report all gpus
|
||||
self._active_gpus = None
|
||||
try:
|
||||
active_gpus = Session.get_nvidia_visible_env() or ""
|
||||
if active_gpus:
|
||||
self._active_gpus = [g.strip() for g in active_gpus.split(',')]
|
||||
active_gpus = Session.get_nvidia_visible_env()
|
||||
# None means no filtering, report all gpus
|
||||
if active_gpus and active_gpus != "all":
|
||||
self._active_gpus = [g.strip() for g in str(active_gpus).split(',')]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '1.4.0'
|
||||
__version__ = '1.4.1'
|
||||
|
||||
@@ -136,6 +136,12 @@ agent {
|
||||
},
|
||||
|
||||
translate_ssh: true,
|
||||
|
||||
# set "disable_ssh_mount: true" to disable the automatic mount of ~/.ssh folder into the docker containers
|
||||
# default is false, automatically mounts ~/.ssh
|
||||
# Must be set to True if using "clearml-session" with this agent!
|
||||
# disable_ssh_mount: false
|
||||
|
||||
# reload configuration file every daemon execution
|
||||
reload_config: false,
|
||||
|
||||
|
||||
Reference in New Issue
Block a user