mirror of
https://github.com/clearml/clearml-agent
synced 2025-06-26 18:16:15 +00:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
249b51a31b | ||
|
|
da19ef26c4 | ||
|
|
f69e16ea9d | ||
|
|
efa1f71dac | ||
|
|
692cb8cf13 | ||
|
|
ebdc215632 | ||
|
|
b2da639582 |
@@ -57,10 +57,7 @@ from clearml_agent.definitions import (
|
||||
ENV_WORKER_ID,
|
||||
ENV_WORKER_TAGS,
|
||||
ENV_DOCKER_SKIP_GPUS_FLAG,
|
||||
ENV_AGENT_SECRET_KEY,
|
||||
ENV_AGENT_AUTH_TOKEN,
|
||||
ENV_AWS_SECRET_KEY,
|
||||
ENV_AZURE_ACCOUNT_KEY,
|
||||
ENV_AGENT_DISABLE_SSH_MOUNT,
|
||||
ENV_SSH_AUTH_SOCK,
|
||||
ENV_AGENT_SKIP_PIP_VENV_INSTALL,
|
||||
@@ -71,6 +68,7 @@ from clearml_agent.definitions import (
|
||||
ENV_DEBUG_INFO,
|
||||
ENV_CHILD_AGENTS_COUNT_CMD,
|
||||
ENV_DOCKER_ARGS_FILTERS,
|
||||
ENV_FORCE_SYSTEM_SITE_PACKAGES,
|
||||
)
|
||||
from clearml_agent.definitions import WORKING_REPOSITORY_DIR, PIP_EXTRA_INDICES
|
||||
from clearml_agent.errors import (
|
||||
@@ -688,7 +686,7 @@ class Worker(ServiceCommandSection):
|
||||
self._docker_args_filters = []
|
||||
|
||||
self._task_ping_interval_sec = max(
|
||||
0, text_to_int(self._session.config.get("agent.task_ping_interval_sec", 120.0))
|
||||
0, text_to_int(self._session.config.get("agent.task_ping_interval_sec", 60.0))
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -1786,7 +1784,8 @@ class Worker(ServiceCommandSection):
|
||||
if stderr:
|
||||
stderr.flush()
|
||||
|
||||
if self._task_ping_interval_sec and time() - last_task_ping > self._task_ping_interval_sec:
|
||||
if not stopping and self._task_ping_interval_sec and \
|
||||
time() - last_task_ping > self._task_ping_interval_sec:
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
res = (session or self._session).send(tasks_api.PingRequest(task=task_id))
|
||||
@@ -1795,7 +1794,7 @@ class Worker(ServiceCommandSection):
|
||||
except Exception as ex:
|
||||
self.log.error("Failed sending ping: %s", str(ex))
|
||||
finally:
|
||||
self._task_ping_interval_sec = time()
|
||||
last_task_ping = time()
|
||||
|
||||
# get diff from previous poll
|
||||
printed_lines, stdout_pos_count = _print_file(stdout_path, stdout_pos_count)
|
||||
@@ -2105,8 +2104,9 @@ class Worker(ServiceCommandSection):
|
||||
|
||||
end_of_build_marker = "build.done=true"
|
||||
docker_cmd_suffix = ' build --id {task_id} --install-globally; ' \
|
||||
'echo "" >> {conf_file} ; ' \
|
||||
'echo {end_of_build_marker} >> {conf_file} ; ' \
|
||||
'ORG=$(stat -c "%u:%g" {conf_file}) ; chown $(whoami):$(whoami) {conf_file} ; ' \
|
||||
'echo "" >> {conf_file} ; echo {end_of_build_marker} >> {conf_file} ; ' \
|
||||
'chown $ORG {conf_file} ; ' \
|
||||
'bash'.format(
|
||||
task_id=task_id,
|
||||
end_of_build_marker=end_of_build_marker,
|
||||
@@ -2125,10 +2125,16 @@ class Worker(ServiceCommandSection):
|
||||
|
||||
# now we need to wait until the line shows on our configuration file.
|
||||
while True:
|
||||
while temp_config.stat().st_mtime == base_time_stamp:
|
||||
sleep(5.0)
|
||||
with open(temp_config.as_posix()) as f:
|
||||
lines = [l.strip() for l in f.readlines()]
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
while temp_config.stat().st_mtime == base_time_stamp:
|
||||
sleep(5.0)
|
||||
with open(temp_config.as_posix()) as f:
|
||||
lines = [l.strip() for l in f.readlines()]
|
||||
except Exception as ex:
|
||||
# print("Failed reading status file [{}], retrying in 2 seconds".format(ex))
|
||||
sleep(2.0)
|
||||
|
||||
if 'build.done=true' in lines:
|
||||
break
|
||||
base_time_stamp = temp_config.stat().st_mtime
|
||||
@@ -2157,6 +2163,8 @@ class Worker(ServiceCommandSection):
|
||||
print(commit_docker(container_name=target, docker_id=docker_id, apply_change=change))
|
||||
shutdown_docker_process(docker_id=docker_id)
|
||||
|
||||
safe_remove_file(temp_config.as_posix())
|
||||
|
||||
return
|
||||
|
||||
def _get_task_python_version(self, task):
|
||||
@@ -2836,8 +2844,8 @@ class Worker(ServiceCommandSection):
|
||||
# Todo: add support for poetry caching
|
||||
if not self.poetry.enabled:
|
||||
# add to cache
|
||||
print('Adding venv into cache: {}'.format(add_venv_folder_cache))
|
||||
if add_venv_folder_cache:
|
||||
print('Adding venv into cache: {}'.format(add_venv_folder_cache))
|
||||
self.package_api.add_cached_venv(
|
||||
requirements=[freeze, previous_reqs],
|
||||
docker_cmd=execution_info.docker_cmd if execution_info else None,
|
||||
@@ -3458,7 +3466,6 @@ class Worker(ServiceCommandSection):
|
||||
temp_config.put("sdk.storage.cache.default_base_dir", mounted_cache_dir)
|
||||
temp_config.put("agent.pip_download_cache.path", mounted_pip_dl_dir)
|
||||
temp_config.put("agent.vcs_cache.path", mounted_vcs_cache)
|
||||
temp_config.put("agent.package_manager.system_site_packages", True)
|
||||
temp_config.put("agent.package_manager.conda_env_as_base_docker", False)
|
||||
temp_config.put("agent.default_python", "")
|
||||
temp_config.put("agent.python_binary", "")
|
||||
@@ -3470,6 +3477,11 @@ class Worker(ServiceCommandSection):
|
||||
temp_config.put("agent.git_pass", (ENV_AGENT_GIT_PASS.get() or
|
||||
self._session.config.get("agent.git_pass", None)))
|
||||
|
||||
force_system_site_packages = ENV_FORCE_SYSTEM_SITE_PACKAGES.get()
|
||||
force_system_site_packages = force_system_site_packages if force_system_site_packages is not None else True
|
||||
if force_system_site_packages:
|
||||
temp_config.put("agent.package_manager.system_site_packages", True)
|
||||
|
||||
if temp_config.get("agent.venvs_cache.path", None):
|
||||
temp_config.put("agent.venvs_cache.path", '/root/.clearml/venvs-cache')
|
||||
|
||||
|
||||
@@ -154,6 +154,11 @@ ENV_CHILD_AGENTS_COUNT_CMD = EnvironmentConfig('CLEARML_AGENT_CHILD_AGENTS_COUNT
|
||||
ENV_DOCKER_ARGS_FILTERS = EnvironmentConfig('CLEARML_AGENT_DOCKER_ARGS_FILTERS')
|
||||
ENV_DOCKER_ARGS_HIDE_ENV = EnvironmentConfig('CLEARML_AGENT_DOCKER_ARGS_HIDE_ENV')
|
||||
|
||||
ENV_FORCE_SYSTEM_SITE_PACKAGES = EnvironmentConfig('CLEARML_AGENT_FORCE_SYSTEM_SITE_PACKAGES', type=bool)
|
||||
""" Force system_site_packages: true when running tasks in containers (i.e. docker mode or k8s glue) """
|
||||
|
||||
|
||||
|
||||
ENV_CUSTOM_BUILD_SCRIPT = EnvironmentConfig('CLEARML_AGENT_CUSTOM_BUILD_SCRIPT')
|
||||
"""
|
||||
Specifies a custom environment setup script to be executed instead of installing a virtual environment.
|
||||
|
||||
@@ -22,7 +22,12 @@ import yaml
|
||||
from clearml_agent.backend_api.session import Request
|
||||
from clearml_agent.commands.events import Events
|
||||
from clearml_agent.commands.worker import Worker, get_task_container, set_task_container, get_next_task
|
||||
from clearml_agent.definitions import ENV_DOCKER_IMAGE, ENV_AGENT_GIT_USER, ENV_AGENT_GIT_PASS
|
||||
from clearml_agent.definitions import (
|
||||
ENV_DOCKER_IMAGE,
|
||||
ENV_AGENT_GIT_USER,
|
||||
ENV_AGENT_GIT_PASS,
|
||||
ENV_FORCE_SYSTEM_SITE_PACKAGES,
|
||||
)
|
||||
from clearml_agent.errors import APIError
|
||||
from clearml_agent.glue.definitions import ENV_START_AGENT_SCRIPT_PATH
|
||||
from clearml_agent.helper.base import safe_remove_file
|
||||
@@ -136,8 +141,11 @@ class K8sIntegration(Worker):
|
||||
self.k8s_pending_queue_name = k8s_pending_queue_name or self.K8S_PENDING_QUEUE
|
||||
self.k8s_pending_queue_id = None
|
||||
self.container_bash_script = container_bash_script or self.CONTAINER_BASH_SCRIPT
|
||||
# Always do system packages, because by we will be running inside a docker
|
||||
self._session.config.put("agent.package_manager.system_site_packages", True)
|
||||
force_system_packages = ENV_FORCE_SYSTEM_SITE_PACKAGES.get()
|
||||
self._force_system_site_packages = force_system_packages if force_system_packages is not None else True
|
||||
if self._force_system_site_packages:
|
||||
# Use system packages, because by we will be running inside a docker
|
||||
self._session.config.put("agent.package_manager.system_site_packages", True)
|
||||
# Add debug logging
|
||||
if debug:
|
||||
self.log.logger.disabled = False
|
||||
@@ -455,7 +463,7 @@ class K8sIntegration(Worker):
|
||||
git_user = ENV_AGENT_GIT_USER.get() or self._session.config.get("agent.git_user", None)
|
||||
git_pass = ENV_AGENT_GIT_PASS.get() or self._session.config.get("agent.git_pass", None)
|
||||
extra_config_values = [
|
||||
'agent.package_manager.system_site_packages: true',
|
||||
'agent.package_manager.system_site_packages: true' if self._force_system_site_packages else '',
|
||||
'agent.git_user: "{}"'.format(git_user) if git_user else '',
|
||||
'agent.git_pass: "{}"'.format(git_pass) if git_pass else '',
|
||||
]
|
||||
|
||||
@@ -177,7 +177,7 @@ class PackageManager(object):
|
||||
cls._pip_version.append("==" + version)
|
||||
|
||||
@classmethod
|
||||
def get_pip_versions(cls, pip="pip", wrap='"'):
|
||||
def get_pip_versions(cls, pip="pip", wrap=''):
|
||||
return [
|
||||
(wrap + pip + version + wrap)
|
||||
for version in cls._pip_version or [pip]
|
||||
|
||||
@@ -117,10 +117,11 @@ def terminate_all_child_processes(pid=None, timeout=10., include_parent=True):
|
||||
|
||||
|
||||
def get_docker_id(docker_cmd_contains):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
containers_running = get_bash_output(cmd='docker ps --no-trunc --format \"{{.ID}}: {{.Command}}\"')
|
||||
for docker_line in containers_running.split('\n'):
|
||||
parts = docker_line.split(':')
|
||||
parts = docker_line.split(':', 1)
|
||||
if docker_cmd_contains in parts[-1]:
|
||||
# we found our docker, return it
|
||||
return parts[0]
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '1.5.1rc0'
|
||||
__version__ = '1.5.2rc0'
|
||||
|
||||
@@ -9,7 +9,7 @@ python-dateutil>=2.4.2,<2.9.0
|
||||
pyjwt>=2.4.0,<2.7.0
|
||||
PyYAML>=3.12,<6.1
|
||||
requests>=2.20.0,<2.29.0
|
||||
six>=1.13.0,<1.16.0
|
||||
six>=1.13.0,<1.17.0
|
||||
typing>=3.6.4,<3.8.0 ; python_version < '3.5'
|
||||
urllib3>=1.21.1,<1.27.0
|
||||
virtualenv>=16,<21
|
||||
|
||||
Reference in New Issue
Block a user