Compare commits

...

6 Commits

Author SHA1 Message Date
allegroai
d9b9b4984b Version bump to v0.17.2 2021-03-04 20:12:50 +02:00
allegroai
8a46dc6b03 Update default_docker in docs 2021-03-04 20:07:34 +02:00
allegroai
205f9dd816 Fix k8s glue does not pass docker environment variables
Remove deprecated flags
2021-03-03 15:07:06 +02:00
allegroai
9dfa1294e2 Add agent.enable_task_env set the OS environment based on the Environment section of the Task. 2021-02-28 19:47:44 +02:00
allegroai
f019905720 Fix venv cache support for local folders 2021-02-28 19:47:09 +02:00
allegroai
9c257858dd Fix venv cache support for local folders 2021-02-23 18:54:38 +02:00
6 changed files with 40 additions and 11 deletions

View File

@@ -149,6 +149,9 @@
# arguments: ["--ipc=host", ]
}
# set the OS environments based on the Task's Environment section before launching the Task process.
enable_task_env: false
# set the initial bash script to execute at the startup of any docker.
# all lines will be executed regardless of their exit code.
# {python_single_digit} is translated to 'python3' or 'python2' according to requested python version

View File

@@ -1742,11 +1742,14 @@ class Worker(ServiceCommandSection):
base_interpreter=package_api.requirements_manager.get_interpreter(),
requirement_substitutions=[OnlyExternalRequirements]
)
package_api.cwd = vcs.location if vcs and vcs.location else directory
# make sure we run the handlers
cached_requirements = \
{k: package_api.requirements_manager.replace(requirements[k] or '')
for k in requirements}
package_api.load_requirements(cached_requirements)
if str(cached_requirements.get('pip', '')).strip() \
or str(cached_requirements.get('conda', '')).strip():
package_api.load_requirements(cached_requirements)
elif not is_cached and not standalone_mode:
self.install_requirements(
@@ -1820,6 +1823,12 @@ class Worker(ServiceCommandSection):
if repo_info:
self._update_commit_id(current_task.id, execution, repo_info)
# get Task Environments and update the process
if self._session.config.get('agent.enable_task_env', None):
hyper_params = self._get_task_os_env(current_task)
if hyper_params:
os.environ.update(hyper_params)
# Add the script CWD to the python path
python_path = get_python_path(script_dir, execution.entry_point, self.package_api, is_conda_env=self.is_conda)
if ENV_TASK_EXTRA_PYTHON_PATH.get():
@@ -1897,6 +1906,20 @@ class Worker(ServiceCommandSection):
return 1 if exit_code is None else exit_code
def _get_task_os_env(self, current_task):
if not self._session.check_min_api_version('2.9'):
return None
# noinspection PyBroadException
try:
hyper_params = self._session.get(
service="tasks", action="get_hyper_params", tasks=[current_task.id])
hyper_params = {
str(p['name']): str(p['value'])
for p in hyper_params['params'][0]['hyperparams'] if p['section'] == 'Environment'}
return hyper_params
except Exception:
return None
def set_docker_variables(self, docker):
temp_config, docker_image_func = self.get_docker_config_cmd(docker)
self.dump_config(self.temp_config_path, config=temp_config)

View File

@@ -36,8 +36,7 @@ class K8sIntegration(Worker):
KUBECTL_RUN_CMD = "kubectl run clearml-{queue_name}-id-{task_id} " \
"--image {docker_image} " \
"--restart=Never --replicas=1 " \
"--generator=run-pod/v1 " \
"--restart=Never " \
"--namespace={namespace}"
KUBECTL_DELETE_CMD = "kubectl delete pods " \
@@ -273,13 +272,13 @@ class K8sIntegration(Worker):
return
if task_data.execution.docker_cmd:
docker_parts = task_data.execution.docker_cmd
docker_cmd = task_data.execution.docker_cmd
else:
docker_parts = str(ENV_DOCKER_IMAGE.get() or
self._session.config.get("agent.default_docker.image", "nvidia/cuda"))
docker_cmd = str(ENV_DOCKER_IMAGE.get() or
self._session.config.get("agent.default_docker.image", "nvidia/cuda"))
# take the first part, this is the docker image name (not arguments)
docker_parts = docker_parts.split()
docker_parts = docker_cmd.split()
docker_image = docker_parts[0]
docker_args = docker_parts[1:] if len(docker_parts) > 1 else []
@@ -355,7 +354,7 @@ class K8sIntegration(Worker):
else:
output, error = self._kubectl_run(
create_clearml_conf=create_clearml_conf,
labels=labels, docker_image=docker_image,
labels=labels, docker_image=docker_cmd,
task_data=task_data,
task_id=task_id, queue=queue, queue_name=safe_queue_name)

View File

@@ -21,7 +21,8 @@ class ExternalRequirements(SimpleSubstitution):
# noinspection PyBroadException
try:
if not req.name and req.req and not req.req.editable and not req.req.vcs and \
req.req.line and not req.req.line.strip().split('#')[0].lower().endswith('.whl'):
req.req.line and req.req.line.strip().split('#')[0] and \
not req.req.line.strip().split('#')[0].lower().endswith('.whl'):
return True
except Exception:
pass

View File

@@ -1 +1 @@
__version__ = '0.17.1'
__version__ = '0.17.2'

View File

@@ -141,12 +141,15 @@ agent {
default_docker: {
# default docker image to use when running in docker mode
image: "nvidia/cuda:10.1-runtime-ubuntu18.04"
image: "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
# optional arguments to pass to docker image
# arguments: ["--ipc=host"]
}
# set the OS environments based on the Task's Environment section before launching the Task process.
enable_task_env: false
# CUDA versions used for Conda setup & solving PyTorch wheel packages
# it Should be detected automatically. Override with os environment CUDA_VERSION / CUDNN_VERSION
# cuda_version: 10.1