mirror of
https://github.com/clearml/clearml-agent
synced 2025-06-26 18:16:15 +00:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d9b9b4984b | ||
|
|
8a46dc6b03 | ||
|
|
205f9dd816 | ||
|
|
9dfa1294e2 | ||
|
|
f019905720 | ||
|
|
9c257858dd |
@@ -149,6 +149,9 @@
|
||||
# arguments: ["--ipc=host", ]
|
||||
}
|
||||
|
||||
# set the OS environments based on the Task's Environment section before launching the Task process.
|
||||
enable_task_env: false
|
||||
|
||||
# set the initial bash script to execute at the startup of any docker.
|
||||
# all lines will be executed regardless of their exit code.
|
||||
# {python_single_digit} is translated to 'python3' or 'python2' according to requested python version
|
||||
|
||||
@@ -1742,11 +1742,14 @@ class Worker(ServiceCommandSection):
|
||||
base_interpreter=package_api.requirements_manager.get_interpreter(),
|
||||
requirement_substitutions=[OnlyExternalRequirements]
|
||||
)
|
||||
package_api.cwd = vcs.location if vcs and vcs.location else directory
|
||||
# make sure we run the handlers
|
||||
cached_requirements = \
|
||||
{k: package_api.requirements_manager.replace(requirements[k] or '')
|
||||
for k in requirements}
|
||||
package_api.load_requirements(cached_requirements)
|
||||
if str(cached_requirements.get('pip', '')).strip() \
|
||||
or str(cached_requirements.get('conda', '')).strip():
|
||||
package_api.load_requirements(cached_requirements)
|
||||
|
||||
elif not is_cached and not standalone_mode:
|
||||
self.install_requirements(
|
||||
@@ -1820,6 +1823,12 @@ class Worker(ServiceCommandSection):
|
||||
if repo_info:
|
||||
self._update_commit_id(current_task.id, execution, repo_info)
|
||||
|
||||
# get Task Environments and update the process
|
||||
if self._session.config.get('agent.enable_task_env', None):
|
||||
hyper_params = self._get_task_os_env(current_task)
|
||||
if hyper_params:
|
||||
os.environ.update(hyper_params)
|
||||
|
||||
# Add the script CWD to the python path
|
||||
python_path = get_python_path(script_dir, execution.entry_point, self.package_api, is_conda_env=self.is_conda)
|
||||
if ENV_TASK_EXTRA_PYTHON_PATH.get():
|
||||
@@ -1897,6 +1906,20 @@ class Worker(ServiceCommandSection):
|
||||
|
||||
return 1 if exit_code is None else exit_code
|
||||
|
||||
def _get_task_os_env(self, current_task):
|
||||
if not self._session.check_min_api_version('2.9'):
|
||||
return None
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
hyper_params = self._session.get(
|
||||
service="tasks", action="get_hyper_params", tasks=[current_task.id])
|
||||
hyper_params = {
|
||||
str(p['name']): str(p['value'])
|
||||
for p in hyper_params['params'][0]['hyperparams'] if p['section'] == 'Environment'}
|
||||
return hyper_params
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def set_docker_variables(self, docker):
|
||||
temp_config, docker_image_func = self.get_docker_config_cmd(docker)
|
||||
self.dump_config(self.temp_config_path, config=temp_config)
|
||||
|
||||
@@ -36,8 +36,7 @@ class K8sIntegration(Worker):
|
||||
|
||||
KUBECTL_RUN_CMD = "kubectl run clearml-{queue_name}-id-{task_id} " \
|
||||
"--image {docker_image} " \
|
||||
"--restart=Never --replicas=1 " \
|
||||
"--generator=run-pod/v1 " \
|
||||
"--restart=Never " \
|
||||
"--namespace={namespace}"
|
||||
|
||||
KUBECTL_DELETE_CMD = "kubectl delete pods " \
|
||||
@@ -273,13 +272,13 @@ class K8sIntegration(Worker):
|
||||
return
|
||||
|
||||
if task_data.execution.docker_cmd:
|
||||
docker_parts = task_data.execution.docker_cmd
|
||||
docker_cmd = task_data.execution.docker_cmd
|
||||
else:
|
||||
docker_parts = str(ENV_DOCKER_IMAGE.get() or
|
||||
self._session.config.get("agent.default_docker.image", "nvidia/cuda"))
|
||||
docker_cmd = str(ENV_DOCKER_IMAGE.get() or
|
||||
self._session.config.get("agent.default_docker.image", "nvidia/cuda"))
|
||||
|
||||
# take the first part, this is the docker image name (not arguments)
|
||||
docker_parts = docker_parts.split()
|
||||
docker_parts = docker_cmd.split()
|
||||
docker_image = docker_parts[0]
|
||||
docker_args = docker_parts[1:] if len(docker_parts) > 1 else []
|
||||
|
||||
@@ -355,7 +354,7 @@ class K8sIntegration(Worker):
|
||||
else:
|
||||
output, error = self._kubectl_run(
|
||||
create_clearml_conf=create_clearml_conf,
|
||||
labels=labels, docker_image=docker_image,
|
||||
labels=labels, docker_image=docker_cmd,
|
||||
task_data=task_data,
|
||||
task_id=task_id, queue=queue, queue_name=safe_queue_name)
|
||||
|
||||
|
||||
@@ -21,7 +21,8 @@ class ExternalRequirements(SimpleSubstitution):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if not req.name and req.req and not req.req.editable and not req.req.vcs and \
|
||||
req.req.line and not req.req.line.strip().split('#')[0].lower().endswith('.whl'):
|
||||
req.req.line and req.req.line.strip().split('#')[0] and \
|
||||
not req.req.line.strip().split('#')[0].lower().endswith('.whl'):
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '0.17.1'
|
||||
__version__ = '0.17.2'
|
||||
|
||||
@@ -141,12 +141,15 @@ agent {
|
||||
|
||||
default_docker: {
|
||||
# default docker image to use when running in docker mode
|
||||
image: "nvidia/cuda:10.1-runtime-ubuntu18.04"
|
||||
image: "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
|
||||
|
||||
# optional arguments to pass to docker image
|
||||
# arguments: ["--ipc=host"]
|
||||
}
|
||||
|
||||
# set the OS environments based on the Task's Environment section before launching the Task process.
|
||||
enable_task_env: false
|
||||
|
||||
# CUDA versions used for Conda setup & solving PyTorch wheel packages
|
||||
# it Should be detected automatically. Override with os environment CUDA_VERSION / CUDNN_VERSION
|
||||
# cuda_version: 10.1
|
||||
|
||||
Reference in New Issue
Block a user