mirror of
https://github.com/clearml/clearml-agent
synced 2025-06-26 18:16:15 +00:00
Merge branch 'master' of https://github.com/allegroai/clearml-agent
This commit is contained in:
commit
4d5680198e
@ -11,6 +11,7 @@
|
||||
|
||||
# Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https)
|
||||
# leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol)
|
||||
# Notice: GitHub personal token is equivalent to password, you can put it directly into `git_pass`
|
||||
# git_user: ""
|
||||
# git_pass: ""
|
||||
# git_host: ""
|
||||
@ -30,6 +31,10 @@
|
||||
# specific python version and the system supports multiple python the agent will use the requested python version)
|
||||
# ignore_requested_python_version: true
|
||||
|
||||
# Force the root folder of the git repository (instead of the working directory) into the PYHTONPATH
|
||||
# default false, only the working directory will be added to the PYHTONPATH
|
||||
# force_git_root_python_path: false
|
||||
|
||||
# select python package manager:
|
||||
# currently supported: pip, conda and poetry
|
||||
# if "pip" or "conda" are used, the agent installs the required packages
|
||||
@ -44,6 +49,8 @@
|
||||
|
||||
# specify pip version to use (examples "<20", "==19.3.1", "", empty string will install the latest version)
|
||||
pip_version: "<20.2",
|
||||
# specify poetry version to use (examples "<2", "==1.1.1", "", empty string will install the latest version)
|
||||
# poetry_version: "<2",
|
||||
|
||||
# virtual environment inheres packages from system
|
||||
system_site_packages: false,
|
||||
@ -156,7 +163,7 @@
|
||||
|
||||
default_docker: {
|
||||
# default docker image to use when running in docker mode
|
||||
image: "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
|
||||
image: "nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04"
|
||||
|
||||
# optional arguments to pass to docker image
|
||||
# arguments: ["--ipc=host", ]
|
||||
|
@ -18,3 +18,12 @@ ENV_ENABLE_FILES_CONFIG_SECTION = EnvEntry('CLEARML_AGENT_ENABLE_FILES_CONFIG_SE
|
||||
ENV_INITIAL_CONNECT_RETRY_OVERRIDE = EnvEntry(
|
||||
'CLEARML_AGENT_INITIAL_CONNECT_RETRY_OVERRIDE', default=True, converter=safe_text_to_bool
|
||||
)
|
||||
|
||||
"""
|
||||
Experimental option to set the request method for all API requests and auth login.
|
||||
This could be useful when GET requests with payloads are blocked by a server as
|
||||
POST requests can be used instead.
|
||||
|
||||
However this has not been vigorously tested and may have unintended consequences.
|
||||
"""
|
||||
ENV_API_DEFAULT_REQ_METHOD = EnvEntry("CLEARML_API_DEFAULT_REQ_METHOD")
|
@ -5,10 +5,16 @@ import six
|
||||
|
||||
from .apimodel import ApiModel
|
||||
from .datamodel import DataModel
|
||||
from .defs import ENV_API_DEFAULT_REQ_METHOD
|
||||
|
||||
|
||||
if ENV_API_DEFAULT_REQ_METHOD.get().upper() not in ("GET", "POST"):
|
||||
raise ValueError(
|
||||
"CLEARML_API_DEFAULT_REQ_METHOD environment variable must be 'get' or 'post' (any case is allowed)."
|
||||
)
|
||||
|
||||
class Request(ApiModel):
|
||||
_method = 'get'
|
||||
_method = ENV_API_DEFAULT_REQ_METHOD.get(default="get")
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if kwargs:
|
||||
|
@ -15,7 +15,7 @@ from six.moves.urllib.parse import urlparse, urlunparse
|
||||
|
||||
from .callresult import CallResult
|
||||
from .defs import ENV_VERBOSE, ENV_HOST, ENV_ACCESS_KEY, ENV_SECRET_KEY, ENV_WEB_HOST, ENV_FILES_HOST, ENV_AUTH_TOKEN, \
|
||||
ENV_NO_DEFAULT_SERVER, ENV_DISABLE_VAULT_SUPPORT, ENV_INITIAL_CONNECT_RETRY_OVERRIDE
|
||||
ENV_NO_DEFAULT_SERVER, ENV_DISABLE_VAULT_SUPPORT, ENV_INITIAL_CONNECT_RETRY_OVERRIDE, ENV_API_DEFAULT_REQ_METHOD
|
||||
from .request import Request, BatchRequest
|
||||
from .token_manager import TokenManager
|
||||
from ..config import load
|
||||
@ -240,6 +240,12 @@ class Session(TokenManager):
|
||||
except Exception as ex:
|
||||
print("Failed getting vaults: {}".format(ex))
|
||||
|
||||
def verify_feature_set(self, feature_set):
|
||||
if isinstance(feature_set, str):
|
||||
feature_set = [feature_set]
|
||||
if self.feature_set not in feature_set:
|
||||
raise ValueError('ClearML-server does not support requested feature set {}'.format(feature_set))
|
||||
|
||||
def _send_request(
|
||||
self,
|
||||
service,
|
||||
@ -609,6 +615,7 @@ class Session(TokenManager):
|
||||
try:
|
||||
data = {"expiration_sec": exp} if exp else {}
|
||||
res = self._send_request(
|
||||
method=ENV_API_DEFAULT_REQ_METHOD.get(default="get"),
|
||||
service="auth",
|
||||
action="login",
|
||||
auth=auth,
|
||||
|
166
clearml_agent/commands/resolver.py
Normal file
166
clearml_agent/commands/resolver.py
Normal file
@ -0,0 +1,166 @@
|
||||
import json
|
||||
import re
|
||||
import shlex
|
||||
from clearml_agent.helper.package.requirements import (
|
||||
RequirementsManager, MarkerRequirement,
|
||||
compare_version_rules, )
|
||||
|
||||
|
||||
def resolve_default_container(session, task_id, container_config):
|
||||
container_lookup = session.config.get('agent.default_docker.match_rules', None)
|
||||
if not session.check_min_api_version("2.13") or not container_lookup:
|
||||
return container_config
|
||||
|
||||
# check backend support before sending any more requests (because they will fail and crash the Task)
|
||||
try:
|
||||
session.verify_feature_set('advanced')
|
||||
except ValueError:
|
||||
return container_config
|
||||
|
||||
result = session.send_request(
|
||||
service='tasks',
|
||||
action='get_all',
|
||||
version='2.14',
|
||||
json={'id': [task_id],
|
||||
'only_fields': ['script.requirements', 'script.binary',
|
||||
'script.repository', 'script.branch',
|
||||
'project', 'container'],
|
||||
'search_hidden': True},
|
||||
method='get',
|
||||
async_enable=False,
|
||||
)
|
||||
try:
|
||||
task_info = result.json()['data']['tasks'][0] if result.ok else {}
|
||||
except (ValueError, TypeError):
|
||||
return container_config
|
||||
|
||||
from clearml_agent.external.requirements_parser.requirement import Requirement
|
||||
|
||||
# store tasks repository
|
||||
repository = task_info.get('script', {}).get('repository') or ''
|
||||
branch = task_info.get('script', {}).get('branch') or ''
|
||||
binary = task_info.get('script', {}).get('binary') or ''
|
||||
requested_container = task_info.get('container', {})
|
||||
|
||||
# get project full path
|
||||
project_full_name = ''
|
||||
if task_info.get('project', None):
|
||||
result = session.send_request(
|
||||
service='projects',
|
||||
action='get_all',
|
||||
version='2.13',
|
||||
json={
|
||||
'id': [task_info.get('project')],
|
||||
'only_fields': ['name'],
|
||||
},
|
||||
method='get',
|
||||
async_enable=False,
|
||||
)
|
||||
try:
|
||||
if result.ok:
|
||||
project_full_name = result.json()['data']['projects'][0]['name'] or ''
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
task_packages_lookup = {}
|
||||
for entry in container_lookup:
|
||||
match = entry.get('match', None)
|
||||
if not match:
|
||||
continue
|
||||
if match.get('project', None):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if not re.search(match.get('project', None), project_full_name):
|
||||
continue
|
||||
except Exception:
|
||||
print('Failed parsing regular expression \"{}\" in rule: {}'.format(
|
||||
match.get('project', None), entry))
|
||||
continue
|
||||
|
||||
if match.get('script.repository', None):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if not re.search(match.get('script.repository', None), repository):
|
||||
continue
|
||||
except Exception:
|
||||
print('Failed parsing regular expression \"{}\" in rule: {}'.format(
|
||||
match.get('script.repository', None), entry))
|
||||
continue
|
||||
|
||||
if match.get('script.branch', None):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if not re.search(match.get('script.branch', None), branch):
|
||||
continue
|
||||
except Exception:
|
||||
print('Failed parsing regular expression \"{}\" in rule: {}'.format(
|
||||
match.get('script.branch', None), entry))
|
||||
continue
|
||||
|
||||
if match.get('script.binary', None):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if not re.search(match.get('script.binary', None), binary):
|
||||
continue
|
||||
except Exception:
|
||||
print('Failed parsing regular expression \"{}\" in rule: {}'.format(
|
||||
match.get('script.binary', None), entry))
|
||||
continue
|
||||
|
||||
if match.get('container', None):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if not re.search(match.get('container', None), requested_container.get('image', '')):
|
||||
continue
|
||||
except Exception:
|
||||
print('Failed parsing regular expression \"{}\" in rule: {}'.format(
|
||||
match.get('container', None), entry))
|
||||
continue
|
||||
|
||||
matched = True
|
||||
for req_section in ['script.requirements.pip', 'script.requirements.conda']:
|
||||
if not match.get(req_section, None):
|
||||
continue
|
||||
|
||||
match_pip_reqs = [MarkerRequirement(Requirement.parse('{} {}'.format(k, v)))
|
||||
for k, v in match.get(req_section, None).items()]
|
||||
|
||||
if not task_packages_lookup.get(req_section):
|
||||
req_section_parts = req_section.split('.')
|
||||
task_packages_lookup[req_section] = \
|
||||
RequirementsManager.parse_requirements_section_to_marker_requirements(
|
||||
requirements=task_info.get(req_section_parts[0], {}).get(
|
||||
req_section_parts[1], {}).get(req_section_parts[2], None)
|
||||
)
|
||||
|
||||
matched_all_reqs = True
|
||||
for mr in match_pip_reqs:
|
||||
matched_req = False
|
||||
for pr in task_packages_lookup[req_section]:
|
||||
if mr.req.name != pr.req.name:
|
||||
continue
|
||||
if compare_version_rules(mr.specs, pr.specs):
|
||||
matched_req = True
|
||||
break
|
||||
if not matched_req:
|
||||
matched_all_reqs = False
|
||||
break
|
||||
|
||||
# if ew have a match, check second section
|
||||
if matched_all_reqs:
|
||||
continue
|
||||
# no match stop
|
||||
matched = False
|
||||
break
|
||||
|
||||
if matched:
|
||||
if not container_config.get('container'):
|
||||
container_config['container'] = entry.get('image', None)
|
||||
if not container_config.get('arguments'):
|
||||
container_config['arguments'] = entry.get('arguments', None)
|
||||
container_config['arguments'] = shlex.split(str(container_config.get('arguments') or '').strip())
|
||||
print('Matching default container with rule:\n{}'.format(json.dumps(entry)))
|
||||
return container_config
|
||||
|
||||
return container_config
|
||||
|
@ -41,6 +41,7 @@ from clearml_agent.backend_api.session.defs import ENV_ENABLE_ENV_CONFIG_SECTION
|
||||
from clearml_agent.backend_config.defs import UptimeConf
|
||||
from clearml_agent.backend_config.utils import apply_environment, apply_files
|
||||
from clearml_agent.commands.base import resolve_names, ServiceCommandSection
|
||||
from clearml_agent.commands.resolver import resolve_default_container
|
||||
from clearml_agent.definitions import (
|
||||
ENVIRONMENT_SDK_PARAMS,
|
||||
PROGRAM_NAME,
|
||||
@ -102,7 +103,8 @@ from clearml_agent.helper.package.poetry_api import PoetryConfig, PoetryAPI
|
||||
from clearml_agent.helper.package.post_req import PostRequirement
|
||||
from clearml_agent.helper.package.priority_req import PriorityPackageRequirement, PackageCollectorRequirement
|
||||
from clearml_agent.helper.package.pytorch import PytorchRequirement
|
||||
from clearml_agent.helper.package.requirements import RequirementsManager
|
||||
from clearml_agent.helper.package.requirements import (
|
||||
RequirementsManager, )
|
||||
from clearml_agent.helper.package.venv_update_api import VenvUpdateAPI
|
||||
from clearml_agent.helper.process import (
|
||||
kill_all_child_processes,
|
||||
@ -330,6 +332,9 @@ def get_task_container(session, task_id):
|
||||
except (ValueError, TypeError):
|
||||
container = {}
|
||||
|
||||
if (not container or not container.get('container')) and session.check_min_api_version("2.13"):
|
||||
container = resolve_default_container(session=session, task_id=task_id, container_config=container)
|
||||
|
||||
return container
|
||||
|
||||
|
||||
@ -629,7 +634,7 @@ class Worker(ServiceCommandSection):
|
||||
:param queue: ID of queue that task was pulled from
|
||||
:param task_id: ID of task to run
|
||||
:param worker_args: Worker command line arguments
|
||||
:params task_session: The session for running operations on the passed task
|
||||
:param task_session: The session for running operations on the passed task
|
||||
:param docker: Docker image in which the execution task will run
|
||||
"""
|
||||
# start new process and execute task id
|
||||
@ -1118,6 +1123,7 @@ class Worker(ServiceCommandSection):
|
||||
return queue_tags, runtime_props
|
||||
|
||||
def get_runtime_properties(self):
|
||||
# TODO: refactor to use the Session env State
|
||||
if self._runtime_props_support is not True:
|
||||
# either not supported or never tested
|
||||
if self._runtime_props_support == self._session.api_version:
|
||||
@ -1795,6 +1801,7 @@ class Worker(ServiceCommandSection):
|
||||
docker=None,
|
||||
entry_point=None,
|
||||
install_globally=False,
|
||||
force_docker=False,
|
||||
**_
|
||||
):
|
||||
if not task_id:
|
||||
@ -1803,7 +1810,7 @@ class Worker(ServiceCommandSection):
|
||||
self._session.print_configuration()
|
||||
|
||||
if docker is not False and docker is not None:
|
||||
return self._build_docker(docker, target, task_id, entry_point)
|
||||
return self._build_docker(docker, target, task_id, entry_point, force_docker=force_docker)
|
||||
|
||||
current_task = self._session.api_client.tasks.get_by_id(task_id)
|
||||
|
||||
@ -1885,7 +1892,7 @@ class Worker(ServiceCommandSection):
|
||||
|
||||
return 0
|
||||
|
||||
def _build_docker(self, docker, target, task_id, entry_point=None):
|
||||
def _build_docker(self, docker, target, task_id, entry_point=None, force_docker=False):
|
||||
|
||||
self.temp_config_path = safe_mkstemp(
|
||||
suffix=".cfg", prefix=".clearml_agent.", text=True, name_only=True
|
||||
@ -1896,20 +1903,24 @@ class Worker(ServiceCommandSection):
|
||||
temp_config, docker_image_func = self.get_docker_config_cmd(docker)
|
||||
self.dump_config(self.temp_config_path, config=temp_config)
|
||||
self.docker_image_func = docker_image_func
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
task_container = get_task_container(self._session, task_id)
|
||||
except Exception:
|
||||
task_container = {}
|
||||
|
||||
if task_container.get('image'):
|
||||
docker_image = task_container.get('image')
|
||||
docker_arguments = task_container.get('arguments')
|
||||
docker_setup_script = task_container.get('setup_shell_script')
|
||||
docker_image = self._docker_image
|
||||
docker_arguments = self._docker_arguments
|
||||
docker_setup_script = None
|
||||
|
||||
if force_docker:
|
||||
print('Ignoring any task container info, using docker image {}'.format(docker_image))
|
||||
else:
|
||||
docker_image = self._docker_image
|
||||
docker_arguments = self._docker_arguments
|
||||
docker_setup_script = None
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
task_container = get_task_container(self._session, task_id)
|
||||
if task_container.get('image'):
|
||||
docker_image = task_container.get('image')
|
||||
print('Ignoring default docker image, using task docker image {}'.format(docker_image))
|
||||
docker_arguments = task_container.get('arguments')
|
||||
docker_setup_script = task_container.get('setup_shell_script')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print('Building Task {} inside docker image: {} {} setup_script={}\n'.format(
|
||||
task_id, docker_image, docker_arguments or '', docker_setup_script or ''))
|
||||
@ -2219,7 +2230,10 @@ class Worker(ServiceCommandSection):
|
||||
os.environ.update(hyper_params)
|
||||
|
||||
# Add the script CWD to the python path
|
||||
python_path = get_python_path(script_dir, execution.entry_point, self.package_api, is_conda_env=self.is_conda)
|
||||
if repo_info and repo_info.root and self._session.config.get('agent.force_git_root_python_path', None):
|
||||
python_path = get_python_path(repo_info.root, None, self.package_api, is_conda_env=self.is_conda)
|
||||
else:
|
||||
python_path = get_python_path(script_dir, execution.entry_point, self.package_api, is_conda_env=self.is_conda)
|
||||
if ENV_TASK_EXTRA_PYTHON_PATH.get():
|
||||
python_path = add_python_path(python_path, ENV_TASK_EXTRA_PYTHON_PATH.get())
|
||||
if python_path:
|
||||
@ -2588,8 +2602,8 @@ class Worker(ServiceCommandSection):
|
||||
print('Poetry Enabled: Ignoring requested python packages, using repository poetry lock file!')
|
||||
api.install()
|
||||
return api
|
||||
except Exception:
|
||||
self.log.error("failed installing poetry requirements:")
|
||||
except Exception as ex:
|
||||
self.log.error("failed installing poetry requirements: {}".format(ex))
|
||||
return None
|
||||
|
||||
def install_requirements(
|
||||
@ -2789,7 +2803,7 @@ class Worker(ServiceCommandSection):
|
||||
".".join, reversed(list(suffixes(self._get_python_version_suffix(config_version).split("."))))
|
||||
)
|
||||
]
|
||||
|
||||
default_python = None
|
||||
for version, executable in python_executables:
|
||||
self.log.debug("Searching for {}".format(executable))
|
||||
if find_executable(executable):
|
||||
@ -2800,15 +2814,37 @@ class Worker(ServiceCommandSection):
|
||||
except subprocess.CalledProcessError as ex:
|
||||
self.log.warning("error getting %s version: %s", executable, ex)
|
||||
continue
|
||||
|
||||
if not default_python:
|
||||
match = re.search(r"Python ({}(?:\.\d+)*)".format(r"\d+"), output)
|
||||
default_python = (
|
||||
match.group(1),
|
||||
version if version and '.' in version else '.'.join(match.group(1).split('.')[:2]),
|
||||
executable)
|
||||
|
||||
match = re.search(
|
||||
r"Python ({}(?:\.\d+)*)".format(
|
||||
r"\d+" if not config_version or os.path.sep in config_version else config_version), output
|
||||
)
|
||||
if match:
|
||||
self.log.debug("Found: {}".format(executable))
|
||||
return match.group(1), version or '.'.join(match.group(1).split('.')[:2]), executable
|
||||
return (
|
||||
match.group(1),
|
||||
version if version and '.' in version else '.'.join(match.group(1).split('.')[:2]),
|
||||
executable
|
||||
)
|
||||
|
||||
if default_python:
|
||||
self.log.warning(
|
||||
"Python executable with version {!r} requested by the Task, "
|
||||
"not found in path, using \'{}\' (v{}) instead".format(
|
||||
config_version, find_executable(default_python[-1]), default_python[0]
|
||||
)
|
||||
)
|
||||
return default_python
|
||||
|
||||
raise CommandFailedError(
|
||||
"Python executable with version {!r} defined in configuration file, "
|
||||
"Python executable with version {!r} requested by the Task, "
|
||||
"key 'agent.default_python', not found in path, tried: {}".format(
|
||||
config_version, list(zip(*python_executables))[1]
|
||||
)
|
||||
@ -2843,7 +2879,8 @@ class Worker(ServiceCommandSection):
|
||||
requested_python_version = \
|
||||
requested_python_version or \
|
||||
Text(self._session.config.get("agent.python_binary", None)) or \
|
||||
Text(self._session.config.get("agent.default_python", None))
|
||||
Text(self._session.config.get("agent.default_python", None)) or \
|
||||
'{}.{}'.format(sys.version_info.major, sys.version_info.minor)
|
||||
|
||||
if self.is_conda:
|
||||
executable_version_suffix = \
|
||||
@ -2870,17 +2907,19 @@ class Worker(ServiceCommandSection):
|
||||
self.find_python_executable_for_version(requested_python_version)
|
||||
except Exception:
|
||||
def_python_version = Text(self._session.config.get("agent.python_binary", None)) or \
|
||||
Text(self._session.config.get("agent.default_python", None))
|
||||
Text(self._session.config.get("agent.default_python", None)) or \
|
||||
'{}.{}'.format(sys.version_info.major, sys.version_info.minor)
|
||||
print('Warning: could not locate requested Python version {}, reverting to version {}'.format(
|
||||
requested_python_version, def_python_version))
|
||||
executable_version, executable_version_suffix, executable_name = \
|
||||
self.find_python_executable_for_version(def_python_version)
|
||||
|
||||
self._session.config.put("agent.default_python", executable_version)
|
||||
self._session.config.put("agent.default_python", executable_version_suffix)
|
||||
self._session.config.put("agent.python_binary", executable_name)
|
||||
|
||||
venv_dir = Path(venv_dir) if venv_dir else \
|
||||
Path(self._session.config["agent.venvs_dir"], executable_version_suffix)
|
||||
venv_dir = Path(os.path.expanduser(os.path.expandvars(venv_dir.as_posix())))
|
||||
|
||||
first_time = not standalone_mode and (
|
||||
is_windows_platform()
|
||||
@ -3010,8 +3049,8 @@ class Worker(ServiceCommandSection):
|
||||
self._docker_image = docker_image
|
||||
self._docker_arguments = docker_arguments
|
||||
|
||||
print("Running in Docker {} mode (v19.03 and above) - using default docker image: {} {}\n".format(
|
||||
'*standalone*' if self._standalone_mode else '', self._docker_image,
|
||||
print("Running in Docker{} mode (v19.03 and above) - using default docker image: {} {}\n".format(
|
||||
' *standalone*' if self._standalone_mode else '', self._docker_image,
|
||||
self._sanitize_docker_command(self._docker_arguments) or ''))
|
||||
|
||||
temp_config = deepcopy(self._session.config)
|
||||
|
@ -204,10 +204,13 @@ def get_python_path(script_dir, entry_point, package_api, is_conda_env=False):
|
||||
["-c", "import sys; print('{}'.join(sys.path))".format(python_path_sep)])
|
||||
org_python_path = python_path_cmd.get_output(cwd=script_dir)
|
||||
# Add path of the script directory and executable directory
|
||||
python_path = '{}{python_path_sep}{}{python_path_sep}'.format(
|
||||
Path(script_dir).absolute().as_posix(),
|
||||
(Path(script_dir) / Path(entry_point)).parent.absolute().as_posix(),
|
||||
python_path_sep=python_path_sep)
|
||||
python_path = '{}{python_path_sep}'.format(
|
||||
Path(script_dir).absolute().as_posix(), python_path_sep=python_path_sep)
|
||||
if entry_point:
|
||||
python_path += '{}{python_path_sep}'.format(
|
||||
(Path(script_dir) / Path(entry_point)).parent.absolute().as_posix(),
|
||||
python_path_sep=python_path_sep)
|
||||
|
||||
if is_windows_platform():
|
||||
python_path = python_path.replace('/', '\\')
|
||||
|
||||
|
@ -2,7 +2,7 @@ from __future__ import unicode_literals, print_function
|
||||
|
||||
import csv
|
||||
import sys
|
||||
from collections import Iterable
|
||||
from collections.abc import Iterable
|
||||
from typing import List, Dict, Text, Any
|
||||
|
||||
from attr import attrs, attrib
|
||||
|
@ -189,14 +189,6 @@ class CondaAPI(PackageManager):
|
||||
if conda_env.is_file() and not is_windows_platform():
|
||||
self.source = self.pip.source = CommandSequence(('source', conda_env.as_posix()), self.source)
|
||||
|
||||
# install cuda toolkit
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
cuda_version = float(int(self.session.config['agent.cuda_version'])) / 10.0
|
||||
if cuda_version > 0:
|
||||
self._install('cudatoolkit={:.1f}'.format(cuda_version))
|
||||
except Exception:
|
||||
pass
|
||||
return self
|
||||
|
||||
def _init_existing_environment(self, conda_pre_build_env_path):
|
||||
@ -456,7 +448,9 @@ class CondaAPI(PackageManager):
|
||||
requirements['conda'] = requirements['conda'].split('\n')
|
||||
has_torch = False
|
||||
has_matplotlib = False
|
||||
has_cudatoolkit = False
|
||||
try:
|
||||
# notice this is an integer version: 112 (means 11.2)
|
||||
cuda_version = int(self.session.config.get('agent.cuda_version', 0))
|
||||
except:
|
||||
cuda_version = 0
|
||||
@ -488,6 +482,19 @@ class CondaAPI(PackageManager):
|
||||
if '.' not in m.specs[0][1]:
|
||||
continue
|
||||
|
||||
if m.name.lower() == 'cudatoolkit':
|
||||
# skip cuda if we are running on CPU
|
||||
if not cuda_version:
|
||||
continue
|
||||
|
||||
has_cudatoolkit = True
|
||||
# cuda version, only major.minor
|
||||
requested_cuda_version = '.'.join(m.specs[0][1].split('.')[:2])
|
||||
# make sure that the cuda_version we support can install the requested cuda (major version)
|
||||
if int(float(requested_cuda_version)) > int(float(cuda_version)/10.0):
|
||||
continue
|
||||
m.specs = [(m.specs[0][0], str(requested_cuda_version)), ]
|
||||
|
||||
conda_supported_req_names.append(m.name.lower())
|
||||
if m.req.name.lower() == 'matplotlib':
|
||||
has_matplotlib = True
|
||||
@ -504,6 +511,10 @@ class CondaAPI(PackageManager):
|
||||
|
||||
reqs.append(m)
|
||||
|
||||
if not has_cudatoolkit and cuda_version:
|
||||
m = MarkerRequirement(Requirement("cudatoolkit == {}".format(float(cuda_version) / 10.0)))
|
||||
reqs.append(m)
|
||||
|
||||
# if we have a conda list, the rest should be installed with pip,
|
||||
# this means any experiment that was executed with pip environment,
|
||||
# will be installed using pip
|
||||
@ -559,8 +570,12 @@ class CondaAPI(PackageManager):
|
||||
# change _ to - in name but not the prefix _ (as this is conda prefix)
|
||||
if r.name and not r.name.startswith('_') and not requirements.get('conda', None):
|
||||
r.name = r.name.replace('_', '-')
|
||||
# remove .post from version numbers, it fails ~= version, and change == to ~=
|
||||
if r.specs and r.specs[0]:
|
||||
|
||||
if has_cudatoolkit and r.specs and len(r.specs[0]) > 1 and r.name == 'cudatoolkit':
|
||||
# select specific cuda version if it came from the requirements
|
||||
r.specs = [(r.specs[0][0].replace('==', '='), r.specs[0][1].split('.post')[0])]
|
||||
elif r.specs and r.specs[0] and len(r.specs[0]) > 1:
|
||||
# remove .post from version numbers it fails with ~= version, and change == to ~=
|
||||
r.specs = [(r.specs[0][0].replace('==', '~='), r.specs[0][1].split('.post')[0])]
|
||||
|
||||
while reqs:
|
||||
|
@ -5,6 +5,7 @@ import attr
|
||||
import sys
|
||||
import os
|
||||
from pathlib2 import Path
|
||||
|
||||
from clearml_agent.helper.process import Argv, DEVNULL, check_if_command_exists
|
||||
from clearml_agent.session import Session, POETRY
|
||||
|
||||
@ -81,6 +82,32 @@ class PoetryConfig:
|
||||
@_guard_enabled
|
||||
def initialize(self, cwd=None):
|
||||
if not self._initialized:
|
||||
if self.session.config.get("agent.package_manager.poetry_version", None) is not None:
|
||||
version = str(self.session.config.get("agent.package_manager.poetry_version"))
|
||||
print('Upgrading Poetry package {}'.format(version))
|
||||
# first upgrade pip if we need to
|
||||
try:
|
||||
from clearml_agent.helper.package.pip_api.venv import VirtualenvPip
|
||||
pip = VirtualenvPip(
|
||||
session=self.session, python=self._python,
|
||||
requirements_manager=None, path=None, interpreter=self._python)
|
||||
pip.upgrade_pip()
|
||||
except Exception as ex:
|
||||
self.log.warning("failed upgrading pip: {}".format(ex))
|
||||
|
||||
# now install poetry
|
||||
try:
|
||||
version = version.replace(' ', '')
|
||||
if ('=' in version) or ('~' in version) or ('<' in version) or ('>' in version):
|
||||
version = version
|
||||
elif version:
|
||||
version = "==" + version
|
||||
argv = Argv(self._python, "-m", "pip", "install", "poetry{}".format(version),
|
||||
"--upgrade", "--disable-pip-version-check")
|
||||
print(argv.get_output())
|
||||
except Exception as ex:
|
||||
self.log.warning("failed upgrading poetry: {}".format(ex))
|
||||
|
||||
self._initialized = True
|
||||
try:
|
||||
self._config("--local", "virtualenvs.in-project", "true", cwd=cwd)
|
||||
|
@ -208,7 +208,11 @@ class SimpleVersion:
|
||||
if not version_b:
|
||||
return True
|
||||
|
||||
if not num_parts:
|
||||
num_parts = max(len(version_a.split('.')), len(version_b.split('.')), )
|
||||
|
||||
if op == '~=':
|
||||
num_parts = len(version_b.split('.')) - 1
|
||||
num_parts = max(num_parts, 2)
|
||||
op = '=='
|
||||
ignore_sub_versions = True
|
||||
@ -245,6 +249,16 @@ class SimpleVersion:
|
||||
return version_a_key < version_b_key
|
||||
raise ValueError('Unrecognized comparison operator [{}]'.format(op))
|
||||
|
||||
@classmethod
|
||||
def max_version(cls, version_a, version_b):
|
||||
return version_a if cls.compare_versions(
|
||||
version_a=version_a, op='>=', version_b=version_b, num_parts=None) else version_b
|
||||
|
||||
@classmethod
|
||||
def min_version(cls, version_a, version_b):
|
||||
return version_a if cls.compare_versions(
|
||||
version_a=version_a, op='<=', version_b=version_b, num_parts=None) else version_b
|
||||
|
||||
@staticmethod
|
||||
def _parse_letter_version(
|
||||
letter, # type: str
|
||||
@ -313,6 +327,77 @@ class SimpleVersion:
|
||||
return ()
|
||||
|
||||
|
||||
def compare_version_rules(specs_a, specs_b):
|
||||
# specs_a/b are a list of tuples: [('==', '1.2.3'), ] or [('>=', '1.2'), ('<', '1.3')]
|
||||
# section definition:
|
||||
class Section(object):
|
||||
def __init__(self, left=None, left_eq=False, right=None, right_eq=False):
|
||||
self.left, self.left_eq, self.right, self.right_eq = left, left_eq, right, right_eq
|
||||
# first create a list of in/out sections for each spec
|
||||
# >, >= are left rule
|
||||
# <, <= are right rule
|
||||
# ~= x.y.z is converted to: >= x.y and < x.y+1
|
||||
# ==/=== are converted to: >= and <=
|
||||
# != x.y.z will split a section into: left < x.y.z and right > x.y.z
|
||||
def create_section(specs):
|
||||
section = Section()
|
||||
for op, v in specs:
|
||||
a = section
|
||||
if op == '>':
|
||||
a.left = v
|
||||
a.left_eq = False
|
||||
elif op == '>=':
|
||||
a.left = v
|
||||
a.left_eq = True
|
||||
elif op == '<':
|
||||
a.right = v
|
||||
a.right_eq = False
|
||||
elif op == '<=':
|
||||
a.right = v
|
||||
a.right_eq = True
|
||||
elif op == '==':
|
||||
a.left = v
|
||||
a.left_eq = True
|
||||
a.right = v
|
||||
a.right_eq = True
|
||||
elif op == '~=':
|
||||
new_v = v.split('.')
|
||||
a_left = '.'.join(new_v[:-1])
|
||||
a.left = a_left if not a.left else SimpleVersion.max_version(a_left, a.left)
|
||||
a.left_eq = True
|
||||
a_right = '.'.join(new_v[:-2] + [str(int(new_v[-2])+1)])
|
||||
a.right = a_right if not a.right else SimpleVersion.min_version(a_right, a.right)
|
||||
a.right_eq = False if a.right == a_right else a.right_eq
|
||||
|
||||
return section
|
||||
|
||||
section_a = create_section(specs_a)
|
||||
section_b = create_section(specs_b)
|
||||
i = Section()
|
||||
# then we have a list of sections for spec A/B
|
||||
if section_a.left == section_b.left:
|
||||
i.left = section_a.left
|
||||
i.left_eq = section_a.left_eq and section_b.left_eq
|
||||
else:
|
||||
i.left = SimpleVersion.max_version(section_a.left, section_b.left)
|
||||
i.left_eq = section_a.left_eq if i.left == section_a.left else section_b.left_eq
|
||||
if section_a.right == section_b.right:
|
||||
i.right = section_a.right
|
||||
i.right_eq = section_a.right_eq and section_b.right_eq
|
||||
else:
|
||||
i.right = SimpleVersion.min_version(section_a.right, section_b.right)
|
||||
i.right_eq = section_a.right_eq if i.right == section_a.right else section_b.right_eq
|
||||
|
||||
# return true if any section from A intersects a section from B
|
||||
valid = True
|
||||
valid &= SimpleVersion.compare_versions(
|
||||
version_a=i.left, op='<=' if i.left_eq else '<', version_b=i.right, num_parts=None)
|
||||
valid &= SimpleVersion.compare_versions(
|
||||
version_a=i.right, op='>=' if i.left_eq else '>', version_b=i.left, num_parts=None)
|
||||
|
||||
return valid
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class RequirementSubstitution(object):
|
||||
|
||||
@ -468,20 +553,9 @@ class RequirementsManager(object):
|
||||
return None
|
||||
|
||||
def replace(self, requirements): # type: (Text) -> Text
|
||||
def safe_parse(req_str):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
return list(parse(req_str, cwd=self._cwd))
|
||||
except Exception as ex:
|
||||
return [Requirement(req_str)]
|
||||
parsed_requirements = self.parse_requirements_section_to_marker_requirements(
|
||||
requirements=requirements, cwd=self._cwd)
|
||||
|
||||
parsed_requirements = tuple(
|
||||
map(
|
||||
MarkerRequirement,
|
||||
[r for line in (requirements.splitlines() if isinstance(requirements, six.text_type) else requirements)
|
||||
for r in safe_parse(line)]
|
||||
)
|
||||
)
|
||||
if not parsed_requirements:
|
||||
# return the original requirements just in case
|
||||
return requirements
|
||||
@ -614,3 +688,24 @@ class RequirementsManager(object):
|
||||
|
||||
return (normalize_cuda_version(cuda_version or 0),
|
||||
normalize_cuda_version(cudnn_version or 0))
|
||||
|
||||
@staticmethod
|
||||
def parse_requirements_section_to_marker_requirements(requirements, cwd=None):
|
||||
def safe_parse(req_str):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
return list(parse(req_str, cwd=cwd))
|
||||
except Exception as ex:
|
||||
return [Requirement(req_str)]
|
||||
|
||||
if not requirements:
|
||||
return tuple()
|
||||
|
||||
parsed_requirements = tuple(
|
||||
map(
|
||||
MarkerRequirement,
|
||||
[r for line in (requirements.splitlines() if isinstance(requirements, str) else requirements)
|
||||
for r in safe_parse(line)]
|
||||
)
|
||||
)
|
||||
return parsed_requirements
|
||||
|
@ -663,7 +663,9 @@ def clone_repository_cached(session, execution, destination):
|
||||
|
||||
vcs.pull()
|
||||
rm_tree(destination)
|
||||
shutil.copytree(Text(cached_repo_path), Text(clone_folder))
|
||||
shutil.copytree(Text(cached_repo_path), Text(clone_folder),
|
||||
symlinks=select_for_platform(linux=True, windows=False),
|
||||
ignore_dangling_symlinks=True)
|
||||
if not clone_folder.is_dir():
|
||||
raise CommandFailedError(
|
||||
"copying of repository failed: from {} to {}".format(
|
||||
|
@ -204,6 +204,13 @@ COMMANDS = {
|
||||
'nargs': '*',
|
||||
'default': False,
|
||||
},
|
||||
'--force-docker': {
|
||||
'help': 'Force using the agent-specified docker image (either explicitly in the --docker argument or'
|
||||
'using the agent\'s default docker image). If provided, the agent will not use any docker'
|
||||
'container information stored on the task itself (default False)',
|
||||
'default': False,
|
||||
'action': 'store_true',
|
||||
},
|
||||
'--python-version': {
|
||||
'help': 'Virtual environment python version to use',
|
||||
},
|
||||
|
@ -229,26 +229,35 @@ class Session(_Session):
|
||||
except:
|
||||
pass
|
||||
|
||||
def print_configuration(self, remove_secret_keys=("secret", "pass", "token", "account_key")):
|
||||
def print_configuration(
|
||||
self,
|
||||
remove_secret_keys=("secret", "pass", "token", "account_key", "contents"),
|
||||
skip_value_keys=("environment", )
|
||||
):
|
||||
# remove all the secrets from the print
|
||||
def recursive_remove_secrets(dictionary, secret_keys=()):
|
||||
def recursive_remove_secrets(dictionary, secret_keys=(), empty_keys=()):
|
||||
for k in list(dictionary):
|
||||
for s in secret_keys:
|
||||
if s in k:
|
||||
dictionary.pop(k)
|
||||
break
|
||||
for s in empty_keys:
|
||||
if s == k:
|
||||
dictionary[k] = {key: '****' for key in dictionary[k]} \
|
||||
if isinstance(dictionary[k], dict) else '****'
|
||||
break
|
||||
if isinstance(dictionary.get(k, None), dict):
|
||||
recursive_remove_secrets(dictionary[k], secret_keys=secret_keys)
|
||||
recursive_remove_secrets(dictionary[k], secret_keys=secret_keys, empty_keys=empty_keys)
|
||||
elif isinstance(dictionary.get(k, None), (list, tuple)):
|
||||
for item in dictionary[k]:
|
||||
if isinstance(item, dict):
|
||||
recursive_remove_secrets(item, secret_keys=secret_keys)
|
||||
recursive_remove_secrets(item, secret_keys=secret_keys, empty_keys=empty_keys)
|
||||
|
||||
config = deepcopy(self.config.to_dict())
|
||||
# remove the env variable, it's not important
|
||||
config.pop('env', None)
|
||||
if remove_secret_keys:
|
||||
recursive_remove_secrets(config, secret_keys=remove_secret_keys)
|
||||
if remove_secret_keys or skip_value_keys:
|
||||
recursive_remove_secrets(config, secret_keys=remove_secret_keys, empty_keys=skip_value_keys)
|
||||
# remove logging.loggers.urllib3.level from the print
|
||||
try:
|
||||
config['logging']['loggers']['urllib3'].pop('level', None)
|
||||
|
@ -1 +1 @@
|
||||
__version__ = '1.1.1'
|
||||
__version__ = '1.2.0rc1'
|
||||
|
@ -171,7 +171,7 @@ agent {
|
||||
|
||||
default_docker: {
|
||||
# default docker image to use when running in docker mode
|
||||
image: "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
|
||||
image: "nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04"
|
||||
|
||||
# optional arguments to pass to docker image
|
||||
# arguments: ["--ipc=host", ]
|
||||
|
@ -15,6 +15,7 @@ api {
|
||||
agent {
|
||||
# Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https)
|
||||
# leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol)
|
||||
# Notice: GitHub personal token is equivalent to password, you can put it directly into `git_pass`
|
||||
git_user=""
|
||||
git_pass=""
|
||||
# Limit credentials to a single domain, for example: github.com,
|
||||
@ -60,6 +61,8 @@ agent {
|
||||
|
||||
# specify pip version to use (examples "<20", "==19.3.1", "", empty string will install the latest version)
|
||||
# pip_version: "<20"
|
||||
# specify poetry version to use (examples "<2", "==1.1.1", "", empty string will install the latest version)
|
||||
# poetry_version: "<2",
|
||||
|
||||
# virtual environment inheres packages from system
|
||||
system_site_packages: false,
|
||||
@ -155,10 +158,57 @@ agent {
|
||||
|
||||
default_docker: {
|
||||
# default docker image to use when running in docker mode
|
||||
image: "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
|
||||
image: "nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04"
|
||||
|
||||
# optional arguments to pass to docker image
|
||||
# arguments: ["--ipc=host"]
|
||||
|
||||
# lookup table rules for default container
|
||||
# first matched rule will be picked, according to rule order
|
||||
# enterprise version only
|
||||
# match_rules: [
|
||||
# {
|
||||
# image: "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
|
||||
# arguments: "-e define=value"
|
||||
# match: {
|
||||
# script{
|
||||
# # Optional: must match all requirements (not partial)
|
||||
# requirements: {
|
||||
# # version selection matching PEP-440
|
||||
# pip: {
|
||||
# tensorflow: "~=2.6"
|
||||
# },
|
||||
# }
|
||||
# # Optional: matching based on regular expression, example: "^exact_match$"
|
||||
# repository: "/my_repository/"
|
||||
# branch: "main"
|
||||
# binary: "python3.6"
|
||||
# }
|
||||
# # Optional: matching based on regular expression, example: "^exact_match$"
|
||||
# project: "project/sub_project"
|
||||
# }
|
||||
# },
|
||||
# {
|
||||
# image: "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
|
||||
# arguments: "-e define=value"
|
||||
# match: {
|
||||
# # must match all requirements (not partial)
|
||||
# script{
|
||||
# requirements: {
|
||||
# conda: {
|
||||
# torch: ">=2.6,<2.8"
|
||||
# }
|
||||
# }
|
||||
# # no repository matching required
|
||||
# repository: ""
|
||||
# }
|
||||
# # no container image matching required (allow to replace one requested container with another)
|
||||
# container: ""
|
||||
# # no repository matching required
|
||||
# project: ""
|
||||
# }
|
||||
# },
|
||||
# ]
|
||||
}
|
||||
|
||||
# set the OS environments based on the Task's Environment section before launching the Task process.
|
||||
|
Loading…
Reference in New Issue
Block a user