Compare commits

...

34 Commits

Author SHA1 Message Date
allegroai
a5a797ec5e Version bump to v1.3.0 2022-06-16 23:24:28 +03:00
allegroai
ff6cee4a44 Fix requirements --extra-index-url line with trailing comment
Fix --extra-index-url is added for different command line switches
2022-06-16 23:22:29 +03:00
allegroai
9acbad28f7 Fix repository URL contains credentials even when agent.force_git_ssh_protocolagent.force_git_ssh_protocol is true 2022-06-16 23:20:53 +03:00
allegroai
560e689ccd Fix always make pygobject an optional package (i.e. if installation fails continue the Task package environment setup) 2022-06-16 23:18:55 +03:00
allegroai
f66e42ddb1 Fix optional priority packaged always compare lower case package name 2022-06-16 23:18:31 +03:00
allegroai
d9856d5de5 Add Python 3.10 support 2022-06-16 23:16:06 +03:00
Niels ten Boom
24177cc5a9 Support private repos from requirements.txt file (#107)
* support private repos
* fix double indices
2022-06-15 10:26:24 +03:00
allegroai
178af0dee8 Bump PyJWT version due to "Key confusion through non-blocklisted public key formats" vulnerability 2022-05-25 16:41:26 +03:00
allegroai
51eb0a713c Version bump 2022-05-12 23:31:54 +03:00
allegroai
249aa006cb Make sure that if we have "setuptools" in the original required packages, we preserve the line in the pip freeze list 2022-05-12 23:31:32 +03:00
allegroai
c08e2ac0bb Fix clearml.conf access in non-root containers 2022-05-05 12:23:11 +03:00
allegroai
335ef91d8e Fix git unsafe directory issue (disable check on cached vcs folder) 2022-05-05 12:22:40 +03:00
allegroai
6c7a639673 Fix broken pytorch setuptools incompatibility (force setuptools < 59 if torch is below 1.11) 2022-05-05 12:22:13 +03:00
allegroai
5f77cad5ac Fix error message 2022-04-27 15:36:39 +03:00
allegroai
0228ae0494 Set environment variables before expanding path 2022-04-27 15:14:16 +03:00
allegroai
165677e800 Version bump 2022-04-27 14:59:51 +03:00
allegroai
2e5298b737 Add support for use-owner-token in k8s glue 2022-04-27 14:59:27 +03:00
allegroai
c9ffb8a053 Version bump 2022-04-20 08:57:16 +03:00
allegroai
2466eed23f Fix dynamic GPUs with "all" GPUs on he same worker 2022-04-20 08:56:22 +03:00
allegroai
6e31171d31 Version bump to v1.2.3 2022-04-14 22:39:38 +03:00
allegroai
592254709e Fix typo 2022-04-14 22:38:19 +03:00
allegroai
e43f31eb80 Version bump 2022-04-13 10:02:25 +03:00
allegroai
f50ba005b5 Protect dynamic GPUs from failing to parse worker GPU index 2022-04-13 10:01:50 +03:00
allegroai
1011544533 Fix copy breaks agent and nulls the worker name 2022-04-13 10:01:12 +03:00
allegroai
6572023173 Fix avoid reinstall pytorch package if the same version is already installed 2022-04-09 14:18:38 +03:00
allegroai
9c7e2aacd0 Fix PYTHONPATH is overwritten when executing a task (append to it instead) 2022-04-09 14:17:49 +03:00
Allegro AI
715f102f6d Update README.md 2022-04-01 17:48:27 +03:00
allegroai
5446aed9cf Version bump to v1.2.2 2022-03-30 20:48:28 +03:00
allegroai
b94ec85461 Fix update should run with -y 2022-03-30 20:48:11 +03:00
allegroai
f55f4f7535 Version bump 2022-03-30 20:11:13 +03:00
allegroai
c87da3a079 Fix apt-get update fail causes apt-get install to not be executed 2022-03-30 20:10:57 +03:00
allegroai
c3590a53a8 Fix CLEARML_AGENT_SKIP_PIP_VENV_INSTALL fails to find python executable 2022-03-30 20:10:08 +03:00
allegroai
a4315722ab Version bump to vv1.2.1 2022-03-28 18:13:20 +03:00
allegroai
c901bd331c Fix git packages are installed even if commit is given and is preinstalled when using cached virtual environment 2022-03-28 18:11:46 +03:00
15 changed files with 229 additions and 63 deletions

View File

@@ -8,8 +8,8 @@ ML-Ops scheduler & orchestration solution supporting Linux, macOS and Windows**
[![GitHub license](https://img.shields.io/github/license/allegroai/clearml-agent.svg)](https://img.shields.io/github/license/allegroai/clearml-agent.svg)
[![PyPI pyversions](https://img.shields.io/pypi/pyversions/clearml-agent.svg)](https://img.shields.io/pypi/pyversions/clearml-agent.svg)
[![PyPI version shields.io](https://img.shields.io/pypi/v/clearml-agent.svg)](https://img.shields.io/pypi/v/clearml-agent.svg)
[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/allegroai)](https://artifacthub.io/packages/search?repo=allegroai)
[![PyPI Downloads](https://pepy.tech/badge/clearml-agent/month)](https://pypi.org/project/clearml-agent/)
[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/allegroai)](https://artifacthub.io/packages/search?repo=allegroai)
</div>
---

View File

@@ -83,7 +83,7 @@
# set the optional priority packages to be installed before the rest of the required packages,
# In case a package installation fails, the package will be ignored,
# and the virtual environment process will continue
# priority_optional_packages: ["pygobject", ]
priority_optional_packages: ["pygobject", ]
# set the post packages to be installed after all the rest of the required packages
# post_packages: ["horovod", ]

View File

@@ -139,8 +139,8 @@ from clearml_agent.helper.singleton import Singleton
from clearml_agent.session import Session
from .events import Events
DOCKER_ROOT_CONF_FILE = "/root/clearml.conf"
DOCKER_DEFAULT_CONF_FILE = "/root/default_clearml.conf"
DOCKER_ROOT_CONF_FILE = "/tmp/clearml.conf" # assuming we can always access/mount this file
DOCKER_DEFAULT_CONF_FILE = "~/default_clearml.conf"
sys_random = random.SystemRandom()
@@ -1109,7 +1109,11 @@ class Worker(ServiceCommandSection):
if w.id.startswith(worker_name) and w.id != self.worker_id]
gpus = []
for w in our_workers:
gpus += [int(g) for g in w.split(':')[-1].lower().replace('gpu', '').split(',')]
for g in w.split(':')[-1].lower().replace('gpu', '').split(','):
try:
gpus += [int(g.strip())]
except (ValueError, TypeError):
print("INFO: failed parsing GPU int('{}') - skipping".format(g))
available_gpus = list(set(gpu_indexes) - set(gpus))
return available_gpus
@@ -1120,7 +1124,13 @@ class Worker(ServiceCommandSection):
raise ValueError("Dynamic GPU allocation is not supported by the ClearML-server")
available_gpus = [prop["value"] for prop in available_gpus if prop["key"] == 'available_gpus']
if available_gpus:
available_gpus = [int(g) for g in available_gpus[-1].split(',')]
gpus = []
for g in available_gpus[-1].split(','):
try:
gpus += [int(g.strip())]
except (ValueError, TypeError):
print("INFO: failed parsing GPU int('{}') - skipping".format(g))
available_gpus = gpus
if not isinstance(gpu_queues, dict):
gpu_queues = dict(gpu_queues)
@@ -1281,12 +1291,9 @@ class Worker(ServiceCommandSection):
raise ValueError("Running in Docker mode, 'docker' command was not found")
self._worker_tags = kwargs.get('child_report_tags', None)
self._impersonate_as_task_owner = kwargs.get('use_owner_token', False)
if self._impersonate_as_task_owner:
if not self._session.check_min_api_version("2.14"):
raise ValueError("Server does not support --use-owner-token option (incompatible API version)")
if self._session.feature_set == "basic":
raise ValueError("Server does not support --use-owner-token option")
self._use_owner_token(kwargs.get('use_owner_token', False))
self._standalone_mode = kwargs.get('standalone_mode', False)
self._services_mode = kwargs.get('services_mode', False)
# must have docker in services_mode
@@ -1795,7 +1802,7 @@ class Worker(ServiceCommandSection):
def _apply_extra_configuration(self):
# store a few things we updated in runtime (TODO: we should list theme somewhere)
agent_config = self._session.config["agent"].copy()
agent_config_keys = ["cuda_version", "cudnn_version", "default_python", "worker_id", "debug"]
agent_config_keys = ["cuda_version", "cudnn_version", "default_python", "worker_id", "worker_name", "debug"]
try:
self._session.load_vaults()
except Exception as ex:
@@ -1882,6 +1889,9 @@ class Worker(ServiceCommandSection):
base_interpreter=package_api.requirements_manager.get_interpreter(),
requirement_substitutions=[OnlyExternalRequirements],
)
# manually update the current state,
# for the external git reference chance (in the replace callback)
package_api.requirements_manager.update_installed_packages_state(package_api.freeze())
# make sure we run the handlers
cached_requirements = \
{k: package_api.requirements_manager.replace(requirements[k] or '')
@@ -1992,7 +2002,7 @@ class Worker(ServiceCommandSection):
if entry_point == "clone_task" or entry_point == "reuse_task":
change = 'ENTRYPOINT if [ ! -s "{trains_conf}" ] ; then ' \
'cp {default_trains_conf} {trains_conf} ; ' \
'cp {default_trains_conf} {trains_conf} && export CLEARML_CONFIG_FILE={trains_conf}; ' \
' fi ; clearml-agent execute --id {task_id} --standalone-mode {clone}'.format(
default_trains_conf=DOCKER_DEFAULT_CONF_FILE,
trains_conf=DOCKER_ROOT_CONF_FILE,
@@ -2244,6 +2254,9 @@ class Worker(ServiceCommandSection):
base_interpreter=package_api.requirements_manager.get_interpreter(),
requirement_substitutions=[OnlyExternalRequirements]
)
# manually update the current state,
# for the external git reference chance (in the replace callback)
package_api.requirements_manager.update_installed_packages_state(package_api.freeze())
# make sure we run the handlers
cached_requirements = \
{k: package_api.requirements_manager.replace(requirements[k] or '')
@@ -2343,7 +2356,7 @@ class Worker(ServiceCommandSection):
if ENV_TASK_EXTRA_PYTHON_PATH.get():
python_path = add_python_path(python_path, ENV_TASK_EXTRA_PYTHON_PATH.get())
if python_path:
os.environ['PYTHONPATH'] = python_path
os.environ['PYTHONPATH'] = os.pathsep.join(filter(None, (os.environ.get('PYTHONPATH', None), python_path)))
# check if we want to run as another user, only supported on linux
if ENV_TASK_EXECUTE_AS_USER.get() and is_linux_platform():
@@ -2367,6 +2380,10 @@ class Worker(ServiceCommandSection):
if sys.getfilesystemencoding() == 'ascii' and not os.environ.get("PYTHONIOENCODING"):
os.environ["PYTHONIOENCODING"] = "utf-8"
# check if we need to update backwards compatible OS environment
if not os.environ.get("TRAINS_CONFIG_FILE") and os.environ.get("CLEARML_CONFIG_FILE"):
os.environ["TRAINS_CONFIG_FILE"] = os.environ.get("CLEARML_CONFIG_FILE")
print("Starting Task Execution:\n".format(current_task.id))
exit_code = -1
try:
@@ -2967,6 +2984,11 @@ class Worker(ServiceCommandSection):
- a new working directory (replacing the working_dir in the task's script section)
- a requirements manager instance
"""
os.environ["CLEARML_TASK_SCRIPT_ENTRY"] = execution.entry_point
os.environ["CLEARML_TASK_WORKING_DIR"] = execution.working_dir
os.environ["CLEARML_VENV_PATH"] = str(venv_folder)
os.environ["CLEARML_GIT_ROOT"] = git_root
script = os.path.expanduser(os.path.expandvars(script))
try:
@@ -2984,10 +3006,6 @@ class Worker(ServiceCommandSection):
task.to_dict(), separators=(',', ':'), default=str
)
os.environ["CLEARML_CUSTOM_BUILD_OUTPUT"] = script_output_file.name
os.environ["CLEARML_TASK_SCRIPT_ENTRY"] = execution.entry_point
os.environ["CLEARML_TASK_WORKING_DIR"] = execution.working_dir
os.environ["CLEARML_VENV_PATH"] = str(venv_folder)
os.environ["CLEARML_GIT_ROOT"] = git_root
try:
subprocess.check_call([script])
@@ -2999,7 +3017,7 @@ class Worker(ServiceCommandSection):
output = Path(script_output_file.name).read_text()
if not output:
raise SkippedCustomBuildScript("Build script {} is not found".format(script))
raise SkippedCustomBuildScript("Build script {} did not return any output".format(script))
try:
output = json.loads(output)
@@ -3193,6 +3211,10 @@ class Worker(ServiceCommandSection):
if standalone_mode:
self.package_api = VirtualenvPip(**package_manager_params)
else:
if not Path(executable_name).is_file():
executable_name_path = find_executable(executable_name)
print("Interpreter '{}' found at '{}'".format(executable_name, executable_name_path))
executable_name = executable_name_path
# we can change it, no one is going to use it anyhow
package_manager_params['path'] = None
package_manager_params['interpreter'] = executable_name
@@ -3631,7 +3653,7 @@ class Worker(ServiceCommandSection):
"{python_single_digit}.$i -m pip --version && " +
"export LOCAL_PYTHON=$(which {python_single_digit}.$i) && break ; done",
"[ ! -z $LOCAL_PYTHON ] || export CLEARML_APT_INSTALL=\"$CLEARML_APT_INSTALL {python_single_digit}-pip\"", # noqa
"[ -z \"$CLEARML_APT_INSTALL\" ] || (apt-get update && apt-get install -y $CLEARML_APT_INSTALL)",
"[ -z \"$CLEARML_APT_INSTALL\" ] || (apt-get update -y ; apt-get install -y $CLEARML_APT_INSTALL)",
]
if preprocess_bash_script:
@@ -3672,6 +3694,7 @@ class Worker(ServiceCommandSection):
base_cmd += (
(['--name', name] if name else []) +
['-v', conf_file+':'+DOCKER_ROOT_CONF_FILE] +
['-e', "CLEARML_CONFIG_FILE={}".format(DOCKER_ROOT_CONF_FILE)] +
(['-v', host_ssh_cache+':'+mount_ssh] if host_ssh_cache else []) +
(['-v', host_apt_cache+':'+mount_apt_cache] if host_apt_cache else []) +
(['-v', host_pip_cache+':'+mount_pip_cache] if host_pip_cache else []) +
@@ -3762,12 +3785,17 @@ class Worker(ServiceCommandSection):
# patch venv folder to new location
script_dir = script_dir.replace(venv_folder, new_venv_folder)
# New command line execution
command = RunasArgv('bash', '-c', 'HOME=\"{}\" PATH=\"{}\" PYTHONPATH=\"{}\" TRAINS_CONFIG_FILE={} {}'.format(
home_folder,
os.environ.get('PATH', '').replace(venv_folder, new_venv_folder),
os.environ.get('PYTHONPATH', '').replace(venv_folder, new_venv_folder),
user_trains_conf,
command.serialize().replace(venv_folder, new_venv_folder)))
command = RunasArgv(
'bash', '-c',
'HOME=\"{}\" PATH=\"{}\" PYTHONPATH=\"{}\" '
'TRAINS_CONFIG_FILE={} CLEARML_CONFIG_FILE={} {}'.format(
home_folder,
os.environ.get('PATH', '').replace(venv_folder, new_venv_folder),
os.environ.get('PYTHONPATH', '').replace(venv_folder, new_venv_folder),
user_trains_conf, user_trains_conf,
command.serialize().replace(venv_folder, new_venv_folder)
)
)
command.set_uid(user_uid=user_uid, user_gid=user_uid)
return command, script_dir
@@ -3933,6 +3961,14 @@ class Worker(ServiceCommandSection):
# type: (str) -> bool
return re.fullmatch(r"^[a-zA-Z0-9][a-zA-Z0-9_.-]+$", name) is not None
def _use_owner_token(self, use_owner_token=False):
self._impersonate_as_task_owner = use_owner_token
if self._impersonate_as_task_owner:
if not self._session.check_min_api_version("2.14"):
raise ValueError("Server does not support --use-owner-token option (incompatible API version)")
if self._session.feature_set == "basic":
raise ValueError("Server does not support --use-owner-token option")
if __name__ == "__main__":
pass

View File

@@ -1,6 +1,9 @@
import os
import re
import warnings
from clearml_agent.definitions import PIP_EXTRA_INDICES
from .requirement import Requirement
@@ -42,9 +45,14 @@ def parse(reqstr, cwd=None):
yield requirement
elif line.startswith('-f') or line.startswith('--find-links') or \
line.startswith('-i') or line.startswith('--index-url') or \
line.startswith('--extra-index-url') or \
line.startswith('--no-index'):
warnings.warn('Private repos not supported. Skipping.')
elif line.startswith('--extra-index-url'):
extra_index = line[len('--extra-index-url'):].strip()
extra_index = re.sub(r"\s+#.*$", "", extra_index) # strip comments
if extra_index and extra_index not in PIP_EXTRA_INDICES:
PIP_EXTRA_INDICES.append(extra_index)
print(f"appended {extra_index} to list of extra pip indices")
continue
elif line.startswith('-Z') or line.startswith('--always-unzip'):
warnings.warn('Unused option --always-unzip. Skipping.')

View File

@@ -18,7 +18,7 @@ from typing import Text, List, Callable, Any, Collection, Optional, Union
import yaml
from clearml_agent.commands.events import Events
from clearml_agent.commands.worker import Worker, get_task_container, set_task_container
from clearml_agent.commands.worker import Worker, get_task_container, set_task_container, get_next_task
from clearml_agent.definitions import ENV_DOCKER_IMAGE
from clearml_agent.errors import APIError
from clearml_agent.helper.base import safe_remove_file
@@ -362,7 +362,7 @@ class K8sIntegration(Worker):
print('Failed getting number of used pods: {}'.format(ex))
return -2
def run_one_task(self, queue: Text, task_id: Text, worker_args=None, **_):
def run_one_task(self, queue: Text, task_id: Text, worker_args=None, task_session=None, **_):
print('Pulling task {} launching on kubernetes cluster'.format(task_id))
task_data = self._session.api_client.tasks.get_all(id=[task_id])[0]
@@ -398,11 +398,19 @@ class K8sIntegration(Worker):
self.conf_file_content
or Path(self._session._config_file).read_text()
).encode("ascii")
create_clearml_conf = "echo '{}' | base64 --decode >> ~/clearml.conf".format(
create_clearml_conf = ["echo '{}' | base64 --decode >> ~/clearml.conf".format(
base64.b64encode(
hocon_config_encoded
).decode('ascii')
)
)]
if task_session:
create_clearml_conf.append(
"export CLEARML_AUTH_TOKEN=$(echo '{}' | base64 --decode)".format(
base64.b64encode(task_session.token.encode("ascii")).decode('ascii')
)
)
if self.ports_mode:
print("Kubernetes looking for available pod to use")
@@ -594,19 +602,22 @@ class K8sIntegration(Worker):
extra_docker_bash_script=extra_docker_bash_script)
for line in container_bash_script])
create_init_script = \
"echo '{}' | base64 --decode >> ~/__start_agent__.sh ; " \
extra_bash_commands = list(create_clearml_conf or [])
extra_bash_commands.append(
"echo '{}' | base64 --decode >> ~/__start_agent__.sh ; "
"/bin/bash ~/__start_agent__.sh".format(
base64.b64encode(
script_encoded.encode('ascii')
).decode('ascii'))
)
# Notice: we always leave with exit code 0, so pods are never restarted
container = self._merge_containers(
container,
dict(name=name, image=docker_image,
command=['/bin/bash'],
args=['-c', '{} ; {} ; exit 0'.format(create_clearml_conf, create_init_script)])
args=['-c', '{} ; exit 0'.format(' ; '.join(extra_bash_commands))])
)
if template['spec']['containers']:
@@ -685,7 +696,7 @@ class K8sIntegration(Worker):
"--",
"/bin/sh",
"-c",
"{} ; {}".format(create_clearml_conf, container_bash_script.format(
"{} ; {}".format(" ; ".join(create_clearml_conf or []), container_bash_script.format(
extra_bash_init_cmd=self.extra_bash_init_script or "",
extra_docker_bash_script=docker_bash or "",
task_id=task_id
@@ -742,14 +753,16 @@ class K8sIntegration(Worker):
# get next task in queue
try:
response = self._session.api_client.queues.get_next_task(queue=queue)
response = get_next_task(
self._session, queue=queue, get_task_info=self._impersonate_as_task_owner
)
except Exception as e:
print("Warning: Could not access task queue [{}], error: {}".format(queue, e))
continue
else:
try:
task_id = response.entry.task
except AttributeError:
task_id = response["entry"]["task"]
except (KeyError, TypeError, AttributeError):
print("No tasks in queue {}".format(queue))
continue
events_service.send_log_events(
@@ -761,8 +774,26 @@ class K8sIntegration(Worker):
level="INFO",
)
task_session = None
if self._impersonate_as_task_owner:
try:
task_user = response["task_info"]["user"]
task_company = response["task_info"]["company"]
except (KeyError, TypeError, AttributeError):
print("Error: cannot retrieve owner user for the task '{}', skipping".format(task_id))
continue
task_session = self.get_task_session(task_user, task_company)
if not task_session:
print(
"Error: Could not login as the user '{}' for the task '{}', skipping".format(
task_user, task_id
)
)
continue
self.report_monitor(ResourceMonitor.StatusReport(queues=queues, queue=queue, task=task_id))
self.run_one_task(queue, task_id, worker_params)
self.run_one_task(queue, task_id, worker_params, task_session)
self.report_monitor(ResourceMonitor.StatusReport(queues=self.queues))
break
else:
@@ -773,7 +804,7 @@ class K8sIntegration(Worker):
if self._session.config["agent.reload_config"]:
self.reload_config()
def k8s_daemon(self, queue):
def k8s_daemon(self, queue, **kwargs):
"""
Start the k8s Glue service.
This service will be pulling tasks from *queue* and scheduling them for execution using kubectl.
@@ -784,8 +815,10 @@ class K8sIntegration(Worker):
:param list(str) queue: queue name to pull from
"""
return self.daemon(queues=[ObjectID(name=queue)] if queue else None,
log_level=logging.INFO, foreground=True, docker=False)
return self.daemon(
queues=[ObjectID(name=queue)] if queue else None,
log_level=logging.INFO, foreground=True, docker=False, **kwargs,
)
@classmethod
def get_ssh_server_bash(cls, ssh_port_number):

View File

@@ -95,7 +95,8 @@ class ExternalRequirements(SimpleSubstitution):
vcs._set_ssh_url()
new_req_line = 'git+{}{}{}'.format(
'' if scheme and '://' in vcs.url else scheme,
vcs.url_with_auth, fragment
vcs_url if session.config.get('agent.force_git_ssh_protocol', None) else vcs.url_with_auth,
fragment
)
if new_req_line != req_line:
furl_line = furl(new_req_line)
@@ -174,5 +175,11 @@ class OnlyExternalRequirements(ExternalRequirements):
# Do not store the skipped requirements
# mark skip package
if super(OnlyExternalRequirements, self).match(req):
if self.is_already_installed(req):
print("No need to reinstall \'{}\' from VCS, "
"the exact same version is already installed".format(req.name))
return Text('')
return self._add_vcs_credentials(req, self._session)
return Text('')

View File

@@ -1,3 +1,4 @@
import re
from typing import Text
from .base import PackageManager
@@ -11,13 +12,14 @@ class PriorityPackageRequirement(SimpleSubstitution):
def __init__(self, *args, **kwargs):
super(PriorityPackageRequirement, self).__init__(*args, **kwargs)
self._replaced_packages = {}
# check if we need to replace the packages:
priority_packages = self.config.get('agent.package_manager.priority_packages', None)
if priority_packages:
self.__class__.name = priority_packages
self.__class__.name = [p.lower() for p in priority_packages]
priority_optional_packages = self.config.get('agent.package_manager.priority_optional_packages', None)
if priority_optional_packages:
self.__class__.optional_package_names = priority_optional_packages
self.__class__.optional_package_names = [p.lower() for p in priority_optional_packages]
def match(self, req):
# match both Cython & cython
@@ -28,7 +30,9 @@ class PriorityPackageRequirement(SimpleSubstitution):
Replace a requirement
:raises: ValueError if version is pre-release
"""
if req.name in self.optional_package_names:
self._replaced_packages[req.name] = req.line
if req.name.lower() in self.optional_package_names:
# noinspection PyBroadException
try:
if PackageManager.out_of_scope_install_package(str(req)):
@@ -39,6 +43,41 @@ class PriorityPackageRequirement(SimpleSubstitution):
PackageManager.out_of_scope_install_package(str(req))
return Text(req)
def replace_back(self, list_of_requirements):
"""
:param list_of_requirements: {'pip': ['a==1.0', ]}
:return: {'pip': ['a==1.0', ]}
"""
# if we replaced setuptools, it means someone requested it, and since freeze will not contain it,
# we need to add it manually
if not self._replaced_packages or "setuptools" not in self._replaced_packages:
return list_of_requirements
try:
for k, lines in list_of_requirements.items():
# k is either pip/conda
if k not in ('pip', 'conda'):
continue
for i, line in enumerate(lines):
if not line or line.lstrip().startswith('#'):
continue
parts = [p for p in re.split(r'\s|=|\.|<|>|~|!|@|#', line) if p]
if not parts:
continue
# if we found setuptools, do nothing
if parts[0] == "setuptools":
return list_of_requirements
# if we are here it means we have not found setuptools
# we should add it:
if "pip" in list_of_requirements:
list_of_requirements["pip"] = [self._replaced_packages["setuptools"]] + list_of_requirements["pip"]
except Exception as ex: # noqa
return list_of_requirements
return list_of_requirements
class PackageCollectorRequirement(SimpleSubstitution):
"""

View File

@@ -7,13 +7,14 @@ from furl import furl
import urllib.parse
from operator import itemgetter
from html.parser import HTMLParser
from typing import Text
from typing import Text, Optional, Dict
import attr
import requests
import six
from .requirements import SimpleSubstitution, FatalSpecsResolutionError, SimpleVersion
from .requirements import SimpleSubstitution, FatalSpecsResolutionError, SimpleVersion, MarkerRequirement
from ...external.requirements_parser.requirement import Requirement
OS_TO_WHEEL_NAME = {"linux": "linux_x86_64", "windows": "win_amd64"}
@@ -179,6 +180,7 @@ class PytorchRequirement(SimpleSubstitution):
self.python_version_string = None
self.python_major_minor_str = None
self.python = None
self._fix_setuptools = None
self.exceptions = []
self._original_req = []
@@ -318,12 +320,14 @@ class PytorchRequirement(SimpleSubstitution):
from pip._internal.commands.show import search_packages_info
installed_torch = list(search_packages_info([req.name]))
# notice the comparison order, the first part will make sure we have a valid installed package
if installed_torch and installed_torch[0]['version'] and \
req.compare_version(installed_torch[0]['version']):
installed_torch_version = (getattr(installed_torch[0], 'version', None) or installed_torch[0]['version']) \
if installed_torch else None
if installed_torch and installed_torch_version and \
req.compare_version(installed_torch_version):
print('PyTorch: requested "{}" version {}, using pre-installed version {}'.format(
req.name, req.specs[0] if req.specs else 'unspecified', installed_torch[0]['version']))
req.name, req.specs[0] if req.specs else 'unspecified', installed_torch_version))
# package already installed, do nothing
req.specs = [('==', str(installed_torch[0]['version']))]
req.specs = [('==', str(installed_torch_version))]
return '{} {} {}'.format(req.name, req.specs[0][0], req.specs[0][1]), True
except Exception:
pass
@@ -364,6 +368,10 @@ class PytorchRequirement(SimpleSubstitution):
else:
print('Trying PyTorch CUDA version {} support'.format(torch_url_key))
# fix broken pytorch setuptools incompatibility
if closest_matched_version and SimpleVersion.compare_versions(closest_matched_version, "<", "1.11.0"):
self._fix_setuptools = "setuptools < 59"
if not url:
url = PytorchWheel(
torch_version=fix_version(version),
@@ -504,7 +512,7 @@ class PytorchRequirement(SimpleSubstitution):
for i, line in enumerate(lines):
if not line or line.lstrip().startswith('#'):
continue
parts = [p for p in re.split('\s|=|\.|<|>|~|!|@|#', line) if p]
parts = [p for p in re.split(r'\s|=|\.|<|>|~|!|@|#', line) if p]
if not parts:
continue
for req, new_req in self._original_req:
@@ -526,6 +534,16 @@ class PytorchRequirement(SimpleSubstitution):
return list_of_requirements
def post_scan_add_req(self): # type: () -> Optional[MarkerRequirement]
"""
Allows the RequirementSubstitution to add an extra line/requirements after
the initial requirements scan is completed.
Called only once per requirements.txt object
"""
if self._fix_setuptools:
return MarkerRequirement(Requirement.parse(self._fix_setuptools))
return None
MAP = {
"windows": {
"cuda100": {

View File

@@ -628,10 +628,23 @@ class RequirementsManager(object):
result = list(result)
# add post scan add requirements call back
double_req_set = None
for h in self.handlers:
req = h.post_scan_add_req()
if req:
result.append(req.tostr())
reqs = h.post_scan_add_req()
if reqs:
if double_req_set is None:
def safe_parse_name(line):
try:
return Requirement.parse(line).name
except: # noqa
return None
double_req_set = set([safe_parse_name(r) for r in result if r])
for r in (reqs if isinstance(reqs, (tuple, list)) else [reqs]):
if r and (not r.name or r.name not in double_req_set):
result.append(r.tostr())
elif r:
print("SKIPPING additional auto installed package: \"{}\"".format(r))
return join_lines(result)

View File

@@ -529,6 +529,13 @@ class Git(VCS):
"GIT_SSH_COMMAND": "ssh -oBatchMode=yes",
}
def __init__(self, *args, **kwargs):
super(Git, self).__init__(*args, **kwargs)
try:
self.call("config", "--global", "--replace-all", "safe.directory", "*", cwd=self.location)
except: # noqa
pass
@staticmethod
def remote_branch_name(branch):
return [

View File

@@ -1 +1 @@
__version__ = '1.2.0rc6'
__version__ = '1.3.0'

View File

@@ -34,12 +34,12 @@ agent {
# force_git_ssh_user: git
# unique name of this worker, if None, created based on hostname:process_id
# Overridden with os environment: CLEARML_WORKER_NAME
# Overridden with os environment: CLEARML_WORKER_ID
# worker_id: "clearml-agent-machine1:gpu0"
worker_id: ""
# worker name, replaces the hostname when creating a unique name for this worker
# Overridden with os environment: CLEARML_WORKER_ID
# Overridden with os environment: CLEARML_WORKER_NAME
# worker_name: "clearml-agent-machine1"
worker_name: ""

View File

@@ -65,6 +65,10 @@ def parse_args():
help="Limit the maximum number of pods that this service can run at the same time."
"Should not be used with ports-mode"
)
parser.add_argument(
"--use-owner-token", action="store_true", default=False,
help="Generate and use task owner token for the execution of each task"
)
return parser.parse_args()
@@ -87,7 +91,7 @@ def main():
ssh_port_number=args.ssh_server_port) if args.ssh_server_port else None,
namespace=args.namespace, max_pods_limit=args.max_pods or None,
)
k8s.k8s_daemon(args.queue)
k8s.k8s_daemon(args.queue, use_owner_token=args.use_owner_token)
if __name__ == "__main__":

View File

@@ -8,7 +8,7 @@ psutil>=3.4.2,<5.9.0
pyhocon>=0.3.38,<0.4.0
pyparsing>=2.0.3,<2.5.0
python-dateutil>=2.4.2,<2.9.0
pyjwt>=1.6.4,<2.1.0
pyjwt>=2.4.0,<2.5.0
PyYAML>=3.12,<5.5.0
requests>=2.20.0,<2.26.0
six>=1.13.0,<1.16.0

View File

@@ -61,6 +61,7 @@ setup(
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'License :: OSI Approved :: Apache Software License',
],