mirror of
https://github.com/clearml/clearml-agent
synced 2025-06-26 18:16:15 +00:00
Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
51eb0a713c | ||
|
|
249aa006cb | ||
|
|
c08e2ac0bb | ||
|
|
335ef91d8e | ||
|
|
6c7a639673 | ||
|
|
5f77cad5ac | ||
|
|
0228ae0494 | ||
|
|
165677e800 | ||
|
|
2e5298b737 | ||
|
|
c9ffb8a053 | ||
|
|
2466eed23f | ||
|
|
6e31171d31 | ||
|
|
592254709e | ||
|
|
e43f31eb80 | ||
|
|
f50ba005b5 | ||
|
|
1011544533 | ||
|
|
6572023173 | ||
|
|
9c7e2aacd0 | ||
|
|
715f102f6d |
@@ -8,8 +8,8 @@ ML-Ops scheduler & orchestration solution supporting Linux, macOS and Windows**
|
||||
[](https://img.shields.io/github/license/allegroai/clearml-agent.svg)
|
||||
[](https://img.shields.io/pypi/pyversions/clearml-agent.svg)
|
||||
[](https://img.shields.io/pypi/v/clearml-agent.svg)
|
||||
[](https://artifacthub.io/packages/search?repo=allegroai)
|
||||
|
||||
[](https://pypi.org/project/clearml-agent/)
|
||||
[](https://artifacthub.io/packages/search?repo=allegroai)
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
@@ -139,8 +139,8 @@ from clearml_agent.helper.singleton import Singleton
|
||||
from clearml_agent.session import Session
|
||||
from .events import Events
|
||||
|
||||
DOCKER_ROOT_CONF_FILE = "/root/clearml.conf"
|
||||
DOCKER_DEFAULT_CONF_FILE = "/root/default_clearml.conf"
|
||||
DOCKER_ROOT_CONF_FILE = "/tmp/clearml.conf" # assuming we can always access/mount this file
|
||||
DOCKER_DEFAULT_CONF_FILE = "~/default_clearml.conf"
|
||||
|
||||
|
||||
sys_random = random.SystemRandom()
|
||||
@@ -1109,7 +1109,11 @@ class Worker(ServiceCommandSection):
|
||||
if w.id.startswith(worker_name) and w.id != self.worker_id]
|
||||
gpus = []
|
||||
for w in our_workers:
|
||||
gpus += [int(g) for g in w.split(':')[-1].lower().replace('gpu', '').split(',')]
|
||||
for g in w.split(':')[-1].lower().replace('gpu', '').split(','):
|
||||
try:
|
||||
gpus += [int(g.strip())]
|
||||
except (ValueError, TypeError):
|
||||
print("INFO: failed parsing GPU int('{}') - skipping".format(g))
|
||||
available_gpus = list(set(gpu_indexes) - set(gpus))
|
||||
|
||||
return available_gpus
|
||||
@@ -1120,7 +1124,13 @@ class Worker(ServiceCommandSection):
|
||||
raise ValueError("Dynamic GPU allocation is not supported by the ClearML-server")
|
||||
available_gpus = [prop["value"] for prop in available_gpus if prop["key"] == 'available_gpus']
|
||||
if available_gpus:
|
||||
available_gpus = [int(g) for g in available_gpus[-1].split(',')]
|
||||
gpus = []
|
||||
for g in available_gpus[-1].split(','):
|
||||
try:
|
||||
gpus += [int(g.strip())]
|
||||
except (ValueError, TypeError):
|
||||
print("INFO: failed parsing GPU int('{}') - skipping".format(g))
|
||||
available_gpus = gpus
|
||||
if not isinstance(gpu_queues, dict):
|
||||
gpu_queues = dict(gpu_queues)
|
||||
|
||||
@@ -1281,12 +1291,9 @@ class Worker(ServiceCommandSection):
|
||||
raise ValueError("Running in Docker mode, 'docker' command was not found")
|
||||
|
||||
self._worker_tags = kwargs.get('child_report_tags', None)
|
||||
self._impersonate_as_task_owner = kwargs.get('use_owner_token', False)
|
||||
if self._impersonate_as_task_owner:
|
||||
if not self._session.check_min_api_version("2.14"):
|
||||
raise ValueError("Server does not support --use-owner-token option (incompatible API version)")
|
||||
if self._session.feature_set == "basic":
|
||||
raise ValueError("Server does not support --use-owner-token option")
|
||||
|
||||
self._use_owner_token(kwargs.get('use_owner_token', False))
|
||||
|
||||
self._standalone_mode = kwargs.get('standalone_mode', False)
|
||||
self._services_mode = kwargs.get('services_mode', False)
|
||||
# must have docker in services_mode
|
||||
@@ -1795,7 +1802,7 @@ class Worker(ServiceCommandSection):
|
||||
def _apply_extra_configuration(self):
|
||||
# store a few things we updated in runtime (TODO: we should list theme somewhere)
|
||||
agent_config = self._session.config["agent"].copy()
|
||||
agent_config_keys = ["cuda_version", "cudnn_version", "default_python", "worker_id", "debug"]
|
||||
agent_config_keys = ["cuda_version", "cudnn_version", "default_python", "worker_id", "worker_name", "debug"]
|
||||
try:
|
||||
self._session.load_vaults()
|
||||
except Exception as ex:
|
||||
@@ -1995,7 +2002,7 @@ class Worker(ServiceCommandSection):
|
||||
|
||||
if entry_point == "clone_task" or entry_point == "reuse_task":
|
||||
change = 'ENTRYPOINT if [ ! -s "{trains_conf}" ] ; then ' \
|
||||
'cp {default_trains_conf} {trains_conf} ; ' \
|
||||
'cp {default_trains_conf} {trains_conf} && export CLEARML_CONFIG_FILE={trains_conf}; ' \
|
||||
' fi ; clearml-agent execute --id {task_id} --standalone-mode {clone}'.format(
|
||||
default_trains_conf=DOCKER_DEFAULT_CONF_FILE,
|
||||
trains_conf=DOCKER_ROOT_CONF_FILE,
|
||||
@@ -2349,7 +2356,7 @@ class Worker(ServiceCommandSection):
|
||||
if ENV_TASK_EXTRA_PYTHON_PATH.get():
|
||||
python_path = add_python_path(python_path, ENV_TASK_EXTRA_PYTHON_PATH.get())
|
||||
if python_path:
|
||||
os.environ['PYTHONPATH'] = python_path
|
||||
os.environ['PYTHONPATH'] = os.pathsep.join(filter(None, (os.environ.get('PYTHONPATH', None), python_path)))
|
||||
|
||||
# check if we want to run as another user, only supported on linux
|
||||
if ENV_TASK_EXECUTE_AS_USER.get() and is_linux_platform():
|
||||
@@ -2373,6 +2380,10 @@ class Worker(ServiceCommandSection):
|
||||
if sys.getfilesystemencoding() == 'ascii' and not os.environ.get("PYTHONIOENCODING"):
|
||||
os.environ["PYTHONIOENCODING"] = "utf-8"
|
||||
|
||||
# check if we need to update backwards compatible OS environment
|
||||
if not os.environ.get("TRAINS_CONFIG_FILE") and os.environ.get("CLEARML_CONFIG_FILE"):
|
||||
os.environ["TRAINS_CONFIG_FILE"] = os.environ.get("CLEARML_CONFIG_FILE")
|
||||
|
||||
print("Starting Task Execution:\n".format(current_task.id))
|
||||
exit_code = -1
|
||||
try:
|
||||
@@ -2973,6 +2984,11 @@ class Worker(ServiceCommandSection):
|
||||
- a new working directory (replacing the working_dir in the task's script section)
|
||||
- a requirements manager instance
|
||||
"""
|
||||
os.environ["CLEARML_TASK_SCRIPT_ENTRY"] = execution.entry_point
|
||||
os.environ["CLEARML_TASK_WORKING_DIR"] = execution.working_dir
|
||||
os.environ["CLEARML_VENV_PATH"] = str(venv_folder)
|
||||
os.environ["CLEARML_GIT_ROOT"] = git_root
|
||||
|
||||
script = os.path.expanduser(os.path.expandvars(script))
|
||||
|
||||
try:
|
||||
@@ -2990,10 +3006,6 @@ class Worker(ServiceCommandSection):
|
||||
task.to_dict(), separators=(',', ':'), default=str
|
||||
)
|
||||
os.environ["CLEARML_CUSTOM_BUILD_OUTPUT"] = script_output_file.name
|
||||
os.environ["CLEARML_TASK_SCRIPT_ENTRY"] = execution.entry_point
|
||||
os.environ["CLEARML_TASK_WORKING_DIR"] = execution.working_dir
|
||||
os.environ["CLEARML_VENV_PATH"] = str(venv_folder)
|
||||
os.environ["CLEARML_GIT_ROOT"] = git_root
|
||||
|
||||
try:
|
||||
subprocess.check_call([script])
|
||||
@@ -3005,7 +3017,7 @@ class Worker(ServiceCommandSection):
|
||||
|
||||
output = Path(script_output_file.name).read_text()
|
||||
if not output:
|
||||
raise SkippedCustomBuildScript("Build script {} is not found".format(script))
|
||||
raise SkippedCustomBuildScript("Build script {} did not return any output".format(script))
|
||||
|
||||
try:
|
||||
output = json.loads(output)
|
||||
@@ -3682,6 +3694,7 @@ class Worker(ServiceCommandSection):
|
||||
base_cmd += (
|
||||
(['--name', name] if name else []) +
|
||||
['-v', conf_file+':'+DOCKER_ROOT_CONF_FILE] +
|
||||
['-e', "CLEARML_CONFIG_FILE={}".format(DOCKER_ROOT_CONF_FILE)] +
|
||||
(['-v', host_ssh_cache+':'+mount_ssh] if host_ssh_cache else []) +
|
||||
(['-v', host_apt_cache+':'+mount_apt_cache] if host_apt_cache else []) +
|
||||
(['-v', host_pip_cache+':'+mount_pip_cache] if host_pip_cache else []) +
|
||||
@@ -3772,12 +3785,17 @@ class Worker(ServiceCommandSection):
|
||||
# patch venv folder to new location
|
||||
script_dir = script_dir.replace(venv_folder, new_venv_folder)
|
||||
# New command line execution
|
||||
command = RunasArgv('bash', '-c', 'HOME=\"{}\" PATH=\"{}\" PYTHONPATH=\"{}\" TRAINS_CONFIG_FILE={} {}'.format(
|
||||
home_folder,
|
||||
os.environ.get('PATH', '').replace(venv_folder, new_venv_folder),
|
||||
os.environ.get('PYTHONPATH', '').replace(venv_folder, new_venv_folder),
|
||||
user_trains_conf,
|
||||
command.serialize().replace(venv_folder, new_venv_folder)))
|
||||
command = RunasArgv(
|
||||
'bash', '-c',
|
||||
'HOME=\"{}\" PATH=\"{}\" PYTHONPATH=\"{}\" '
|
||||
'TRAINS_CONFIG_FILE={} CLEARML_CONFIG_FILE={} {}'.format(
|
||||
home_folder,
|
||||
os.environ.get('PATH', '').replace(venv_folder, new_venv_folder),
|
||||
os.environ.get('PYTHONPATH', '').replace(venv_folder, new_venv_folder),
|
||||
user_trains_conf, user_trains_conf,
|
||||
command.serialize().replace(venv_folder, new_venv_folder)
|
||||
)
|
||||
)
|
||||
command.set_uid(user_uid=user_uid, user_gid=user_uid)
|
||||
|
||||
return command, script_dir
|
||||
@@ -3943,6 +3961,14 @@ class Worker(ServiceCommandSection):
|
||||
# type: (str) -> bool
|
||||
return re.fullmatch(r"^[a-zA-Z0-9][a-zA-Z0-9_.-]+$", name) is not None
|
||||
|
||||
def _use_owner_token(self, use_owner_token=False):
|
||||
self._impersonate_as_task_owner = use_owner_token
|
||||
if self._impersonate_as_task_owner:
|
||||
if not self._session.check_min_api_version("2.14"):
|
||||
raise ValueError("Server does not support --use-owner-token option (incompatible API version)")
|
||||
if self._session.feature_set == "basic":
|
||||
raise ValueError("Server does not support --use-owner-token option")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pass
|
||||
|
||||
@@ -18,7 +18,7 @@ from typing import Text, List, Callable, Any, Collection, Optional, Union
|
||||
import yaml
|
||||
|
||||
from clearml_agent.commands.events import Events
|
||||
from clearml_agent.commands.worker import Worker, get_task_container, set_task_container
|
||||
from clearml_agent.commands.worker import Worker, get_task_container, set_task_container, get_next_task
|
||||
from clearml_agent.definitions import ENV_DOCKER_IMAGE
|
||||
from clearml_agent.errors import APIError
|
||||
from clearml_agent.helper.base import safe_remove_file
|
||||
@@ -362,7 +362,7 @@ class K8sIntegration(Worker):
|
||||
print('Failed getting number of used pods: {}'.format(ex))
|
||||
return -2
|
||||
|
||||
def run_one_task(self, queue: Text, task_id: Text, worker_args=None, **_):
|
||||
def run_one_task(self, queue: Text, task_id: Text, worker_args=None, task_session=None, **_):
|
||||
print('Pulling task {} launching on kubernetes cluster'.format(task_id))
|
||||
task_data = self._session.api_client.tasks.get_all(id=[task_id])[0]
|
||||
|
||||
@@ -398,11 +398,19 @@ class K8sIntegration(Worker):
|
||||
self.conf_file_content
|
||||
or Path(self._session._config_file).read_text()
|
||||
).encode("ascii")
|
||||
create_clearml_conf = "echo '{}' | base64 --decode >> ~/clearml.conf".format(
|
||||
|
||||
create_clearml_conf = ["echo '{}' | base64 --decode >> ~/clearml.conf".format(
|
||||
base64.b64encode(
|
||||
hocon_config_encoded
|
||||
).decode('ascii')
|
||||
)
|
||||
)]
|
||||
|
||||
if task_session:
|
||||
create_clearml_conf.append(
|
||||
"export CLEARML_AUTH_TOKEN=$(echo '{}' | base64 --decode)".format(
|
||||
base64.b64encode(task_session.token.encode("ascii")).decode('ascii')
|
||||
)
|
||||
)
|
||||
|
||||
if self.ports_mode:
|
||||
print("Kubernetes looking for available pod to use")
|
||||
@@ -594,19 +602,22 @@ class K8sIntegration(Worker):
|
||||
extra_docker_bash_script=extra_docker_bash_script)
|
||||
for line in container_bash_script])
|
||||
|
||||
create_init_script = \
|
||||
"echo '{}' | base64 --decode >> ~/__start_agent__.sh ; " \
|
||||
extra_bash_commands = list(create_clearml_conf or [])
|
||||
|
||||
extra_bash_commands.append(
|
||||
"echo '{}' | base64 --decode >> ~/__start_agent__.sh ; "
|
||||
"/bin/bash ~/__start_agent__.sh".format(
|
||||
base64.b64encode(
|
||||
script_encoded.encode('ascii')
|
||||
).decode('ascii'))
|
||||
)
|
||||
|
||||
# Notice: we always leave with exit code 0, so pods are never restarted
|
||||
container = self._merge_containers(
|
||||
container,
|
||||
dict(name=name, image=docker_image,
|
||||
command=['/bin/bash'],
|
||||
args=['-c', '{} ; {} ; exit 0'.format(create_clearml_conf, create_init_script)])
|
||||
args=['-c', '{} ; exit 0'.format(' ; '.join(extra_bash_commands))])
|
||||
)
|
||||
|
||||
if template['spec']['containers']:
|
||||
@@ -685,7 +696,7 @@ class K8sIntegration(Worker):
|
||||
"--",
|
||||
"/bin/sh",
|
||||
"-c",
|
||||
"{} ; {}".format(create_clearml_conf, container_bash_script.format(
|
||||
"{} ; {}".format(" ; ".join(create_clearml_conf or []), container_bash_script.format(
|
||||
extra_bash_init_cmd=self.extra_bash_init_script or "",
|
||||
extra_docker_bash_script=docker_bash or "",
|
||||
task_id=task_id
|
||||
@@ -742,14 +753,16 @@ class K8sIntegration(Worker):
|
||||
|
||||
# get next task in queue
|
||||
try:
|
||||
response = self._session.api_client.queues.get_next_task(queue=queue)
|
||||
response = get_next_task(
|
||||
self._session, queue=queue, get_task_info=self._impersonate_as_task_owner
|
||||
)
|
||||
except Exception as e:
|
||||
print("Warning: Could not access task queue [{}], error: {}".format(queue, e))
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
task_id = response.entry.task
|
||||
except AttributeError:
|
||||
task_id = response["entry"]["task"]
|
||||
except (KeyError, TypeError, AttributeError):
|
||||
print("No tasks in queue {}".format(queue))
|
||||
continue
|
||||
events_service.send_log_events(
|
||||
@@ -761,8 +774,26 @@ class K8sIntegration(Worker):
|
||||
level="INFO",
|
||||
)
|
||||
|
||||
task_session = None
|
||||
if self._impersonate_as_task_owner:
|
||||
try:
|
||||
task_user = response["task_info"]["user"]
|
||||
task_company = response["task_info"]["company"]
|
||||
except (KeyError, TypeError, AttributeError):
|
||||
print("Error: cannot retrieve owner user for the task '{}', skipping".format(task_id))
|
||||
continue
|
||||
|
||||
task_session = self.get_task_session(task_user, task_company)
|
||||
if not task_session:
|
||||
print(
|
||||
"Error: Could not login as the user '{}' for the task '{}', skipping".format(
|
||||
task_user, task_id
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
self.report_monitor(ResourceMonitor.StatusReport(queues=queues, queue=queue, task=task_id))
|
||||
self.run_one_task(queue, task_id, worker_params)
|
||||
self.run_one_task(queue, task_id, worker_params, task_session)
|
||||
self.report_monitor(ResourceMonitor.StatusReport(queues=self.queues))
|
||||
break
|
||||
else:
|
||||
@@ -773,7 +804,7 @@ class K8sIntegration(Worker):
|
||||
if self._session.config["agent.reload_config"]:
|
||||
self.reload_config()
|
||||
|
||||
def k8s_daemon(self, queue):
|
||||
def k8s_daemon(self, queue, **kwargs):
|
||||
"""
|
||||
Start the k8s Glue service.
|
||||
This service will be pulling tasks from *queue* and scheduling them for execution using kubectl.
|
||||
@@ -784,8 +815,10 @@ class K8sIntegration(Worker):
|
||||
|
||||
:param list(str) queue: queue name to pull from
|
||||
"""
|
||||
return self.daemon(queues=[ObjectID(name=queue)] if queue else None,
|
||||
log_level=logging.INFO, foreground=True, docker=False)
|
||||
return self.daemon(
|
||||
queues=[ObjectID(name=queue)] if queue else None,
|
||||
log_level=logging.INFO, foreground=True, docker=False, **kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_ssh_server_bash(cls, ssh_port_number):
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import re
|
||||
from typing import Text
|
||||
|
||||
from .base import PackageManager
|
||||
@@ -11,6 +12,7 @@ class PriorityPackageRequirement(SimpleSubstitution):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(PriorityPackageRequirement, self).__init__(*args, **kwargs)
|
||||
self._replaced_packages = {}
|
||||
# check if we need to replace the packages:
|
||||
priority_packages = self.config.get('agent.package_manager.priority_packages', None)
|
||||
if priority_packages:
|
||||
@@ -28,6 +30,8 @@ class PriorityPackageRequirement(SimpleSubstitution):
|
||||
Replace a requirement
|
||||
:raises: ValueError if version is pre-release
|
||||
"""
|
||||
self._replaced_packages[req.name] = req.line
|
||||
|
||||
if req.name in self.optional_package_names:
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
@@ -39,6 +43,41 @@ class PriorityPackageRequirement(SimpleSubstitution):
|
||||
PackageManager.out_of_scope_install_package(str(req))
|
||||
return Text(req)
|
||||
|
||||
def replace_back(self, list_of_requirements):
|
||||
"""
|
||||
:param list_of_requirements: {'pip': ['a==1.0', ]}
|
||||
:return: {'pip': ['a==1.0', ]}
|
||||
"""
|
||||
# if we replaced setuptools, it means someone requested it, and since freeze will not contain it,
|
||||
# we need to add it manually
|
||||
if not self._replaced_packages or "setuptools" not in self._replaced_packages:
|
||||
return list_of_requirements
|
||||
|
||||
try:
|
||||
for k, lines in list_of_requirements.items():
|
||||
# k is either pip/conda
|
||||
if k not in ('pip', 'conda'):
|
||||
continue
|
||||
for i, line in enumerate(lines):
|
||||
if not line or line.lstrip().startswith('#'):
|
||||
continue
|
||||
parts = [p for p in re.split(r'\s|=|\.|<|>|~|!|@|#', line) if p]
|
||||
if not parts:
|
||||
continue
|
||||
# if we found setuptools, do nothing
|
||||
if parts[0] == "setuptools":
|
||||
return list_of_requirements
|
||||
|
||||
# if we are here it means we have not found setuptools
|
||||
# we should add it:
|
||||
if "pip" in list_of_requirements:
|
||||
list_of_requirements["pip"] = [self._replaced_packages["setuptools"]] + list_of_requirements["pip"]
|
||||
|
||||
except Exception as ex: # noqa
|
||||
return list_of_requirements
|
||||
|
||||
return list_of_requirements
|
||||
|
||||
|
||||
class PackageCollectorRequirement(SimpleSubstitution):
|
||||
"""
|
||||
|
||||
@@ -7,13 +7,14 @@ from furl import furl
|
||||
import urllib.parse
|
||||
from operator import itemgetter
|
||||
from html.parser import HTMLParser
|
||||
from typing import Text
|
||||
from typing import Text, Optional, Dict
|
||||
|
||||
import attr
|
||||
import requests
|
||||
|
||||
import six
|
||||
from .requirements import SimpleSubstitution, FatalSpecsResolutionError, SimpleVersion
|
||||
from .requirements import SimpleSubstitution, FatalSpecsResolutionError, SimpleVersion, MarkerRequirement
|
||||
from ...external.requirements_parser.requirement import Requirement
|
||||
|
||||
OS_TO_WHEEL_NAME = {"linux": "linux_x86_64", "windows": "win_amd64"}
|
||||
|
||||
@@ -179,6 +180,7 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
self.python_version_string = None
|
||||
self.python_major_minor_str = None
|
||||
self.python = None
|
||||
self._fix_setuptools = None
|
||||
self.exceptions = []
|
||||
self._original_req = []
|
||||
|
||||
@@ -318,12 +320,14 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
from pip._internal.commands.show import search_packages_info
|
||||
installed_torch = list(search_packages_info([req.name]))
|
||||
# notice the comparison order, the first part will make sure we have a valid installed package
|
||||
if installed_torch and installed_torch[0]['version'] and \
|
||||
req.compare_version(installed_torch[0]['version']):
|
||||
installed_torch_version = (getattr(installed_torch[0], 'version', None) or installed_torch[0]['version']) \
|
||||
if installed_torch else None
|
||||
if installed_torch and installed_torch_version and \
|
||||
req.compare_version(installed_torch_version):
|
||||
print('PyTorch: requested "{}" version {}, using pre-installed version {}'.format(
|
||||
req.name, req.specs[0] if req.specs else 'unspecified', installed_torch[0]['version']))
|
||||
req.name, req.specs[0] if req.specs else 'unspecified', installed_torch_version))
|
||||
# package already installed, do nothing
|
||||
req.specs = [('==', str(installed_torch[0]['version']))]
|
||||
req.specs = [('==', str(installed_torch_version))]
|
||||
return '{} {} {}'.format(req.name, req.specs[0][0], req.specs[0][1]), True
|
||||
except Exception:
|
||||
pass
|
||||
@@ -364,6 +368,10 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
else:
|
||||
print('Trying PyTorch CUDA version {} support'.format(torch_url_key))
|
||||
|
||||
# fix broken pytorch setuptools incompatibility
|
||||
if closest_matched_version and SimpleVersion.compare_versions(closest_matched_version, "<", "1.11.0"):
|
||||
self._fix_setuptools = "setuptools < 59"
|
||||
|
||||
if not url:
|
||||
url = PytorchWheel(
|
||||
torch_version=fix_version(version),
|
||||
@@ -504,7 +512,7 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
for i, line in enumerate(lines):
|
||||
if not line or line.lstrip().startswith('#'):
|
||||
continue
|
||||
parts = [p for p in re.split('\s|=|\.|<|>|~|!|@|#', line) if p]
|
||||
parts = [p for p in re.split(r'\s|=|\.|<|>|~|!|@|#', line) if p]
|
||||
if not parts:
|
||||
continue
|
||||
for req, new_req in self._original_req:
|
||||
@@ -526,6 +534,16 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
|
||||
return list_of_requirements
|
||||
|
||||
def post_scan_add_req(self): # type: () -> Optional[MarkerRequirement]
|
||||
"""
|
||||
Allows the RequirementSubstitution to add an extra line/requirements after
|
||||
the initial requirements scan is completed.
|
||||
Called only once per requirements.txt object
|
||||
"""
|
||||
if self._fix_setuptools:
|
||||
return MarkerRequirement(Requirement.parse(self._fix_setuptools))
|
||||
return None
|
||||
|
||||
MAP = {
|
||||
"windows": {
|
||||
"cuda100": {
|
||||
|
||||
@@ -628,10 +628,23 @@ class RequirementsManager(object):
|
||||
|
||||
result = list(result)
|
||||
# add post scan add requirements call back
|
||||
double_req_set = None
|
||||
for h in self.handlers:
|
||||
req = h.post_scan_add_req()
|
||||
if req:
|
||||
result.append(req.tostr())
|
||||
reqs = h.post_scan_add_req()
|
||||
if reqs:
|
||||
if double_req_set is None:
|
||||
def safe_parse_name(line):
|
||||
try:
|
||||
return Requirement.parse(line).name
|
||||
except: # noqa
|
||||
return None
|
||||
double_req_set = set([safe_parse_name(r) for r in result if r])
|
||||
|
||||
for r in (reqs if isinstance(reqs, (tuple, list)) else [reqs]):
|
||||
if r and (not r.name or r.name not in double_req_set):
|
||||
result.append(r.tostr())
|
||||
elif r:
|
||||
print("SKIPPING additional auto installed package: \"{}\"".format(r))
|
||||
|
||||
return join_lines(result)
|
||||
|
||||
|
||||
@@ -529,6 +529,13 @@ class Git(VCS):
|
||||
"GIT_SSH_COMMAND": "ssh -oBatchMode=yes",
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Git, self).__init__(*args, **kwargs)
|
||||
try:
|
||||
self.call("config", "--global", "--replace-all", "safe.directory", "*", cwd=self.location)
|
||||
except: # noqa
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def remote_branch_name(branch):
|
||||
return [
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '1.2.2'
|
||||
__version__ = '1.2.4rc3'
|
||||
|
||||
@@ -34,12 +34,12 @@ agent {
|
||||
# force_git_ssh_user: git
|
||||
|
||||
# unique name of this worker, if None, created based on hostname:process_id
|
||||
# Overridden with os environment: CLEARML_WORKER_NAME
|
||||
# Overridden with os environment: CLEARML_WORKER_ID
|
||||
# worker_id: "clearml-agent-machine1:gpu0"
|
||||
worker_id: ""
|
||||
|
||||
# worker name, replaces the hostname when creating a unique name for this worker
|
||||
# Overridden with os environment: CLEARML_WORKER_ID
|
||||
# Overridden with os environment: CLEARML_WORKER_NAME
|
||||
# worker_name: "clearml-agent-machine1"
|
||||
worker_name: ""
|
||||
|
||||
|
||||
@@ -65,6 +65,10 @@ def parse_args():
|
||||
help="Limit the maximum number of pods that this service can run at the same time."
|
||||
"Should not be used with ports-mode"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use-owner-token", action="store_true", default=False,
|
||||
help="Generate and use task owner token for the execution of each task"
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@@ -87,7 +91,7 @@ def main():
|
||||
ssh_port_number=args.ssh_server_port) if args.ssh_server_port else None,
|
||||
namespace=args.namespace, max_pods_limit=args.max_pods or None,
|
||||
)
|
||||
k8s.k8s_daemon(args.queue)
|
||||
k8s.k8s_daemon(args.queue, use_owner_token=args.use_owner_token)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user