Compare commits

...

10 Commits

Author SHA1 Message Date
allegroai
c9ffb8a053 Version bump 2022-04-20 08:57:16 +03:00
allegroai
2466eed23f Fix dynamic GPUs with "all" GPUs on he same worker 2022-04-20 08:56:22 +03:00
allegroai
6e31171d31 Version bump to v1.2.3 2022-04-14 22:39:38 +03:00
allegroai
592254709e Fix typo 2022-04-14 22:38:19 +03:00
allegroai
e43f31eb80 Version bump 2022-04-13 10:02:25 +03:00
allegroai
f50ba005b5 Protect dynamic GPUs from failing to parse worker GPU index 2022-04-13 10:01:50 +03:00
allegroai
1011544533 Fix copy breaks agent and nulls the worker name 2022-04-13 10:01:12 +03:00
allegroai
6572023173 Fix avoid reinstall pytorch package if the same version is already installed 2022-04-09 14:18:38 +03:00
allegroai
9c7e2aacd0 Fix PYTHONPATH is overwritten when executing a task (append to it instead) 2022-04-09 14:17:49 +03:00
Allegro AI
715f102f6d Update README.md 2022-04-01 17:48:27 +03:00
5 changed files with 25 additions and 13 deletions

View File

@@ -8,8 +8,8 @@ ML-Ops scheduler & orchestration solution supporting Linux, macOS and Windows**
[![GitHub license](https://img.shields.io/github/license/allegroai/clearml-agent.svg)](https://img.shields.io/github/license/allegroai/clearml-agent.svg)
[![PyPI pyversions](https://img.shields.io/pypi/pyversions/clearml-agent.svg)](https://img.shields.io/pypi/pyversions/clearml-agent.svg)
[![PyPI version shields.io](https://img.shields.io/pypi/v/clearml-agent.svg)](https://img.shields.io/pypi/v/clearml-agent.svg)
[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/allegroai)](https://artifacthub.io/packages/search?repo=allegroai)
[![PyPI Downloads](https://pepy.tech/badge/clearml-agent/month)](https://pypi.org/project/clearml-agent/)
[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/allegroai)](https://artifacthub.io/packages/search?repo=allegroai)
</div>
---

View File

@@ -1109,7 +1109,11 @@ class Worker(ServiceCommandSection):
if w.id.startswith(worker_name) and w.id != self.worker_id]
gpus = []
for w in our_workers:
gpus += [int(g) for g in w.split(':')[-1].lower().replace('gpu', '').split(',')]
for g in w.split(':')[-1].lower().replace('gpu', '').split(','):
try:
gpus += [int(g.strip())]
except (ValueError, TypeError):
print("INFO: failed parsing GPU int('{}') - skipping".format(g))
available_gpus = list(set(gpu_indexes) - set(gpus))
return available_gpus
@@ -1120,7 +1124,13 @@ class Worker(ServiceCommandSection):
raise ValueError("Dynamic GPU allocation is not supported by the ClearML-server")
available_gpus = [prop["value"] for prop in available_gpus if prop["key"] == 'available_gpus']
if available_gpus:
available_gpus = [int(g) for g in available_gpus[-1].split(',')]
gpus = []
for g in available_gpus[-1].split(','):
try:
gpus += [int(g.strip())]
except (ValueError, TypeError):
print("INFO: failed parsing GPU int('{}') - skipping".format(g))
available_gpus = gpus
if not isinstance(gpu_queues, dict):
gpu_queues = dict(gpu_queues)
@@ -1795,7 +1805,7 @@ class Worker(ServiceCommandSection):
def _apply_extra_configuration(self):
# store a few things we updated in runtime (TODO: we should list theme somewhere)
agent_config = self._session.config["agent"].copy()
agent_config_keys = ["cuda_version", "cudnn_version", "default_python", "worker_id", "debug"]
agent_config_keys = ["cuda_version", "cudnn_version", "default_python", "worker_id", "worker_name", "debug"]
try:
self._session.load_vaults()
except Exception as ex:
@@ -2349,7 +2359,7 @@ class Worker(ServiceCommandSection):
if ENV_TASK_EXTRA_PYTHON_PATH.get():
python_path = add_python_path(python_path, ENV_TASK_EXTRA_PYTHON_PATH.get())
if python_path:
os.environ['PYTHONPATH'] = python_path
os.environ['PYTHONPATH'] = os.pathsep.join(filter(None, (os.environ.get('PYTHONPATH', None), python_path)))
# check if we want to run as another user, only supported on linux
if ENV_TASK_EXECUTE_AS_USER.get() and is_linux_platform():

View File

@@ -318,12 +318,14 @@ class PytorchRequirement(SimpleSubstitution):
from pip._internal.commands.show import search_packages_info
installed_torch = list(search_packages_info([req.name]))
# notice the comparison order, the first part will make sure we have a valid installed package
if installed_torch and installed_torch[0]['version'] and \
req.compare_version(installed_torch[0]['version']):
installed_torch_version = (getattr(installed_torch[0], 'version', None) or installed_torch[0]['version']) \
if installed_torch else None
if installed_torch and installed_torch_version and \
req.compare_version(installed_torch_version):
print('PyTorch: requested "{}" version {}, using pre-installed version {}'.format(
req.name, req.specs[0] if req.specs else 'unspecified', installed_torch[0]['version']))
req.name, req.specs[0] if req.specs else 'unspecified', installed_torch_version))
# package already installed, do nothing
req.specs = [('==', str(installed_torch[0]['version']))]
req.specs = [('==', str(installed_torch_version))]
return '{} {} {}'.format(req.name, req.specs[0][0], req.specs[0][1]), True
except Exception:
pass

View File

@@ -1 +1 @@
__version__ = '1.2.2'
__version__ = '1.2.4rc0'

View File

@@ -34,12 +34,12 @@ agent {
# force_git_ssh_user: git
# unique name of this worker, if None, created based on hostname:process_id
# Overridden with os environment: CLEARML_WORKER_NAME
# Overridden with os environment: CLEARML_WORKER_ID
# worker_id: "clearml-agent-machine1:gpu0"
worker_id: ""
# worker name, replaces the hostname when creating a unique name for this worker
# Overridden with os environment: CLEARML_WORKER_ID
# Overridden with os environment: CLEARML_WORKER_NAME
# worker_name: "clearml-agent-machine1"
worker_name: ""