From 6756dec06a34ef3bb1c1e0a9c349ffda03123799 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Thu, 31 Aug 2023 00:09:36 +0300 Subject: [PATCH 01/27] Json serialization error should raise exception --- clearml/backend_interface/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml/backend_interface/base.py b/clearml/backend_interface/base.py index 4d273393..266b9c25 100644 --- a/clearml/backend_interface/base.py +++ b/clearml/backend_interface/base.py @@ -83,7 +83,7 @@ class InterfaceBase(SessionInterface): if raise_on_errors: raise res = None - except jsonschema.ValidationError as e: + except (jsonschema.ValidationError, requests.exceptions.InvalidJSONError) as e: if log: log.error( 'Field %s contains illegal schema: %s', '.'.join(e.path), str(e.message) From 713501c611a9e07ec57e5b02f12a09a4bcfdd465 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Thu, 31 Aug 2023 00:10:21 +0300 Subject: [PATCH 02/27] Fix hydra overrides integration (issue #552) --- clearml/binding/hydra_bind.py | 1 + 1 file changed, 1 insertion(+) diff --git a/clearml/binding/hydra_bind.py b/clearml/binding/hydra_bind.py index d26b4e85..2df22ce9 100644 --- a/clearml/binding/hydra_bind.py +++ b/clearml/binding/hydra_bind.py @@ -89,6 +89,7 @@ class PatchHydra(object): if overrides and not isinstance(overrides, (list, tuple)): overrides = [overrides] overrides += ['{}={}'.format(k, v) for k, v in stored_config.items()] + overrides = ["+" + (o if o.startswith("+") and not o.startswith("++") else o) for o in overrides] else: # We take care of it inside the _patched_run_job pass From 71c74f977b8dfbdc3a53d6c8ba49495416861663 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Thu, 31 Aug 2023 00:10:38 +0300 Subject: [PATCH 03/27] Add Model archive/unarchive methods --- clearml/backend_interface/model.py | 23 +++++++++++++++++++++++ clearml/model.py | 20 ++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/clearml/backend_interface/model.py b/clearml/backend_interface/model.py index 22d776b9..9e16dcbc 100644 --- a/clearml/backend_interface/model.py +++ b/clearml/backend_interface/model.py @@ -12,6 +12,7 @@ from ..storage import StorageManager from ..storage.helper import StorageHelper from ..utilities.async_manager import AsyncManagerMixin + ModelPackage = namedtuple("ModelPackage", "weights design") @@ -77,6 +78,28 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): self.send(models.SetReadyRequest(model=self.id, publish_task=False)) self.reload() + def archive(self): + if Session.check_min_api_server_version("2.13"): + self.send(models.ArchiveManyRequest(ids=[self.id])) + self.reload() + else: + from ..model import BaseModel + # edit will reload + self._edit( + system_tags=list(set((self.data.system_tags or []) if hasattr(self.data, "system_tags") else []) | {BaseModel._archived_tag}) + ) + + def unarchive(self): + if Session.check_min_api_server_version("2.13"): + self.send(models.UnarchiveManyRequest(ids=[self.id])) + self.reload() + else: + from ..model import BaseModel + # edit will reload + self._edit( + system_tags=list(set((self.data.system_tags or []) if hasattr(self.data, "system_tags") else []) - {BaseModel._archived_tag}) + ) + def _reload(self): """Reload the model object""" if self._offline_mode: diff --git a/clearml/model.py b/clearml/model.py index c380ee34..79a7f53b 100644 --- a/clearml/model.py +++ b/clearml/model.py @@ -1055,6 +1055,26 @@ class BaseModel(object): if not self.published: self._get_base_model().publish() + def archive(self): + # type: () -> () + """ + Archive the model. If the model is already archived, this is a no-op + """ + try: + self._get_base_model().archive() + except Exception: + pass + + def unarchive(self): + # type: () -> () + """ + Unarchive the model. If the model is not archived, this is a no-op + """ + try: + self._get_base_model().unarchive() + except Exception: + pass + def _init_reporter(self): if self._reporter: return From 04fe7dffe3b12fe5851178c35866d810e6cd2fcc Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 2 Sep 2023 17:49:02 +0300 Subject: [PATCH 04/27] Fix OutputModel should prefer connected task name over filename stem --- .gitignore | 5 +++-- clearml/model.py | 14 +++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 95025315..26218bce 100644 --- a/.gitignore +++ b/.gitignore @@ -7,8 +7,9 @@ # Python *.pyc __pycache__ -build/ -dist/ +/build/ +/dist/ +*/conda_build/build/ *.egg-info .env .venv/ diff --git a/clearml/model.py b/clearml/model.py index 79a7f53b..f143da82 100644 --- a/clearml/model.py +++ b/clearml/model.py @@ -2400,11 +2400,15 @@ class OutputModel(BaseModel): # make sure the created model is updated: out_model_file_name = target_filename or weights_filename or register_uri - name = ( - Path(out_model_file_name).stem - if out_model_file_name - else (self._task_connect_name or "Output Model") - ) + + # prefer self._task_connect_name if exists + if self._task_connect_name: + name = self._task_connect_name + elif out_model_file_name: + name = Path(out_model_file_name).stem + else: + name = "Output Model" + if not self._base_model: model = self._get_force_base_model(task_model_entry=name) else: From cd61efe6df24317a025bedb8f569b1b3a51997ee Mon Sep 17 00:00:00 2001 From: pollfly <75068813+pollfly@users.noreply.github.com> Date: Wed, 6 Sep 2023 17:07:59 +0300 Subject: [PATCH 05/27] Edit dataset_path docstring (#1105) --- clearml/datasets/dataset.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/clearml/datasets/dataset.py b/clearml/datasets/dataset.py index 8db853ce..c0658f73 100644 --- a/clearml/datasets/dataset.py +++ b/clearml/datasets/dataset.py @@ -462,9 +462,13 @@ class Dataset(object): the dataset (e.g. [s3://bucket/folder/file.csv, http://web.com/file.txt]) :param wildcard: add only specific set of files. Wildcard matching, can be a single string or a list of wildcards. - :param dataset_path: The location in the dataset where the file will be downloaded into. + :param dataset_path: The location in the dataset where the file will be downloaded into, or list/touple of + locations (if list/touple, it must be the same length as ``source_url``). e.g: for source_url='s3://bucket/remote_folder/image.jpg' and dataset_path='s3_files', - 'image.jpg' will be downloaded to 's3_files/image.jpg' (relative path to the dataset) + 'image.jpg' will be downloaded to 's3_files/image.jpg' (relative path to the dataset). + For source_url=['s3://bucket/remote_folder/image.jpg', 's3://bucket/remote_folder/image2.jpg'] and + dataset_path=['s3_files', 's3_files_2'], 'image.jpg' will be downloaded to 's3_files/image.jpg' and + 'image2.jpg' will be downloaded to 's3_files_2/image2.jpg' (relative path to the dataset). :param recursive: If True, match all wildcard files recursively :param verbose: If True, print to console files added/modified :param max_workers: The number of threads to add the external files with. Useful when `source_url` is From c922c40d1325aa682f40966de4dd2ec72ebe5652 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Fri, 8 Sep 2023 22:14:53 +0300 Subject: [PATCH 06/27] Fix model reporting in tensorflow 2.13 does not work properly (#1112) --- clearml/binding/frameworks/tensorflow_bind.py | 36 ++++++++++++++++--- examples/frameworks/keras/keras_v3.py | 28 +++++++++++++++ 2 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 examples/frameworks/keras/keras_v3.py diff --git a/clearml/binding/frameworks/tensorflow_bind.py b/clearml/binding/frameworks/tensorflow_bind.py index eb0366dc..e5bb6f4d 100644 --- a/clearml/binding/frameworks/tensorflow_bind.py +++ b/clearml/binding/frameworks/tensorflow_bind.py @@ -1589,6 +1589,11 @@ class PatchKerasModelIO(object): from keras import models as keras_saving # noqa except ImportError: keras_saving = None + try: + from keras.src.saving import saving_api as keras_saving_v3 + except ImportError: + keras_saving_v3 = None + # check that we are not patching anything twice if PatchKerasModelIO.__patched_tensorflow: PatchKerasModelIO.__patched_keras = [ @@ -1598,9 +1603,10 @@ class PatchKerasModelIO(object): Functional if PatchKerasModelIO.__patched_tensorflow[3] != Functional else None, None, None, + keras_saving_v3 ] else: - PatchKerasModelIO.__patched_keras = [Network, Sequential, keras_saving, Functional, None, None] + PatchKerasModelIO.__patched_keras = [Network, Sequential, keras_saving, Functional, None, None, keras_saving_v3] PatchKerasModelIO._patch_io_calls(*PatchKerasModelIO.__patched_keras) if 'tensorflow' in sys.modules and not PatchKerasModelIO.__patched_tensorflow: @@ -1643,6 +1649,8 @@ class PatchKerasModelIO(object): except ImportError: keras_hdf5 = None + keras_saving_v3 = None + if PatchKerasModelIO.__patched_keras: PatchKerasModelIO.__patched_tensorflow = [ Network if PatchKerasModelIO.__patched_keras[0] != Network else None, @@ -1651,14 +1659,23 @@ class PatchKerasModelIO(object): Functional if PatchKerasModelIO.__patched_keras[3] != Functional else None, keras_saving_legacy if PatchKerasModelIO.__patched_keras[4] != keras_saving_legacy else None, keras_hdf5 if PatchKerasModelIO.__patched_keras[5] != keras_hdf5 else None, + keras_saving_v3 if PatchKerasModelIO.__patched_keras[6] != keras_saving_v3 else None, ] else: PatchKerasModelIO.__patched_tensorflow = [ - Network, Sequential, keras_saving, Functional, keras_saving_legacy, keras_hdf5] + Network, Sequential, keras_saving, Functional, keras_saving_legacy, keras_hdf5, keras_saving_v3] PatchKerasModelIO._patch_io_calls(*PatchKerasModelIO.__patched_tensorflow) @staticmethod - def _patch_io_calls(Network, Sequential, keras_saving, Functional, keras_saving_legacy=None, keras_hdf5=None): + def _patch_io_calls( + Network, + Sequential, + keras_saving, + Functional, + keras_saving_legacy=None, + keras_hdf5=None, + keras_saving_v3=None + ): try: if Sequential is not None: Sequential._updated_config = _patched_call(Sequential._updated_config, @@ -1718,6 +1735,9 @@ class PatchKerasModelIO(object): keras_hdf5.save_model_to_hdf5 = _patched_call( keras_hdf5.save_model_to_hdf5, PatchKerasModelIO._save_model) + if keras_saving_v3 is not None: + keras_saving_v3.save_model = _patched_call(keras_saving_v3.save_model, PatchKerasModelIO._save_model) + except Exception as ex: LoggerRoot.get_base_logger(TensorflowBinding).warning(str(ex)) @@ -2058,6 +2078,11 @@ class PatchTensorflowModelIO(object): Checkpoint.write = _patched_call(Checkpoint.write, PatchTensorflowModelIO._ckpt_write) except Exception: pass + # noinspection PyBroadException + try: + Checkpoint._write = _patched_call(Checkpoint._write, PatchTensorflowModelIO._ckpt_write) + except Exception: + pass except ImportError: pass except Exception: @@ -2231,7 +2256,10 @@ class PatchTensorflow2ModelIO(object): try: # hack: make sure tensorflow.__init__ is called import tensorflow # noqa - from tensorflow.python.training.tracking import util # noqa + try: + from tensorflow.python.checkpoint.checkpoint import TrackableSaver + except ImportError: + from tensorflow.python.training.tracking.util import TrackableSaver # noqa # noinspection PyBroadException try: util.TrackableSaver.save = _patched_call(util.TrackableSaver.save, diff --git a/examples/frameworks/keras/keras_v3.py b/examples/frameworks/keras/keras_v3.py new file mode 100644 index 00000000..79a5bf93 --- /dev/null +++ b/examples/frameworks/keras/keras_v3.py @@ -0,0 +1,28 @@ +import numpy as np +import keras +from clearml import Task + + +def get_model(): + # Create a simple model. + inputs = keras.Input(shape=(32,)) + outputs = keras.layers.Dense(1)(inputs) + model = keras.Model(inputs, outputs) + model.compile(optimizer=keras.optimizers.Adam(), loss="mean_squared_error") + return model + +Task.init(project_name="examples", task_name="keras_v3") + +model = get_model() + +test_input = np.random.random((128, 32)) +test_target = np.random.random((128, 1)) +model.fit(test_input, test_target) + +model.save("my_model.keras") + +reconstructed_model = keras.models.load_model("my_model.keras") + +np.testing.assert_allclose( + model.predict(test_input), reconstructed_model.predict(test_input) +) From d458924160ef93fe8e54b4570a03a09db04e44c7 Mon Sep 17 00:00:00 2001 From: natephysics Date: Sat, 9 Sep 2023 21:17:49 +0200 Subject: [PATCH 07/27] Add support for recursive list, dict, and tuple ref parsing for pipeline controller.add step() parameter override (#1099) * feat: :sparkles: Added optional support for list, dicts, tuples in Pipeline parameter_overrides * style: :art: Updated to pass the flake8 formatting guidelines * docs: :memo: There was a small typo I noticed in the documentation. Two extra ' --- clearml/automation/controller.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/clearml/automation/controller.py b/clearml/automation/controller.py index 019b0d92..60381cad 100644 --- a/clearml/automation/controller.py +++ b/clearml/automation/controller.py @@ -81,6 +81,7 @@ class PipelineController(object): parents = attrib(type=list, default=None) # list of parent DAG steps timeout = attrib(type=float, default=None) # execution timeout limit parameters = attrib(type=dict, default=None) # Task hyper-parameters to change + recursively_parse_parameters = attrib(type=bool, default=False) # if True, recursively parse parameters in lists, dicts, or tuples configurations = attrib(type=dict, default=None) # Task configuration objects to change task_overrides = attrib(type=dict, default=None) # Task overrides to change executed = attrib(type=str, default=None) # The actual executed Task ID (None if not executed yet) @@ -368,6 +369,7 @@ class PipelineController(object): base_task_id=None, # type: Optional[str] parents=None, # type: Optional[Sequence[str]] parameter_override=None, # type: Optional[Mapping[str, Any]] + recursively_parse_parameters=False, # type: bool configuration_overrides=None, # type: Optional[Mapping[str, Union[str, Mapping]]] task_overrides=None, # type: Optional[Mapping[str, Any]] execution_queue=None, # type: Optional[str] @@ -405,7 +407,10 @@ class PipelineController(object): - Parameter access ``parameter_override={'Args/input_file': '${.parameters.Args/input_file}' }`` - Pipeline Task argument (see `Pipeline.add_parameter`) ``parameter_override={'Args/input_file': '${pipeline.}' }`` - Task ID ``parameter_override={'Args/input_file': '${stage3.id}' }`` - + :param recursively_parse_parameters: If True, recursively parse parameters from parameter_override in lists, dicts, or tuples. + Example: + - ``parameter_override={'Args/input_file': ['${.artifacts..url}', 'file2.txt']}`` will be correctly parsed. + - ``parameter_override={'Args/input_file': ('${.parameters.Args/input_file}', '${.parameters.Args/input_file}')}`` will be correctly parsed. :param configuration_overrides: Optional, override Task configuration objects. Expected dictionary of configuration object name and configuration object content. Examples: @@ -572,6 +577,7 @@ class PipelineController(object): name=name, base_task_id=base_task_id, parents=parents or [], queue=execution_queue, timeout=time_limit, parameters=parameter_override or {}, + recursively_parse_parameters=recursively_parse_parameters, configurations=configuration_overrides, clone_task=clone_base_task, task_overrides=task_overrides, @@ -2237,7 +2243,7 @@ class PipelineController(object): updated_hyper_parameters = {} for k, v in node.parameters.items(): - updated_hyper_parameters[k] = self._parse_step_ref(v) + updated_hyper_parameters[k] = self._parse_step_ref(v, recursive=node.recursively_parse_parameters) task_overrides = self._parse_task_overrides(node.task_overrides) if node.task_overrides else None @@ -2776,11 +2782,12 @@ class PipelineController(object): except Exception: pass - def _parse_step_ref(self, value): + def _parse_step_ref(self, value, recursive=False): # type: (Any) -> Optional[str] """ Return the step reference. For example "${step1.parameters.Args/param}" :param value: string + :param recursive: if True, recursively parse all values in the dict, list or tuple :return: """ # look for all the step references @@ -2793,6 +2800,18 @@ class PipelineController(object): if not isinstance(new_val, six.string_types): return new_val updated_value = updated_value.replace(g, new_val, 1) + + # if we have a dict, list or tuple, we need to recursively update the values + if recursive: + if isinstance(value, dict): + updated_value = {} + for k, v in value.items(): + updated_value[k] = self._parse_step_ref(v, recursive=True) + elif isinstance(value, list): + updated_value = [self._parse_step_ref(v, recursive=True) for v in value] + elif isinstance(value, tuple): + updated_value = tuple(self._parse_step_ref(v, recursive=True) for v in value) + return updated_value def _parse_task_overrides(self, task_overrides): From 96b89d76b8fbad5017b63f2bff03db3e05fd7e39 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Tue, 12 Sep 2023 00:50:23 +0300 Subject: [PATCH 08/27] Fix dataset zip extraction might fail when creating folders concurrently --- clearml/storage/manager.py | 6 ++++-- clearml/storage/util.py | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/clearml/storage/manager.py b/clearml/storage/manager.py index 7e6425a1..55710017 100644 --- a/clearml/storage/manager.py +++ b/clearml/storage/manager.py @@ -12,7 +12,7 @@ from pathlib2 import Path from .cache import CacheManager from .callbacks import ProgressReport from .helper import StorageHelper -from .util import encode_string_to_filename, safe_extract +from .util import encode_string_to_filename, safe_extract, create_zip_directories from ..debugging.log import LoggerRoot from ..config import deferred_config @@ -163,7 +163,9 @@ class StorageManager(object): temp_target_folder.mkdir(parents=True, exist_ok=True) if suffix == ".zip": - ZipFile(cached_file.as_posix()).extractall(path=temp_target_folder.as_posix()) + zip_file = ZipFile(cached_file.as_posix()) + create_zip_directories(zip_file, path=temp_target_folder.as_posix()) + zip_file.extractall(path=temp_target_folder.as_posix()) elif suffix == ".tar.gz": with tarfile.open(cached_file.as_posix()) as file: safe_extract(file, temp_target_folder.as_posix()) diff --git a/clearml/storage/util.py b/clearml/storage/util.py index b3c13262..9367f7b9 100644 --- a/clearml/storage/util.py +++ b/clearml/storage/util.py @@ -1,7 +1,7 @@ import fnmatch import hashlib import json -import os.path +import os import re import sys from typing import Optional, Union, Sequence, Dict @@ -338,6 +338,37 @@ def is_within_directory(directory, target): return prefix == abs_directory +def create_zip_directories(zipfile, path=None): + try: + path = os.getcwd() if path is None else os.fspath(path) + for member in zipfile.namelist(): + arcname = member.replace("/", os.path.sep) + if os.path.altsep: + arcname = arcname.replace(os.path.altsep, os.path.sep) + # interpret absolute pathname as relative, remove drive letter or + # UNC path, redundant separators, "." and ".." components. + arcname = os.path.splitdrive(arcname)[1] + invalid_path_parts = ("", os.path.curdir, os.path.pardir) + arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) if x not in invalid_path_parts) + if os.path.sep == "\\": + # noinspection PyBroadException + try: + # filter illegal characters on Windows + # noinspection PyProtectedMember + arcname = zipfile._sanitize_windows_name(arcname, os.path.sep) + except Exception: + pass + + targetpath = os.path.normpath(os.path.join(path, arcname)) + + # Create all upper directories if necessary. + upperdirs = os.path.dirname(targetpath) + if upperdirs: + os.makedirs(upperdirs, exist_ok=True) + except Exception as e: + LoggerRoot.get_base_logger().warning("Failed creating zip directories: " + str(e)) + + def safe_extract(tar, path=".", members=None, numeric_owner=False): """Tarfile member sanitization (addresses CVE-2007-4559)""" for member in tar.getmembers(): From d131ff2a45bb76f6859f2a6ba61354401df16200 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Tue, 12 Sep 2023 00:51:25 +0300 Subject: [PATCH 09/27] Fix import sys should not appear in pipeline component auto detected packages --- clearml/utilities/pigar/reqs.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clearml/utilities/pigar/reqs.py b/clearml/utilities/pigar/reqs.py index 8a7e5024..c7c81291 100644 --- a/clearml/utilities/pigar/reqs.py +++ b/clearml/utilities/pigar/reqs.py @@ -284,6 +284,11 @@ def is_std_or_local_lib(name): False if installed package str if local library """ + + # check if one of the builtin modules first + if name in sys.builtin_module_names: + return True + exist = True if six.PY2: import imp # noqa From 76076b09ddcf69409f7e1c52923b417dc720a969 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Tue, 12 Sep 2023 00:51:47 +0300 Subject: [PATCH 10/27] Fix compatibility issue with requests < 2.28 --- clearml/backend_interface/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clearml/backend_interface/base.py b/clearml/backend_interface/base.py index 266b9c25..22c37bd9 100644 --- a/clearml/backend_interface/base.py +++ b/clearml/backend_interface/base.py @@ -25,6 +25,8 @@ class InterfaceBase(SessionInterface): _default_session = None _num_retry_warning_display = 1 _offline_mode = ENV_OFFLINE_MODE.get() + _JSON_EXCEPTION = (jsonschema.ValidationError, requests.exceptions.InvalidJSONError) \ + if hasattr(requests.exceptions, "InvalidJSONError") else (jsonschema.ValidationError,) @property def session(self): @@ -83,7 +85,7 @@ class InterfaceBase(SessionInterface): if raise_on_errors: raise res = None - except (jsonschema.ValidationError, requests.exceptions.InvalidJSONError) as e: + except cls._JSON_EXCEPTION as e: if log: log.error( 'Field %s contains illegal schema: %s', '.'.join(e.path), str(e.message) From 1b27ed07f7313ff490ef153bd07e2353022e2957 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 16 Sep 2023 20:52:42 +0300 Subject: [PATCH 11/27] Add force_single_script_file to Task.create() to avoid git repo auto detection --- clearml/backend_interface/task/populate.py | 21 +++++++++++++++++++++ clearml/task.py | 3 +++ 2 files changed, 24 insertions(+) diff --git a/clearml/backend_interface/task/populate.py b/clearml/backend_interface/task/populate.py index 7351f290..a4361365 100644 --- a/clearml/backend_interface/task/populate.py +++ b/clearml/backend_interface/task/populate.py @@ -46,6 +46,7 @@ class CreateAndPopulate(object): output_uri=None, # type: Optional[str] base_task_id=None, # type: Optional[str] add_task_init_call=True, # type: bool + force_single_script_file=False, # type: bool raise_on_missing_entries=False, # type: bool verbose=False, # type: bool ): @@ -84,6 +85,7 @@ class CreateAndPopulate(object): :param base_task_id: Use a pre-existing task in the system, instead of a local repo/script. Essentially clones an existing task and overrides arguments/requirements. :param add_task_init_call: If True, a 'Task.init()' call is added to the script entry point in remote execution. + :param force_single_script_file: If True, do not auto-detect local repository :param raise_on_missing_entries: If True, raise ValueError on missing entries when populating :param verbose: If True, print verbose logging """ @@ -125,6 +127,7 @@ class CreateAndPopulate(object): self.task_type = task_type self.output_uri = output_uri self.task = None + self.force_single_script_file = bool(force_single_script_file) self.raise_on_missing_entries = raise_on_missing_entries self.verbose = verbose @@ -159,6 +162,7 @@ class CreateAndPopulate(object): detect_jupyter_notebook=False, add_missing_installed_packages=True, detailed_req_report=False, + force_single_script=self.force_single_script_file, ) # check if we have no repository and no requirements raise error @@ -237,6 +241,23 @@ class CreateAndPopulate(object): task_state['script']['diff'] = '' task_state['script']['working_dir'] = cwd or '.' task_state['script']['entry_point'] = entry_point or "" + + if self.force_single_script_file and Path(self.script).is_file(): + create_requirements = self.packages is True + repo_info, requirements = ScriptInfo.get( + filepaths=[Path(self.script).as_posix()], + log=getLogger(), + create_requirements=create_requirements, + uncommitted_from_remote=True, + detect_jupyter_notebook=False, + add_missing_installed_packages=True, + detailed_req_report=False, + force_single_script=self.force_single_script_file, + ) + task_state['script']['diff'] = repo_info.script['diff'] or '' + task_state['script']['entry_point'] = repo_info.script['entry_point'] + if create_requirements: + task_state['script']['requirements'] = repo_info.script.get('requirements') or {} else: # standalone task task_state['script']['entry_point'] = self.script or "" diff --git a/clearml/task.py b/clearml/task.py index 0aeb4a24..0c606a67 100644 --- a/clearml/task.py +++ b/clearml/task.py @@ -791,6 +791,7 @@ class Task(_Task): argparse_args=None, # type: Optional[Sequence[Tuple[str, str]]] base_task_id=None, # type: Optional[str] add_task_init_call=True, # type: bool + force_single_script_file=False, # type: bool ): # type: (...) -> TaskInstance """ @@ -832,6 +833,7 @@ class Task(_Task): :param base_task_id: Use a pre-existing task in the system, instead of a local repo/script. Essentially clones an existing task and overrides arguments/requirements. :param add_task_init_call: If True, a 'Task.init()' call is added to the script entry point in remote execution. + :param force_single_script_file: If True, do not auto-detect local repository :return: The newly created Task (experiment) :rtype: Task @@ -852,6 +854,7 @@ class Task(_Task): docker=docker, docker_args=docker_args, docker_bash_setup_script=docker_bash_setup_script, base_task_id=base_task_id, add_task_init_call=add_task_init_call, + force_single_script_file=force_single_script_file, raise_on_missing_entries=False, ) task = manual_populate.create_task() From 1820423b02d6190f37b2cc8a018900ff30962247 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 16 Sep 2023 20:53:30 +0300 Subject: [PATCH 12/27] Fix keep backwards compatibility for pipeline.add_step() positional arguments --- clearml/automation/controller.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/clearml/automation/controller.py b/clearml/automation/controller.py index 60381cad..dfa87339 100644 --- a/clearml/automation/controller.py +++ b/clearml/automation/controller.py @@ -81,7 +81,6 @@ class PipelineController(object): parents = attrib(type=list, default=None) # list of parent DAG steps timeout = attrib(type=float, default=None) # execution timeout limit parameters = attrib(type=dict, default=None) # Task hyper-parameters to change - recursively_parse_parameters = attrib(type=bool, default=False) # if True, recursively parse parameters in lists, dicts, or tuples configurations = attrib(type=dict, default=None) # Task configuration objects to change task_overrides = attrib(type=dict, default=None) # Task overrides to change executed = attrib(type=str, default=None) # The actual executed Task ID (None if not executed yet) @@ -100,6 +99,8 @@ class PipelineController(object): monitor_artifacts = attrib(type=list, default=None) # List of artifact names to monitor monitor_models = attrib(type=list, default=None) # List of models to monitor explicit_docker_image = attrib(type=str, default=None) # The Docker image the node uses, specified at creation + recursively_parse_parameters = attrib(type=bool, default=False) # if True, recursively parse parameters in + # lists, dicts, or tuples def __attrs_post_init__(self): if self.parents is None: @@ -369,7 +370,6 @@ class PipelineController(object): base_task_id=None, # type: Optional[str] parents=None, # type: Optional[Sequence[str]] parameter_override=None, # type: Optional[Mapping[str, Any]] - recursively_parse_parameters=False, # type: bool configuration_overrides=None, # type: Optional[Mapping[str, Union[str, Mapping]]] task_overrides=None, # type: Optional[Mapping[str, Any]] execution_queue=None, # type: Optional[str] @@ -386,7 +386,8 @@ class PipelineController(object): cache_executed_step=False, # type: bool base_task_factory=None, # type: Optional[Callable[[PipelineController.Node], Task]] retry_on_failure=None, # type: Optional[Union[int, Callable[[PipelineController, PipelineController.Node, int], bool]]] # noqa - status_change_callback=None # type: Optional[Callable[[PipelineController, PipelineController.Node, str], None]] # noqa + status_change_callback=None, # type: Optional[Callable[[PipelineController, PipelineController.Node, str], None]] # noqa + recursively_parse_parameters=False # type: bool ): # type: (...) -> bool """ From 701582d5ff5c1d87fe735ffb023968fd05b30781 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 16 Sep 2023 20:53:47 +0300 Subject: [PATCH 13/27] Fix incorrect module used --- clearml/binding/frameworks/tensorflow_bind.py | 42 +++++++++++++++---- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/clearml/binding/frameworks/tensorflow_bind.py b/clearml/binding/frameworks/tensorflow_bind.py index e5bb6f4d..385d1d71 100644 --- a/clearml/binding/frameworks/tensorflow_bind.py +++ b/clearml/binding/frameworks/tensorflow_bind.py @@ -2252,24 +2252,24 @@ class PatchTensorflow2ModelIO(object): return PatchTensorflow2ModelIO.__patched = True + # noinspection PyBroadException try: # hack: make sure tensorflow.__init__ is called import tensorflow # noqa - try: - from tensorflow.python.checkpoint.checkpoint import TrackableSaver - except ImportError: - from tensorflow.python.training.tracking.util import TrackableSaver # noqa + from tensorflow.python.training.tracking import util # noqa + # noinspection PyBroadException try: - util.TrackableSaver.save = _patched_call(util.TrackableSaver.save, - PatchTensorflow2ModelIO._save) + util.TrackableSaver.save = _patched_call(util.TrackableSaver.save, PatchTensorflow2ModelIO._save) except Exception: pass + # noinspection PyBroadException try: - util.TrackableSaver.restore = _patched_call(util.TrackableSaver.restore, - PatchTensorflow2ModelIO._restore) + util.TrackableSaver.restore = _patched_call( + util.TrackableSaver.restore, PatchTensorflow2ModelIO._restore + ) except Exception: pass except ImportError: @@ -2277,6 +2277,32 @@ class PatchTensorflow2ModelIO(object): except Exception: LoggerRoot.get_base_logger(TensorflowBinding).debug('Failed patching tensorflow v2') + # noinspection PyBroadException + try: + # hack: make sure tensorflow.__init__ is called + import tensorflow # noqa + from tensorflow.python.checkpoint import checkpoint + + # noinspection PyBroadException + try: + checkpoint.TrackableSaver.save = _patched_call( + checkpoint.TrackableSaver.save, PatchTensorflow2ModelIO._save + ) + except Exception: + pass + + # noinspection PyBroadException + try: + checkpoint.TrackableSaver.restore = _patched_call( + checkpoint.TrackableSaver.restore, PatchTensorflow2ModelIO._restore + ) + except Exception: + pass + except ImportError: + pass + except Exception: + LoggerRoot.get_base_logger(TensorflowBinding).debug('Failed patching tensorflow v2.11') + @staticmethod def _save(original_fn, self, file_prefix, *args, **kwargs): model = original_fn(self, file_prefix, *args, **kwargs) From 9e69b09c9f97b60eab9424c34751bc0db5f93087 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Wed, 20 Sep 2023 15:26:50 +0300 Subject: [PATCH 14/27] Fix PEP8 issues --- clearml/automation/controller.py | 6 ++++-- clearml/binding/environ_bind.py | 1 - clearml/utilities/proxy_object.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/clearml/automation/controller.py b/clearml/automation/controller.py index dfa87339..acb5b94e 100644 --- a/clearml/automation/controller.py +++ b/clearml/automation/controller.py @@ -3221,8 +3221,10 @@ class PipelineController(object): name=artifact_name, artifact_object=artifact_object, wait_on_upload=True, - extension_name=".pkl" if isinstance(artifact_object, dict) and - not self._artifact_serialization_function else None, + extension_name=( + ".pkl" if isinstance(artifact_object, dict) and not self._artifact_serialization_function + else None + ), serialization_function=self._artifact_serialization_function ) diff --git a/clearml/binding/environ_bind.py b/clearml/binding/environ_bind.py index e5167368..69b6ffb3 100644 --- a/clearml/binding/environ_bind.py +++ b/clearml/binding/environ_bind.py @@ -246,7 +246,6 @@ class PatchOsFork(object): os._exit = _at_exit_callback - @staticmethod def _patched_fork(*args, **kwargs): if not PatchOsFork._current_task: diff --git a/clearml/utilities/proxy_object.py b/clearml/utilities/proxy_object.py index 9aadc2f4..57ad1c58 100644 --- a/clearml/utilities/proxy_object.py +++ b/clearml/utilities/proxy_object.py @@ -287,7 +287,7 @@ class WrapperBase(type): # (http://code.activestate.com/recipes/496741/). It adds special methods # to the wrapper class so it can proxy the wrapped class. In addition, it # adds a field __overrides__ in the wrapper class dictionary, containing - # all attributes decorated to be overriden. + # all attributes decorated to be overridden. _special_names = [ '__abs__', '__add__', '__and__', '__call__', '__cmp__', '__coerce__', @@ -303,7 +303,7 @@ class WrapperBase(type): '__repr__', '__reversed__', '__rfloorfiv__', '__rlshift__', '__rmod__', '__rmul__', '__ror__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__', '__rxor__', '__setitem__', '__setslice__', '__sub__', - '__truediv__', '__xor__', 'next', '__str__', '__repr__', + '__truediv__', '__xor__', 'next', '__str__', '__repr__', '__round__', '__fspath__', '__bytes__', '__index__' ] From 0442579e23e46ff3902f37d1938fb904bace5121 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Wed, 20 Sep 2023 15:27:27 +0300 Subject: [PATCH 15/27] Fix `scroll_id` argument missing in `events.get_scalar_metric_data` endpoint (mainly for use in the APIClient) --- clearml/backend_api/services/v2_23/events.py | 22 +++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/clearml/backend_api/services/v2_23/events.py b/clearml/backend_api/services/v2_23/events.py index f8a3ad97..e29d541f 100644 --- a/clearml/backend_api/services/v2_23/events.py +++ b/clearml/backend_api/services/v2_23/events.py @@ -3075,6 +3075,8 @@ class GetScalarMetricDataRequest(Request): :param model_events: If set then the retrieving model events. Otherwise task events :type model_events: bool + :param scroll_id: Pass this value on next call to get next page + :type scroll_id: str """ _service = "events" @@ -3095,16 +3097,21 @@ class GetScalarMetricDataRequest(Request): "type": ["boolean", "null"], }, "task": {"description": "task ID", "type": ["string", "null"]}, + "scroll_id": { + "description": "Pass this value on next call to get next page", + "type": "string", + }, }, "type": "object", } - def __init__(self, task=None, metric=None, no_scroll=False, model_events=False, **kwargs): + def __init__(self, task=None, metric=None, no_scroll=False, model_events=False, scroll_id=None, **kwargs): super(GetScalarMetricDataRequest, self).__init__(**kwargs) self.task = task self.metric = metric self.no_scroll = no_scroll self.model_events = model_events + self.scroll_id = scroll_id @schema_property("task") def task(self): @@ -3158,6 +3165,19 @@ class GetScalarMetricDataRequest(Request): self.assert_isinstance(value, "model_events", (bool,)) self._property_model_events = value + @schema_property("scroll_id") + def scroll_id(self): + return self._property_scroll_id + + @scroll_id.setter + def scroll_id(self, value): + if value is None: + self._property_scroll_id = None + return + + self.assert_isinstance(value, "scroll_id", six.string_types) + self._property_scroll_id = value + class GetScalarMetricDataResponse(Response): """ From c5675733a0efa8b2b1d2bf280b2b24b333cef8f4 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Fri, 22 Sep 2023 16:25:50 +0300 Subject: [PATCH 16/27] Fix GPU memory used reports 0 when memory can not be queried per process --- clearml/utilities/gpu/gpustat.py | 6 ++--- clearml/utilities/resource_monitor.py | 32 +++++++++++++++++++-------- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/clearml/utilities/gpu/gpustat.py b/clearml/utilities/gpu/gpustat.py index d6246e71..f84d73bb 100644 --- a/clearml/utilities/gpu/gpustat.py +++ b/clearml/utilities/gpu/gpustat.py @@ -285,11 +285,11 @@ class GPUStatCollection(object): for nv_process in nv_comp_processes + nv_graphics_processes: try: process = get_process_info(nv_process) - processes.append(process) except psutil.NoSuchProcess: # TODO: add some reminder for NVML broken context # e.g. nvidia-smi reset or reboot the system - pass + process = None + processes.append(process) # we do not actually use these, so no point in collecting them # # TODO: Do not block if full process info is not requested @@ -313,7 +313,7 @@ class GPUStatCollection(object): # Convert bytes into MBytes 'memory.used': memory.used // MB if memory else None, 'memory.total': memory.total // MB if memory else None, - 'processes': processes, + 'processes': None if (processes and all(p is None for p in processes)) else processes } if per_process_stats: GPUStatCollection.clean_processes() diff --git a/clearml/utilities/resource_monitor.py b/clearml/utilities/resource_monitor.py index 24c0e3f5..c0cf6cb5 100644 --- a/clearml/utilities/resource_monitor.py +++ b/clearml/utilities/resource_monitor.py @@ -43,6 +43,8 @@ class ResourceMonitor(BackgroundMonitor): self._process_info = psutil.Process() if report_mem_used_per_process else None self._last_process_pool = {} self._last_process_id_list = [] + self._gpu_memory_per_process = True + if not self._gpustat: self._task.get_logger().report_text('ClearML Monitor: GPU monitoring is not available') else: # if running_remotely(): @@ -311,16 +313,28 @@ class ResourceMonitor(BackgroundMonitor): # update mem used by our process and sub processes if self._process_info and (not self._last_process_pool.get('gpu') or (time() - self._last_process_pool['gpu'][0]) >= self._report_frequency): - gpu_stat = self._gpustat.new_query(per_process_stats=True) gpu_mem = {} - for i, g in enumerate(gpu_stat.gpus): - # only monitor the active gpu's, if none were selected, monitor everything - if self._active_gpus and i not in self._active_gpus: - continue - gpu_mem[i] = 0 - for p in g.processes: - if p['pid'] in self._last_process_id_list: - gpu_mem[i] += p.get('gpu_memory_usage', 0) + if self._gpu_memory_per_process: + # noinspection PyBroadException + try: + gpu_stat = self._gpustat.new_query(per_process_stats=True) + except Exception: + gpu_stat = self._gpustat.new_query(per_process_stats=False) + + for i, g in enumerate(gpu_stat.gpus): + # if processes is None, that means we can't query GPU memory usage per proces, so we can stop + if g.processes is None: + self._gpu_memory_per_process = False + break + # only monitor the active gpu's, if none were selected, monitor everything + if self._active_gpus and i not in self._active_gpus: + continue + + gpu_mem[i] = 0 + for p in g.processes: + if p is not None and p['pid'] in self._last_process_id_list: + gpu_mem[i] += p.get('gpu_memory_usage', 0) + self._last_process_pool['gpu'] = time(), gpu_mem else: # if we do no need to update the memory usage, run global query From e71c2572904527dfb5d21a36a503a370442151c9 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Fri, 22 Sep 2023 16:26:06 +0300 Subject: [PATCH 17/27] Version bump to v1.13.0 --- clearml/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml/version.py b/clearml/version.py index 96ddfeb7..84c54b74 100644 --- a/clearml/version.py +++ b/clearml/version.py @@ -1 +1 @@ -__version__ = '1.12.2' +__version__ = '1.13.0' From 96646dc46a7c5437d30c6029c4c493db63de2dd2 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Mon, 25 Sep 2023 22:16:02 +0300 Subject: [PATCH 18/27] Fix GPU memory used reports 0 when memory can not be queried per process (edge case) --- clearml/utilities/resource_monitor.py | 45 ++++++++++++++------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/clearml/utilities/resource_monitor.py b/clearml/utilities/resource_monitor.py index c0cf6cb5..ac3165c3 100644 --- a/clearml/utilities/resource_monitor.py +++ b/clearml/utilities/resource_monitor.py @@ -311,39 +311,40 @@ class ResourceMonitor(BackgroundMonitor): # On the rest of the samples we return the previous memory measurement # update mem used by our process and sub processes - if self._process_info and (not self._last_process_pool.get('gpu') or - (time() - self._last_process_pool['gpu'][0]) >= self._report_frequency): + if self._gpu_memory_per_process and self._process_info and \ + (not self._last_process_pool.get('gpu') or + (time() - self._last_process_pool['gpu'][0]) >= self._report_frequency): gpu_mem = {} - if self._gpu_memory_per_process: - # noinspection PyBroadException - try: - gpu_stat = self._gpustat.new_query(per_process_stats=True) - except Exception: - gpu_stat = self._gpustat.new_query(per_process_stats=False) + # noinspection PyBroadException + try: + gpu_stat = self._gpustat.new_query(per_process_stats=True) + except Exception: + gpu_stat = self._gpustat.new_query(per_process_stats=False) - for i, g in enumerate(gpu_stat.gpus): - # if processes is None, that means we can't query GPU memory usage per proces, so we can stop - if g.processes is None: - self._gpu_memory_per_process = False - break - # only monitor the active gpu's, if none were selected, monitor everything - if self._active_gpus and i not in self._active_gpus: - continue + for i, g in enumerate(gpu_stat.gpus): + # if processes is None, that means we can't query GPU memory usage per proces, so we can stop + if g.processes is None: + self._gpu_memory_per_process = False + break + # only monitor the active gpu's, if none were selected, monitor everything + if self._active_gpus and i not in self._active_gpus: + continue - gpu_mem[i] = 0 - for p in g.processes: - if p is not None and p['pid'] in self._last_process_id_list: - gpu_mem[i] += p.get('gpu_memory_usage', 0) + gpu_mem[i] = 0 + for p in g.processes: + if p is not None and p['pid'] in self._last_process_id_list: + gpu_mem[i] += p.get('gpu_memory_usage', 0) self._last_process_pool['gpu'] = time(), gpu_mem else: # if we do no need to update the memory usage, run global query # if we have no parent process (backward compatibility), return global stats - gpu_stat = self._gpustat.new_query() + gpu_stat = self._gpustat.new_query(per_process_stats=False) gpu_mem = self._last_process_pool['gpu'][1] if self._last_process_pool.get('gpu') else None # generate the statistics dict for actual report stats = {} + for i, g in enumerate(gpu_stat.gpus): # only monitor the active gpu's, if none were selected, monitor everything if self._active_gpus and i not in self._active_gpus: @@ -381,7 +382,7 @@ class ResourceMonitor(BackgroundMonitor): specs.update( gpu_count=int(len(gpus)), gpu_type=', '.join(g.name for g in gpus), - gpu_memory=', '.join('{}GB'.format(round(g.memory_total/1024.0)) for g in gpus), + gpu_memory=', '.join('{}GB'.format(round(g.memory_total / 1024.0)) for g in gpus), gpu_driver_version=gpu_stat.driver_version or '', gpu_driver_cuda_version=gpu_stat.driver_cuda_version or '', ) From b83baa6f2e0acdeef7a63b6420eb94e9206e386b Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Mon, 25 Sep 2023 22:16:54 +0300 Subject: [PATCH 19/27] Fix Hydra overrides integration (#1120) --- clearml/binding/hydra_bind.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml/binding/hydra_bind.py b/clearml/binding/hydra_bind.py index 2df22ce9..6c8af3f9 100644 --- a/clearml/binding/hydra_bind.py +++ b/clearml/binding/hydra_bind.py @@ -89,7 +89,7 @@ class PatchHydra(object): if overrides and not isinstance(overrides, (list, tuple)): overrides = [overrides] overrides += ['{}={}'.format(k, v) for k, v in stored_config.items()] - overrides = ["+" + (o if o.startswith("+") and not o.startswith("++") else o) for o in overrides] + overrides = [("+" + o) if (o.startswith("+") and not o.startswith("++")) else o for o in overrides] else: # We take care of it inside the _patched_run_job pass From 2a5fd56fdb13cf6ec80fde5bb99655f0de9d69be Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Tue, 26 Sep 2023 00:33:44 +0300 Subject: [PATCH 20/27] Version bump to v1.13.1 --- clearml/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml/version.py b/clearml/version.py index 84c54b74..82b6a36f 100644 --- a/clearml/version.py +++ b/clearml/version.py @@ -1 +1 @@ -__version__ = '1.13.0' +__version__ = '1.13.1' From ea9b613d4bc1b272bcf358a77e3e27406f2c15de Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 30 Sep 2023 00:02:40 +0300 Subject: [PATCH 21/27] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9145295d..8a891232 100644 --- a/README.md +++ b/README.md @@ -182,7 +182,7 @@ If ClearML is part of your development process / project / publication, please c ``` @misc{clearml, title = {ClearML - Your entire MLOps stack in one open-source tool}, -year = {2019}, +year = {2023}, note = {Software available from http://github.com/allegroai/clearml}, url={https://clear.ml/}, author = {ClearML}, From f02b1fc19079b56f563c8daf47667f33cc13c47c Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Mon, 2 Oct 2023 20:43:44 +0300 Subject: [PATCH 22/27] Add configuration files logging example --- examples/reporting/config_files.py | 36 ++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 examples/reporting/config_files.py diff --git a/examples/reporting/config_files.py b/examples/reporting/config_files.py new file mode 100644 index 00000000..1a18c610 --- /dev/null +++ b/examples/reporting/config_files.py @@ -0,0 +1,36 @@ +# ClearML - example code for logging configuration files to Task": +# +import json +from pathlib import Path +import yaml + +from clearml import Task + + +# Connecting ClearML with the current process, +# from here on everything is logged automatically +task = Task.init(project_name='FirstTrial', task_name='config_files_example') + + +# ----------------------------------------------- +# Log config file +# Notice any file format i supported +# In the Web UI you could edit the configuration file directly as text +# and launch on a remote worker with the new configuration automatically applied +# ----------------------------------------------- + +config_file = task.connect_configuration(Path("data_samples") / "sample.json", name='json config file') + +with open(config_file, "rt") as f: + config_json = json.load(f) + +print(config_json) + +config_file = task.connect_configuration(Path("data_samples") / "config_yaml.yaml", name='yaml config file') + +with open(config_file, "rt") as f: + config_yaml = yaml.load(f, Loader=yaml.SafeLoader) + +print(config_yaml) + +print("done") From 9687ca3db199c780cde6354e6efb2a26a2d785cc Mon Sep 17 00:00:00 2001 From: charlienewey-odin <145554036+charlienewey-odin@users.noreply.github.com> Date: Sat, 7 Oct 2023 14:39:08 +0100 Subject: [PATCH 23/27] Verify dataset in parallel (#1131) --- clearml/datasets/dataset.py | 51 ++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/clearml/datasets/dataset.py b/clearml/datasets/dataset.py index c0658f73..41783345 100644 --- a/clearml/datasets/dataset.py +++ b/clearml/datasets/dataset.py @@ -2497,7 +2497,7 @@ class Dataset(object): # check if target folder is not empty, see if it contains everything we need if target_base_folder and next(target_base_folder.iterdir(), None): - if self._verify_dataset_folder(target_base_folder, part, chunk_selection): + if self._verify_dataset_folder(target_base_folder, part, chunk_selection, max_workers): target_base_folder.touch() self._release_lock_ds_target_folder(target_base_folder) return target_base_folder.as_posix() @@ -2538,7 +2538,7 @@ class Dataset(object): raise_on_error=False, force=False) # verify entire dataset (if failed, force downloading parent datasets) - if not self._verify_dataset_folder(target_base_folder, part, chunk_selection): + if not self._verify_dataset_folder(target_base_folder, part, chunk_selection, max_workers): LoggerRoot.get_base_logger().info('Dataset parents need refreshing, re-fetching all parent datasets') # we should delete the entire cache folder self._extract_parent_datasets( @@ -3214,31 +3214,42 @@ class Dataset(object): raise ValueError("Dataset merging failed: {}".format([e for e in errors if e is not None])) pool.close() - def _verify_dataset_folder(self, target_base_folder, part, chunk_selection): - # type: (Path, Optional[int], Optional[dict]) -> bool + def _verify_dataset_folder(self, target_base_folder, part, chunk_selection, max_workers): + # type: (Path, Optional[int], Optional[dict], Optional[int]) -> bool + + def __verify_file_or_link(target_base_folder, part, chunk_selection, file_entry): + # type: (Path, Optional[int], Optional[dict], DatasetFileEntry) -> Optional[bool] + + # check if we need the file for the requested dataset part + if part is not None: + f_parts = chunk_selection.get(file_entry.parent_dataset_id, []) + # file is not in requested dataset part, no need to check it. + if self._get_chunk_idx_from_artifact_name(file_entry.artifact_name) not in f_parts: + return None + + # check if the local size and the stored size match (faster than comparing hash) + if (target_base_folder / file_entry.relative_path).stat().st_size != file_entry.size: + return False + + return True + target_base_folder = Path(target_base_folder) # check dataset file size, if we have a full match no need for parent dataset download / merge verified = True # noinspection PyBroadException try: - for f in self._dataset_file_entries.values(): - # check if we need it for the current part - if part is not None: - f_parts = chunk_selection.get(f.parent_dataset_id, []) - # this is not in our current part, no need to check it. - if self._get_chunk_idx_from_artifact_name(f.artifact_name) not in f_parts: - continue + futures_ = [] + with ThreadPoolExecutor(max_workers=max_workers) as tp: + for f in self._dataset_file_entries.values(): + future = tp.submit(__verify_file_or_link, target_base_folder, part, chunk_selection, f) + futures_.append(future) - # check if the local size and the stored size match (faster than comparing hash) - if (target_base_folder / f.relative_path).stat().st_size != f.size: - verified = False - break - - for f in self._dataset_link_entries.values(): - if (target_base_folder / f.relative_path).stat().st_size != f.size: - verified = False - break + for f in self._dataset_link_entries.values(): + # don't check whether link is in dataset part, hence None for part and chunk_selection + future = tp.submit(__verify_file_or_link, target_base_folder, None, None, f) + futures_.append(future) + verified = all(f.result() != False for f in futures_) except Exception: verified = False From 7783e651a5d047731ed1ab3e73c74723d223c388 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Tue, 10 Oct 2023 15:17:03 +0300 Subject: [PATCH 24/27] Update requirements for pyjwt < 2.9 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 005f3391..82654c24 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,7 +11,7 @@ Pillow>=4.1.1 psutil>=3.4.2 pyparsing>=2.0.3 python-dateutil>=2.6.1 -pyjwt>=2.4.0,<2.5.0 ; python_version > '3.5' +pyjwt>=2.4.0,<2.9.0 ; python_version > '3.5' pyjwt>=1.6.4,<2.0.0 ; python_version <= '3.5' PyYAML>=3.12 requests>=2.20.0 From 040fd671a4a6694ab599cf55d456f57cdbedfc20 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Fri, 13 Oct 2023 22:17:29 +0300 Subject: [PATCH 25/27] Fix PEP8 issues --- clearml/datasets/dataset.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/clearml/datasets/dataset.py b/clearml/datasets/dataset.py index 41783345..bcae9440 100644 --- a/clearml/datasets/dataset.py +++ b/clearml/datasets/dataset.py @@ -3217,22 +3217,22 @@ class Dataset(object): def _verify_dataset_folder(self, target_base_folder, part, chunk_selection, max_workers): # type: (Path, Optional[int], Optional[dict], Optional[int]) -> bool - def __verify_file_or_link(target_base_folder, part, chunk_selection, file_entry): - # type: (Path, Optional[int], Optional[dict], DatasetFileEntry) -> Optional[bool] + def verify_file_or_link(base_folder, ds_part, ds_chunk_selection, file_entry): + # type: (Path, Optional[int], Optional[dict], FileEntry) -> Optional[bool] # check if we need the file for the requested dataset part - if part is not None: - f_parts = chunk_selection.get(file_entry.parent_dataset_id, []) + if ds_part is not None: + f_parts = ds_chunk_selection.get(file_entry.parent_dataset_id, []) # file is not in requested dataset part, no need to check it. if self._get_chunk_idx_from_artifact_name(file_entry.artifact_name) not in f_parts: return None # check if the local size and the stored size match (faster than comparing hash) - if (target_base_folder / file_entry.relative_path).stat().st_size != file_entry.size: + if (base_folder / file_entry.relative_path).stat().st_size != file_entry.size: return False - + return True - + target_base_folder = Path(target_base_folder) # check dataset file size, if we have a full match no need for parent dataset download / merge verified = True @@ -3241,15 +3241,15 @@ class Dataset(object): futures_ = [] with ThreadPoolExecutor(max_workers=max_workers) as tp: for f in self._dataset_file_entries.values(): - future = tp.submit(__verify_file_or_link, target_base_folder, part, chunk_selection, f) + future = tp.submit(verify_file_or_link, target_base_folder, part, chunk_selection, f) futures_.append(future) for f in self._dataset_link_entries.values(): # don't check whether link is in dataset part, hence None for part and chunk_selection - future = tp.submit(__verify_file_or_link, target_base_folder, None, None, f) + future = tp.submit(verify_file_or_link, target_base_folder, None, None, f) futures_.append(future) - verified = all(f.result() != False for f in futures_) + verified = all(f.result() is not False for f in futures_) except Exception: verified = False From 8a834af777d7c4d1541573158d627c9d39f5c7c5 Mon Sep 17 00:00:00 2001 From: bedapisl Date: Sun, 15 Oct 2023 11:28:54 +0200 Subject: [PATCH 26/27] Support lightgbm model loading from a string instead of file (#1136) Co-authored-by: Bedrich Pisl --- clearml/binding/frameworks/lightgbm_bind.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml/binding/frameworks/lightgbm_bind.py b/clearml/binding/frameworks/lightgbm_bind.py index 3493027a..0b842153 100644 --- a/clearml/binding/frameworks/lightgbm_bind.py +++ b/clearml/binding/frameworks/lightgbm_bind.py @@ -71,7 +71,7 @@ class PatchLIGHTgbmModelIO(PatchBaseModelIO): return ret @staticmethod - def _load(original_fn, model_file, *args, **kwargs): + def _load(original_fn, model_file=None, *args, **kwargs): if not PatchLIGHTgbmModelIO._current_task: return original_fn(model_file, *args, **kwargs) From a8ec1076da35e7e22997a3427dfca99dccd09d52 Mon Sep 17 00:00:00 2001 From: Matteo Destro Date: Tue, 17 Oct 2023 19:13:20 +0200 Subject: [PATCH 27/27] Remove subdir path from bucket url (#1117) --- clearml/storage/helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml/storage/helper.py b/clearml/storage/helper.py index f4bcae4b..1d62cc68 100644 --- a/clearml/storage/helper.py +++ b/clearml/storage/helper.py @@ -894,7 +894,7 @@ class _GoogleCloudStorageDriver(_Driver): obj.download_to_filename(str(p)) def test_upload(self, test_path, config, **_): - bucket_url = str(furl(scheme=self.scheme, netloc=config.bucket, path=config.subdir)) + bucket_url = str(furl(scheme=self.scheme, netloc=config.bucket)) bucket = self.get_container(container_name=bucket_url, config=config).bucket test_obj = bucket