From d826e9806e5d4a14ad44e855da358098913ccad1 Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Fri, 26 Jul 2024 19:13:34 +0300
Subject: [PATCH 01/14] Fix injected task import in Task.populate()

---
 clearml/backend_interface/task/populate.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/clearml/backend_interface/task/populate.py b/clearml/backend_interface/task/populate.py
index 35af6d77..76aaaab5 100644
--- a/clearml/backend_interface/task/populate.py
+++ b/clearml/backend_interface/task/populate.py
@@ -422,8 +422,9 @@ class CreateAndPopulate(object):
                         "diff --git a{script_entry} b{script_entry}\n" \
                         "--- a{script_entry}\n" \
                         "+++ b{script_entry}\n" \
-                        "@@ -{idx_a},0 +{idx_b},3 @@\n" \
-                        "+from clearml import Task\n" \
+                        "@@ -{idx_a},0 +{idx_b},4 @@\n" \
+                        "+try: from allegroai import Task\n" \
+                        "+except ImportError: from clearml import Task\n" \
                         "+(__name__ != \"__main__\") or Task.init()\n" \
                         "+\n".format(
                             script_entry=script_entry, idx_a=idx_a, idx_b=idx_a + 1)
@@ -432,7 +433,11 @@ class CreateAndPopulate(object):
                 pass
             elif local_entry_file and lines:
                 # if we are here it means we do not have a git diff, but a single script file
-                init_lines = ["from clearml import Task\n", "(__name__ != \"__main__\") or Task.init()\n\n"]
+                init_lines = [
+                    "try: from allegroai import Task\n",
+                    "except ImportError: from clearml import Task\n",
+                    '(__name__ != "__main__") or Task.init()\n\n',
+                ]
                 task_state['script']['diff'] = ''.join(lines[:idx_a] + init_lines + lines[idx_a:])
                 # no need to add anything, we patched it.
                 task_init_patch = ""
@@ -440,7 +445,8 @@ class CreateAndPopulate(object):
                 # Add Task.init call
                 # if we are here it means we do not have a git diff, but a single script file
                 task_init_patch += \
-                    "from clearml import Task\n" \
+                    "try: from allegroai import Task\n" \
+                    "except ImportError: from clearml import Task\n" \
                     "(__name__ != \"__main__\") or Task.init()\n\n"
                 task_state['script']['diff'] = task_init_patch + task_state['script'].get('diff', '')
                 task_init_patch = ""

From 2b66ee663ed5dd5fd81adf9e024e874159fb0a86 Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Mon, 29 Jul 2024 17:36:02 +0300
Subject: [PATCH 02/14] Fix dataset with external links will not reuse
 downloaded data from parents

---
 clearml/datasets/dataset.py | 105 +++++++++++++++++++++---------------
 1 file changed, 61 insertions(+), 44 deletions(-)

diff --git a/clearml/datasets/dataset.py b/clearml/datasets/dataset.py
index 4f7cf0db..ddf2e94e 100644
--- a/clearml/datasets/dataset.py
+++ b/clearml/datasets/dataset.py
@@ -123,6 +123,7 @@ class Dataset(object):
     __hyperparams_section = "Datasets"
     __datasets_runtime_prop = "datasets"
     __orig_datasets_runtime_prop_prefix = "orig_datasets"
+    __dataset_struct = "Dataset Struct"
     __preview_media_max_file_size = deferred_config("dataset.preview.media.max_file_size", 5 * 1024 * 1024, transform=int)
     __preview_tabular_table_count = deferred_config("dataset.preview.tabular.table_count", 10, transform=int)
     __preview_tabular_row_count = deferred_config("dataset.preview.tabular.row_count", 10, transform=int)
@@ -2081,13 +2082,35 @@ class Dataset(object):
         self.update_changed_files(num_files_added=count - modified_count, num_files_modified=modified_count)
         return count - modified_count, modified_count
 
+    def _repair_dependency_graph(self):
+        """
+        Repair dependency graph via the Dataset Struct configuration object.
+        Might happen for datasets with external files in old clearml versions
+        """
+        try:
+            dataset_struct = self._task.get_configuration_object_as_dict(Dataset.__dataset_struct)
+            new_dependency_graph = {}
+            for dataset in dataset_struct.values():
+                new_dependency_graph[dataset["job_id"]] = [dataset_struct[p]["job_id"] for p in dataset["parents"]]
+            self._dependency_graph = new_dependency_graph
+        except Exception as e:
+            LoggerRoot.get_base_logger().warning("Could not repair dependency graph. Error is: {}".format(e))
+
     def _update_dependency_graph(self):
         """
-        Update the dependency graph based on the current self._dataset_file_entries state
+        Update the dependency graph based on the current self._dataset_file_entries
+        and self._dataset_link_entries states
         :return:
         """
         # collect all dataset versions
-        used_dataset_versions = set(f.parent_dataset_id for f in self._dataset_file_entries.values())
+        used_dataset_versions = set(f.parent_dataset_id for f in self._dataset_file_entries.values()) | set(
+            f.parent_dataset_id for f in self._dataset_link_entries.values()
+        )
+        for dataset_id in used_dataset_versions:
+            if dataset_id not in self._dependency_graph and dataset_id != self._id:
+                self._repair_dependency_graph()
+                break
+
         used_dataset_versions.add(self._id)
         current_parents = self._dependency_graph.get(self._id) or []
         # remove parent versions we no longer need from the main version list
@@ -2296,29 +2319,8 @@ class Dataset(object):
             Notice you should unlock it manually, or wait for the process to finish for auto unlocking.
         :param max_workers: Number of threads to be spawned when getting dataset files. Defaults to no multi-threading.
         """
-        target_folder = (
-            Path(target_folder)
-            if target_folder
-            else self._create_ds_target_folder(
-                lock_target_folder=lock_target_folder
-            )[0]
-        ).as_posix()
-        dependencies = self._get_dependencies_by_order(
-            include_unused=False, include_current=True
-        )
-        links = {}
-        for dependency in dependencies:
-            ds = Dataset.get(dependency)
-            links.update(ds._dataset_link_entries)
-        links.update(self._dataset_link_entries)
-
         def _download_link(link, target_path):
             if os.path.exists(target_path):
-                LoggerRoot.get_base_logger().info(
-                    "{} already exists. Skipping downloading {}".format(
-                        target_path, link
-                    )
-                )
                 return
             ok = False
             error = None
@@ -2341,27 +2343,40 @@ class Dataset(object):
                 LoggerRoot.get_base_logger().info(log_string)
             else:
                 link.size = Path(target_path).stat().st_size
-        if not max_workers:
-            for relative_path, link in links.items():
-                if not is_path_traversal(target_folder, relative_path):
-                    target_path = os.path.join(target_folder, relative_path)
-                else:
-                    LoggerRoot.get_base_logger().warning(
-                        "Ignoring relative path `{}`: it must not traverse directories".format(relative_path)
-                    )
-                    target_path = os.path.join(target_folder, os.path.basename(relative_path))
+
+        def _get_target_path(relative_path, target_folder):
+            if not is_path_traversal(target_folder, relative_path):
+                return os.path.join(target_folder, relative_path)
+            else:
+                LoggerRoot.get_base_logger().warning(
+                    "Ignoring relative path `{}`: it must not traverse directories".format(relative_path)
+                )
+                return os.path.join(target_folder, os.path.basename(relative_path))
+
+        def _submit_download_link(relative_path, link, target_folder, pool=None):
+            if link.parent_dataset_id != self.id:
+                return
+            target_path = _get_target_path(relative_path, target_folder)
+            if pool is None:
                 _download_link(link, target_path)
+            else:
+                pool.submit(_download_link, link, target_path)
+
+        target_folder = (
+            Path(target_folder)
+            if target_folder
+            else self._create_ds_target_folder(
+                lock_target_folder=lock_target_folder
+            )[0]
+        ).as_posix()
+
+        if not max_workers:
+            for relative_path, link in self._dataset_link_entries.items():
+                _submit_download_link(relative_path, link, target_folder)
         else:
             with ThreadPoolExecutor(max_workers=max_workers) as pool:
-                for relative_path, link in links.items():
-                    if not is_path_traversal(target_folder, relative_path):
-                        target_path = os.path.join(target_folder, relative_path)
-                    else:
-                        LoggerRoot.get_base_logger().warning(
-                            "Ignoring relative path `{}`: it must not traverse directories".format(relative_path)
-                        )
-                        target_path = os.path.join(target_folder, os.path.basename(relative_path))
-                    pool.submit(_download_link, link, target_path)
+                for relative_path, link in self._dataset_link_entries.items():
+                    _submit_download_link(relative_path, link, target_folder, pool=pool)
 
     def _extract_dataset_archive(
             self,
@@ -2586,6 +2601,7 @@ class Dataset(object):
         :param include_current: If True include the current dataset ID as the last ID in the list
         :return: list of str representing the datasets id
         """
+        self._update_dependency_graph()
         roots = [self._id]
         dependencies = []
         # noinspection DuplicatedCode
@@ -3027,7 +3043,7 @@ class Dataset(object):
             # fetch the parents of this version (task) based on what we have on the Task itself.
             # noinspection PyBroadException
             try:
-                dataset_version_node = task.get_configuration_object_as_dict("Dataset Struct")
+                dataset_version_node = task.get_configuration_object_as_dict(Dataset.__dataset_struct)
                 # fine the one that is us
                 for node in dataset_version_node.values():
                     if node["job_id"] != id_:
@@ -3056,7 +3072,7 @@ class Dataset(object):
             dataset_struct[indices[id_]]["parents"] = [indices[p] for p in parents]
         # noinspection PyProtectedMember
         self._task._set_configuration(
-            name="Dataset Struct",
+            name=Dataset.__dataset_struct,
             description="Structure of the dataset",
             config_type="json",
             config_text=json.dumps(dataset_struct, indent=2),
@@ -3234,7 +3250,8 @@ class Dataset(object):
 
                 return None
 
-            errors = pool.map(copy_file, self._dataset_file_entries.values())
+            errors = list(pool.map(copy_file, self._dataset_file_entries.values()))
+            errors.extend(list(pool.map(copy_file, self._dataset_link_entries.values())))
 
             CacheManager.get_cache_manager(cache_context=self.__cache_context).unlock_cache_folder(
                 ds_base_folder.as_posix())

From 1b474dc0b057b69c76bc2daa9eb8be927cb25efa Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Mon, 29 Jul 2024 17:37:05 +0300
Subject: [PATCH 03/14] Fix hierarchy for pipeline nodes without args

---
 clearml/automation/controller.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/clearml/automation/controller.py b/clearml/automation/controller.py
index 8d14c17c..62d26781 100644
--- a/clearml/automation/controller.py
+++ b/clearml/automation/controller.py
@@ -4805,11 +4805,11 @@ class PipelineDecorator(PipelineController):
                         if n not in _node.parents:
                             _node.parents.append(n)
                         break
-        if kwargs:
-            leaves = cls._singleton._find_executed_node_leaves()
-            _node.parents = (_node.parents or []) + [
-                x for x in cls._evaluated_return_values.get(tid, []) if x in leaves
-            ]
+
+        leaves = cls._singleton._find_executed_node_leaves()
+        _node.parents = (_node.parents or []) + [
+            x for x in cls._evaluated_return_values.get(tid, []) if x in leaves
+        ]
 
         if not cls._singleton._abort_running_steps_on_failure:
             for parent in _node.parents:

From 462b01ee0e0ac9e496ac9bbba8c2943636a82247 Mon Sep 17 00:00:00 2001
From: pollfly <75068813+pollfly@users.noreply.github.com>
Date: Tue, 30 Jul 2024 23:32:08 +0300
Subject: [PATCH 04/14] Edit Logger docs (#1302)

---
 docs/logger.md | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/docs/logger.md b/docs/logger.md
index 59f8ea9a..fbe0360a 100644
--- a/docs/logger.md
+++ b/docs/logger.md
@@ -22,13 +22,13 @@ Using the **ClearML** [Logger](https://github.com/allegroai/clearml/blob/master/
 Additionally, the **ClearML** Logger module provides methods that allow you to do the following:
     
   * Get the [current logger]()
-  * Overrride the ClearML configuration file with a [default upload destination]() for images and files
+  * Override the ClearML configuration file with a [default upload destination]() for images and files
   
 ## Graphs and Images
 
 ### Scalar Metrics
 
-Use to report scalar metrics by iteration as a line plot.
+Report scalar metrics by iteration as a line plot.
 
 First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scalar_reporting.py)) with the following method.
 
@@ -99,7 +99,7 @@ def report_scalar(self, title, series, value, iteration)
 
 ### Histograms
 
-Use to report any data by iteration as a histogram.
+Report any data by iteration as a histogram.
 
 First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scatter_hist_confusion_mat_reporting.py)) with the following method.
 
@@ -197,7 +197,7 @@ def report_histogram(self, title, series, values, iteration, labels=None, xlabel
 
 ### Line Plots
 
-Use to report any data by iteration as a single or multiple line plot.
+Report any data by iteration as a single or multiple line plot.
 
 First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scatter_hist_confusion_mat_reporting.py)) with the following method.
 
@@ -323,7 +323,7 @@ def report_line_plot(self, title, series, iteration, xaxis, yaxis, mode='lines',
 
 ### 2D Scatter Diagrams
 
-Use to report any vector data as a 2D scatter diagram.
+Report any vector data as a 2D scatter diagram.
 
 First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scatter_hist_confusion_mat_reporting.py)) with the following method.
 
@@ -459,7 +459,7 @@ def report_scatter2d(self, title, series, scatter, iteration, xaxis=None, yaxis=
 
 ### 3D Scatter Diagrams
 
-Use to report any array data as a 3D scatter diagram.
+Report any array data as a 3D scatter diagram.
 
 First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/3d_plots_reporting.py)) with the following method.
 
@@ -584,7 +584,7 @@ def report_scatter3d(self, title, series, scatter, iteration, labels=None, mode=
                     <li><code>lines+markers</code>
                     </li>
                 </ul>
-            The default values is <code>lines</code>.
+            The default value is <code>lines</code>.
             </td>
             <td>No
             </td>
@@ -595,7 +595,7 @@ def report_scatter3d(self, title, series, scatter, iteration, labels=None, mode=
 
 ### Confusion Matrices
 
-Use to report a heat-map matrix as a confusion matrix. You can also plot a heat-map as a [surface diagram](#surface-diagrams).
+Report a heat-map matrix as a confusion matrix. You can also plot a heat-map as a [surface diagram](#surface-diagrams).
 
 First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scatter_hist_confusion_mat_reporting.py)) with the following method.
 
@@ -687,7 +687,7 @@ def report_confusion_matrix(self, title, series, matrix, iteration, xlabels=None
 
 ### Surface Diagrams
 
-Use to plot a heat-map matrix as a surface diagram. You can also plot a heat-map as a [confusion matrix](#confusion-matrices).
+Plot a heat-map matrix as a surface diagram. You can also plot a heat-map as a [confusion matrix](#confusion-matrices).
 
 First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/3d_plots_reporting.py)) with the following method.
 
@@ -818,7 +818,7 @@ def report_surface(self, title, series, matrix, iteration, xlabels=None, ylabels
 
 ### Images
 
-Use to report an image and upload its contents to the bucket specified in the **ClearML** configuration file,
+Report an image and upload its contents to the bucket specified in the **ClearML** configuration file,
 or a [default upload destination](#set-default-upload-destination), if you set a default. 
 
 First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/manual_reporting.py)) with the following method.
@@ -896,7 +896,7 @@ def report_image(self, title, series, iteration, local_path=None, matrix=None, m
             </td>
             <td>ndarray
             </td>
-            <td>A 3D numpy.ndarray object containing image data (RGB). If <code>path</code> is not specified, then <code>matrix</code> is required. The default values is <code>None</code>.
+            <td>A 3D numpy.ndarray object containing image data (RGB). If <code>path</code> is not specified, then <code>matrix</code> is required. The default value is <code>None</code>.
             </td>
             <td>No
             </td>
@@ -917,7 +917,7 @@ def report_image(self, title, series, iteration, local_path=None, matrix=None, m
             </td>
             <td>string
             </td>
-            <td>The path of the image file. If <code>matrix</code> is not specified, then <code>path</code> is required. The default values is <code>None</code>. 
+            <td>The path of the image file. If <code>matrix</code> is not specified, then <code>path</code> is required. The default value is <code>None</code>. 
             </td>
             <td>No
             </td>
@@ -948,13 +948,13 @@ By setting the `CLEARML_LOG_ENVIRONMENT` environment variable, make **ClearML**
 
 * All environment variables
 
-        export CLEARML_LOG_ENVIRONMENT="*"
+        export CLEARML_LOG_ENVIRONMENT=*
 
 * Specific environment variables
 
     For example, log `PWD` and `PYTHONPATH`
 
-        export CLEARML_LOG_ENVIRONMENT="PWD,PYTHONPATH"
+        export CLEARML_LOG_ENVIRONMENT=PWD,PYTHONPATH
 
 * No environment variables
 
@@ -1368,7 +1368,7 @@ None.
 
 ### Set Default Upload Destination
 
-Use to specify the default destination storage location used for uploading images.
+Specify the default destination storage location used for uploading images.
 Images are uploaded and a link to the image is reported.
 
 Credentials for the storage location are in the global configuration file (for example, on Linux, <code>~/clearml.conf</code>). 

From a7b58900283c5aada3a0d7906f1a19cbe07f643b Mon Sep 17 00:00:00 2001
From: Rafael Tvelov <rafael.tvelov@phystech.edu>
Date: Wed, 31 Jul 2024 14:19:31 +0300
Subject: [PATCH 05/14] Fix scalar logging bug with Fire (#1301)

Fixes `_patched_call` using under Fire scope, see #1300
---
 clearml/binding/fire_bind.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clearml/binding/fire_bind.py b/clearml/binding/fire_bind.py
index 0953d229..173c5420 100644
--- a/clearml/binding/fire_bind.py
+++ b/clearml/binding/fire_bind.py
@@ -6,7 +6,7 @@ except ImportError:
     fire = None
 
 import inspect
-from .frameworks import _patched_call  # noqa
+from .frameworks import _patched_call_no_recursion_guard  # noqa
 from ..config import get_remote_task_id, running_remotely
 from ..utilities.dicts import cast_str_to_bool
 
@@ -57,9 +57,9 @@ class PatchFire:
         if not cls.__patched:
             cls.__patched = True
             if running_remotely():
-                fire.core._Fire = _patched_call(fire.core._Fire, PatchFire.__Fire)
+                fire.core._Fire = _patched_call_no_recursion_guard(fire.core._Fire, PatchFire.__Fire)
             else:
-                fire.core._CallAndUpdateTrace = _patched_call(
+                fire.core._CallAndUpdateTrace = _patched_call_no_recursion_guard(
                     fire.core._CallAndUpdateTrace, PatchFire.__CallAndUpdateTrace
                 )
 

From d8ca5c73c4749e91d6a5dd39b7d1fb4e9ccee5af Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Wed, 31 Jul 2024 17:24:05 +0300
Subject: [PATCH 06/14] Flake8

---
 clearml/backend_interface/task/task.py | 4 ++--
 clearml/external/kerastuner.py         | 6 ++----
 clearml/task.py                        | 4 ++--
 examples/reporting/requirements.txt    | 1 +
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/clearml/backend_interface/task/task.py b/clearml/backend_interface/task/task.py
index 8298c77c..380e5171 100644
--- a/clearml/backend_interface/task/task.py
+++ b/clearml/backend_interface/task/task.py
@@ -94,7 +94,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
             return str(self) == str(other)
 
         def __repr__(self):
-            return f"TaskTypes.{self.value}"
+            return "TaskTypes.{}".format(self.value)
 
         training = 'training'
         testing = 'testing'
@@ -116,7 +116,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
             return str(self) == str(other)
 
         def __repr__(self):
-            return f"TaskTypes.{self.value}"
+            return "TaskTypes.{}".format(self.value)
 
         created = "created"
         queued = "queued"
diff --git a/clearml/external/kerastuner.py b/clearml/external/kerastuner.py
index eb411ab9..e03b1568 100644
--- a/clearml/external/kerastuner.py
+++ b/clearml/external/kerastuner.py
@@ -1,12 +1,10 @@
 from typing import Optional
 from logging import getLogger
+from ..task import Task
 
 _logger = getLogger("clearml.external.kerastuner")
 
 
-from ..task import Task
-
-
 try:
     import pandas as pd
 except ImportError:
@@ -119,4 +117,4 @@ else:
                 summary = pd.concat([summary, pd.DataFrame(trial_dict, index=[trial.trial_id])], ignore_index=True)
             summary.index.name = "trial id"
             summary = summary[["trial id", *sorted(summary.columns[1:])]]
-            self.task.get_logger().report_table("summary", "trial", 0, table_plot=summary)
\ No newline at end of file
+            self.task.get_logger().report_table("summary", "trial", 0, table_plot=summary)
diff --git a/clearml/task.py b/clearml/task.py
index 4a92ce57..76b98630 100644
--- a/clearml/task.py
+++ b/clearml/task.py
@@ -180,8 +180,8 @@ class Task(_Task):
     __detect_repo_async = deferred_config('development.vcs_repo_detect_async', False)
     __default_output_uri = DEV_DEFAULT_OUTPUT_URI.get() or deferred_config('development.default_output_uri', None)
 
-    __hidden_tag = "hidden" 
-    
+    __hidden_tag = "hidden"
+
     _launch_multi_node_section = "launch_multi_node"
     _launch_multi_node_instance_tag = "multi_node_instance"
 
diff --git a/examples/reporting/requirements.txt b/examples/reporting/requirements.txt
index bb9393a3..ace399d2 100644
--- a/examples/reporting/requirements.txt
+++ b/examples/reporting/requirements.txt
@@ -3,6 +3,7 @@ clearml>=1.14.4
 matplotlib >= 3.1.1 ; python_version >= '3.6'
 matplotlib >= 2.2.4 ; python_version < '3.6'
 numpy != 1.24.0 # https://github.com/numpy/numpy/issues/22826
+bokeh_sampledata==2024.2 ; python_version >= '3.10'
 pandas
 pillow>=4.0
 plotly

From b298e212dde631f3b2326e0a3a519d0235510cbb Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Wed, 31 Jul 2024 17:24:43 +0300
Subject: [PATCH 07/14] Add docstring allowing users to pass packages=False to
 revert to requirements.txt inside their git repository

---
 clearml/automation/controller.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/clearml/automation/controller.py b/clearml/automation/controller.py
index 62d26781..0b932796 100644
--- a/clearml/automation/controller.py
+++ b/clearml/automation/controller.py
@@ -212,7 +212,7 @@ class PipelineController(object):
             docker=None,  # type: Optional[str]
             docker_args=None,  # type: Optional[str]
             docker_bash_setup_script=None,  # type: Optional[str]
-            packages=None,  # type: Optional[Union[str, Sequence[str]]]
+            packages=None,  # type: Optional[Union[bool, str, Sequence[str]]]
             repo=None,  # type: Optional[str]
             repo_branch=None,  # type: Optional[str]
             repo_commit=None,  # type: Optional[str]
@@ -273,6 +273,7 @@ class PipelineController(object):
         :param packages: Manually specify a list of required packages or a local requirements.txt file.
             Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
             If not provided, packages are automatically added.
+            Use `False` to install requirements from "requirements.txt" inside your git repository
         :param repo: Optional, specify a repository to attach to the pipeline controller, when remotely executing.
             Allow users to execute the controller inside the specified repository, enabling them to load modules/script
             from the repository. Notice the execution work directory will be the repository root folder.
@@ -711,7 +712,7 @@ class PipelineController(object):
             task_type=None,  # type: Optional[str]
             auto_connect_frameworks=None,  # type: Optional[dict]
             auto_connect_arg_parser=None,  # type: Optional[dict]
-            packages=None,  # type: Optional[Union[str, Sequence[str]]]
+            packages=None,  # type: Optional[Union[bool, str, Sequence[str]]]
             repo=None,  # type: Optional[str]
             repo_branch=None,  # type: Optional[str]
             repo_commit=None,  # type: Optional[str]
@@ -786,6 +787,7 @@ class PipelineController(object):
         :param packages: Manually specify a list of required packages or a local requirements.txt file.
             Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
             If not provided, packages are automatically added based on the imports used in the function.
+            Use `False` to install requirements from "requirements.txt" inside your git repository
         :param repo: Optional, specify a repository to attach to the function, when remotely executing.
             Allow users to execute the function inside the specified repository, enabling to load modules/script
             from a repository Notice the execution work directory will be the repository root folder.
@@ -2064,7 +2066,7 @@ class PipelineController(object):
             task_type=None,  # type: Optional[str]
             auto_connect_frameworks=None,  # type: Optional[dict]
             auto_connect_arg_parser=None,  # type: Optional[dict]
-            packages=None,  # type: Optional[Union[str, Sequence[str]]]
+            packages=None,  # type: Optional[Union[bool, str, Sequence[str]]]
             repo=None,  # type: Optional[str]
             repo_branch=None,  # type: Optional[str]
             repo_commit=None,  # type: Optional[str]
@@ -2139,6 +2141,7 @@ class PipelineController(object):
         :param packages: Manually specify a list of required packages or a local requirements.txt file.
             Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
             If not provided, packages are automatically added based on the imports used in the function.
+            Use `False` to install requirements from "requirements.txt" inside your git repository
         :param repo: Optional, specify a repository to attach to the function, when remotely executing.
             Allow users to execute the function inside the specified repository, enabling to load modules/script
             from a repository Notice the execution work directory will be the repository root folder.
@@ -3485,7 +3488,7 @@ class PipelineDecorator(PipelineController):
             docker=None,  # type: Optional[str]
             docker_args=None,  # type: Optional[str]
             docker_bash_setup_script=None,  # type: Optional[str]
-            packages=None,  # type: Optional[Union[str, Sequence[str]]]
+            packages=None,  # type: Optional[Union[bool, str, Sequence[str]]]
             repo=None,  # type: Optional[str]
             repo_branch=None,  # type: Optional[str]
             repo_commit=None,  # type: Optional[str]
@@ -3541,6 +3544,7 @@ class PipelineDecorator(PipelineController):
         :param packages: Manually specify a list of required packages or a local requirements.txt file.
             Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
             If not provided, packages are automatically added.
+            Use `False` to install requirements from "requirements.txt" inside your git repository
         :param repo: Optional, specify a repository to attach to the pipeline controller, when remotely executing.
             Allow users to execute the controller inside the specified repository, enabling them to load modules/script
             from the repository. Notice the execution work directory will be the repository root folder.
@@ -3950,7 +3954,7 @@ class PipelineDecorator(PipelineController):
             return_values=('return_object', ),  # type: Union[str, Sequence[str]]
             name=None,  # type: Optional[str]
             cache=False,  # type: bool
-            packages=None,  # type: Optional[Union[str, Sequence[str]]]
+            packages=None,  # type: Optional[Union[bool, str, Sequence[str]]]
             parents=None,  # type:  Optional[List[str]]
             execution_queue=None,  # type: Optional[str]
             continue_on_fail=False,  # type: bool
@@ -3992,6 +3996,7 @@ class PipelineDecorator(PipelineController):
         :param packages: Manually specify a list of required packages or a local requirements.txt file.
             Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
             If not provided, packages are automatically added based on the imports used inside the wrapped function.
+            Use `False` to install requirements from "requirements.txt" inside your git repository
         :param parents: Optional list of parent nodes in the DAG.
             The current step in the pipeline will be sent for execution only after all the parent nodes
             have been executed successfully.
@@ -4415,7 +4420,7 @@ class PipelineDecorator(PipelineController):
             docker=None,  # type: Optional[str]
             docker_args=None,  # type: Optional[str]
             docker_bash_setup_script=None,  # type: Optional[str]
-            packages=None,  # type: Optional[Union[str, Sequence[str]]]
+            packages=None,  # type: Optional[Union[bool, str, Sequence[str]]]
             repo=None,  # type: Optional[str]
             repo_branch=None,  # type: Optional[str]
             repo_commit=None,  # type: Optional[str]
@@ -4502,6 +4507,7 @@ class PipelineDecorator(PipelineController):
         :param packages: Manually specify a list of required packages or a local requirements.txt file.
             Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
             If not provided, packages are automatically added based on the imports used in the function.
+            Use `False` to install requirements from "requirements.txt" inside your git repository
         :param repo: Optional, specify a repository to attach to the function, when remotely executing.
             Allow users to execute the function inside the specified repository, enabling them to load modules/script
             from the repository. Notice the execution work directory will be the repository root folder.

From 62dd92a22d627407c8ebeb62a53c69e845d12cd8 Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Wed, 31 Jul 2024 17:25:12 +0300
Subject: [PATCH 08/14] Fix when abort callback is set, set task status to
 stopped only if running locally, otherwise leave it for the Agent to set it.

---
 clearml/task.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/clearml/task.py b/clearml/task.py
index 76b98630..d63a2c78 100644
--- a/clearml/task.py
+++ b/clearml/task.py
@@ -4148,7 +4148,10 @@ class Task(_Task):
             )
         )
         self.flush(wait_for_uploads=True)
-        self.stopped(status_reason='USER ABORTED')
+
+        # if running remotely, we want the daemon to kill us
+        if self.running_locally():
+            self.stopped(status_reason='USER ABORTED')
 
         if self._dev_worker:
             self._dev_worker.unregister()

From 87233dfeac43595482d92257efb6da8802ae9137 Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Wed, 31 Jul 2024 17:26:04 +0300
Subject: [PATCH 09/14] Protect against jsonschema / referencing import to
 include TypeError

---
 clearml/backend_api/session/request.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clearml/backend_api/session/request.py b/clearml/backend_api/session/request.py
index 9004127a..99c9ec25 100644
--- a/clearml/backend_api/session/request.py
+++ b/clearml/backend_api/session/request.py
@@ -7,7 +7,7 @@ try:
     # Since `referencing`` only supports Python >= 3.8, this try-except blocks maintain support
     # for earlier python versions.
     from referencing.exceptions import Unresolvable
-except ImportError:
+except (ImportError, TypeError):
     from jsonschema.exceptions import RefResolutionError as Unresolvable
 
 from .apimodel import ApiModel

From 165e8a07f920d1baf272c8ed3728ae7a8916c382 Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Wed, 31 Jul 2024 17:27:22 +0300
Subject: [PATCH 10/14] Fix offline behavior

---
 clearml/datasets/dataset.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/clearml/datasets/dataset.py b/clearml/datasets/dataset.py
index ddf2e94e..4ac0e001 100644
--- a/clearml/datasets/dataset.py
+++ b/clearml/datasets/dataset.py
@@ -2354,7 +2354,7 @@ class Dataset(object):
                 return os.path.join(target_folder, os.path.basename(relative_path))
 
         def _submit_download_link(relative_path, link, target_folder, pool=None):
-            if link.parent_dataset_id != self.id:
+            if link.parent_dataset_id != self.id and not link.parent_dataset_id.startswith("offline-"):
                 return
             target_path = _get_target_path(relative_path, target_folder)
             if pool is None:
@@ -2758,6 +2758,13 @@ class Dataset(object):
             for k, v in dataset._dependency_graph.items()  # noqa
         }
         # noinspection PyProtectedMember
+        for entry in dataset._dataset_file_entries.values():
+            if entry.parent_dataset_id.startswith("offline-"):
+                entry.parent_dataset_id = id
+        for entry in dataset._dataset_link_entries.values():
+            if entry.parent_dataset_id.startswith("offline-"):
+                entry.parent_dataset_id = id
+        # noinspection PyProtectedMember
         dataset._update_dependency_graph()
         # noinspection PyProtectedMember
         dataset._log_dataset_page()

From 386e1874bef8783c2c50a30b9fdde8ae08dea76d Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Wed, 31 Jul 2024 17:28:25 +0300
Subject: [PATCH 11/14] Add retries parameter to StorageManager.upload_folder
 (#1305)

---
 clearml/storage/manager.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/clearml/storage/manager.py b/clearml/storage/manager.py
index a4fb8d33..9c35dea6 100644
--- a/clearml/storage/manager.py
+++ b/clearml/storage/manager.py
@@ -211,8 +211,8 @@ class StorageManager(object):
         return Session.get_files_server_host()
 
     @classmethod
-    def upload_folder(cls, local_folder, remote_url, match_wildcard=None):
-        # type: (str, str, Optional[str]) -> Optional[str]
+    def upload_folder(cls, local_folder, remote_url, match_wildcard=None, retries=None):
+        # type: (str, str, Optional[str], Optional[int]) -> Optional[str]
         """
         Upload local folder recursively to a remote storage, maintaining the sub folder structure
         in the remote storage.
@@ -231,6 +231,7 @@ class StorageManager(object):
             Example: `*.json`
             Notice: target file size/date are not checked. Default True, always upload.
             Notice if uploading to http, we will always overwrite the target.
+        :param int retries: Number of retries before failing to upload a file in the folder.
         :return: Newly uploaded remote URL or None on error.
         """
 
@@ -250,6 +251,7 @@ class StorageManager(object):
                     pool.apply_async(
                         helper.upload,
                         args=(str(path), str(path).replace(local_folder, remote_url)),
+                        kwds={"retries": retries if retries else cls._file_upload_retries}
                     )
                 )
 

From 028adb45fb636228c898bd8b27aa3c01e9bd170b Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Fri, 2 Aug 2024 15:21:31 +0300
Subject: [PATCH 12/14] Black formatting

---
 clearml/cli/config/__main__.py | 248 ++++++++++++++++++---------------
 1 file changed, 135 insertions(+), 113 deletions(-)

diff --git a/clearml/cli/config/__main__.py b/clearml/cli/config/__main__.py
index 46a4ccb9..9b088093 100644
--- a/clearml/cli/config/__main__.py
+++ b/clearml/cli/config/__main__.py
@@ -15,13 +15,15 @@ from clearml.backend_config.defs import LOCAL_CONFIG_FILES, LOCAL_CONFIG_FILE_OV
 from clearml.config import config_obj
 from clearml.utilities.pyhocon import ConfigFactory, ConfigMissingException
 
-description = "\n" \
-    "Please create new clearml credentials through the settings page in " \
-    "your `clearml-server` web app (e.g. http://localhost:8080//settings/workspace-configuration) \n"\
-    "Or create a free account at https://app.clear.ml/settings/workspace-configuration\n\n" \
-    "In settings page, press \"Create new credentials\", then press \"Copy to clipboard\".\n" \
-    "\n" \
+description = (
+    "\n"
+    "Please create new clearml credentials through the settings page in "
+    "your `clearml-server` web app (e.g. http://localhost:8080//settings/workspace-configuration) \n"
+    "Or create a free account at https://app.clear.ml/settings/workspace-configuration\n\n"
+    'In settings page, press "Create new credentials", then press "Copy to clipboard".\n'
+    "\n"
     "Paste copied configuration here:\n"
+)
 
 host_description = """
 Editing configuration file: {CONFIG_FILE}
@@ -30,9 +32,9 @@ Enter the url of the clearml-server's Web service, for example: {HOST}
 
 # noinspection PyBroadException
 try:
-    def_host = ENV_HOST.get(default=config_obj.get("api.web_server")) or 'http://localhost:8080'
+    def_host = ENV_HOST.get(default=config_obj.get("api.web_server")) or "http://localhost:8080"
 except Exception:
-    def_host = 'http://localhost:8080'
+    def_host = "http://localhost:8080"
 
 
 def validate_file(string):
@@ -51,35 +53,38 @@ def main():
 
     p = argparse.ArgumentParser(description=__doc__)
     p.add_argument(
-        "--file", "-F", help="Target configuration file path (default is %(default)s)",
+        "--file",
+        "-F",
+        help="Target configuration file path (default is %(default)s)",
         default=default_config_file,
-        type=validate_file
+        type=validate_file,
     )
 
     args = p.parse_args()
 
-    print('ClearML SDK setup process')
+    print("ClearML SDK setup process")
 
     conf_file = Path(os.path.expanduser(args.file)).absolute()
     if conf_file.exists() and conf_file.is_file() and conf_file.stat().st_size > 0:
-        print('Configuration file already exists: {}'.format(str(conf_file)))
-        print('Leaving setup, feel free to edit the configuration file.')
+        print("Configuration file already exists: {}".format(str(conf_file)))
+        print("Leaving setup, feel free to edit the configuration file.")
         return
-    print(description, end='')
-    sentinel = ''
-    parse_input = ''
+    print(description, end="")
+    sentinel = ""
+    parse_input = ""
 
     if os.environ.get("JPY_PARENT_PID"):
         # When running from a colab instance and calling clearml-init
         # colab will squish the api credentials into a single line
         # The regex splits this single line based on 2 spaces or more
         import re
+
         api_input = input()
-        parse_input = '\n'.join(re.split(r" {2,}", api_input))
+        parse_input = "\n".join(re.split(r" {2,}", api_input))
     else:
         for line in iter(input, sentinel):
-            parse_input += line+'\n'
-            if line.rstrip() == '}':
+            parse_input += line + "\n"
+            if line.rstrip() == "}":
                 break
 
     credentials = None
@@ -102,11 +107,14 @@ def main():
         files_server = files_server or None
 
     while not credentials or set(credentials) != {"access_key", "secret_key"}:
-        print('Could not parse credentials, please try entering them manually.')
+        print("Could not parse credentials, please try entering them manually.")
         credentials = read_manual_credentials()
 
-    print('Detected credentials key=\"{}\" secret=\"{}\"'.format(credentials['access_key'],
-                                                                 credentials['secret_key'][0:4] + "***"))
+    print(
+        'Detected credentials key="{}" secret="{}"'.format(
+            credentials["access_key"], credentials["secret_key"][0:4] + "***"
+        )
+    )
     web_input = True
     if web_server:
         host = web_server
@@ -114,47 +122,43 @@ def main():
         web_input = False
         host = api_server
     else:
-        print(host_description.format(CONFIG_FILE=args.file, HOST=def_host,))
-        host = input_url('WEB Host', '')
+        print(
+            host_description.format(
+                CONFIG_FILE=args.file,
+                HOST=def_host,
+            )
+        )
+        host = input_url("WEB Host", "")
 
     parsed_host = verify_url(host)
     api_host, files_host, web_host = parse_known_host(parsed_host)
 
-    hosts_dict = {
-        "API": api_server,
-        "Files": files_server,
-        "Web": web_server
-    }
+    hosts_dict = {"API": api_server, "Files": files_server, "Web": web_server}
 
-    infered_hosts_dict = {
-        "API": api_host,
-        "Files": files_host,
-        "Web": web_host
-    }
+    infered_hosts_dict = {"API": api_host, "Files": files_host, "Web": web_host}
 
     for host_type, url in six.iteritems(hosts_dict):
-        if url is None or not (
-            url.startswith('http://') or url.startswith('https://')
-        ):
+        if url is None or not (url.startswith("http://") or url.startswith("https://")):
             infered_host_url = infered_hosts_dict[host_type]
             if infered_host_url != "":
                 hosts_dict[host_type] = infered_host_url
             else:
                 hosts_dict[host_type] = input_url(host_type)
 
-    api_host, files_host, web_host = hosts_dict['API'], hosts_dict['Files'], hosts_dict['Web']
+    api_host, files_host, web_host = hosts_dict["API"], hosts_dict["Files"], hosts_dict["Web"]
 
     # one of these two we configured
     if not web_input:
-        web_host = input_url('Web Application Host', web_host)
+        web_host = input_url("Web Application Host", web_host)
     else:
         if web_input is True and not web_host:
             web_host = host
 
-    print('\nClearML Hosts configuration:\nWeb App: {}\nAPI: {}\nFile Store: {}\n'.format(
-        web_host, api_host, files_host))
+    print(
+        "\nClearML Hosts configuration:\nWeb App: {}\nAPI: {}\nFile Store: {}\n".format(web_host, api_host, files_host)
+    )
 
-    if len(set([web_host, api_host, files_host])) != 3:
+    if len({web_host, api_host, files_host}) != 3:
         raise ValueError("All three server URLs should be distinct")
 
     retry = 1
@@ -166,88 +170,94 @@ def main():
         if retry < max_retries + 1:
             credentials = read_manual_credentials()
     else:
-        print('Exiting setup without creating configuration file')
+        print("Exiting setup without creating configuration file")
         return
 
     # noinspection PyBroadException
     try:
-        default_sdk_conf = Path(__file__).absolute().parents[2] / 'config/default/sdk.conf'
-        with open(str(default_sdk_conf), 'rt') as f:
+        default_sdk_conf = Path(__file__).absolute().parents[2] / "config/default/sdk.conf"
+        with open(str(default_sdk_conf), "rt") as f:
             default_sdk = f.read()
     except Exception:
-        print('Error! Could not read default configuration file')
+        print("Error! Could not read default configuration file")
         return
     # noinspection PyBroadException
     try:
-        with open(str(conf_file), 'wt') as f:
-            header = '# ClearML SDK configuration file\n' \
-                     'api {\n' \
-                     '    # Notice: \'host\' is the api server (default port 8008), not the web server.\n' \
-                     '    api_server: %s\n' \
-                     '    web_server: %s\n' \
-                     '    files_server: %s\n' \
-                     '    # Credentials are generated using the webapp, %s/settings\n' \
-                     '    # Override with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY\n' \
-                     '    credentials {"access_key": "%s", "secret_key": "%s"}\n' \
-                     '}\n' \
-                     'sdk ' % (api_host, web_host, files_host,
-                               web_host, credentials['access_key'], credentials['secret_key'])
+        with open(str(conf_file), "wt") as f:
+            header = (
+                "# ClearML SDK configuration file\n"
+                "api {\n"
+                "    # Notice: 'host' is the api server (default port 8008), not the web server.\n"
+                "    api_server: %s\n"
+                "    web_server: %s\n"
+                "    files_server: %s\n"
+                "    # Credentials are generated using the webapp, %s/settings\n"
+                "    # Override with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY\n"
+                '    credentials {"access_key": "%s", "secret_key": "%s"}\n'
+                "}\n"
+                "sdk "
+                % (api_host, web_host, files_host, web_host, credentials["access_key"], credentials["secret_key"])
+            )
             f.write(header)
             f.write(default_sdk)
     except Exception:
-        print('Error! Could not write configuration file at: {}'.format(str(conf_file)))
+        print("Error! Could not write configuration file at: {}".format(str(conf_file)))
         return
 
-    print('\nNew configuration stored in {}'.format(str(conf_file)))
-    print('ClearML setup completed successfully.')
+    print("\nNew configuration stored in {}".format(str(conf_file)))
+    print("ClearML setup completed successfully.")
 
 
 def parse_known_host(parsed_host):
-    if parsed_host.netloc.startswith('demoapp.'):
+    if parsed_host.netloc.startswith("demoapp."):
         # this is our demo server
-        api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapp.', 'demoapi.', 1) + parsed_host.path
+        api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace("demoapp.", "demoapi.", 1) + parsed_host.path
         web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
-        files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapp.', 'demofiles.',
-                                                                             1) + parsed_host.path
-    elif parsed_host.netloc.startswith('app.'):
+        files_host = (
+            parsed_host.scheme + "://" + parsed_host.netloc.replace("demoapp.", "demofiles.", 1) + parsed_host.path
+        )
+    elif parsed_host.netloc.startswith("app."):
         # this is our application server
-        api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('app.', 'api.', 1) + parsed_host.path
+        api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace("app.", "api.", 1) + parsed_host.path
         web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
-        files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('app.', 'files.', 1) + parsed_host.path
-    elif parsed_host.netloc.startswith('demoapi.'):
-        print('{} is the api server, we need the web server. Replacing \'demoapi.\' with \'demoapp.\''.format(
-            parsed_host.netloc))
+        files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace("app.", "files.", 1) + parsed_host.path
+    elif parsed_host.netloc.startswith("demoapi."):
+        print(
+            "{} is the api server, we need the web server. Replacing 'demoapi.' with 'demoapp.'".format(
+                parsed_host.netloc
+            )
+        )
         api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
-        web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapi.', 'demoapp.', 1) + parsed_host.path
-        files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapi.', 'demofiles.',
-                                                                             1) + parsed_host.path
-    elif parsed_host.netloc.startswith('api.'):
-        print('{} is the api server, we need the web server. Replacing \'api.\' with \'app.\''.format(
-            parsed_host.netloc))
+        web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace("demoapi.", "demoapp.", 1) + parsed_host.path
+        files_host = (
+            parsed_host.scheme + "://" + parsed_host.netloc.replace("demoapi.", "demofiles.", 1) + parsed_host.path
+        )
+    elif parsed_host.netloc.startswith("api."):
+        print("{} is the api server, we need the web server. Replacing 'api.' with 'app.'".format(parsed_host.netloc))
         api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
-        web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('api.', 'app.', 1) + parsed_host.path
-        files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('api.', 'files.', 1) + parsed_host.path
+        web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace("api.", "app.", 1) + parsed_host.path
+        files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace("api.", "files.", 1) + parsed_host.path
     elif parsed_host.port == 8008:
-        print('Port 8008 is the api port. Replacing 8008 with 8080 for Web application')
+        print("Port 8008 is the api port. Replacing 8008 with 8080 for Web application")
         api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
-        web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8008', ':8080', 1) + parsed_host.path
-        files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8008', ':8081', 1) + parsed_host.path
+        web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(":8008", ":8080", 1) + parsed_host.path
+        files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(":8008", ":8081", 1) + parsed_host.path
     elif parsed_host.port == 8080:
-        print('Port 8080 is the web port. Replacing 8080 with 8008 for API server')
-        api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8080', ':8008', 1) + parsed_host.path
+        print("Port 8080 is the web port. Replacing 8080 with 8008 for API server")
+        api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(":8080", ":8008", 1) + parsed_host.path
         web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
-        files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8080', ':8081', 1) + parsed_host.path
+        files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(":8080", ":8081", 1) + parsed_host.path
     elif parsed_host.port is None:
-        print('Web app hosted on standard port using ' + parsed_host.scheme + ' protocol.')
-        print('Assuming files and api ports are unchanged and use the same (' + parsed_host.scheme + ') protocol')
-        api_host = parsed_host.scheme + "://" + parsed_host.netloc + ':8008' + parsed_host.path
+        print("Web app hosted on standard port using " + parsed_host.scheme + " protocol.")
+        print("Assuming files and api ports are unchanged and use the same (" + parsed_host.scheme + ") protocol")
+        api_host = parsed_host.scheme + "://" + parsed_host.netloc + ":8008" + parsed_host.path
         web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
-        files_host = parsed_host.scheme + "://" + parsed_host.netloc + ':8081' + parsed_host.path
+        files_host = parsed_host.scheme + "://" + parsed_host.netloc + ":8081" + parsed_host.path
     else:
         print("Warning! Could not parse host name")
-        api_host = ''
-        web_host = ''
-        files_host = ''
+        api_host = ""
+        web_host = ""
+        files_host = ""
 
     return api_host, files_host, web_host
 
@@ -256,18 +266,25 @@ def verify_credentials(api_host, credentials):
     """check if the credentials are valid"""
     # noinspection PyBroadException
     try:
-        print('Verifying credentials ...')
+        print("Verifying credentials ...")
         if api_host:
-            Session(api_key=credentials['access_key'], secret_key=credentials['secret_key'], host=api_host,
-                    http_retries_config={"total": 2})
-            print('Credentials verified!')
+            Session(
+                api_key=credentials["access_key"],
+                secret_key=credentials["secret_key"],
+                host=api_host,
+                http_retries_config={"total": 2},
+            )
+            print("Credentials verified!")
             return True
         else:
             print("Can't verify credentials")
             return False
     except Exception:
-        print('Error: could not verify credentials: key={} secret={}'.format(
-            credentials.get('access_key'), credentials.get('secret_key')))
+        print(
+            "Error: could not verify credentials: key={} secret={}".format(
+                credentials.get("access_key"), credentials.get("secret_key")
+            )
+        )
         return False
 
 
@@ -292,18 +309,18 @@ def get_parsed_field(parsed_config, fields):
 
 
 def read_manual_credentials():
-    print('Enter user access key: ', end='')
+    print("Enter user access key: ", end="")
     access_key = input()
-    print('Enter user secret: ', end='')
+    print("Enter user secret: ", end="")
     secret_key = input()
     return {"access_key": access_key, "secret_key": secret_key}
 
 
 def input_url(host_type, host=None):
     while True:
-        print('{} configured to: {}'.format(host_type, '[{}] '.format(host) if host else ''), end='')
+        print("{} configured to: {}".format(host_type, "[{}] ".format(host) if host else ""), end="")
         parse_input = input()
-        if host and (not parse_input or parse_input.lower() == 'yes' or parse_input.lower() == 'y'):
+        if host and (not parse_input or parse_input.lower() == "yes" or parse_input.lower() == "y"):
             break
         parsed_host = verify_url(parse_input) if parse_input else None
         if parse_input and parsed_host:
@@ -313,29 +330,34 @@ def input_url(host_type, host=None):
 
 
 def input_host_port(host_type, parsed_host):
-    print('Enter port for {} host '.format(host_type), end='')
+    print("Enter port for {} host ".format(host_type), end="")
     replace_port = input().lower()
-    return parsed_host.scheme + "://" + parsed_host.netloc + (
-        ':{}'.format(replace_port) if replace_port else '') + parsed_host.path
+    return (
+        parsed_host.scheme
+        + "://"
+        + parsed_host.netloc
+        + (":{}".format(replace_port) if replace_port else "")
+        + parsed_host.path
+    )
 
 
 def verify_url(parse_input):
     # noinspection PyBroadException
     try:
-        if not parse_input.startswith('http://') and not parse_input.startswith('https://'):
+        if not parse_input.startswith("http://") and not parse_input.startswith("https://"):
             # if we have a specific port, use http prefix, otherwise assume https
-            if ':' in parse_input:
-                parse_input = 'http://' + parse_input
+            if ":" in parse_input:
+                parse_input = "http://" + parse_input
             else:
-                parse_input = 'https://' + parse_input
+                parse_input = "https://" + parse_input
         parsed_host = urlparse(parse_input)
-        if parsed_host.scheme not in ('http', 'https'):
+        if parsed_host.scheme not in ("http", "https"):
             parsed_host = None
     except Exception:
         parsed_host = None
-        print('Could not parse url {}\nEnter your clearml-server host: '.format(parse_input), end='')
+        print("Could not parse url {}\nEnter your clearml-server host: ".format(parse_input), end="")
     return parsed_host
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

From 01d337f1aa1dfe73d0b0271b7c156d83856b316a Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Mon, 5 Aug 2024 15:46:11 +0300
Subject: [PATCH 13/14] Black formatting

---
 examples/datasets/csv_dataset_creation.py     |  5 +--
 examples/datasets/data_ingestion.py           | 20 +++--------
 examples/datasets/dataset_creation.py         |  8 ++---
 examples/datasets/dataset_folder_syncing.py   | 10 +++---
 .../datasets/multi_parent_child_dataset.py    | 17 ++++-----
 .../datasets/single_parent_child_dataset.py   | 13 +++----
 .../urbansounds_dataset_preprocessing.py      | 36 +++++--------------
 examples/datasets/urbansounds_get_data.py     |  8 ++---
 8 files changed, 42 insertions(+), 75 deletions(-)

diff --git a/examples/datasets/csv_dataset_creation.py b/examples/datasets/csv_dataset_creation.py
index 8dbb4999..5aec21a2 100644
--- a/examples/datasets/csv_dataset_creation.py
+++ b/examples/datasets/csv_dataset_creation.py
@@ -6,7 +6,8 @@ def main():
 
     print("STEP1 : Downloading CSV dataset")
     csv_file_path = manager.get_local_copy(
-        remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/Iris_Species.csv")
+        remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/Iris_Species.csv"
+    )
 
     print("STEP2 : Creating a dataset")
     # By default, clearml data uploads to the clearml fileserver. Adding output_uri argument to the create() method
@@ -23,5 +24,5 @@ def main():
     print("We are done, have a great day :)")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/examples/datasets/data_ingestion.py b/examples/datasets/data_ingestion.py
index 638d01b2..6157da87 100644
--- a/examples/datasets/data_ingestion.py
+++ b/examples/datasets/data_ingestion.py
@@ -42,19 +42,13 @@ dataset_path = Dataset.get(
 # Dataset and Dataloader initializations
 transform = transforms.Compose([transforms.ToTensor()])
 
-trainset = datasets.CIFAR10(
-    root=dataset_path, train=True, download=False, transform=transform
-)
+trainset = datasets.CIFAR10(root=dataset_path, train=True, download=False, transform=transform)
 trainloader = torch.utils.data.DataLoader(
     trainset, batch_size=params.get("batch_size", 4), shuffle=True, num_workers=10
 )
 
-testset = datasets.CIFAR10(
-    root=dataset_path, train=False, download=False, transform=transform
-)
-testloader = torch.utils.data.DataLoader(
-    testset, batch_size=params.get("batch_size", 4), shuffle=False, num_workers=10
-)
+testset = datasets.CIFAR10(root=dataset_path, train=False, download=False, transform=transform)
+testloader = torch.utils.data.DataLoader(testset, batch_size=params.get("batch_size", 4), shuffle=False, num_workers=10)
 
 classes = (
     "plane",
@@ -87,14 +81,10 @@ def predictions_gt_images_handler(engine, logger, *args, **kwargs):
         ax = fig.add_subplot(num_x, num_y, idx + 1, xticks=[], yticks=[])
         ax.imshow(trans(x[idx]))
         ax.set_title(
-            "{0} {1:.1f}% (label: {2})".format(
-                classes[preds], probs * 100, classes[y[idx]]
-            ),
+            "{0} {1:.1f}% (label: {2})".format(classes[preds], probs * 100, classes[y[idx]]),
             color=("green" if preds == y[idx] else "red"),
         )
-    logger.writer.add_figure(
-        "predictions vs actuals", figure=fig, global_step=engine.state.epoch
-    )
+    logger.writer.add_figure("predictions vs actuals", figure=fig, global_step=engine.state.epoch)
 
 
 class Net(nn.Module):
diff --git a/examples/datasets/dataset_creation.py b/examples/datasets/dataset_creation.py
index 9fa9cc92..6a4395ce 100644
--- a/examples/datasets/dataset_creation.py
+++ b/examples/datasets/dataset_creation.py
@@ -3,13 +3,9 @@ from clearml import StorageManager, Dataset
 
 manager = StorageManager()
 
-dataset_path = manager.get_local_copy(
-    remote_url="https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
-)
+dataset_path = manager.get_local_copy(remote_url="https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz")
 
-dataset = Dataset.create(
-    dataset_name="cifar_dataset", dataset_project="dataset_examples"
-)
+dataset = Dataset.create(dataset_name="cifar_dataset", dataset_project="dataset_examples")
 
 # Prepare and clean data here before it is added to the dataset
 
diff --git a/examples/datasets/dataset_folder_syncing.py b/examples/datasets/dataset_folder_syncing.py
index a039642a..cefb9de8 100644
--- a/examples/datasets/dataset_folder_syncing.py
+++ b/examples/datasets/dataset_folder_syncing.py
@@ -8,8 +8,9 @@ from clearml import Dataset, StorageManager
 
 def download_mnist_dataset():
     manager = StorageManager()
-    mnist_dataset = Path(manager.get_local_copy(
-        remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST"))
+    mnist_dataset = Path(
+        manager.get_local_copy(remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST")
+    )
     mnist_dataset_train = mnist_dataset / "TRAIN"
     mnist_dataset_test = mnist_dataset / "TEST"
 
@@ -28,7 +29,8 @@ def main():
 
     print("STEP3 : Creating the dataset")
     mnist_dataset = Dataset.create(
-        dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset (Syncing Example)")
+        dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset (Syncing Example)"
+    )
 
     print("STEP4 : Syncing train dataset")
     shutil.copytree(mnist_dataset_train, mnist_train_path)  # Populating dataset folder with TRAIN images
@@ -46,5 +48,5 @@ def main():
     print("We are done, have a great day :)")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/examples/datasets/multi_parent_child_dataset.py b/examples/datasets/multi_parent_child_dataset.py
index 51f83207..a9d2dcf5 100644
--- a/examples/datasets/multi_parent_child_dataset.py
+++ b/examples/datasets/multi_parent_child_dataset.py
@@ -7,29 +7,30 @@ def main():
     manager = StorageManager()
 
     print("STEP1 : Downloading mnist dataset")
-    mnist_dataset = Path(manager.get_local_copy(
-        remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST"))
+    mnist_dataset = Path(
+        manager.get_local_copy(remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST")
+    )
     mnist_dataset_train = mnist_dataset / "TRAIN"
     mnist_dataset_test = mnist_dataset / "TEST"
 
     print("STEP2 : Creating the training dataset")
-    train_dataset = Dataset.create(
-        dataset_project="dataset_examples/MNIST", dataset_name="MNIST Training Dataset")
+    train_dataset = Dataset.create(dataset_project="dataset_examples/MNIST", dataset_name="MNIST Training Dataset")
     train_dataset.add_files(path=mnist_dataset_train, dataset_path="TRAIN")
     train_dataset.upload()
     train_dataset.finalize()
 
     print("STEP3 : Creating the testing dataset")
-    test_dataset = Dataset.create(
-        dataset_project="dataset_examples/MNIST", dataset_name="MNIST Testing Dataset")
+    test_dataset = Dataset.create(dataset_project="dataset_examples/MNIST", dataset_name="MNIST Testing Dataset")
     test_dataset.add_files(path=mnist_dataset_test, dataset_path="TEST")
     test_dataset.upload()
     test_dataset.finalize()
 
     print("STEP4 : Create a child dataset with both mnist train and test data")
     child_dataset = Dataset.create(
-        dataset_project="dataset_examples/MNIST", dataset_name="MNIST Complete Dataset",
-        parent_datasets=[train_dataset.id, test_dataset.id])
+        dataset_project="dataset_examples/MNIST",
+        dataset_name="MNIST Complete Dataset",
+        parent_datasets=[train_dataset.id, test_dataset.id],
+    )
     child_dataset.upload()
     child_dataset.finalize()
 
diff --git a/examples/datasets/single_parent_child_dataset.py b/examples/datasets/single_parent_child_dataset.py
index 664cd91c..88700f66 100644
--- a/examples/datasets/single_parent_child_dataset.py
+++ b/examples/datasets/single_parent_child_dataset.py
@@ -7,21 +7,22 @@ def main():
     manager = StorageManager()
 
     print("STEP1 : Downloading mnist dataset")
-    mnist_dataset = Path(manager.get_local_copy(
-        remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST"))
+    mnist_dataset = Path(
+        manager.get_local_copy(remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST")
+    )
     mnist_dataset_train = mnist_dataset / "TRAIN"
     mnist_dataset_test = mnist_dataset / "TEST"
 
     print("STEP2 : Creating the training dataset")
-    mnist_dataset = Dataset.create(
-        dataset_project="dataset_examples", dataset_name="MNIST Training Dataset")
+    mnist_dataset = Dataset.create(dataset_project="dataset_examples", dataset_name="MNIST Training Dataset")
     mnist_dataset.add_files(path=mnist_dataset_train, dataset_path="TRAIN")
     mnist_dataset.upload()
     mnist_dataset.finalize()
 
     print("STEP3 : Create a child dataset of mnist dataset using TEST Dataset")
     child_dataset = Dataset.create(
-        dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset", parent_datasets=[mnist_dataset.id])
+        dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset", parent_datasets=[mnist_dataset.id]
+    )
     child_dataset.add_files(path=mnist_dataset_test, dataset_path="TEST")
     child_dataset.upload()
     child_dataset.finalize()
@@ -29,5 +30,5 @@ def main():
     print("We are done, have a great day :)")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/examples/datasets/urbansounds_dataset_preprocessing.py b/examples/datasets/urbansounds_dataset_preprocessing.py
index 7230afb9..19b418d5 100644
--- a/examples/datasets/urbansounds_dataset_preprocessing.py
+++ b/examples/datasets/urbansounds_dataset_preprocessing.py
@@ -43,9 +43,7 @@ class PreProcessor:
         # Make sure all spectrograms are the same size
         fixed_length = 3 * (self.configuration["resample_freq"] // 200)
         if melspectogram_db.shape[2] < fixed_length:
-            melspectogram_db = torch.nn.functional.pad(
-                melspectogram_db, (0, fixed_length - melspectogram_db.shape[2])
-            )
+            melspectogram_db = torch.nn.functional.pad(melspectogram_db, (0, fixed_length - melspectogram_db.shape[2]))
         else:
             melspectogram_db = melspectogram_db[:, :, :fixed_length]
 
@@ -64,16 +62,10 @@ class DataSetBuilder:
             alias="Raw Dataset",
         )
         # This will return the pandas dataframe we added in the previous task
-        self.metadata = (
-            Task.get_task(task_id=self.original_dataset._task.id)
-            .artifacts["metadata"]
-            .get()
-        )
+        self.metadata = Task.get_task(task_id=self.original_dataset._task.id).artifacts["metadata"].get()
         # This will download the data and return a local path to the data
         self.original_dataset_path = Path(
-            self.original_dataset.get_mutable_local_copy(
-                self.configuration["dataset_path"], overwrite=True
-            )
+            self.original_dataset.get_mutable_local_copy(self.configuration["dataset_path"], overwrite=True)
         )
 
         # Prepare a preprocessor that will handle each sample one by one
@@ -114,33 +106,23 @@ class DataSetBuilder:
         # audio side by side in the debug sample UI)
         for i, (_, data) in tqdm(enumerate(self.metadata.iterrows())):
             _, audio_file_path, label = data.tolist()
-            sample, sample_freq = torchaudio.load(
-                self.original_dataset_path / audio_file_path, normalize=True
-            )
+            sample, sample_freq = torchaudio.load(self.original_dataset_path / audio_file_path, normalize=True)
             spectrogram = self.preprocessor.preprocess_sample(sample, sample_freq)
             # Get only the filename and replace the extension, we're saving an image here
             new_file_name = os.path.basename(audio_file_path).replace(".wav", ".npy")
             # Get the correct folder, basically the original dataset folder + the new filename
-            spectrogram_path = (
-                self.original_dataset_path
-                / os.path.dirname(audio_file_path)
-                / new_file_name
-            )
+            spectrogram_path = self.original_dataset_path / os.path.dirname(audio_file_path) / new_file_name
             # Save the numpy array to disk
             np.save(spectrogram_path, spectrogram)
 
             # Log every 10th sample as a debug sample to the UI, so we can manually check it
             if i % 10 == 0:
                 # Convert the numpy array to a viewable JPEG
-                rgb_image = mpl.colormaps["viridis"](
-                    spectrogram[0, :, :].detach().numpy() * 255
-                )[:, :, :3]
+                rgb_image = mpl.colormaps["viridis"](spectrogram[0, :, :].detach().numpy() * 255)[:, :, :3]
                 title = os.path.splitext(os.path.basename(audio_file_path))[0]
 
                 # Report the image and the original sound, so they can be viewed side by side
-                self.preprocessed_dataset.get_logger().report_image(
-                    title=title, series="spectrogram", image=rgb_image
-                )
+                self.preprocessed_dataset.get_logger().report_image(title=title, series="spectrogram", image=rgb_image)
                 self.preprocessed_dataset.get_logger().report_media(
                     title=title,
                     series="original_audio",
@@ -152,9 +134,7 @@ class DataSetBuilder:
         # Again add some visualizations to the task
         self.log_dataset_statistics()
         # We still want the metadata
-        self.preprocessed_dataset._task.upload_artifact(
-            name="metadata", artifact_object=self.metadata
-        )
+        self.preprocessed_dataset._task.upload_artifact(name="metadata", artifact_object=self.metadata)
         self.preprocessed_dataset.finalize(auto_upload=True)
 
 
diff --git a/examples/datasets/urbansounds_get_data.py b/examples/datasets/urbansounds_get_data.py
index 9f442077..55e6f352 100644
--- a/examples/datasets/urbansounds_get_data.py
+++ b/examples/datasets/urbansounds_get_data.py
@@ -28,9 +28,7 @@ def get_urbansound8k():
         "https://allegro-datasets.s3.amazonaws.com/clearml/UrbanSound8K.zip",
         extract_archive=True,
     )
-    path_to_urbansound8k_csv = (
-        Path(path_to_urbansound8k) / "UrbanSound8K" / "metadata" / "UrbanSound8K.csv"
-    )
+    path_to_urbansound8k_csv = Path(path_to_urbansound8k) / "UrbanSound8K" / "metadata" / "UrbanSound8K.csv"
     path_to_urbansound8k_audio = Path(path_to_urbansound8k) / "UrbanSound8K" / "audio"
 
     return path_to_urbansound8k_csv, path_to_urbansound8k_audio
@@ -38,9 +36,7 @@ def get_urbansound8k():
 
 def log_dataset_statistics(dataset, metadata):
     histogram_data = metadata["class"].value_counts()
-    dataset.get_logger().report_table(
-        title="Raw Dataset Metadata", series="Raw Dataset Metadata", table_plot=metadata
-    )
+    dataset.get_logger().report_table(title="Raw Dataset Metadata", series="Raw Dataset Metadata", table_plot=metadata)
     dataset.get_logger().report_histogram(
         title="Class distribution",
         series="Class distribution",

From 97221b6c152e72c34940ed68fc97b9258677ea1f Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Tue, 6 Aug 2024 16:26:06 +0300
Subject: [PATCH 14/14] Version bump to v1.16.3

---
 clearml/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clearml/version.py b/clearml/version.py
index 9e54f289..6cc7415d 100644
--- a/clearml/version.py
+++ b/clearml/version.py
@@ -1 +1 @@
-__version__ = "1.16.2"
+__version__ = "1.16.3"