diff --git a/README.md b/README.md
index 8a891232..2f6f441c 100644
--- a/README.md
+++ b/README.md
@@ -4,8 +4,8 @@

-**[ClearML](https://clear.ml) - Auto-Magical Suite of tools to streamline your ML workflow
-Experiment Manager, MLOps and Data-Management**
+**[ClearML](https://clear.ml) - Auto-Magical Suite of tools to streamline your AI workflow
+Experiment Manager, MLOps/LLMOps and Data-Management**
[](https://img.shields.io/github/license/allegroai/clearml.svg) [](https://img.shields.io/pypi/pyversions/clearml.svg) [](https://pypi.org/project/clearml/) [](https://anaconda.org/clearml/clearml) [](https://optuna.org)
[](https://pypi.org/project/clearml/) [](https://artifacthub.io/packages/search?repo=allegroai) [](https://www.youtube.com/c/clearml) [](https://joinslack.clear.ml) [](https://app.clear.ml)
@@ -19,14 +19,15 @@
ClearML is a ML/DL development and production suite. It contains FIVE main modules:
- [Experiment Manager](#clearml-experiment-manager) - Automagical experiment tracking, environments and results
-- [MLOps](https://github.com/allegroai/clearml-agent) - Orchestration, Automation & Pipelines solution for ML/DL jobs (K8s / Cloud / bare-metal)
+- [MLOps / LLMOps](https://github.com/allegroai/clearml-agent) - Orchestration, Automation & Pipelines solution for ML/DL/GenAI jobs (Kubernetes / Cloud / bare-metal)
- [Data-Management](https://github.com/allegroai/clearml/blob/master/docs/datasets.md) - Fully differentiable data management & version control solution on top of object-storage
(S3 / GS / Azure / NAS)
- [Model-Serving](https://github.com/allegroai/clearml-serving) - *cloud-ready* Scalable model serving solution!
- **Deploy new model endpoints in under 5 minutes**
- Includes optimized GPU serving support backed by Nvidia-Triton
- **with out-of-the-box Model Monitoring**
-- **NEW** :fire: [Reports](https://clear.ml/docs/latest/docs/webapp/webapp_reports) - Create and share rich MarkDown documents supporting embeddable online content
+- [Reports](https://clear.ml/docs/latest/docs/webapp/webapp_reports) - Create and share rich MarkDown documents supporting embeddable online content
+- **NEW** :fire: [Orchestration Dashboard](https://clear.ml/docs/latest/docs/webapp/webapp_orchestration_dash/) - Live rich dashboard for your entire compute cluster (Cloud / Kubernetes / On-Prem)
Instrumenting these components is the **ClearML-server**, see [Self-Hosting](https://clear.ml/docs/latest/docs/deploying_clearml/clearml_server) & [Free tier Hosting](https://app.clear.ml)
@@ -141,7 +142,7 @@ The ClearML run-time components:
- [clearml-session](https://github.com/allegroai/clearml-session) - **Launch remote JupyterLab / VSCode-server inside any docker, on Cloud/On-Prem machines**
- [clearml-task](https://github.com/allegroai/clearml/blob/master/docs/clearml-task.md) - Run any codebase on remote machines with full remote logging of Tensorboard, Matplotlib & Console outputs
- [clearml-data](https://github.com/allegroai/clearml/blob/master/docs/datasets.md) - **CLI for managing and versioning your datasets, including creating / uploading / downloading of data from S3/GS/Azure/NAS**
-- [AWS Auto-Scaler](https://clear.ml/docs/latest/docs/guides/services/aws_autoscaler) - Automatically spin EC2 instances based on your workloads with preconfigured budget! No need for K8s!
+- [AWS Auto-Scaler](https://clear.ml/docs/latest/docs/guides/services/aws_autoscaler) - Automatically spin EC2 instances based on your workloads with preconfigured budget! No need for AKE!
- [Hyper-Parameter Optimization](https://clear.ml/docs/latest/docs/guides/optimization/hyper-parameter-optimization/examples_hyperparam_opt) - Optimize any code with black-box approach and state-of-the-art Bayesian optimization algorithms
- [Automation Pipeline](https://clear.ml/docs/latest/docs/guides/pipeline/pipeline_controller) - Build pipelines based on existing experiments / jobs, supports building pipelines of pipelines!
- [Slack Integration](https://clear.ml/docs/latest/docs/guides/services/slack_alerts) - Report experiments progress / failure directly to Slack (fully customizable!)
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 00000000..1ac0d525
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,5 @@
+## Reporting Security Issues
+
+Thanks for taking the time to make ClearML more secure!
+
+To carry on the discussion more securely - Please send your report to [security@clear.ml](mailto:security@clear.ml).
\ No newline at end of file
diff --git a/clearml/automation/controller.py b/clearml/automation/controller.py
index 26cb9516..8326b399 100644
--- a/clearml/automation/controller.py
+++ b/clearml/automation/controller.py
@@ -142,7 +142,7 @@ class PipelineController(object):
try:
self.job.task.reload()
self.job_ended = self.job_started + self.job.task.data.active_duration
- except Exception as e:
+ except Exception:
pass
def set_job_started(self):
@@ -154,7 +154,6 @@ class PipelineController(object):
except Exception:
pass
-
def __init__(
self,
name, # type: str
@@ -558,7 +557,7 @@ class PipelineController(object):
previous_status # type: str
):
pass
-
+
:param output_uri: The storage / output url for this step. This is the default location for output
models and other artifacts. Check Task.init reference docs for more info (output_uri is a parameter).
@@ -1631,7 +1630,7 @@ class PipelineController(object):
'target_project': self._target_project,
}
pipeline_dag = self._serialize()
-
+
# serialize pipeline state
if self._task and self._auto_connect_task:
# check if we are either running locally or that we are running remotely,
@@ -2733,7 +2732,7 @@ class PipelineController(object):
if node_failed and self._abort_running_steps_on_failure and not node.continue_on_fail:
nodes_failed_stop_pipeline.append(node.name)
elif node.timeout:
- node.set_job_started()
+ started = node.job.task.data.started
if (datetime.now().astimezone(started.tzinfo) - started).total_seconds() > node.timeout:
node.job.abort()
completed_jobs.append(j)
diff --git a/clearml/backend_api/api_proxy.py b/clearml/backend_api/api_proxy.py
index 22281cfd..888a27b0 100644
--- a/clearml/backend_api/api_proxy.py
+++ b/clearml/backend_api/api_proxy.py
@@ -32,7 +32,10 @@ class ApiServiceProxy(object):
)
# get the most advanced service version that supports our api
- version = [str(v) for v in ApiServiceProxy._available_versions if Session.check_min_api_version(v)][-1]
+ version = [
+ str(v) for v in ApiServiceProxy._available_versions
+ if Session.check_min_api_version(v)
+ ][-1]
Session.api_version = version
self.__dict__["__wrapped_version__"] = Session.api_version
name = ".v{}.{}".format(
diff --git a/clearml/backend_api/services/v2_23/queues.py b/clearml/backend_api/services/v2_23/queues.py
index c22ccd4f..1df26606 100644
--- a/clearml/backend_api/services/v2_23/queues.py
+++ b/clearml/backend_api/services/v2_23/queues.py
@@ -490,7 +490,7 @@ class Queue(NonStrictDataModel):
self._property_metadata = None
return
- self.assert_isinstance(value, "metadata", (dict,), is_array=True)
+ self.assert_isinstance(value, "metadata", (dict,))
self._property_metadata = value
diff --git a/clearml/backend_api/session/defs.py b/clearml/backend_api/session/defs.py
index cdbbcf3c..a477ebd9 100644
--- a/clearml/backend_api/session/defs.py
+++ b/clearml/backend_api/session/defs.py
@@ -40,7 +40,6 @@ for a very long time for a non-responding or mis-configured server
"""
ENV_API_EXTRA_RETRY_CODES = EnvEntry("CLEARML_API_EXTRA_RETRY_CODES")
-
ENV_FORCE_MAX_API_VERSION = EnvEntry("CLEARML_FORCE_MAX_API_VERSION", type=str)
diff --git a/clearml/backend_api/session/request.py b/clearml/backend_api/session/request.py
index 2a85cb0c..9004127a 100644
--- a/clearml/backend_api/session/request.py
+++ b/clearml/backend_api/session/request.py
@@ -1,7 +1,14 @@
import abc
-import jsonschema
import six
+from jsonschema.exceptions import FormatError, SchemaError, ValidationError
+
+try:
+ # Since `referencing`` only supports Python >= 3.8, this try-except blocks maintain support
+ # for earlier python versions.
+ from referencing.exceptions import Unresolvable
+except ImportError:
+ from jsonschema.exceptions import RefResolutionError as Unresolvable
from .apimodel import ApiModel
from .datamodel import DataModel
@@ -39,8 +46,7 @@ class BatchRequest(Request):
_batched_request_cls = abc.abstractproperty()
- _schema_errors = (jsonschema.SchemaError, jsonschema.ValidationError, jsonschema.FormatError,
- jsonschema.RefResolutionError)
+ _schema_errors = (SchemaError, ValidationError, FormatError, Unresolvable)
def __init__(self, requests, validate_requests=False, allow_raw_requests=True, **kwargs):
super(BatchRequest, self).__init__(**kwargs)
@@ -70,8 +76,7 @@ class BatchRequest(Request):
for i, req in enumerate(self.requests):
try:
req.validate()
- except (jsonschema.SchemaError, jsonschema.ValidationError,
- jsonschema.FormatError, jsonschema.RefResolutionError) as e:
+ except (SchemaError, ValidationError, FormatError, Unresolvable) as e:
raise Exception('Validation error in batch item #%d: %s' % (i, str(e)))
def get_json(self):
diff --git a/clearml/backend_api/session/session.py b/clearml/backend_api/session/session.py
index 73403619..efdd697f 100644
--- a/clearml/backend_api/session/session.py
+++ b/clearml/backend_api/session/session.py
@@ -12,6 +12,7 @@ import jwt
import requests
import six
from requests.auth import HTTPBasicAuth
+from requests.exceptions import ChunkedEncodingError, ContentDecodingError, StreamConsumedError
from six.moves.urllib.parse import urlparse, urlunparse
from typing import List, Optional
@@ -31,6 +32,7 @@ from .defs import (
ENV_API_EXTRA_RETRY_CODES,
ENV_API_DEFAULT_REQ_METHOD,
ENV_FORCE_MAX_API_VERSION,
+ ENV_IGNORE_MISSING_CONFIG,
MissingConfigError
)
from .request import Request, BatchRequest # noqa: F401
@@ -417,6 +419,13 @@ class Session(TokenManager):
(self._logger or get_logger()).warning("SSLError Retrying {}".format(ex))
sleep(0.1)
continue
+ except (ChunkedEncodingError, ContentDecodingError, StreamConsumedError) as ex:
+ retry_counter += 1
+ # we should retry
+ if retry_counter >= self._ssl_error_count_verbosity:
+ (self._logger or get_logger()).warning("Network decoding error Retrying {}".format(ex))
+ sleep(0.1)
+ continue
if (
refresh_token_if_unauthorized
@@ -736,7 +745,7 @@ class Session(TokenManager):
return urlunparse(parsed)
@classmethod
- def check_min_api_version(cls, min_api_version):
+ def check_min_api_version(cls, min_api_version, raise_error=False):
"""
Return True if Session.api_version is greater or equal >= to min_api_version
"""
@@ -764,18 +773,24 @@ class Session(TokenManager):
# noinspection PyBroadException
try:
cls()
+ except MissingConfigError:
+ if raise_error and not ENV_IGNORE_MISSING_CONFIG.get():
+ raise
+ except LoginError:
+ if raise_error:
+ raise
except Exception:
pass
return cls._version_tuple(cls.api_version) >= cls._version_tuple(str(min_api_version))
@classmethod
- def check_min_api_server_version(cls, min_api_version):
+ def check_min_api_server_version(cls, min_api_version, raise_error=False):
"""
Return True if Session.max_api_version is greater or equal >= to min_api_version
Notice this is the api version server reported, not the current SDK max supported api version
"""
- if cls.check_min_api_version(min_api_version):
+ if cls.check_min_api_version(min_api_version, raise_error=raise_error):
return True
return cls._version_tuple(cls.max_api_version) >= cls._version_tuple(str(min_api_version))
diff --git a/clearml/backend_interface/base.py b/clearml/backend_interface/base.py
index afbe67a7..8fe54b4b 100644
--- a/clearml/backend_interface/base.py
+++ b/clearml/backend_interface/base.py
@@ -178,8 +178,9 @@ class IdObjectBase(InterfaceBase):
# noinspection PyBroadException
try:
self._data = self._reload()
- except Exception:
- self.log.error("Failed reloading task {}".format(self.id))
+ except Exception as ex:
+ self.log.error("Failed reloading {} {}".format(type(self).__name__.lower(), self.id))
+ self.log.debug("Failed reloading {} {}: {}".format(type(self).__name__.lower(), self.id, ex))
@classmethod
def normalize_id(cls, id):
diff --git a/clearml/backend_interface/model.py b/clearml/backend_interface/model.py
index bc14258f..3d870e80 100644
--- a/clearml/backend_interface/model.py
+++ b/clearml/backend_interface/model.py
@@ -79,7 +79,7 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin):
self.reload()
def archive(self):
- if Session.check_min_api_server_version("2.13"):
+ if Session.check_min_api_server_version("2.13", raise_error=True):
self.send(models.ArchiveManyRequest(ids=[self.id]))
self.reload()
else:
@@ -90,7 +90,7 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin):
)
def unarchive(self):
- if Session.check_min_api_server_version("2.13"):
+ if Session.check_min_api_server_version("2.13", raise_error=True):
self.send(models.UnarchiveManyRequest(ids=[self.id]))
self.reload()
else:
diff --git a/clearml/backend_interface/task/log.py b/clearml/backend_interface/task/log.py
index 9f348cdc..9e67cd38 100644
--- a/clearml/backend_interface/task/log.py
+++ b/clearml/backend_interface/task/log.py
@@ -263,8 +263,8 @@ class TaskHandler(BufferingHandler):
# flush pending logs
if not self._task_id:
return
- # avoid deadlocks just skip the lock, we are shutting down anyway
- self.lock = None
+ # Never null the lock, it might be used by internal Python at some point
+ # self.lock = None
self._task_id = None
# shut down the TaskHandler, from this point onwards. No events will be logged
diff --git a/clearml/backend_interface/task/task.py b/clearml/backend_interface/task/task.py
index fc00233c..30c098a3 100644
--- a/clearml/backend_interface/task/task.py
+++ b/clearml/backend_interface/task/task.py
@@ -1088,12 +1088,13 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
if not model.ready:
# raise ValueError('Model %s is not published (not ready)' % model_id)
self.log.debug('Model %s [%s] is not published yet (not ready)' % (model_id, model.uri))
- name = name or Path(model.uri).stem
else:
# clear the input model
model = None
model_id = ''
- name = name or 'Input Model'
+ from ...model import InputModel
+ # noinspection PyProtectedMember
+ name = name or InputModel._get_connect_name(model)
with self._edit_lock:
self.reload()
@@ -1345,7 +1346,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
return params.get(name, default)
def delete_parameter(self, name, force=False):
- # type: (str) -> bool
+ # type: (str, bool) -> bool
"""
Delete a parameter by its full name Section/name.
diff --git a/clearml/binding/hydra_bind.py b/clearml/binding/hydra_bind.py
index b89f1dbc..6e58139a 100644
--- a/clearml/binding/hydra_bind.py
+++ b/clearml/binding/hydra_bind.py
@@ -1,7 +1,6 @@
import io
import sys
from functools import partial
-import yaml
from ..config import running_remotely, get_remote_task_id, DEV_TASK_NO_REUSE
from ..debugging.log import LoggerRoot
@@ -17,6 +16,9 @@ class PatchHydra(object):
_parameter_allow_full_edit = '_allow_omegaconf_edit_'
_should_delete_overrides = False
_overrides_section = "Args/overrides"
+ _default_hydra_context = None
+ _overrides_parser = None
+ _config_group_warning_sent = False
@classmethod
def patch_hydra(cls):
@@ -83,6 +85,7 @@ class PatchHydra(object):
@staticmethod
def _patched_hydra_run(self, config_name, task_function, overrides, *args, **kwargs):
+ PatchHydra._default_hydra_context = self
PatchHydra._allow_omegaconf_edit = False
if not running_remotely():
return PatchHydra._original_hydra_run(self, config_name, task_function, overrides, *args, **kwargs)
@@ -104,12 +107,11 @@ class PatchHydra(object):
if k.startswith(PatchHydra._parameter_section+'/')}
stored_config.pop(PatchHydra._parameter_allow_full_edit, None)
for override_k, override_v in stored_config.items():
- if override_k.startswith("~"):
- new_override = override_k
- else:
- new_override = "++" + override_k.lstrip("+")
+ new_override = override_k
if override_v is not None and override_v != "":
new_override += "=" + override_v
+ if not new_override.startswith("~") and not PatchHydra._is_group(self, new_override):
+ new_override = "++" + new_override.lstrip("+")
overrides.append(new_override)
PatchHydra._should_delete_overrides = True
except Exception:
@@ -117,6 +119,28 @@ class PatchHydra(object):
return PatchHydra._original_hydra_run(self, config_name, task_function, overrides, *args, **kwargs)
+ @staticmethod
+ def _parse_override(override):
+ if PatchHydra._overrides_parser is None:
+ from hydra.core.override_parser.overrides_parser import OverridesParser
+ PatchHydra._overrides_parser = OverridesParser.create()
+ return PatchHydra._overrides_parser.parse_overrides(overrides=[override])[0]
+
+ @staticmethod
+ def _is_group(hydra_context, override):
+ # noinspection PyBroadException
+ try:
+ override = PatchHydra._parse_override(override)
+ group_exists = hydra_context.config_loader.repository.group_exists(override.key_or_group)
+ return group_exists
+ except Exception:
+ if not PatchHydra._config_group_warning_sent:
+ LoggerRoot.get_base_logger().warning(
+ "Could not determine if Hydra is overriding a Config Group"
+ )
+ PatchHydra._config_group_warning_sent = True
+ return False
+
@staticmethod
def _patched_run_job(config, task_function, *args, **kwargs):
# noinspection PyBroadException
@@ -125,11 +149,12 @@ class PatchHydra(object):
failed_status = JobStatus.FAILED
except Exception:
- LoggerRoot.get_base_logger(PatchHydra).warning(
+ LoggerRoot.get_base_logger().warning(
"Could not import JobStatus from Hydra. Failed tasks will be marked as completed"
)
failed_status = None
+ hydra_context = kwargs.get("hydra_context", PatchHydra._default_hydra_context)
# store the config
# noinspection PyBroadException
try:
@@ -139,7 +164,8 @@ class PatchHydra(object):
overrides = config.hydra.overrides.task
stored_config = {}
for arg in overrides:
- arg = arg.lstrip("+")
+ if not PatchHydra._is_group(hydra_context, arg):
+ arg = arg.lstrip("+")
if "=" in arg:
k, v = arg.split("=", 1)
stored_config[k] = v
diff --git a/clearml/datasets/dataset.py b/clearml/datasets/dataset.py
index a56cdaa0..d57b158e 100644
--- a/clearml/datasets/dataset.py
+++ b/clearml/datasets/dataset.py
@@ -1241,7 +1241,7 @@ class Dataset(object):
:return: Newly created Dataset object
"""
- if not Dataset.is_offline() and not Session.check_min_api_server_version("2.13"):
+ if not Dataset.is_offline() and not Session.check_min_api_server_version("2.13", raise_error=True):
raise NotImplementedError("Datasets are not supported with your current ClearML server version. Please update your server.")
parent_datasets = [cls.get(dataset_id=p) if not isinstance(p, Dataset) else p for p in (parent_datasets or [])]
@@ -1531,7 +1531,7 @@ class Dataset(object):
"""
if Dataset.is_offline():
raise ValueError("Cannot rename dataset in offline mode")
- if not bool(Session.check_min_api_server_version(cls.__min_api_version)):
+ if not bool(Session.check_min_api_server_version(cls.__min_api_version, raise_error=True)):
LoggerRoot.get_base_logger().warning(
"Could not rename dataset because API version < {}".format(cls.__min_api_version)
)
@@ -1578,7 +1578,7 @@ class Dataset(object):
"""
if cls.is_offline():
raise ValueError("Cannot move dataset project in offlime mode")
- if not bool(Session.check_min_api_server_version(cls.__min_api_version)):
+ if not bool(Session.check_min_api_server_version(cls.__min_api_version, raise_error=True)):
LoggerRoot.get_base_logger().warning(
"Could not move dataset to another project because API version < {}".format(cls.__min_api_version)
)
@@ -3221,14 +3221,14 @@ class Dataset(object):
# type: (Path, Union[FileEntry, LinkEntry], Optional[int], Optional[dict]) -> bool
# check if we need the file for the requested dataset part
- if ds_part is not None:
- f_parts = ds_chunk_selection.get(file_entry.parent_dataset_id, [])
+ if part is not None:
+ f_parts = chunk_selection.get(file_entry.parent_dataset_id, [])
# file is not in requested dataset part, no need to check it.
if self._get_chunk_idx_from_artifact_name(file_entry.artifact_name) not in f_parts:
return True
# check if the local size and the stored size match (faster than comparing hash)
- if (base_folder / file_entry.relative_path).stat().st_size != file_entry.size:
+ if (target_base_folder / file_entry.relative_path).stat().st_size != file_entry.size:
return False
return True
@@ -3240,23 +3240,19 @@ class Dataset(object):
tp = None
try:
futures_ = []
- tp = ThreadPoolExecutor(max_workers=max_workers)
- for f in self._dataset_file_entries.values():
- future = tp.submit(__verify_file_or_link, target_base_folder, f, part, chunk_selection)
- futures_.append(future)
+ with ThreadPoolExecutor(max_workers=max_workers) as tp:
+ for f in self._dataset_file_entries.values():
+ future = tp.submit(__verify_file_or_link, target_base_folder, f, part, chunk_selection)
+ futures_.append(future)
- for f in self._dataset_link_entries.values():
- # don't check whether link is in dataset part, hence None for part and chunk_selection
- future = tp.submit(__verify_file_or_link, target_base_folder, f, None, None)
- futures_.append(future)
+ for f in self._dataset_link_entries.values():
+ # don't check whether link is in dataset part, hence None for part and chunk_selection
+ future = tp.submit(__verify_file_or_link, target_base_folder, f, None, None)
+ futures_.append(future)
- verified = all(f.result() for f in futures_)
+ verified = all(f.result() for f in futures_)
except Exception:
verified = False
- finally:
- if tp is not None:
- # we already have our result, close all pending checks (improves performance when verified==False)
- tp.shutdown(cancel_futures=True)
return verified
diff --git a/clearml/model.py b/clearml/model.py
index 7f529bda..146f1404 100644
--- a/clearml/model.py
+++ b/clearml/model.py
@@ -1,9 +1,8 @@
import abc
import os
-import tarfile
import zipfile
import shutil
-from tempfile import mkdtemp, mkstemp
+from tempfile import mkstemp
import six
import math
@@ -1427,12 +1426,31 @@ class Model(BaseModel):
:param project_name: Optional, filter based project name string, if not given query models from all projects
:param model_name: Optional Model name as shown in the model artifactory
- :param tags: Filter based on the requested list of tags (strings)
- To exclude a tag add "-" prefix to the tag. Example: ['production', 'verified', '-qa']
- To include All tags (instead of the default Any behaviour) use "__$all" as the first string, example:
- ["__$all", "best", "model", "ever"]
- To combine All tags and exclude a list of tags use "__$not" before the excluded tags, example:
- ["__$all", "best", "model", "ever", "__$not", "internal", "__$not", "test"]
+ :param tags: Filter based on the requested list of tags (strings).
+ To exclude a tag add "-" prefix to the tag. Example: ``["production", "verified", "-qa"]``.
+ The default behaviour is to join all tags with a logical "OR" operator.
+ To join all tags with a logical "AND" operator instead, use "__$all" as the first string, for example:
+
+ .. code-block:: py
+
+ ["__$all", "best", "model", "ever"]
+
+ To join all tags with AND, but exclude a tag use "__$not" before the excluded tag, for example:
+
+ .. code-block:: py
+
+ ["__$all", "best", "model", "ever", "__$not", "internal", "__$not", "test"]
+
+ The "OR" and "AND" operators apply to all tags that follow them until another operator is specified.
+ The NOT operator applies only to the immediately following tag.
+ For example:
+
+ .. code-block:: py
+
+ ["__$all", "a", "b", "c", "__$or", "d", "__$not", "e", "__$and", "__$or" "f", "g"]
+
+ This example means ("a" AND "b" AND "c" AND ("d" OR NOT "e") AND ("f" OR "g")).
+ See https://clear.ml/docs/latest/docs/clearml_sdk/model_sdk#tag-filters for details.
:param only_published: If True, only return published models.
:param include_archived: If True, return archived models.
:param max_results: Optional return the last X models,
@@ -1596,6 +1614,7 @@ class InputModel(Model):
# noinspection PyProtectedMember
_EMPTY_MODEL_ID = _Model._EMPTY_MODEL_ID
+ _WARNING_CONNECTED_NAMES = {}
@classmethod
def import_model(
@@ -1933,9 +1952,11 @@ class InputModel(Model):
:param object task: A Task object.
:param str name: The model name to be stored on the Task
- (default the filename, of the model weights, without the file extension)
+ (default to filename of the model weights, without the file extension, or to `Input Model` if that is not found)
"""
self._set_task(task)
+ name = name or InputModel._get_connect_name(self)
+ InputModel._warn_on_same_name_connect(name)
model_id = None
# noinspection PyProtectedMember
@@ -1967,6 +1988,28 @@ class InputModel(Model):
if not self._task.get_labels_enumeration() and model.data.labels:
task.set_model_label_enumeration(model.data.labels)
+ @classmethod
+ def _warn_on_same_name_connect(cls, name):
+ if name not in cls._WARNING_CONNECTED_NAMES:
+ cls._WARNING_CONNECTED_NAMES[name] = False
+ return
+ if cls._WARNING_CONNECTED_NAMES[name]:
+ return
+ get_logger().warning("Connecting multiple input models with the same name: `{}`. This might result in the wrong model being used when executing remotely".format(name))
+ cls._WARNING_CONNECTED_NAMES[name] = True
+
+ @staticmethod
+ def _get_connect_name(model):
+ default_name = "Input Model"
+ if model is None:
+ return default_name
+ # noinspection PyBroadException
+ try:
+ model_uri = getattr(model, "url", getattr(model, "uri", None))
+ return Path(model_uri).stem
+ except Exception:
+ return default_name
+
class OutputModel(BaseModel):
"""
diff --git a/clearml/storage/helper.py b/clearml/storage/helper.py
index 1d62cc68..8fca9c85 100644
--- a/clearml/storage/helper.py
+++ b/clearml/storage/helper.py
@@ -791,10 +791,21 @@ class _GoogleCloudStorageDriver(_Driver):
self.name = name[len(_GoogleCloudStorageDriver.scheme_prefix):]
if cfg.credentials_json:
+ # noinspection PyBroadException
try:
credentials = service_account.Credentials.from_service_account_file(cfg.credentials_json)
- except ValueError:
+ except Exception:
credentials = None
+
+ if not credentials:
+ # noinspection PyBroadException
+ try:
+ # Try parsing this as json to support actual json content and not a file path
+ credentials = service_account.Credentials.from_service_account_info(
+ json.loads(cfg.credentials_json)
+ )
+ except Exception:
+ pass
else:
credentials = None
diff --git a/clearml/task.py b/clearml/task.py
index ac896798..aa7b2853 100644
--- a/clearml/task.py
+++ b/clearml/task.py
@@ -331,7 +331,9 @@ class Task(_Task):
:param str output_uri: The default location for output models and other artifacts. If True, the default
files_server will be used for model storage. In the default location, ClearML creates a subfolder for the
- output. The subfolder structure is the following: `