This commit is contained in:
revital 2023-11-23 15:28:57 +02:00
commit 36f66d466a
22 changed files with 275 additions and 94 deletions

View File

@ -4,8 +4,8 @@
<img align="center" src="docs/clearml-logo.svg#gh-light-mode-only" alt="Clear|ML"><img align="center" src="docs/clearml-logo-dark.svg#gh-dark-mode-only" alt="Clear|ML">
</p>
**[ClearML](https://clear.ml) - Auto-Magical Suite of tools to streamline your ML workflow
</br>Experiment Manager, MLOps and Data-Management**
**[ClearML](https://clear.ml) - Auto-Magical Suite of tools to streamline your AI workflow
</br>Experiment Manager, MLOps/LLMOps and Data-Management**
[![GitHub license](https://img.shields.io/github/license/allegroai/clearml.svg)](https://img.shields.io/github/license/allegroai/clearml.svg) [![PyPI pyversions](https://img.shields.io/pypi/pyversions/clearml.svg)](https://img.shields.io/pypi/pyversions/clearml.svg) [![PyPI version shields.io](https://img.shields.io/pypi/v/clearml.svg)](https://pypi.org/project/clearml/) [![Conda version shields.io](https://img.shields.io/conda/v/clearml/clearml)](https://anaconda.org/clearml/clearml) [![Optuna](https://img.shields.io/badge/Optuna-integrated-blue)](https://optuna.org)<br>
[![PyPI Downloads](https://static.pepy.tech/badge/clearml/month)](https://pypi.org/project/clearml/) [![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/allegroai)](https://artifacthub.io/packages/search?repo=allegroai) [![Youtube](https://img.shields.io/badge/ClearML-DD0000?logo=youtube&logoColor=white)](https://www.youtube.com/c/clearml) [![Slack Channel](https://img.shields.io/badge/slack-%23clearml--community-blueviolet?logo=slack)](https://joinslack.clear.ml) [![Signup](https://img.shields.io/badge/Clear%7CML-Signup-brightgreen)](https://app.clear.ml)
@ -19,14 +19,15 @@
ClearML is a ML/DL development and production suite. It contains FIVE main modules:
- [Experiment Manager](#clearml-experiment-manager) - Automagical experiment tracking, environments and results
- [MLOps](https://github.com/allegroai/clearml-agent) - Orchestration, Automation & Pipelines solution for ML/DL jobs (K8s / Cloud / bare-metal)
- [MLOps / LLMOps](https://github.com/allegroai/clearml-agent) - Orchestration, Automation & Pipelines solution for ML/DL/GenAI jobs (Kubernetes / Cloud / bare-metal)
- [Data-Management](https://github.com/allegroai/clearml/blob/master/docs/datasets.md) - Fully differentiable data management & version control solution on top of object-storage
(S3 / GS / Azure / NAS)
- [Model-Serving](https://github.com/allegroai/clearml-serving) - *cloud-ready* Scalable model serving solution!
- **Deploy new model endpoints in under 5 minutes**
- Includes optimized GPU serving support backed by Nvidia-Triton
- **with out-of-the-box Model Monitoring**
- **NEW** :fire: [Reports](https://clear.ml/docs/latest/docs/webapp/webapp_reports) - Create and share rich MarkDown documents supporting embeddable online content
- [Reports](https://clear.ml/docs/latest/docs/webapp/webapp_reports) - Create and share rich MarkDown documents supporting embeddable online content
- **NEW** :fire: [Orchestration Dashboard](https://clear.ml/docs/latest/docs/webapp/webapp_orchestration_dash/) - Live rich dashboard for your entire compute cluster (Cloud / Kubernetes / On-Prem)
Instrumenting these components is the **ClearML-server**, see [Self-Hosting](https://clear.ml/docs/latest/docs/deploying_clearml/clearml_server) & [Free tier Hosting](https://app.clear.ml)
@ -141,7 +142,7 @@ The ClearML run-time components:
- [clearml-session](https://github.com/allegroai/clearml-session) - **Launch remote JupyterLab / VSCode-server inside any docker, on Cloud/On-Prem machines**
- [clearml-task](https://github.com/allegroai/clearml/blob/master/docs/clearml-task.md) - Run any codebase on remote machines with full remote logging of Tensorboard, Matplotlib & Console outputs
- [clearml-data](https://github.com/allegroai/clearml/blob/master/docs/datasets.md) - **CLI for managing and versioning your datasets, including creating / uploading / downloading of data from S3/GS/Azure/NAS**
- [AWS Auto-Scaler](https://clear.ml/docs/latest/docs/guides/services/aws_autoscaler) - Automatically spin EC2 instances based on your workloads with preconfigured budget! No need for K8s!
- [AWS Auto-Scaler](https://clear.ml/docs/latest/docs/guides/services/aws_autoscaler) - Automatically spin EC2 instances based on your workloads with preconfigured budget! No need for AKE!
- [Hyper-Parameter Optimization](https://clear.ml/docs/latest/docs/guides/optimization/hyper-parameter-optimization/examples_hyperparam_opt) - Optimize any code with black-box approach and state-of-the-art Bayesian optimization algorithms
- [Automation Pipeline](https://clear.ml/docs/latest/docs/guides/pipeline/pipeline_controller) - Build pipelines based on existing experiments / jobs, supports building pipelines of pipelines!
- [Slack Integration](https://clear.ml/docs/latest/docs/guides/services/slack_alerts) - Report experiments progress / failure directly to Slack (fully customizable!)

5
SECURITY.md Normal file
View File

@ -0,0 +1,5 @@
## Reporting Security Issues
Thanks for taking the time to make ClearML more secure!
To carry on the discussion more securely - Please send your report to [security@clear.ml](mailto:security@clear.ml).

View File

@ -142,7 +142,7 @@ class PipelineController(object):
try:
self.job.task.reload()
self.job_ended = self.job_started + self.job.task.data.active_duration
except Exception as e:
except Exception:
pass
def set_job_started(self):
@ -154,7 +154,6 @@ class PipelineController(object):
except Exception:
pass
def __init__(
self,
name, # type: str
@ -2733,7 +2732,7 @@ class PipelineController(object):
if node_failed and self._abort_running_steps_on_failure and not node.continue_on_fail:
nodes_failed_stop_pipeline.append(node.name)
elif node.timeout:
node.set_job_started()
started = node.job.task.data.started
if (datetime.now().astimezone(started.tzinfo) - started).total_seconds() > node.timeout:
node.job.abort()
completed_jobs.append(j)

View File

@ -32,7 +32,10 @@ class ApiServiceProxy(object):
)
# get the most advanced service version that supports our api
version = [str(v) for v in ApiServiceProxy._available_versions if Session.check_min_api_version(v)][-1]
version = [
str(v) for v in ApiServiceProxy._available_versions
if Session.check_min_api_version(v)
][-1]
Session.api_version = version
self.__dict__["__wrapped_version__"] = Session.api_version
name = ".v{}.{}".format(

View File

@ -490,7 +490,7 @@ class Queue(NonStrictDataModel):
self._property_metadata = None
return
self.assert_isinstance(value, "metadata", (dict,), is_array=True)
self.assert_isinstance(value, "metadata", (dict,))
self._property_metadata = value

View File

@ -40,7 +40,6 @@ for a very long time for a non-responding or mis-configured server
"""
ENV_API_EXTRA_RETRY_CODES = EnvEntry("CLEARML_API_EXTRA_RETRY_CODES")
ENV_FORCE_MAX_API_VERSION = EnvEntry("CLEARML_FORCE_MAX_API_VERSION", type=str)

View File

@ -1,7 +1,14 @@
import abc
import jsonschema
import six
from jsonschema.exceptions import FormatError, SchemaError, ValidationError
try:
# Since `referencing`` only supports Python >= 3.8, this try-except blocks maintain support
# for earlier python versions.
from referencing.exceptions import Unresolvable
except ImportError:
from jsonschema.exceptions import RefResolutionError as Unresolvable
from .apimodel import ApiModel
from .datamodel import DataModel
@ -39,8 +46,7 @@ class BatchRequest(Request):
_batched_request_cls = abc.abstractproperty()
_schema_errors = (jsonschema.SchemaError, jsonschema.ValidationError, jsonschema.FormatError,
jsonschema.RefResolutionError)
_schema_errors = (SchemaError, ValidationError, FormatError, Unresolvable)
def __init__(self, requests, validate_requests=False, allow_raw_requests=True, **kwargs):
super(BatchRequest, self).__init__(**kwargs)
@ -70,8 +76,7 @@ class BatchRequest(Request):
for i, req in enumerate(self.requests):
try:
req.validate()
except (jsonschema.SchemaError, jsonschema.ValidationError,
jsonschema.FormatError, jsonschema.RefResolutionError) as e:
except (SchemaError, ValidationError, FormatError, Unresolvable) as e:
raise Exception('Validation error in batch item #%d: %s' % (i, str(e)))
def get_json(self):

View File

@ -12,6 +12,7 @@ import jwt
import requests
import six
from requests.auth import HTTPBasicAuth
from requests.exceptions import ChunkedEncodingError, ContentDecodingError, StreamConsumedError
from six.moves.urllib.parse import urlparse, urlunparse
from typing import List, Optional
@ -31,6 +32,7 @@ from .defs import (
ENV_API_EXTRA_RETRY_CODES,
ENV_API_DEFAULT_REQ_METHOD,
ENV_FORCE_MAX_API_VERSION,
ENV_IGNORE_MISSING_CONFIG,
MissingConfigError
)
from .request import Request, BatchRequest # noqa: F401
@ -417,6 +419,13 @@ class Session(TokenManager):
(self._logger or get_logger()).warning("SSLError Retrying {}".format(ex))
sleep(0.1)
continue
except (ChunkedEncodingError, ContentDecodingError, StreamConsumedError) as ex:
retry_counter += 1
# we should retry
if retry_counter >= self._ssl_error_count_verbosity:
(self._logger or get_logger()).warning("Network decoding error Retrying {}".format(ex))
sleep(0.1)
continue
if (
refresh_token_if_unauthorized
@ -736,7 +745,7 @@ class Session(TokenManager):
return urlunparse(parsed)
@classmethod
def check_min_api_version(cls, min_api_version):
def check_min_api_version(cls, min_api_version, raise_error=False):
"""
Return True if Session.api_version is greater or equal >= to min_api_version
"""
@ -764,18 +773,24 @@ class Session(TokenManager):
# noinspection PyBroadException
try:
cls()
except MissingConfigError:
if raise_error and not ENV_IGNORE_MISSING_CONFIG.get():
raise
except LoginError:
if raise_error:
raise
except Exception:
pass
return cls._version_tuple(cls.api_version) >= cls._version_tuple(str(min_api_version))
@classmethod
def check_min_api_server_version(cls, min_api_version):
def check_min_api_server_version(cls, min_api_version, raise_error=False):
"""
Return True if Session.max_api_version is greater or equal >= to min_api_version
Notice this is the api version server reported, not the current SDK max supported api version
"""
if cls.check_min_api_version(min_api_version):
if cls.check_min_api_version(min_api_version, raise_error=raise_error):
return True
return cls._version_tuple(cls.max_api_version) >= cls._version_tuple(str(min_api_version))

View File

@ -178,8 +178,9 @@ class IdObjectBase(InterfaceBase):
# noinspection PyBroadException
try:
self._data = self._reload()
except Exception:
self.log.error("Failed reloading task {}".format(self.id))
except Exception as ex:
self.log.error("Failed reloading {} {}".format(type(self).__name__.lower(), self.id))
self.log.debug("Failed reloading {} {}: {}".format(type(self).__name__.lower(), self.id, ex))
@classmethod
def normalize_id(cls, id):

View File

@ -79,7 +79,7 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin):
self.reload()
def archive(self):
if Session.check_min_api_server_version("2.13"):
if Session.check_min_api_server_version("2.13", raise_error=True):
self.send(models.ArchiveManyRequest(ids=[self.id]))
self.reload()
else:
@ -90,7 +90,7 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin):
)
def unarchive(self):
if Session.check_min_api_server_version("2.13"):
if Session.check_min_api_server_version("2.13", raise_error=True):
self.send(models.UnarchiveManyRequest(ids=[self.id]))
self.reload()
else:

View File

@ -263,8 +263,8 @@ class TaskHandler(BufferingHandler):
# flush pending logs
if not self._task_id:
return
# avoid deadlocks just skip the lock, we are shutting down anyway
self.lock = None
# Never null the lock, it might be used by internal Python at some point
# self.lock = None
self._task_id = None
# shut down the TaskHandler, from this point onwards. No events will be logged

View File

@ -1088,12 +1088,13 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
if not model.ready:
# raise ValueError('Model %s is not published (not ready)' % model_id)
self.log.debug('Model %s [%s] is not published yet (not ready)' % (model_id, model.uri))
name = name or Path(model.uri).stem
else:
# clear the input model
model = None
model_id = ''
name = name or 'Input Model'
from ...model import InputModel
# noinspection PyProtectedMember
name = name or InputModel._get_connect_name(model)
with self._edit_lock:
self.reload()
@ -1345,7 +1346,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
return params.get(name, default)
def delete_parameter(self, name, force=False):
# type: (str) -> bool
# type: (str, bool) -> bool
"""
Delete a parameter by its full name Section/name.

View File

@ -1,7 +1,6 @@
import io
import sys
from functools import partial
import yaml
from ..config import running_remotely, get_remote_task_id, DEV_TASK_NO_REUSE
from ..debugging.log import LoggerRoot
@ -17,6 +16,9 @@ class PatchHydra(object):
_parameter_allow_full_edit = '_allow_omegaconf_edit_'
_should_delete_overrides = False
_overrides_section = "Args/overrides"
_default_hydra_context = None
_overrides_parser = None
_config_group_warning_sent = False
@classmethod
def patch_hydra(cls):
@ -83,6 +85,7 @@ class PatchHydra(object):
@staticmethod
def _patched_hydra_run(self, config_name, task_function, overrides, *args, **kwargs):
PatchHydra._default_hydra_context = self
PatchHydra._allow_omegaconf_edit = False
if not running_remotely():
return PatchHydra._original_hydra_run(self, config_name, task_function, overrides, *args, **kwargs)
@ -104,12 +107,11 @@ class PatchHydra(object):
if k.startswith(PatchHydra._parameter_section+'/')}
stored_config.pop(PatchHydra._parameter_allow_full_edit, None)
for override_k, override_v in stored_config.items():
if override_k.startswith("~"):
new_override = override_k
else:
new_override = "++" + override_k.lstrip("+")
if override_v is not None and override_v != "":
new_override += "=" + override_v
if not new_override.startswith("~") and not PatchHydra._is_group(self, new_override):
new_override = "++" + new_override.lstrip("+")
overrides.append(new_override)
PatchHydra._should_delete_overrides = True
except Exception:
@ -117,6 +119,28 @@ class PatchHydra(object):
return PatchHydra._original_hydra_run(self, config_name, task_function, overrides, *args, **kwargs)
@staticmethod
def _parse_override(override):
if PatchHydra._overrides_parser is None:
from hydra.core.override_parser.overrides_parser import OverridesParser
PatchHydra._overrides_parser = OverridesParser.create()
return PatchHydra._overrides_parser.parse_overrides(overrides=[override])[0]
@staticmethod
def _is_group(hydra_context, override):
# noinspection PyBroadException
try:
override = PatchHydra._parse_override(override)
group_exists = hydra_context.config_loader.repository.group_exists(override.key_or_group)
return group_exists
except Exception:
if not PatchHydra._config_group_warning_sent:
LoggerRoot.get_base_logger().warning(
"Could not determine if Hydra is overriding a Config Group"
)
PatchHydra._config_group_warning_sent = True
return False
@staticmethod
def _patched_run_job(config, task_function, *args, **kwargs):
# noinspection PyBroadException
@ -125,11 +149,12 @@ class PatchHydra(object):
failed_status = JobStatus.FAILED
except Exception:
LoggerRoot.get_base_logger(PatchHydra).warning(
LoggerRoot.get_base_logger().warning(
"Could not import JobStatus from Hydra. Failed tasks will be marked as completed"
)
failed_status = None
hydra_context = kwargs.get("hydra_context", PatchHydra._default_hydra_context)
# store the config
# noinspection PyBroadException
try:
@ -139,6 +164,7 @@ class PatchHydra(object):
overrides = config.hydra.overrides.task
stored_config = {}
for arg in overrides:
if not PatchHydra._is_group(hydra_context, arg):
arg = arg.lstrip("+")
if "=" in arg:
k, v = arg.split("=", 1)

View File

@ -1241,7 +1241,7 @@ class Dataset(object):
:return: Newly created Dataset object
"""
if not Dataset.is_offline() and not Session.check_min_api_server_version("2.13"):
if not Dataset.is_offline() and not Session.check_min_api_server_version("2.13", raise_error=True):
raise NotImplementedError("Datasets are not supported with your current ClearML server version. Please update your server.")
parent_datasets = [cls.get(dataset_id=p) if not isinstance(p, Dataset) else p for p in (parent_datasets or [])]
@ -1531,7 +1531,7 @@ class Dataset(object):
"""
if Dataset.is_offline():
raise ValueError("Cannot rename dataset in offline mode")
if not bool(Session.check_min_api_server_version(cls.__min_api_version)):
if not bool(Session.check_min_api_server_version(cls.__min_api_version, raise_error=True)):
LoggerRoot.get_base_logger().warning(
"Could not rename dataset because API version < {}".format(cls.__min_api_version)
)
@ -1578,7 +1578,7 @@ class Dataset(object):
"""
if cls.is_offline():
raise ValueError("Cannot move dataset project in offlime mode")
if not bool(Session.check_min_api_server_version(cls.__min_api_version)):
if not bool(Session.check_min_api_server_version(cls.__min_api_version, raise_error=True)):
LoggerRoot.get_base_logger().warning(
"Could not move dataset to another project because API version < {}".format(cls.__min_api_version)
)
@ -3221,14 +3221,14 @@ class Dataset(object):
# type: (Path, Union[FileEntry, LinkEntry], Optional[int], Optional[dict]) -> bool
# check if we need the file for the requested dataset part
if ds_part is not None:
f_parts = ds_chunk_selection.get(file_entry.parent_dataset_id, [])
if part is not None:
f_parts = chunk_selection.get(file_entry.parent_dataset_id, [])
# file is not in requested dataset part, no need to check it.
if self._get_chunk_idx_from_artifact_name(file_entry.artifact_name) not in f_parts:
return True
# check if the local size and the stored size match (faster than comparing hash)
if (base_folder / file_entry.relative_path).stat().st_size != file_entry.size:
if (target_base_folder / file_entry.relative_path).stat().st_size != file_entry.size:
return False
return True
@ -3240,7 +3240,7 @@ class Dataset(object):
tp = None
try:
futures_ = []
tp = ThreadPoolExecutor(max_workers=max_workers)
with ThreadPoolExecutor(max_workers=max_workers) as tp:
for f in self._dataset_file_entries.values():
future = tp.submit(__verify_file_or_link, target_base_folder, f, part, chunk_selection)
futures_.append(future)
@ -3253,10 +3253,6 @@ class Dataset(object):
verified = all(f.result() for f in futures_)
except Exception:
verified = False
finally:
if tp is not None:
# we already have our result, close all pending checks (improves performance when verified==False)
tp.shutdown(cancel_futures=True)
return verified

View File

@ -1,9 +1,8 @@
import abc
import os
import tarfile
import zipfile
import shutil
from tempfile import mkdtemp, mkstemp
from tempfile import mkstemp
import six
import math
@ -1427,12 +1426,31 @@ class Model(BaseModel):
:param project_name: Optional, filter based project name string, if not given query models from all projects
:param model_name: Optional Model name as shown in the model artifactory
:param tags: Filter based on the requested list of tags (strings)
To exclude a tag add "-" prefix to the tag. Example: ['production', 'verified', '-qa']
To include All tags (instead of the default Any behaviour) use "__$all" as the first string, example:
:param tags: Filter based on the requested list of tags (strings).
To exclude a tag add "-" prefix to the tag. Example: ``["production", "verified", "-qa"]``.
The default behaviour is to join all tags with a logical "OR" operator.
To join all tags with a logical "AND" operator instead, use "__$all" as the first string, for example:
.. code-block:: py
["__$all", "best", "model", "ever"]
To combine All tags and exclude a list of tags use "__$not" before the excluded tags, example:
To join all tags with AND, but exclude a tag use "__$not" before the excluded tag, for example:
.. code-block:: py
["__$all", "best", "model", "ever", "__$not", "internal", "__$not", "test"]
The "OR" and "AND" operators apply to all tags that follow them until another operator is specified.
The NOT operator applies only to the immediately following tag.
For example:
.. code-block:: py
["__$all", "a", "b", "c", "__$or", "d", "__$not", "e", "__$and", "__$or" "f", "g"]
This example means ("a" AND "b" AND "c" AND ("d" OR NOT "e") AND ("f" OR "g")).
See https://clear.ml/docs/latest/docs/clearml_sdk/model_sdk#tag-filters for details.
:param only_published: If True, only return published models.
:param include_archived: If True, return archived models.
:param max_results: Optional return the last X models,
@ -1596,6 +1614,7 @@ class InputModel(Model):
# noinspection PyProtectedMember
_EMPTY_MODEL_ID = _Model._EMPTY_MODEL_ID
_WARNING_CONNECTED_NAMES = {}
@classmethod
def import_model(
@ -1933,9 +1952,11 @@ class InputModel(Model):
:param object task: A Task object.
:param str name: The model name to be stored on the Task
(default the filename, of the model weights, without the file extension)
(default to filename of the model weights, without the file extension, or to `Input Model` if that is not found)
"""
self._set_task(task)
name = name or InputModel._get_connect_name(self)
InputModel._warn_on_same_name_connect(name)
model_id = None
# noinspection PyProtectedMember
@ -1967,6 +1988,28 @@ class InputModel(Model):
if not self._task.get_labels_enumeration() and model.data.labels:
task.set_model_label_enumeration(model.data.labels)
@classmethod
def _warn_on_same_name_connect(cls, name):
if name not in cls._WARNING_CONNECTED_NAMES:
cls._WARNING_CONNECTED_NAMES[name] = False
return
if cls._WARNING_CONNECTED_NAMES[name]:
return
get_logger().warning("Connecting multiple input models with the same name: `{}`. This might result in the wrong model being used when executing remotely".format(name))
cls._WARNING_CONNECTED_NAMES[name] = True
@staticmethod
def _get_connect_name(model):
default_name = "Input Model"
if model is None:
return default_name
# noinspection PyBroadException
try:
model_uri = getattr(model, "url", getattr(model, "uri", None))
return Path(model_uri).stem
except Exception:
return default_name
class OutputModel(BaseModel):
"""

View File

@ -791,10 +791,21 @@ class _GoogleCloudStorageDriver(_Driver):
self.name = name[len(_GoogleCloudStorageDriver.scheme_prefix):]
if cfg.credentials_json:
# noinspection PyBroadException
try:
credentials = service_account.Credentials.from_service_account_file(cfg.credentials_json)
except ValueError:
except Exception:
credentials = None
if not credentials:
# noinspection PyBroadException
try:
# Try parsing this as json to support actual json content and not a file path
credentials = service_account.Credentials.from_service_account_info(
json.loads(cfg.credentials_json)
)
except Exception:
pass
else:
credentials = None

View File

@ -331,7 +331,9 @@ class Task(_Task):
:param str output_uri: The default location for output models and other artifacts. If True, the default
files_server will be used for model storage. In the default location, ClearML creates a subfolder for the
output. The subfolder structure is the following: `<output destination name> / <project name> / <task name>.<Task ID>`.
output. If set to False, local runs will not upload output models and artifacts,
and remote runs will not use any default values provided using ``default_output_uri``.
The subfolder structure is the following: `<output destination name> / <project name> / <task name>.<Task ID>`.
Note that for cloud storage, you must install the **ClearML** package for your cloud storage type,
and then configure your storage credentials. For detailed information, see "Storage" in the ClearML
Documentation.
@ -606,6 +608,10 @@ class Task(_Task):
task_id=get_remote_task_id(),
log_to_backend=False,
)
if output_uri is False and not task.output_uri:
# Setting output_uri=False argument will disable using any default when running remotely
pass
else:
if task.get_project_object().default_output_destination and not task.output_uri:
task.output_uri = task.get_project_object().default_output_destination
if cls.__default_output_uri and not task.output_uri:
@ -931,8 +937,31 @@ class Task(_Task):
If specified, ``project_name`` and ``task_name`` are ignored.
:param str project_name: The project name of the Task to get.
:param str task_name: The name of the Task within ``project_name`` to get.
:param list tags: Filter based on the requested list of tags (strings) (Task must have at least one of the
listed tags). To exclude a tag add "-" prefix to the tag. Example: ["best", "-debug"]
:param list tags: Filter based on the requested list of tags (strings). To exclude a tag add "-" prefix to the
tag. Example: ``["best", "-debug"]``.
The default behaviour is to join all tags with a logical "OR" operator.
To join all tags with a logical "AND" operator instead, use "__$all" as the first string, for example:
.. code-block:: py
["__$all", "best", "experiment", "ever"]
To join all tags with AND, but exclude a tag use "__$not" before the excluded tag, for example:
.. code-block:: py
["__$all", "best", "experiment", "ever", "__$not", "internal", "__$not", "test"]
The "OR" and "AND" operators apply to all tags that follow them until another operator is specified.
The NOT operator applies only to the immediately following tag.
For example:
.. code-block:: py
["__$all", "a", "b", "c", "__$or", "d", "__$not", "e", "__$and", "__$or" "f", "g"]
This example means ("a" AND "b" AND "c" AND ("d" OR NOT "e") AND ("f" OR "g")).
See https://clear.ml/docs/latest/docs/clearml_sdk/task_sdk/#tag-filters for more information.
:param bool allow_archived: Only applicable if *not* using specific ``task_id``,
If True (default), allow to return archived Tasks, if False filter out archived Tasks
:param bool task_filter: Only applicable if *not* using specific ``task_id``,
@ -980,8 +1009,31 @@ class Task(_Task):
avoid any regex behaviour, use re.escape()). (Optional)
To match an exact task name (i.e. not partial matching),
add ^/$ at the beginning/end of the string, for example: "^exact_task_name_here$"
:param list tags: Filter based on the requested list of tags (strings) (Task must have all the listed tags)
To exclude a tag add "-" prefix to the tag. Example: ["best", "-debug"]
:param list tags: Filter based on the requested list of tags (strings). To exclude a tag add "-" prefix to the
tag. Example: ``["best", "-debug"]``.
The default behaviour is to join all tags with a logical "OR" operator.
To join all tags with a logical "AND" operator instead, use "__$all" as the first string, for example:
.. code-block:: py
["__$all", "best", "experiment", "ever"]
To join all tags with AND, but exclude a tag use "__$not" before the excluded tag, for example:
.. code-block:: py
["__$all", "best", "experiment", "ever", "__$not", "internal", "__$not", "test"]
The "OR" and "AND" operators apply to all tags that follow them until another operator is specified.
The NOT operator applies only to the immediately following tag.
For example:
.. code-block:: py
["__$all", "a", "b", "c", "__$or", "d", "__$not", "e", "__$and", "__$or" "f", "g"]
This example means ("a" AND "b" AND "c" AND ("d" OR NOT "e") AND ("f" OR "g")).
See https://clear.ml/docs/latest/docs/clearml_sdk/task_sdk/#tag-filters for more information.
:param bool allow_archived: If True (default), allow to return archived Tasks, if False filter out archived Tasks
:param dict task_filter: filter and order Tasks.
See :class:`.backend_api.service.v?.tasks.GetAllRequest` for details; the ? needs to be replaced by the appropriate version.
@ -1032,17 +1084,30 @@ class Task(_Task):
:param str task_name: task name (str) within the selected project
Return any partial match of task_name, regular expressions matching is also supported.
If None is passed, returns all tasks within the project
:param list tags: Filter based on the requested list of tags (strings)
To exclude a tag add "-" prefix to the tag. Example: ["best", "-debug"]
:param list tags: Filter based on the requested list of tags (strings).
To exclude a tag add "-" prefix to the tag. Example: ``["best", "-debug"]``.
The default behaviour is to join all tags with a logical "OR" operator.
To join all tags with a logical "AND" operator instead, use "__$all" as the first string, for example:
.. code-block:: py
["__$all", "best", "experiment", "ever"]
To join all tags with AND, but exclude a tag use "__$not" before the excluded tag, for example:
.. code-block:: py
["__$all", "best", "experiment", "ever", "__$not", "internal", "__$not", "test"]
The "OR" and "AND" operators apply to all tags that follow them until another operator is specified.
The NOT operator applies only to the immediately following tag.
For example, ["__$all", "a", "b", "c", "__$or", "d", "__$not", "e", "__$and", "__$or" "f", "g"]
means ("a" AND "b" AND "c" AND ("d" OR NOT "e") AND ("f" OR "g")).
For example:
.. code-block:: py
["__$all", "a", "b", "c", "__$or", "d", "__$not", "e", "__$and", "__$or" "f", "g"]
This example means ("a" AND "b" AND "c" AND ("d" OR NOT "e") AND ("f" OR "g")).
See https://clear.ml/docs/latest/docs/clearml_sdk/task_sdk/#tag-filters for more information.
:param list additional_return_fields: Optional, if not provided return a list of Task IDs.
If provided return dict per Task with the additional requested fields.
@ -1330,7 +1395,7 @@ class Task(_Task):
:return: The number of tasks enqueued in the given queue
"""
if not Session.check_min_api_server_version("2.20"):
if not Session.check_min_api_server_version("2.20", raise_error=True):
raise ValueError("You version of clearml-server does not support the 'queues.get_num_entries' endpoint")
mutually_exclusive(queue_name=queue_name, queue_id=queue_id)
session = cls._get_default_session()
@ -1524,6 +1589,7 @@ class Task(_Task):
specified, then a path to a local configuration file is returned. Configuration object.
"""
pathlib_Path = None # noqa
cast_Path = Path
if not isinstance(configuration, (dict, list, Path, six.string_types)):
try:
from pathlib import Path as pathlib_Path # noqa
@ -1532,6 +1598,8 @@ class Task(_Task):
if not pathlib_Path or not isinstance(configuration, pathlib_Path):
raise ValueError("connect_configuration supports `dict`, `str` and 'Path' types, "
"{} is not supported".format(type(configuration)))
if pathlib_Path and isinstance(configuration, pathlib_Path):
cast_Path = pathlib_Path
multi_config_support = Session.check_min_api_version('2.9')
if multi_config_support and not name:
@ -1599,7 +1667,7 @@ class Task(_Task):
# it is a path to a local file
if not running_remotely() or not (self.is_main_task() or self._is_remote_main_task()):
# check if not absolute path
configuration_path = Path(configuration)
configuration_path = cast_Path(configuration)
if not configuration_path.is_file():
ValueError("Configuration file does not exist")
try:
@ -1626,7 +1694,7 @@ class Task(_Task):
"Using default configuration: {}".format(name, str(configuration)))
# update back configuration section
if multi_config_support:
configuration_path = Path(configuration)
configuration_path = cast_Path(configuration)
if configuration_path.is_file():
with open(configuration_path.as_posix(), 'rt') as f:
configuration_text = f.read()
@ -1638,15 +1706,13 @@ class Task(_Task):
config_text=configuration_text)
return configuration
configuration_path = Path(configuration)
configuration_path = cast_Path(configuration)
fd, local_filename = mkstemp(prefix='clearml_task_config_',
suffix=configuration_path.suffixes[-1] if
configuration_path.suffixes else '.txt')
with open(fd, "w") as f:
f.write(configuration_text)
if pathlib_Path:
return pathlib_Path(local_filename)
return Path(local_filename) if isinstance(configuration, Path) else local_filename
return cast_Path(local_filename) if isinstance(configuration, cast_Path) else local_filename
def connect_label_enumeration(self, enumeration):
# type: (Dict[str, int]) -> Dict[str, int]

View File

@ -2,12 +2,19 @@ import attr
from .version import Version
try:
# noinspection PyUnresolvedReferences
import importlib.metadata
attr_version = importlib.metadata.version("attrs")
except ImportError:
attr_version = attr.__version__
class attrs(object):
def __init__(self, *args, **kwargs):
if any(x in kwargs for x in ("eq", "order")):
raise RuntimeError("Only `cmp` is supported for attr.attrs, not `eq` or `order`")
if Version(attr.__version__) >= Version("19.2"):
if Version(attr_version) >= Version("19.2"):
cmp = kwargs.pop("cmp", None)
if cmp is not None:
kwargs["eq"] = kwargs["order"] = cmp

View File

@ -1 +1 @@
__version__ = '1.13.1'
__version__ = '1.13.2'

View File

@ -2,9 +2,11 @@ clearml
jsonschema==3.2.0 ; python_version <= '3.5'
matplotlib
pytorch-ignite
tensorboard>=1.14.0
tensorboard<=2.11.2 ; python_version <= '3.7'
tensorboard>2.11.2 ; python_version > '3.7'
tensorboardX
torch>=1.1.0
torchvision>=0.3.0
tqdm
protobuf>=4.21.1
protobuf==3.20.* ; python_version <= '3.7'
protobuf>=4.21.1 ; python_version > '3.7'

View File

@ -21,14 +21,14 @@ task = Task.init(project_name='FirstTrial', task_name='config_files_example')
config_file = task.connect_configuration(Path("data_samples") / "sample.json", name='json config file')
with open(config_file, "rt") as f:
with open(config_file.as_posix(), "rt") as f:
config_json = json.load(f)
print(config_json)
config_file = task.connect_configuration(Path("data_samples") / "config_yaml.yaml", name='yaml config file')
with open(config_file, "rt") as f:
with open(config_file.as_posix(), "rt") as f:
config_yaml = yaml.load(f, Loader=yaml.SafeLoader)
print(config_yaml)

View File

@ -14,6 +14,7 @@ python-dateutil>=2.6.1
pyjwt>=2.4.0,<2.9.0 ; python_version > '3.5'
pyjwt>=1.6.4,<2.0.0 ; python_version <= '3.5'
PyYAML>=3.12
referencing<0.40 ; python_version >= '3.8'
requests>=2.20.0
six>=1.13.0
typing>=3.6.4 ; python_version < '3.5'