mirror of
https://github.com/clearml/clearml
synced 2025-06-26 18:16:07 +00:00
Merge branch 'master' of https://github.com/allegroai/clearml
This commit is contained in:
commit
36f66d466a
11
README.md
11
README.md
@ -4,8 +4,8 @@
|
||||
<img align="center" src="docs/clearml-logo.svg#gh-light-mode-only" alt="Clear|ML"><img align="center" src="docs/clearml-logo-dark.svg#gh-dark-mode-only" alt="Clear|ML">
|
||||
</p>
|
||||
|
||||
**[ClearML](https://clear.ml) - Auto-Magical Suite of tools to streamline your ML workflow
|
||||
</br>Experiment Manager, MLOps and Data-Management**
|
||||
**[ClearML](https://clear.ml) - Auto-Magical Suite of tools to streamline your AI workflow
|
||||
</br>Experiment Manager, MLOps/LLMOps and Data-Management**
|
||||
|
||||
[](https://img.shields.io/github/license/allegroai/clearml.svg) [](https://img.shields.io/pypi/pyversions/clearml.svg) [](https://pypi.org/project/clearml/) [](https://anaconda.org/clearml/clearml) [](https://optuna.org)<br>
|
||||
[](https://pypi.org/project/clearml/) [](https://artifacthub.io/packages/search?repo=allegroai) [](https://www.youtube.com/c/clearml) [](https://joinslack.clear.ml) [](https://app.clear.ml)
|
||||
@ -19,14 +19,15 @@
|
||||
ClearML is a ML/DL development and production suite. It contains FIVE main modules:
|
||||
|
||||
- [Experiment Manager](#clearml-experiment-manager) - Automagical experiment tracking, environments and results
|
||||
- [MLOps](https://github.com/allegroai/clearml-agent) - Orchestration, Automation & Pipelines solution for ML/DL jobs (K8s / Cloud / bare-metal)
|
||||
- [MLOps / LLMOps](https://github.com/allegroai/clearml-agent) - Orchestration, Automation & Pipelines solution for ML/DL/GenAI jobs (Kubernetes / Cloud / bare-metal)
|
||||
- [Data-Management](https://github.com/allegroai/clearml/blob/master/docs/datasets.md) - Fully differentiable data management & version control solution on top of object-storage
|
||||
(S3 / GS / Azure / NAS)
|
||||
- [Model-Serving](https://github.com/allegroai/clearml-serving) - *cloud-ready* Scalable model serving solution!
|
||||
- **Deploy new model endpoints in under 5 minutes**
|
||||
- Includes optimized GPU serving support backed by Nvidia-Triton
|
||||
- **with out-of-the-box Model Monitoring**
|
||||
- **NEW** :fire: [Reports](https://clear.ml/docs/latest/docs/webapp/webapp_reports) - Create and share rich MarkDown documents supporting embeddable online content
|
||||
- [Reports](https://clear.ml/docs/latest/docs/webapp/webapp_reports) - Create and share rich MarkDown documents supporting embeddable online content
|
||||
- **NEW** :fire: [Orchestration Dashboard](https://clear.ml/docs/latest/docs/webapp/webapp_orchestration_dash/) - Live rich dashboard for your entire compute cluster (Cloud / Kubernetes / On-Prem)
|
||||
|
||||
|
||||
Instrumenting these components is the **ClearML-server**, see [Self-Hosting](https://clear.ml/docs/latest/docs/deploying_clearml/clearml_server) & [Free tier Hosting](https://app.clear.ml)
|
||||
@ -141,7 +142,7 @@ The ClearML run-time components:
|
||||
- [clearml-session](https://github.com/allegroai/clearml-session) - **Launch remote JupyterLab / VSCode-server inside any docker, on Cloud/On-Prem machines**
|
||||
- [clearml-task](https://github.com/allegroai/clearml/blob/master/docs/clearml-task.md) - Run any codebase on remote machines with full remote logging of Tensorboard, Matplotlib & Console outputs
|
||||
- [clearml-data](https://github.com/allegroai/clearml/blob/master/docs/datasets.md) - **CLI for managing and versioning your datasets, including creating / uploading / downloading of data from S3/GS/Azure/NAS**
|
||||
- [AWS Auto-Scaler](https://clear.ml/docs/latest/docs/guides/services/aws_autoscaler) - Automatically spin EC2 instances based on your workloads with preconfigured budget! No need for K8s!
|
||||
- [AWS Auto-Scaler](https://clear.ml/docs/latest/docs/guides/services/aws_autoscaler) - Automatically spin EC2 instances based on your workloads with preconfigured budget! No need for AKE!
|
||||
- [Hyper-Parameter Optimization](https://clear.ml/docs/latest/docs/guides/optimization/hyper-parameter-optimization/examples_hyperparam_opt) - Optimize any code with black-box approach and state-of-the-art Bayesian optimization algorithms
|
||||
- [Automation Pipeline](https://clear.ml/docs/latest/docs/guides/pipeline/pipeline_controller) - Build pipelines based on existing experiments / jobs, supports building pipelines of pipelines!
|
||||
- [Slack Integration](https://clear.ml/docs/latest/docs/guides/services/slack_alerts) - Report experiments progress / failure directly to Slack (fully customizable!)
|
||||
|
5
SECURITY.md
Normal file
5
SECURITY.md
Normal file
@ -0,0 +1,5 @@
|
||||
## Reporting Security Issues
|
||||
|
||||
Thanks for taking the time to make ClearML more secure!
|
||||
|
||||
To carry on the discussion more securely - Please send your report to [security@clear.ml](mailto:security@clear.ml).
|
@ -142,7 +142,7 @@ class PipelineController(object):
|
||||
try:
|
||||
self.job.task.reload()
|
||||
self.job_ended = self.job_started + self.job.task.data.active_duration
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def set_job_started(self):
|
||||
@ -154,7 +154,6 @@ class PipelineController(object):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name, # type: str
|
||||
@ -2733,7 +2732,7 @@ class PipelineController(object):
|
||||
if node_failed and self._abort_running_steps_on_failure and not node.continue_on_fail:
|
||||
nodes_failed_stop_pipeline.append(node.name)
|
||||
elif node.timeout:
|
||||
node.set_job_started()
|
||||
started = node.job.task.data.started
|
||||
if (datetime.now().astimezone(started.tzinfo) - started).total_seconds() > node.timeout:
|
||||
node.job.abort()
|
||||
completed_jobs.append(j)
|
||||
|
@ -32,7 +32,10 @@ class ApiServiceProxy(object):
|
||||
)
|
||||
|
||||
# get the most advanced service version that supports our api
|
||||
version = [str(v) for v in ApiServiceProxy._available_versions if Session.check_min_api_version(v)][-1]
|
||||
version = [
|
||||
str(v) for v in ApiServiceProxy._available_versions
|
||||
if Session.check_min_api_version(v)
|
||||
][-1]
|
||||
Session.api_version = version
|
||||
self.__dict__["__wrapped_version__"] = Session.api_version
|
||||
name = ".v{}.{}".format(
|
||||
|
@ -490,7 +490,7 @@ class Queue(NonStrictDataModel):
|
||||
self._property_metadata = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "metadata", (dict,), is_array=True)
|
||||
self.assert_isinstance(value, "metadata", (dict,))
|
||||
self._property_metadata = value
|
||||
|
||||
|
||||
|
@ -40,7 +40,6 @@ for a very long time for a non-responding or mis-configured server
|
||||
"""
|
||||
ENV_API_EXTRA_RETRY_CODES = EnvEntry("CLEARML_API_EXTRA_RETRY_CODES")
|
||||
|
||||
|
||||
ENV_FORCE_MAX_API_VERSION = EnvEntry("CLEARML_FORCE_MAX_API_VERSION", type=str)
|
||||
|
||||
|
||||
|
@ -1,7 +1,14 @@
|
||||
import abc
|
||||
|
||||
import jsonschema
|
||||
import six
|
||||
from jsonschema.exceptions import FormatError, SchemaError, ValidationError
|
||||
|
||||
try:
|
||||
# Since `referencing`` only supports Python >= 3.8, this try-except blocks maintain support
|
||||
# for earlier python versions.
|
||||
from referencing.exceptions import Unresolvable
|
||||
except ImportError:
|
||||
from jsonschema.exceptions import RefResolutionError as Unresolvable
|
||||
|
||||
from .apimodel import ApiModel
|
||||
from .datamodel import DataModel
|
||||
@ -39,8 +46,7 @@ class BatchRequest(Request):
|
||||
|
||||
_batched_request_cls = abc.abstractproperty()
|
||||
|
||||
_schema_errors = (jsonschema.SchemaError, jsonschema.ValidationError, jsonschema.FormatError,
|
||||
jsonschema.RefResolutionError)
|
||||
_schema_errors = (SchemaError, ValidationError, FormatError, Unresolvable)
|
||||
|
||||
def __init__(self, requests, validate_requests=False, allow_raw_requests=True, **kwargs):
|
||||
super(BatchRequest, self).__init__(**kwargs)
|
||||
@ -70,8 +76,7 @@ class BatchRequest(Request):
|
||||
for i, req in enumerate(self.requests):
|
||||
try:
|
||||
req.validate()
|
||||
except (jsonschema.SchemaError, jsonschema.ValidationError,
|
||||
jsonschema.FormatError, jsonschema.RefResolutionError) as e:
|
||||
except (SchemaError, ValidationError, FormatError, Unresolvable) as e:
|
||||
raise Exception('Validation error in batch item #%d: %s' % (i, str(e)))
|
||||
|
||||
def get_json(self):
|
||||
|
@ -12,6 +12,7 @@ import jwt
|
||||
import requests
|
||||
import six
|
||||
from requests.auth import HTTPBasicAuth
|
||||
from requests.exceptions import ChunkedEncodingError, ContentDecodingError, StreamConsumedError
|
||||
from six.moves.urllib.parse import urlparse, urlunparse
|
||||
from typing import List, Optional
|
||||
|
||||
@ -31,6 +32,7 @@ from .defs import (
|
||||
ENV_API_EXTRA_RETRY_CODES,
|
||||
ENV_API_DEFAULT_REQ_METHOD,
|
||||
ENV_FORCE_MAX_API_VERSION,
|
||||
ENV_IGNORE_MISSING_CONFIG,
|
||||
MissingConfigError
|
||||
)
|
||||
from .request import Request, BatchRequest # noqa: F401
|
||||
@ -417,6 +419,13 @@ class Session(TokenManager):
|
||||
(self._logger or get_logger()).warning("SSLError Retrying {}".format(ex))
|
||||
sleep(0.1)
|
||||
continue
|
||||
except (ChunkedEncodingError, ContentDecodingError, StreamConsumedError) as ex:
|
||||
retry_counter += 1
|
||||
# we should retry
|
||||
if retry_counter >= self._ssl_error_count_verbosity:
|
||||
(self._logger or get_logger()).warning("Network decoding error Retrying {}".format(ex))
|
||||
sleep(0.1)
|
||||
continue
|
||||
|
||||
if (
|
||||
refresh_token_if_unauthorized
|
||||
@ -736,7 +745,7 @@ class Session(TokenManager):
|
||||
return urlunparse(parsed)
|
||||
|
||||
@classmethod
|
||||
def check_min_api_version(cls, min_api_version):
|
||||
def check_min_api_version(cls, min_api_version, raise_error=False):
|
||||
"""
|
||||
Return True if Session.api_version is greater or equal >= to min_api_version
|
||||
"""
|
||||
@ -764,18 +773,24 @@ class Session(TokenManager):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
cls()
|
||||
except MissingConfigError:
|
||||
if raise_error and not ENV_IGNORE_MISSING_CONFIG.get():
|
||||
raise
|
||||
except LoginError:
|
||||
if raise_error:
|
||||
raise
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return cls._version_tuple(cls.api_version) >= cls._version_tuple(str(min_api_version))
|
||||
|
||||
@classmethod
|
||||
def check_min_api_server_version(cls, min_api_version):
|
||||
def check_min_api_server_version(cls, min_api_version, raise_error=False):
|
||||
"""
|
||||
Return True if Session.max_api_version is greater or equal >= to min_api_version
|
||||
Notice this is the api version server reported, not the current SDK max supported api version
|
||||
"""
|
||||
if cls.check_min_api_version(min_api_version):
|
||||
if cls.check_min_api_version(min_api_version, raise_error=raise_error):
|
||||
return True
|
||||
|
||||
return cls._version_tuple(cls.max_api_version) >= cls._version_tuple(str(min_api_version))
|
||||
|
@ -178,8 +178,9 @@ class IdObjectBase(InterfaceBase):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
self._data = self._reload()
|
||||
except Exception:
|
||||
self.log.error("Failed reloading task {}".format(self.id))
|
||||
except Exception as ex:
|
||||
self.log.error("Failed reloading {} {}".format(type(self).__name__.lower(), self.id))
|
||||
self.log.debug("Failed reloading {} {}: {}".format(type(self).__name__.lower(), self.id, ex))
|
||||
|
||||
@classmethod
|
||||
def normalize_id(cls, id):
|
||||
|
@ -79,7 +79,7 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin):
|
||||
self.reload()
|
||||
|
||||
def archive(self):
|
||||
if Session.check_min_api_server_version("2.13"):
|
||||
if Session.check_min_api_server_version("2.13", raise_error=True):
|
||||
self.send(models.ArchiveManyRequest(ids=[self.id]))
|
||||
self.reload()
|
||||
else:
|
||||
@ -90,7 +90,7 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin):
|
||||
)
|
||||
|
||||
def unarchive(self):
|
||||
if Session.check_min_api_server_version("2.13"):
|
||||
if Session.check_min_api_server_version("2.13", raise_error=True):
|
||||
self.send(models.UnarchiveManyRequest(ids=[self.id]))
|
||||
self.reload()
|
||||
else:
|
||||
|
@ -263,8 +263,8 @@ class TaskHandler(BufferingHandler):
|
||||
# flush pending logs
|
||||
if not self._task_id:
|
||||
return
|
||||
# avoid deadlocks just skip the lock, we are shutting down anyway
|
||||
self.lock = None
|
||||
# Never null the lock, it might be used by internal Python at some point
|
||||
# self.lock = None
|
||||
self._task_id = None
|
||||
|
||||
# shut down the TaskHandler, from this point onwards. No events will be logged
|
||||
|
@ -1088,12 +1088,13 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
||||
if not model.ready:
|
||||
# raise ValueError('Model %s is not published (not ready)' % model_id)
|
||||
self.log.debug('Model %s [%s] is not published yet (not ready)' % (model_id, model.uri))
|
||||
name = name or Path(model.uri).stem
|
||||
else:
|
||||
# clear the input model
|
||||
model = None
|
||||
model_id = ''
|
||||
name = name or 'Input Model'
|
||||
from ...model import InputModel
|
||||
# noinspection PyProtectedMember
|
||||
name = name or InputModel._get_connect_name(model)
|
||||
|
||||
with self._edit_lock:
|
||||
self.reload()
|
||||
@ -1345,7 +1346,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
||||
return params.get(name, default)
|
||||
|
||||
def delete_parameter(self, name, force=False):
|
||||
# type: (str) -> bool
|
||||
# type: (str, bool) -> bool
|
||||
"""
|
||||
Delete a parameter by its full name Section/name.
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
import io
|
||||
import sys
|
||||
from functools import partial
|
||||
import yaml
|
||||
from ..config import running_remotely, get_remote_task_id, DEV_TASK_NO_REUSE
|
||||
from ..debugging.log import LoggerRoot
|
||||
|
||||
@ -17,6 +16,9 @@ class PatchHydra(object):
|
||||
_parameter_allow_full_edit = '_allow_omegaconf_edit_'
|
||||
_should_delete_overrides = False
|
||||
_overrides_section = "Args/overrides"
|
||||
_default_hydra_context = None
|
||||
_overrides_parser = None
|
||||
_config_group_warning_sent = False
|
||||
|
||||
@classmethod
|
||||
def patch_hydra(cls):
|
||||
@ -83,6 +85,7 @@ class PatchHydra(object):
|
||||
|
||||
@staticmethod
|
||||
def _patched_hydra_run(self, config_name, task_function, overrides, *args, **kwargs):
|
||||
PatchHydra._default_hydra_context = self
|
||||
PatchHydra._allow_omegaconf_edit = False
|
||||
if not running_remotely():
|
||||
return PatchHydra._original_hydra_run(self, config_name, task_function, overrides, *args, **kwargs)
|
||||
@ -104,12 +107,11 @@ class PatchHydra(object):
|
||||
if k.startswith(PatchHydra._parameter_section+'/')}
|
||||
stored_config.pop(PatchHydra._parameter_allow_full_edit, None)
|
||||
for override_k, override_v in stored_config.items():
|
||||
if override_k.startswith("~"):
|
||||
new_override = override_k
|
||||
else:
|
||||
new_override = "++" + override_k.lstrip("+")
|
||||
if override_v is not None and override_v != "":
|
||||
new_override += "=" + override_v
|
||||
if not new_override.startswith("~") and not PatchHydra._is_group(self, new_override):
|
||||
new_override = "++" + new_override.lstrip("+")
|
||||
overrides.append(new_override)
|
||||
PatchHydra._should_delete_overrides = True
|
||||
except Exception:
|
||||
@ -117,6 +119,28 @@ class PatchHydra(object):
|
||||
|
||||
return PatchHydra._original_hydra_run(self, config_name, task_function, overrides, *args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _parse_override(override):
|
||||
if PatchHydra._overrides_parser is None:
|
||||
from hydra.core.override_parser.overrides_parser import OverridesParser
|
||||
PatchHydra._overrides_parser = OverridesParser.create()
|
||||
return PatchHydra._overrides_parser.parse_overrides(overrides=[override])[0]
|
||||
|
||||
@staticmethod
|
||||
def _is_group(hydra_context, override):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
override = PatchHydra._parse_override(override)
|
||||
group_exists = hydra_context.config_loader.repository.group_exists(override.key_or_group)
|
||||
return group_exists
|
||||
except Exception:
|
||||
if not PatchHydra._config_group_warning_sent:
|
||||
LoggerRoot.get_base_logger().warning(
|
||||
"Could not determine if Hydra is overriding a Config Group"
|
||||
)
|
||||
PatchHydra._config_group_warning_sent = True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _patched_run_job(config, task_function, *args, **kwargs):
|
||||
# noinspection PyBroadException
|
||||
@ -125,11 +149,12 @@ class PatchHydra(object):
|
||||
|
||||
failed_status = JobStatus.FAILED
|
||||
except Exception:
|
||||
LoggerRoot.get_base_logger(PatchHydra).warning(
|
||||
LoggerRoot.get_base_logger().warning(
|
||||
"Could not import JobStatus from Hydra. Failed tasks will be marked as completed"
|
||||
)
|
||||
failed_status = None
|
||||
|
||||
hydra_context = kwargs.get("hydra_context", PatchHydra._default_hydra_context)
|
||||
# store the config
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
@ -139,6 +164,7 @@ class PatchHydra(object):
|
||||
overrides = config.hydra.overrides.task
|
||||
stored_config = {}
|
||||
for arg in overrides:
|
||||
if not PatchHydra._is_group(hydra_context, arg):
|
||||
arg = arg.lstrip("+")
|
||||
if "=" in arg:
|
||||
k, v = arg.split("=", 1)
|
||||
|
@ -1241,7 +1241,7 @@ class Dataset(object):
|
||||
|
||||
:return: Newly created Dataset object
|
||||
"""
|
||||
if not Dataset.is_offline() and not Session.check_min_api_server_version("2.13"):
|
||||
if not Dataset.is_offline() and not Session.check_min_api_server_version("2.13", raise_error=True):
|
||||
raise NotImplementedError("Datasets are not supported with your current ClearML server version. Please update your server.")
|
||||
|
||||
parent_datasets = [cls.get(dataset_id=p) if not isinstance(p, Dataset) else p for p in (parent_datasets or [])]
|
||||
@ -1531,7 +1531,7 @@ class Dataset(object):
|
||||
"""
|
||||
if Dataset.is_offline():
|
||||
raise ValueError("Cannot rename dataset in offline mode")
|
||||
if not bool(Session.check_min_api_server_version(cls.__min_api_version)):
|
||||
if not bool(Session.check_min_api_server_version(cls.__min_api_version, raise_error=True)):
|
||||
LoggerRoot.get_base_logger().warning(
|
||||
"Could not rename dataset because API version < {}".format(cls.__min_api_version)
|
||||
)
|
||||
@ -1578,7 +1578,7 @@ class Dataset(object):
|
||||
"""
|
||||
if cls.is_offline():
|
||||
raise ValueError("Cannot move dataset project in offlime mode")
|
||||
if not bool(Session.check_min_api_server_version(cls.__min_api_version)):
|
||||
if not bool(Session.check_min_api_server_version(cls.__min_api_version, raise_error=True)):
|
||||
LoggerRoot.get_base_logger().warning(
|
||||
"Could not move dataset to another project because API version < {}".format(cls.__min_api_version)
|
||||
)
|
||||
@ -3221,14 +3221,14 @@ class Dataset(object):
|
||||
# type: (Path, Union[FileEntry, LinkEntry], Optional[int], Optional[dict]) -> bool
|
||||
|
||||
# check if we need the file for the requested dataset part
|
||||
if ds_part is not None:
|
||||
f_parts = ds_chunk_selection.get(file_entry.parent_dataset_id, [])
|
||||
if part is not None:
|
||||
f_parts = chunk_selection.get(file_entry.parent_dataset_id, [])
|
||||
# file is not in requested dataset part, no need to check it.
|
||||
if self._get_chunk_idx_from_artifact_name(file_entry.artifact_name) not in f_parts:
|
||||
return True
|
||||
|
||||
# check if the local size and the stored size match (faster than comparing hash)
|
||||
if (base_folder / file_entry.relative_path).stat().st_size != file_entry.size:
|
||||
if (target_base_folder / file_entry.relative_path).stat().st_size != file_entry.size:
|
||||
return False
|
||||
|
||||
return True
|
||||
@ -3240,7 +3240,7 @@ class Dataset(object):
|
||||
tp = None
|
||||
try:
|
||||
futures_ = []
|
||||
tp = ThreadPoolExecutor(max_workers=max_workers)
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as tp:
|
||||
for f in self._dataset_file_entries.values():
|
||||
future = tp.submit(__verify_file_or_link, target_base_folder, f, part, chunk_selection)
|
||||
futures_.append(future)
|
||||
@ -3253,10 +3253,6 @@ class Dataset(object):
|
||||
verified = all(f.result() for f in futures_)
|
||||
except Exception:
|
||||
verified = False
|
||||
finally:
|
||||
if tp is not None:
|
||||
# we already have our result, close all pending checks (improves performance when verified==False)
|
||||
tp.shutdown(cancel_futures=True)
|
||||
|
||||
return verified
|
||||
|
||||
|
@ -1,9 +1,8 @@
|
||||
import abc
|
||||
import os
|
||||
import tarfile
|
||||
import zipfile
|
||||
import shutil
|
||||
from tempfile import mkdtemp, mkstemp
|
||||
from tempfile import mkstemp
|
||||
|
||||
import six
|
||||
import math
|
||||
@ -1427,12 +1426,31 @@ class Model(BaseModel):
|
||||
|
||||
:param project_name: Optional, filter based project name string, if not given query models from all projects
|
||||
:param model_name: Optional Model name as shown in the model artifactory
|
||||
:param tags: Filter based on the requested list of tags (strings)
|
||||
To exclude a tag add "-" prefix to the tag. Example: ['production', 'verified', '-qa']
|
||||
To include All tags (instead of the default Any behaviour) use "__$all" as the first string, example:
|
||||
:param tags: Filter based on the requested list of tags (strings).
|
||||
To exclude a tag add "-" prefix to the tag. Example: ``["production", "verified", "-qa"]``.
|
||||
The default behaviour is to join all tags with a logical "OR" operator.
|
||||
To join all tags with a logical "AND" operator instead, use "__$all" as the first string, for example:
|
||||
|
||||
.. code-block:: py
|
||||
|
||||
["__$all", "best", "model", "ever"]
|
||||
To combine All tags and exclude a list of tags use "__$not" before the excluded tags, example:
|
||||
|
||||
To join all tags with AND, but exclude a tag use "__$not" before the excluded tag, for example:
|
||||
|
||||
.. code-block:: py
|
||||
|
||||
["__$all", "best", "model", "ever", "__$not", "internal", "__$not", "test"]
|
||||
|
||||
The "OR" and "AND" operators apply to all tags that follow them until another operator is specified.
|
||||
The NOT operator applies only to the immediately following tag.
|
||||
For example:
|
||||
|
||||
.. code-block:: py
|
||||
|
||||
["__$all", "a", "b", "c", "__$or", "d", "__$not", "e", "__$and", "__$or" "f", "g"]
|
||||
|
||||
This example means ("a" AND "b" AND "c" AND ("d" OR NOT "e") AND ("f" OR "g")).
|
||||
See https://clear.ml/docs/latest/docs/clearml_sdk/model_sdk#tag-filters for details.
|
||||
:param only_published: If True, only return published models.
|
||||
:param include_archived: If True, return archived models.
|
||||
:param max_results: Optional return the last X models,
|
||||
@ -1596,6 +1614,7 @@ class InputModel(Model):
|
||||
|
||||
# noinspection PyProtectedMember
|
||||
_EMPTY_MODEL_ID = _Model._EMPTY_MODEL_ID
|
||||
_WARNING_CONNECTED_NAMES = {}
|
||||
|
||||
@classmethod
|
||||
def import_model(
|
||||
@ -1933,9 +1952,11 @@ class InputModel(Model):
|
||||
|
||||
:param object task: A Task object.
|
||||
:param str name: The model name to be stored on the Task
|
||||
(default the filename, of the model weights, without the file extension)
|
||||
(default to filename of the model weights, without the file extension, or to `Input Model` if that is not found)
|
||||
"""
|
||||
self._set_task(task)
|
||||
name = name or InputModel._get_connect_name(self)
|
||||
InputModel._warn_on_same_name_connect(name)
|
||||
|
||||
model_id = None
|
||||
# noinspection PyProtectedMember
|
||||
@ -1967,6 +1988,28 @@ class InputModel(Model):
|
||||
if not self._task.get_labels_enumeration() and model.data.labels:
|
||||
task.set_model_label_enumeration(model.data.labels)
|
||||
|
||||
@classmethod
|
||||
def _warn_on_same_name_connect(cls, name):
|
||||
if name not in cls._WARNING_CONNECTED_NAMES:
|
||||
cls._WARNING_CONNECTED_NAMES[name] = False
|
||||
return
|
||||
if cls._WARNING_CONNECTED_NAMES[name]:
|
||||
return
|
||||
get_logger().warning("Connecting multiple input models with the same name: `{}`. This might result in the wrong model being used when executing remotely".format(name))
|
||||
cls._WARNING_CONNECTED_NAMES[name] = True
|
||||
|
||||
@staticmethod
|
||||
def _get_connect_name(model):
|
||||
default_name = "Input Model"
|
||||
if model is None:
|
||||
return default_name
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
model_uri = getattr(model, "url", getattr(model, "uri", None))
|
||||
return Path(model_uri).stem
|
||||
except Exception:
|
||||
return default_name
|
||||
|
||||
|
||||
class OutputModel(BaseModel):
|
||||
"""
|
||||
|
@ -791,10 +791,21 @@ class _GoogleCloudStorageDriver(_Driver):
|
||||
self.name = name[len(_GoogleCloudStorageDriver.scheme_prefix):]
|
||||
|
||||
if cfg.credentials_json:
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
credentials = service_account.Credentials.from_service_account_file(cfg.credentials_json)
|
||||
except ValueError:
|
||||
except Exception:
|
||||
credentials = None
|
||||
|
||||
if not credentials:
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
# Try parsing this as json to support actual json content and not a file path
|
||||
credentials = service_account.Credentials.from_service_account_info(
|
||||
json.loads(cfg.credentials_json)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
credentials = None
|
||||
|
||||
|
@ -331,7 +331,9 @@ class Task(_Task):
|
||||
|
||||
:param str output_uri: The default location for output models and other artifacts. If True, the default
|
||||
files_server will be used for model storage. In the default location, ClearML creates a subfolder for the
|
||||
output. The subfolder structure is the following: `<output destination name> / <project name> / <task name>.<Task ID>`.
|
||||
output. If set to False, local runs will not upload output models and artifacts,
|
||||
and remote runs will not use any default values provided using ``default_output_uri``.
|
||||
The subfolder structure is the following: `<output destination name> / <project name> / <task name>.<Task ID>`.
|
||||
Note that for cloud storage, you must install the **ClearML** package for your cloud storage type,
|
||||
and then configure your storage credentials. For detailed information, see "Storage" in the ClearML
|
||||
Documentation.
|
||||
@ -606,6 +608,10 @@ class Task(_Task):
|
||||
task_id=get_remote_task_id(),
|
||||
log_to_backend=False,
|
||||
)
|
||||
if output_uri is False and not task.output_uri:
|
||||
# Setting output_uri=False argument will disable using any default when running remotely
|
||||
pass
|
||||
else:
|
||||
if task.get_project_object().default_output_destination and not task.output_uri:
|
||||
task.output_uri = task.get_project_object().default_output_destination
|
||||
if cls.__default_output_uri and not task.output_uri:
|
||||
@ -931,8 +937,31 @@ class Task(_Task):
|
||||
If specified, ``project_name`` and ``task_name`` are ignored.
|
||||
:param str project_name: The project name of the Task to get.
|
||||
:param str task_name: The name of the Task within ``project_name`` to get.
|
||||
:param list tags: Filter based on the requested list of tags (strings) (Task must have at least one of the
|
||||
listed tags). To exclude a tag add "-" prefix to the tag. Example: ["best", "-debug"]
|
||||
:param list tags: Filter based on the requested list of tags (strings). To exclude a tag add "-" prefix to the
|
||||
tag. Example: ``["best", "-debug"]``.
|
||||
The default behaviour is to join all tags with a logical "OR" operator.
|
||||
To join all tags with a logical "AND" operator instead, use "__$all" as the first string, for example:
|
||||
|
||||
.. code-block:: py
|
||||
|
||||
["__$all", "best", "experiment", "ever"]
|
||||
|
||||
To join all tags with AND, but exclude a tag use "__$not" before the excluded tag, for example:
|
||||
|
||||
.. code-block:: py
|
||||
|
||||
["__$all", "best", "experiment", "ever", "__$not", "internal", "__$not", "test"]
|
||||
|
||||
The "OR" and "AND" operators apply to all tags that follow them until another operator is specified.
|
||||
The NOT operator applies only to the immediately following tag.
|
||||
For example:
|
||||
|
||||
.. code-block:: py
|
||||
|
||||
["__$all", "a", "b", "c", "__$or", "d", "__$not", "e", "__$and", "__$or" "f", "g"]
|
||||
|
||||
This example means ("a" AND "b" AND "c" AND ("d" OR NOT "e") AND ("f" OR "g")).
|
||||
See https://clear.ml/docs/latest/docs/clearml_sdk/task_sdk/#tag-filters for more information.
|
||||
:param bool allow_archived: Only applicable if *not* using specific ``task_id``,
|
||||
If True (default), allow to return archived Tasks, if False filter out archived Tasks
|
||||
:param bool task_filter: Only applicable if *not* using specific ``task_id``,
|
||||
@ -980,8 +1009,31 @@ class Task(_Task):
|
||||
avoid any regex behaviour, use re.escape()). (Optional)
|
||||
To match an exact task name (i.e. not partial matching),
|
||||
add ^/$ at the beginning/end of the string, for example: "^exact_task_name_here$"
|
||||
:param list tags: Filter based on the requested list of tags (strings) (Task must have all the listed tags)
|
||||
To exclude a tag add "-" prefix to the tag. Example: ["best", "-debug"]
|
||||
:param list tags: Filter based on the requested list of tags (strings). To exclude a tag add "-" prefix to the
|
||||
tag. Example: ``["best", "-debug"]``.
|
||||
The default behaviour is to join all tags with a logical "OR" operator.
|
||||
To join all tags with a logical "AND" operator instead, use "__$all" as the first string, for example:
|
||||
|
||||
.. code-block:: py
|
||||
|
||||
["__$all", "best", "experiment", "ever"]
|
||||
|
||||
To join all tags with AND, but exclude a tag use "__$not" before the excluded tag, for example:
|
||||
|
||||
.. code-block:: py
|
||||
|
||||
["__$all", "best", "experiment", "ever", "__$not", "internal", "__$not", "test"]
|
||||
|
||||
The "OR" and "AND" operators apply to all tags that follow them until another operator is specified.
|
||||
The NOT operator applies only to the immediately following tag.
|
||||
For example:
|
||||
|
||||
.. code-block:: py
|
||||
|
||||
["__$all", "a", "b", "c", "__$or", "d", "__$not", "e", "__$and", "__$or" "f", "g"]
|
||||
|
||||
This example means ("a" AND "b" AND "c" AND ("d" OR NOT "e") AND ("f" OR "g")).
|
||||
See https://clear.ml/docs/latest/docs/clearml_sdk/task_sdk/#tag-filters for more information.
|
||||
:param bool allow_archived: If True (default), allow to return archived Tasks, if False filter out archived Tasks
|
||||
:param dict task_filter: filter and order Tasks.
|
||||
See :class:`.backend_api.service.v?.tasks.GetAllRequest` for details; the ? needs to be replaced by the appropriate version.
|
||||
@ -1032,17 +1084,30 @@ class Task(_Task):
|
||||
:param str task_name: task name (str) within the selected project
|
||||
Return any partial match of task_name, regular expressions matching is also supported.
|
||||
If None is passed, returns all tasks within the project
|
||||
:param list tags: Filter based on the requested list of tags (strings)
|
||||
To exclude a tag add "-" prefix to the tag. Example: ["best", "-debug"]
|
||||
:param list tags: Filter based on the requested list of tags (strings).
|
||||
To exclude a tag add "-" prefix to the tag. Example: ``["best", "-debug"]``.
|
||||
The default behaviour is to join all tags with a logical "OR" operator.
|
||||
To join all tags with a logical "AND" operator instead, use "__$all" as the first string, for example:
|
||||
|
||||
.. code-block:: py
|
||||
|
||||
["__$all", "best", "experiment", "ever"]
|
||||
|
||||
To join all tags with AND, but exclude a tag use "__$not" before the excluded tag, for example:
|
||||
|
||||
.. code-block:: py
|
||||
|
||||
["__$all", "best", "experiment", "ever", "__$not", "internal", "__$not", "test"]
|
||||
|
||||
The "OR" and "AND" operators apply to all tags that follow them until another operator is specified.
|
||||
The NOT operator applies only to the immediately following tag.
|
||||
For example, ["__$all", "a", "b", "c", "__$or", "d", "__$not", "e", "__$and", "__$or" "f", "g"]
|
||||
means ("a" AND "b" AND "c" AND ("d" OR NOT "e") AND ("f" OR "g")).
|
||||
For example:
|
||||
|
||||
.. code-block:: py
|
||||
|
||||
["__$all", "a", "b", "c", "__$or", "d", "__$not", "e", "__$and", "__$or" "f", "g"]
|
||||
|
||||
This example means ("a" AND "b" AND "c" AND ("d" OR NOT "e") AND ("f" OR "g")).
|
||||
See https://clear.ml/docs/latest/docs/clearml_sdk/task_sdk/#tag-filters for more information.
|
||||
:param list additional_return_fields: Optional, if not provided return a list of Task IDs.
|
||||
If provided return dict per Task with the additional requested fields.
|
||||
@ -1330,7 +1395,7 @@ class Task(_Task):
|
||||
|
||||
:return: The number of tasks enqueued in the given queue
|
||||
"""
|
||||
if not Session.check_min_api_server_version("2.20"):
|
||||
if not Session.check_min_api_server_version("2.20", raise_error=True):
|
||||
raise ValueError("You version of clearml-server does not support the 'queues.get_num_entries' endpoint")
|
||||
mutually_exclusive(queue_name=queue_name, queue_id=queue_id)
|
||||
session = cls._get_default_session()
|
||||
@ -1524,6 +1589,7 @@ class Task(_Task):
|
||||
specified, then a path to a local configuration file is returned. Configuration object.
|
||||
"""
|
||||
pathlib_Path = None # noqa
|
||||
cast_Path = Path
|
||||
if not isinstance(configuration, (dict, list, Path, six.string_types)):
|
||||
try:
|
||||
from pathlib import Path as pathlib_Path # noqa
|
||||
@ -1532,6 +1598,8 @@ class Task(_Task):
|
||||
if not pathlib_Path or not isinstance(configuration, pathlib_Path):
|
||||
raise ValueError("connect_configuration supports `dict`, `str` and 'Path' types, "
|
||||
"{} is not supported".format(type(configuration)))
|
||||
if pathlib_Path and isinstance(configuration, pathlib_Path):
|
||||
cast_Path = pathlib_Path
|
||||
|
||||
multi_config_support = Session.check_min_api_version('2.9')
|
||||
if multi_config_support and not name:
|
||||
@ -1599,7 +1667,7 @@ class Task(_Task):
|
||||
# it is a path to a local file
|
||||
if not running_remotely() or not (self.is_main_task() or self._is_remote_main_task()):
|
||||
# check if not absolute path
|
||||
configuration_path = Path(configuration)
|
||||
configuration_path = cast_Path(configuration)
|
||||
if not configuration_path.is_file():
|
||||
ValueError("Configuration file does not exist")
|
||||
try:
|
||||
@ -1626,7 +1694,7 @@ class Task(_Task):
|
||||
"Using default configuration: {}".format(name, str(configuration)))
|
||||
# update back configuration section
|
||||
if multi_config_support:
|
||||
configuration_path = Path(configuration)
|
||||
configuration_path = cast_Path(configuration)
|
||||
if configuration_path.is_file():
|
||||
with open(configuration_path.as_posix(), 'rt') as f:
|
||||
configuration_text = f.read()
|
||||
@ -1638,15 +1706,13 @@ class Task(_Task):
|
||||
config_text=configuration_text)
|
||||
return configuration
|
||||
|
||||
configuration_path = Path(configuration)
|
||||
configuration_path = cast_Path(configuration)
|
||||
fd, local_filename = mkstemp(prefix='clearml_task_config_',
|
||||
suffix=configuration_path.suffixes[-1] if
|
||||
configuration_path.suffixes else '.txt')
|
||||
with open(fd, "w") as f:
|
||||
f.write(configuration_text)
|
||||
if pathlib_Path:
|
||||
return pathlib_Path(local_filename)
|
||||
return Path(local_filename) if isinstance(configuration, Path) else local_filename
|
||||
return cast_Path(local_filename) if isinstance(configuration, cast_Path) else local_filename
|
||||
|
||||
def connect_label_enumeration(self, enumeration):
|
||||
# type: (Dict[str, int]) -> Dict[str, int]
|
||||
|
@ -2,12 +2,19 @@ import attr
|
||||
|
||||
from .version import Version
|
||||
|
||||
try:
|
||||
# noinspection PyUnresolvedReferences
|
||||
import importlib.metadata
|
||||
attr_version = importlib.metadata.version("attrs")
|
||||
except ImportError:
|
||||
attr_version = attr.__version__
|
||||
|
||||
|
||||
class attrs(object):
|
||||
def __init__(self, *args, **kwargs):
|
||||
if any(x in kwargs for x in ("eq", "order")):
|
||||
raise RuntimeError("Only `cmp` is supported for attr.attrs, not `eq` or `order`")
|
||||
if Version(attr.__version__) >= Version("19.2"):
|
||||
if Version(attr_version) >= Version("19.2"):
|
||||
cmp = kwargs.pop("cmp", None)
|
||||
if cmp is not None:
|
||||
kwargs["eq"] = kwargs["order"] = cmp
|
||||
|
@ -1 +1 @@
|
||||
__version__ = '1.13.1'
|
||||
__version__ = '1.13.2'
|
||||
|
@ -2,9 +2,11 @@ clearml
|
||||
jsonschema==3.2.0 ; python_version <= '3.5'
|
||||
matplotlib
|
||||
pytorch-ignite
|
||||
tensorboard>=1.14.0
|
||||
tensorboard<=2.11.2 ; python_version <= '3.7'
|
||||
tensorboard>2.11.2 ; python_version > '3.7'
|
||||
tensorboardX
|
||||
torch>=1.1.0
|
||||
torchvision>=0.3.0
|
||||
tqdm
|
||||
protobuf>=4.21.1
|
||||
protobuf==3.20.* ; python_version <= '3.7'
|
||||
protobuf>=4.21.1 ; python_version > '3.7'
|
||||
|
@ -21,14 +21,14 @@ task = Task.init(project_name='FirstTrial', task_name='config_files_example')
|
||||
|
||||
config_file = task.connect_configuration(Path("data_samples") / "sample.json", name='json config file')
|
||||
|
||||
with open(config_file, "rt") as f:
|
||||
with open(config_file.as_posix(), "rt") as f:
|
||||
config_json = json.load(f)
|
||||
|
||||
print(config_json)
|
||||
|
||||
config_file = task.connect_configuration(Path("data_samples") / "config_yaml.yaml", name='yaml config file')
|
||||
|
||||
with open(config_file, "rt") as f:
|
||||
with open(config_file.as_posix(), "rt") as f:
|
||||
config_yaml = yaml.load(f, Loader=yaml.SafeLoader)
|
||||
|
||||
print(config_yaml)
|
||||
|
@ -14,6 +14,7 @@ python-dateutil>=2.6.1
|
||||
pyjwt>=2.4.0,<2.9.0 ; python_version > '3.5'
|
||||
pyjwt>=1.6.4,<2.0.0 ; python_version <= '3.5'
|
||||
PyYAML>=3.12
|
||||
referencing<0.40 ; python_version >= '3.8'
|
||||
requests>=2.20.0
|
||||
six>=1.13.0
|
||||
typing>=3.6.4 ; python_version < '3.5'
|
||||
|
Loading…
Reference in New Issue
Block a user