From 4cd8857c0da8007d0255c00d72dad3f56690d5d0 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Mon, 27 Mar 2023 13:38:11 +0300 Subject: [PATCH] Refactor code --- .gitignore | 2 +- clearml/automation/controller.py | 6 +- clearml/automation/trigger.py | 2 +- clearml/backend_api/services/v2_20/auth.py | 1 - clearml/backend_api/session/defs.py | 2 + clearml/backend_api/session/request.py | 1 - clearml/backend_api/session/session.py | 15 +- .../backend_interface/metrics/interface.py | 1 + clearml/backend_interface/metrics/reporter.py | 24 +- clearml/backend_interface/model.py | 287 ++++-- clearml/backend_interface/task/populate.py | 6 +- .../backend_interface/task/repo/scriptinfo.py | 12 +- clearml/backend_interface/task/task.py | 8 +- clearml/cli/config/__main__.py | 4 +- clearml/datasets/dataset.py | 1 - clearml/logger.py | 1 + clearml/model.py | 575 ++++++++---- clearml/storage/helper.py | 26 +- clearml/storage/manager.py | 4 +- clearml/task.py | 33 +- .../Getting_Started_2_Setting_Up_Agent.ipynb | 11 +- .../Getting_Started_3_Remote_Execution.ipynb | 873 +++++++++--------- ...hyperparameters.py => hyper_parameters.py} | 2 +- 23 files changed, 1116 insertions(+), 781 deletions(-) rename examples/reporting/{configuration_including_hyperparameters.py => hyper_parameters.py} (100%) diff --git a/.gitignore b/.gitignore index 24a4ec15..95025315 100644 --- a/.gitignore +++ b/.gitignore @@ -11,8 +11,8 @@ build/ dist/ *.egg-info .env -venv/ .venv/ +venv/ # example data examples/runs/ diff --git a/clearml/automation/controller.py b/clearml/automation/controller.py index 7ec254a4..cce4133f 100644 --- a/clearml/automation/controller.py +++ b/clearml/automation/controller.py @@ -218,7 +218,6 @@ class PipelineController(object): def serialize(obj): import dill return dill.dumps(obj) - :param artifact_deserialization_function: A deserialization function that takes one parameter of type `bytes`, which represents the serialized object. This function should return the deserialized object. All parameter/return artifacts fetched by the pipeline will be deserialized using this function. @@ -1157,6 +1156,7 @@ class PipelineController(object): # type: (bool, str) -> bool """ Evaluate whether or not the pipeline is successful + :param fail_on_step_fail: If True (default), evaluate the pipeline steps' status to assess if the pipeline is successful. If False, only evaluate the controller :param fail_condition: Must be one of the following: 'all' (default), 'failed' or 'aborted'. If 'failed', this @@ -1175,18 +1175,14 @@ class PipelineController(object): success_status = [Task.TaskStatusEnum.completed, Task.TaskStatusEnum.failed] else: raise UsageError("fail_condition needs to be one of the following: 'all', 'failed', 'aborted'") - if self._task.status not in success_status: return False - if not fail_on_step_fail: return True - self._update_nodes_status() for node in self._nodes.values(): if node.status not in success_status: return False - return True def elapsed(self): diff --git a/clearml/automation/trigger.py b/clearml/automation/trigger.py index 63e35baf..75279244 100644 --- a/clearml/automation/trigger.py +++ b/clearml/automation/trigger.py @@ -139,7 +139,7 @@ class TaskTrigger(BaseTrigger): raise ValueError("You must provide metric/variant/threshold") valid_status = [str(s) for s in Task.TaskStatusEnum] if self.on_status and not all(s in valid_status for s in self.on_status): - raise ValueError("You on_status contains invalid status value: {}".format(self.on_status)) + raise ValueError("Your on_status contains invalid status value: {}".format(self.on_status)) valid_signs = ['min', 'minimum', 'max', 'maximum'] if self.value_sign and self.value_sign not in valid_signs: raise ValueError("Invalid value_sign `{}`, valid options are: {}".format(self.value_sign, valid_signs)) diff --git a/clearml/backend_api/services/v2_20/auth.py b/clearml/backend_api/services/v2_20/auth.py index 7329e715..0cca7450 100644 --- a/clearml/backend_api/services/v2_20/auth.py +++ b/clearml/backend_api/services/v2_20/auth.py @@ -2,7 +2,6 @@ auth service This service provides authentication management and authorization - validation for the entire system. """ import six diff --git a/clearml/backend_api/session/defs.py b/clearml/backend_api/session/defs.py index f0d305ac..cdbbcf3c 100644 --- a/clearml/backend_api/session/defs.py +++ b/clearml/backend_api/session/defs.py @@ -40,8 +40,10 @@ for a very long time for a non-responding or mis-configured server """ ENV_API_EXTRA_RETRY_CODES = EnvEntry("CLEARML_API_EXTRA_RETRY_CODES") + ENV_FORCE_MAX_API_VERSION = EnvEntry("CLEARML_FORCE_MAX_API_VERSION", type=str) + class MissingConfigError(ValueError): def __init__(self, message=None): if message is None: diff --git a/clearml/backend_api/session/request.py b/clearml/backend_api/session/request.py index 321f073d..2a85cb0c 100644 --- a/clearml/backend_api/session/request.py +++ b/clearml/backend_api/session/request.py @@ -94,7 +94,6 @@ class CompoundRequest(Request): if self._item_prop_name in dict_properties: del dict_properties[self._item_prop_name] dict_.update(dict_properties) - return dict_ def validate(self): diff --git a/clearml/backend_api/session/session.py b/clearml/backend_api/session/session.py index 91bfb671..2b559de4 100644 --- a/clearml/backend_api/session/session.py +++ b/clearml/backend_api/session/session.py @@ -134,6 +134,7 @@ class Session(TokenManager): **kwargs ): self.__class__._sessions_weakrefs.append(weakref.ref(self)) + self._verbose = verbose if verbose is not None else ENV_VERBOSE.get() self._logger = logger if self._verbose and not self._logger: @@ -147,7 +148,6 @@ class Session(TokenManager): self.__init_host = host self.__init_http_retries_config = http_retries_config self.__token_manager_kwargs = kwargs - if config is not None: self.config = config else: @@ -162,21 +162,21 @@ class Session(TokenManager): self._ssl_error_count_verbosity = self.config.get( "api.ssl_error_count_verbosity", self._ssl_error_count_verbosity) - self.__host = self.__init_host or self.get_api_server_host(config=self.config) + self.__host = self.__init_host or self.get_api_server_host(config=self.config) if not self.__host: raise ValueError("ClearML host was not set, check your configuration file or environment variable") - self.__host = self.__host.strip("/") self.__http_retries_config = self.__init_http_retries_config or self.config.get( "api.http.retries", ConfigTree()).as_plain_ordered_dict() + self.__http_retries_config["status_forcelist"] = self._get_retry_codes() self.__http_retries_config["config"] = self.config self.__http_session = get_http_session_with_retry(**self.__http_retries_config) self.__http_session.write_timeout = self._write_session_timeout self.__http_session.request_size_threshold = self._write_session_data_size - self.__max_req_size = self.config.get("api.http.max_req_size", None) + self.__max_req_size = self.config.get("api.http.max_req_size", None) if not self.__max_req_size: raise ValueError("missing max request size") @@ -186,7 +186,6 @@ class Session(TokenManager): req_token_expiration_sec = self.config.get("api.auth.req_token_expiration_sec", None) self.__auth_token = None self._update_default_api_method() - if ENV_AUTH_TOKEN.get(): self.__access_key = self.__secret_key = None self.__auth_token = ENV_AUTH_TOKEN.get() @@ -203,9 +202,11 @@ class Session(TokenManager): if not self.secret_key and not self.access_key and not self.__auth_token: raise MissingConfigError() + super(Session, self).__init__( **self.__token_manager_kwargs, - req_token_expiration_sec=req_token_expiration_sec, + token_expiration_threshold_sec=token_expiration_threshold_sec, + req_token_expiration_sec=req_token_expiration_sec ) self.refresh_token() @@ -633,6 +634,7 @@ class Session(TokenManager): return call_result + @classmethod def _make_all_sessions_go_online(cls): for active_session in cls._get_all_active_sessions(): # noinspection PyProtectedMember @@ -647,7 +649,6 @@ class Session(TokenManager): if session: active_sessions.append(session) new_sessions_weakrefs.append(session_weakref) - cls._sessions_weakrefs = session_weakref return active_sessions diff --git a/clearml/backend_interface/metrics/interface.py b/clearml/backend_interface/metrics/interface.py index 499449cc..a029831a 100644 --- a/clearml/backend_interface/metrics/interface.py +++ b/clearml/backend_interface/metrics/interface.py @@ -7,6 +7,7 @@ from time import time from pathlib2 import Path +from ...backend_api import Session from ...backend_api.services import events as api_events from ..base import InterfaceBase from ...config import config, deferred_config diff --git a/clearml/backend_interface/metrics/reporter.py b/clearml/backend_interface/metrics/reporter.py index df3ed0db..2d02765d 100644 --- a/clearml/backend_interface/metrics/reporter.py +++ b/clearml/backend_interface/metrics/reporter.py @@ -271,9 +271,13 @@ class Reporter(InterfaceBase, AbstractContextManager, SetupUploadMixin, AsyncMan self._for_model = for_model flush_threshold = config.get("development.worker.report_event_flush_threshold", 100) self._report_service = BackgroundReportService( - task=task, async_enable=async_enable, metrics=metrics, + task=task, + async_enable=async_enable, + metrics=metrics, flush_frequency=self._flush_frequency, - flush_threshold=flush_threshold, for_model=for_model) + flush_threshold=flush_threshold, + for_model=for_model, + ) self._report_service.start() def _set_storage_uri(self, value): @@ -355,8 +359,12 @@ class Reporter(InterfaceBase, AbstractContextManager, SetupUploadMixin, AsyncMan :param iter: Iteration number :type iter: int """ - ev = ScalarEvent(metric=self._normalize_name(title), variant=self._normalize_name(series), value=value, - iter=iter) + ev = ScalarEvent( + metric=self._normalize_name(title), + variant=self._normalize_name(series), + value=value, + iter=iter + ) self._report(ev) def report_vector(self, title, series, values, iter): @@ -457,8 +465,12 @@ class Reporter(InterfaceBase, AbstractContextManager, SetupUploadMixin, AsyncMan elif not isinstance(plot, six.string_types): raise ValueError('Plot should be a string or a dict') - ev = PlotEvent(metric=self._normalize_name(title), variant=self._normalize_name(series), - plot_str=plot, iter=iter) + ev = PlotEvent( + metric=self._normalize_name(title), + variant=self._normalize_name(series), + plot_str=plot, + iter=iter + ) self._report(ev) def report_image(self, title, series, src, iter): diff --git a/clearml/backend_interface/model.py b/clearml/backend_interface/model.py index f1bc3b2b..22d776b9 100644 --- a/clearml/backend_interface/model.py +++ b/clearml/backend_interface/model.py @@ -12,7 +12,7 @@ from ..storage import StorageManager from ..storage.helper import StorageHelper from ..utilities.async_manager import AsyncManagerMixin -ModelPackage = namedtuple('ModelPackage', 'weights design') +ModelPackage = namedtuple("ModelPackage", "weights design") class ModelDoesNotExistError(Exception): @@ -22,12 +22,12 @@ class ModelDoesNotExistError(Exception): class _StorageUriMixin(object): @property def upload_storage_uri(self): - """ A URI into which models are uploaded """ + """A URI into which models are uploaded""" return self._upload_storage_uri @upload_storage_uri.setter def upload_storage_uri(self, value): - self._upload_storage_uri = value.rstrip('/') if value else None + self._upload_storage_uri = value.rstrip("/") if value else None def create_dummy_model(upload_storage_uri=None, *args, **kwargs): @@ -44,9 +44,9 @@ def create_dummy_model(upload_storage_uri=None, *args, **kwargs): class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): - """ Manager for backend model objects """ + """Manager for backend model objects""" - _EMPTY_MODEL_ID = 'empty' + _EMPTY_MODEL_ID = "empty" _local_model_to_id_uri = {} @@ -54,8 +54,15 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): def model_id(self): return self.id - def __init__(self, upload_storage_uri, cache_dir, model_id=None, - upload_storage_suffix='models', session=None, log=None): + def __init__( + self, + upload_storage_uri, + cache_dir, + model_id=None, + upload_storage_suffix="models", + session=None, + log=None + ): super(Model, self).__init__(id=model_id, session=session, log=log) self._upload_storage_suffix = upload_storage_suffix if model_id == self._EMPTY_MODEL_ID: @@ -71,7 +78,7 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): self.reload() def _reload(self): - """ Reload the model object """ + """Reload the model object""" if self._offline_mode: return models.Model() @@ -80,11 +87,19 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): res = self.send(models.GetByIdRequest(model=self.id)) return res.response.model - def _upload_model(self, model_file, async_enable=False, target_filename=None, cb=None): + def _upload_model( + self, model_file, async_enable=False, target_filename=None, cb=None + ): if not self.upload_storage_uri: - raise ValueError('Model has no storage URI defined (nowhere to upload to)') + raise ValueError("Model has no storage URI defined (nowhere to upload to)") target_filename = target_filename or Path(model_file).name - dest_path = '/'.join((self.upload_storage_uri, self._upload_storage_suffix or '.', target_filename)) + dest_path = "/".join( + ( + self.upload_storage_uri, + self._upload_storage_suffix or ".", + target_filename, + ) + ) result = StorageHelper.get(dest_path).upload( src_path=model_file, dest_path=dest_path, @@ -93,19 +108,23 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): return_canonized=False ) if async_enable: + def msg(num_results): - self.log.info("Waiting for previous model to upload (%d pending, %s)" % (num_results, dest_path)) + self.log.info( + "Waiting for previous model to upload (%d pending, %s)" + % (num_results, dest_path) + ) self._add_async_result(result, wait_on_max_results=2, wait_cb=msg) return dest_path def _upload_callback(self, res, cb=None): if res is None: - self.log.debug('Starting model upload') + self.log.debug("Starting model upload") elif res is False: - self.log.info('Failed model upload') + self.log.info("Failed model upload") else: - self.log.info('Completed model upload to {}'.format(res)) + self.log.info("Completed model upload to {}".format(res)) if cb: cb(res) @@ -126,12 +145,12 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): :return: A proper design dictionary according to design parameter. """ if isinstance(design, dict): - if 'design' not in design: - raise ValueError('design dictionary must have \'design\' key in it') + if "design" not in design: + raise ValueError("design dictionary must have 'design' key in it") return design - return {'design': design if design else ''} + return {"design": design if design else ""} @staticmethod def _unwrap_design(design): @@ -153,23 +172,40 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): :return: The design string according to design parameter. """ if not design: - return '' + return "" if isinstance(design, six.string_types): return design if isinstance(design, dict): - if 'design' in design: - return design['design'] + if "design" in design: + return design["design"] return list(design.values())[0] - raise ValueError('design must be a string or a dictionary with at least one value') + raise ValueError( + "design must be a string or a dictionary with at least one value" + ) - def update(self, model_file=None, design=None, labels=None, name=None, comment=None, tags=None, - task_id=None, project_id=None, parent_id=None, uri=None, framework=None, - upload_storage_uri=None, target_filename=None, iteration=None, system_tags=None): - """ Update model weights file and various model properties """ + def update( + self, + model_file=None, + design=None, + labels=None, + name=None, + comment=None, + tags=None, + task_id=None, + project_id=None, + parent_id=None, + uri=None, + framework=None, + upload_storage_uri=None, + target_filename=None, + iteration=None, + system_tags=None + ): + """Update model weights file and various model properties""" if self.id is None: if upload_storage_uri: @@ -182,7 +218,11 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): Model._local_model_to_id_uri[str(model_file)] = (self.model_id, uri) # upload model file if needed and get uri - uri = uri or (self._upload_model(model_file, target_filename=target_filename) if model_file else self.data.uri) + uri = uri or ( + self._upload_model(model_file, target_filename=target_filename) + if model_file + else self.data.uri + ) # update fields design = self._wrap_design(design) if design else self.data.design name = name or self.data.name @@ -192,7 +232,7 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): project = project_id or self.data.project parent = parent_id or self.data.parent tags = tags or self.data.tags - if Session.check_min_api_version('2.3'): + if Session.check_min_api_version("2.3"): system_tags = system_tags or self.data.system_tags self._edit( @@ -210,59 +250,121 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): system_tags=system_tags, ) - def edit(self, design=None, labels=None, name=None, comment=None, tags=None, - uri=None, framework=None, iteration=None, system_tags=None): - return self._edit(design=design, labels=labels, name=name, comment=comment, tags=tags, - uri=uri, framework=framework, iteration=iteration, system_tags=system_tags) + def edit( + self, + design=None, + labels=None, + name=None, + comment=None, + tags=None, + uri=None, + framework=None, + iteration=None, + system_tags=None + ): + return self._edit( + design=design, + labels=labels, + name=name, + comment=comment, + tags=tags, + uri=uri, + framework=framework, + iteration=iteration, + system_tags=system_tags, + ) - def _edit(self, design=None, labels=None, name=None, comment=None, tags=None, - uri=None, framework=None, iteration=None, system_tags=None, **extra): + def _edit( + self, + design=None, + labels=None, + name=None, + comment=None, + tags=None, + uri=None, + framework=None, + iteration=None, + system_tags=None, + **extra + ): def offline_store(**kwargs): for k, v in kwargs.items(): setattr(self.data, k, v or getattr(self.data, k, None)) return - if self._offline_mode: - return offline_store(design=design, labels=labels, name=name, comment=comment, tags=tags, - uri=uri, framework=framework, iteration=iteration, **extra) - if Session.check_min_api_version('2.3'): + if self._offline_mode: + return offline_store( + design=design, + labels=labels, + name=name, + comment=comment, + tags=tags, + uri=uri, + framework=framework, + iteration=iteration, + **extra + ) + + if Session.check_min_api_version("2.3"): if tags is not None: - extra.update({'tags': tags}) + extra.update({"tags": tags}) if system_tags is not None: - extra.update({'system_tags': system_tags}) + extra.update({"system_tags": system_tags}) elif tags is not None or system_tags is not None: if tags and system_tags: system_tags = system_tags[:] system_tags += [t for t in tags if t not in system_tags] - extra.update({'system_tags': system_tags or tags or self.data.system_tags}) + extra.update({"system_tags": system_tags or tags or self.data.system_tags}) - self.send(models.EditRequest( - model=self.id, - uri=uri, - name=name, - comment=comment, - labels=labels, - design=self._wrap_design(design) if design else None, - framework=framework, - iteration=iteration, - **extra - )) + self.send( + models.EditRequest( + model=self.id, + uri=uri, + name=name, + comment=comment, + labels=labels, + design=self._wrap_design(design) if design else None, + framework=framework, + iteration=iteration, + **extra + ) + ) self.reload() - def update_and_upload(self, model_file, design=None, labels=None, name=None, comment=None, - tags=None, task_id=None, project_id=None, parent_id=None, framework=None, async_enable=False, - target_filename=None, cb=None, iteration=None): - """ Update the given model for a given task ID """ + def update_and_upload( + self, + model_file, + design=None, + labels=None, + name=None, + comment=None, + tags=None, + task_id=None, + project_id=None, + parent_id=None, + framework=None, + async_enable=False, + target_filename=None, + cb=None, + iteration=None + ): + """Update the given model for a given task ID""" if async_enable: + def callback(uploaded_uri): if uploaded_uri is None: return # If not successful, mark model as failed_uploading if uploaded_uri is False: - uploaded_uri = '{}/failed_uploading'.format(self._upload_storage_uri) + uploaded_uri = "{}/failed_uploading".format( + self._upload_storage_uri + ) - Model._local_model_to_id_uri[str(model_file)] = (self.model_id, uploaded_uri) + Model._local_model_to_id_uri[str(model_file)] = ( + self.model_id, + uploaded_uri, + ) self.update( uri=uploaded_uri, @@ -281,11 +383,17 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): if cb: cb(model_file) - uri = self._upload_model(model_file, async_enable=async_enable, target_filename=target_filename, - cb=callback) + uri = self._upload_model( + model_file, + async_enable=async_enable, + target_filename=target_filename, + cb=callback, + ) return uri else: - uri = self._upload_model(model_file, async_enable=async_enable, target_filename=target_filename) + uri = self._upload_model( + model_file, async_enable=async_enable, target_filename=target_filename + ) Model._local_model_to_id_uri[str(model_file)] = (self.model_id, uri) self.update( uri=uri, @@ -302,7 +410,9 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): return uri - def update_for_task(self, task_id, name=None, model_id=None, type_="output", iteration=None): + def update_for_task( + self, task_id, name=None, model_id=None, type_="output", iteration=None + ): if Session.check_min_api_version("2.13"): req = tasks.AddOrUpdateModelRequest( task=task_id, name=name, type=type_, model=model_id, iteration=iteration @@ -314,7 +424,9 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): # backwards compatibility, None req = None else: - raise ValueError("Type '{}' unsupported (use either 'input' or 'output')".format(type_)) + raise ValueError( + "Type '{}' unsupported (use either 'input' or 'output')".format(type_) + ) if req: self.send(req) @@ -323,7 +435,7 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): @property def model_design(self): - """ Get the model design. For now, this is stored as a single key in the design dict. """ + """Get the model design. For now, this is stored as a single key in the design dict.""" try: return self._unwrap_design(self.data.design) except ValueError: @@ -364,7 +476,11 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): @property def tags(self): - return self.data.system_tags if hasattr(self.data, 'system_tags') else self.data.tags + return ( + self.data.system_tags + if hasattr(self.data, "system_tags") + else self.data.tags + ) @property def task(self): @@ -394,8 +510,10 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): :param bool raise_on_error: If True and the artifact could not be downloaded, raise ValueError, otherwise return None on failure and output log warning. + :param bool force_download: If True, the base artifact will be downloaded, even if the artifact is already cached. + :return: a local path to a downloaded copy of the model """ uri = self.data.uri @@ -403,21 +521,29 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): return None # check if we already downloaded the file - downloaded_models = [k for k, (i, u) in Model._local_model_to_id_uri.items() if i == self.id and u == uri] + downloaded_models = [ + k + for k, (i, u) in Model._local_model_to_id_uri.items() + if i == self.id and u == uri + ] for dl_file in downloaded_models: if Path(dl_file).exists() and not force_download: return dl_file # remove non existing model file Model._local_model_to_id_uri.pop(dl_file, None) - local_download = StorageManager.get_local_copy(uri, extract_archive=False, force_download=force_download) + local_download = StorageManager.get_local_copy( + uri, extract_archive=False, force_download=force_download + ) # save local model, so we can later query what was the original one if local_download is not None: Model._local_model_to_id_uri[str(local_download)] = (self.model_id, uri) elif raise_on_error: - raise ValueError("Could not retrieve a local copy of model weights {}, " - "failed downloading {}".format(self.model_id, uri)) + raise ValueError( + "Could not retrieve a local copy of model weights {}, " + "failed downloading {}".format(self.model_id, uri) + ) return local_download @@ -426,9 +552,9 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): return self._cache_dir def save_model_design_file(self): - """ Download model description file into a local file in our cache_dir """ + """Download model description file into a local file in our cache_dir""" design = self.model_design - filename = self.data.name + '.txt' + filename = self.data.name + ".txt" p = Path(self.cache_dir) / filename # we always write the original model design to file, to prevent any mishaps # if p.is_file(): @@ -438,11 +564,13 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): return str(p) def get_model_package(self): - """ Get a named tuple containing the model's weights and design """ - return ModelPackage(weights=self.download_model_weights(), design=self.save_model_design_file()) + """Get a named tuple containing the model's weights and design""" + return ModelPackage( + weights=self.download_model_weights(), design=self.save_model_design_file() + ) def get_model_design(self): - """ Get model description (text) """ + """Get model description (text)""" return self.model_design @classmethod @@ -465,8 +593,11 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): data = self.data assert isinstance(data, models.Model) parent = self.id if child else None - extra = {'system_tags': tags or data.system_tags} \ - if Session.check_min_api_version('2.3') else {'tags': tags or data.tags} + extra = ( + {"system_tags": tags or data.system_tags} + if Session.check_min_api_version("2.3") + else {"tags": tags or data.tags} + ) req = models.CreateRequest( uri=data.uri, name=name, @@ -485,8 +616,8 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin): def _create_empty_model(self, upload_storage_uri=None, project_id=None): upload_storage_uri = upload_storage_uri or self.upload_storage_uri - name = make_message('Anonymous model %(time)s') - uri = '{}/uploading_file'.format(upload_storage_uri or 'file://') + name = make_message("Anonymous model %(time)s") + uri = "{}/uploading_file".format(upload_storage_uri or "file://") req = models.CreateRequest(uri=uri, name=name, labels={}, project=project_id) res = self.send(req) if not res: diff --git a/clearml/backend_interface/task/populate.py b/clearml/backend_interface/task/populate.py index d45c591b..7351f290 100644 --- a/clearml/backend_interface/task/populate.py +++ b/clearml/backend_interface/task/populate.py @@ -652,7 +652,6 @@ if __name__ == '__main__': function_source, function_name = CreateFromFunction.__extract_function_information( a_function, sanitize_function=_sanitize_function ) - # add helper functions on top. for f in (helper_functions or []): helper_function_source, _ = CreateFromFunction.__extract_function_information( @@ -665,7 +664,6 @@ if __name__ == '__main__': if artifact_serialization_function else ("", "None") ) - artifact_deserialization_function_source, artifact_deserialization_function_name = ( CreateFromFunction.__extract_function_information(artifact_deserialization_function) if artifact_deserialization_function @@ -833,7 +831,5 @@ if __name__ == '__main__': function_source = inspect.getsource(function) if sanitize_function: function_source = sanitize_function(function_source) - function_source = CreateFromFunction.__sanitize_remove_type_hints(function_source) - - return function_source, function_name \ No newline at end of file + return function_source, function_name diff --git a/clearml/backend_interface/task/repo/scriptinfo.py b/clearml/backend_interface/task/repo/scriptinfo.py index 7a674d66..4856165c 100644 --- a/clearml/backend_interface/task/repo/scriptinfo.py +++ b/clearml/backend_interface/task/repo/scriptinfo.py @@ -255,10 +255,14 @@ class ScriptRequirements(object): @staticmethod def _remove_package_versions(installed_pkgs, package_names_to_remove_version): - installed_pkgs = {k: (v[0], None if str(k) in package_names_to_remove_version else v[1]) - for k, v in installed_pkgs.items()} + def _internal(_installed_pkgs): + return { + k: (v[0], None if str(k) in package_names_to_remove_version else v[1]) + if not isinstance(v, dict) else _internal(v) + for k, v in _installed_pkgs.items() + } - return installed_pkgs + return _internal(installed_pkgs) class _JupyterObserver(object): @@ -781,6 +785,7 @@ class ScriptInfo(object): try: # we expect to find boto3 in the sagemaker env import boto3 + with open(cls._sagemaker_metadata_path) as f: notebook_data = json.load(f) client = boto3.client("sagemaker") @@ -799,7 +804,6 @@ class ScriptInfo(object): return jupyter_session.get("path", ""), jupyter_session.get("name", "") except Exception as e: cls._get_logger().warning("Failed finding Notebook in SageMaker environment. Error is: '{}'".format(e)) - return None, None @classmethod diff --git a/clearml/backend_interface/task/task.py b/clearml/backend_interface/task/task.py index 47128aac..f32ee1fa 100644 --- a/clearml/backend_interface/task/task.py +++ b/clearml/backend_interface/task/task.py @@ -699,8 +699,8 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin): print('This text will not be printed!') the text will not be printed, because the Python process is immediately terminated. - - :param bool ignore_errors: If True default), ignore any errors raised + + :param bool ignore_errors: If True (default), ignore any errors raised :param bool force: If True, the task status will be changed to `stopped` regardless of the current Task state. :param str status_message: Optional, add status change message to the stop request. This message will be stored as status_message on the Task's info panel @@ -718,11 +718,13 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin): tasks.CompletedRequest( self.id, status_reason='completed', status_message=status_message, force=force), ignore_errors=ignore_errors) + if self._get_runtime_properties().get("_publish_on_complete"): self.send( tasks.PublishRequest( self.id, status_reason='completed', status_message=status_message, force=force), ignore_errors=ignore_errors) + return resp return self.send( tasks.StoppedRequest(self.id, status_reason='completed', status_message=status_message, force=force), @@ -2387,7 +2389,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin): return True def _get_runtime_properties(self): - # type: () -> Mapping[str, str] + # type: () -> Dict[str, str] if not Session.check_min_api_version('2.13'): return dict() return dict(**self.data.runtime) if self.data.runtime else dict() diff --git a/clearml/cli/config/__main__.py b/clearml/cli/config/__main__.py index 86f34678..16e55cc4 100644 --- a/clearml/cli/config/__main__.py +++ b/clearml/cli/config/__main__.py @@ -93,7 +93,7 @@ def main(): # Take the credentials in raw form or from api section credentials = get_parsed_field(parsed, ["credentials"]) api_server = get_parsed_field(parsed, ["api_server", "host"]) - web_server = get_parsed_field(parsed, ["web_server"]) + web_server = get_parsed_field(parsed, ["web_server"]) # TODO: if previous fails, this will fail too files_server = get_parsed_field(parsed, ["files_server"]) except Exception: credentials = credentials or None @@ -153,7 +153,7 @@ def main(): print('\nClearML Hosts configuration:\nWeb App: {}\nAPI: {}\nFile Store: {}\n'.format( web_host, api_host, files_host)) - + if len(set([web_host, api_host, files_host])) != 3: raise ValueError("All three server URLs should be distinct") diff --git a/clearml/datasets/dataset.py b/clearml/datasets/dataset.py index e84ed439..1ce20fc0 100644 --- a/clearml/datasets/dataset.py +++ b/clearml/datasets/dataset.py @@ -17,7 +17,6 @@ from attr import attrs, attrib from pathlib2 import Path from .. import Task, StorageManager, Logger -from ..backend_api.session.client import APIClient from ..backend_api import Session from ..backend_interface.task.development.worker import DevWorker from ..backend_interface.util import mutually_exclusive, exact_match_regex, get_or_create_project, rename_project diff --git a/clearml/logger.py b/clearml/logger.py index 68799ebc..89c9fb33 100644 --- a/clearml/logger.py +++ b/clearml/logger.py @@ -359,6 +359,7 @@ class Logger(object): iteration=0, table_plot=df, extra_data={'columnwidth': [2., 1., 1., 1.]}) + """ mutually_exclusive( UsageError, _check_none=True, diff --git a/clearml/model.py b/clearml/model.py index 74911383..7ee58edf 100644 --- a/clearml/model.py +++ b/clearml/model.py @@ -24,8 +24,12 @@ from .utilities.proxy_object import cast_basic_type from .utilities.plotly_reporter import SeriesInfo from .backend_interface.util import ( - validate_dict, get_single_result, mutually_exclusive, exact_match_regex, - get_or_create_project, ) + validate_dict, + get_single_result, + mutually_exclusive, + exact_match_regex, + get_or_create_project, +) from .debugging.log import get_logger from .errors import UsageError from .storage.cache import CacheManager @@ -35,8 +39,8 @@ from .utilities.enum import Options from .backend_interface import Task as _Task from .backend_interface.model import create_dummy_model, Model as _Model from .backend_interface.session import SendError -from .backend_interface.metrics import Reporter, Metrics from .config import running_remotely, get_cache_dir +from .backend_interface.metrics import Reporter, Metrics if TYPE_CHECKING: @@ -47,58 +51,66 @@ class Framework(Options): """ Optional frameworks for output model """ - tensorflow = 'TensorFlow' - tensorflowjs = 'TensorFlow_js' - tensorflowlite = 'TensorFlow_Lite' - pytorch = 'PyTorch' - torchscript = 'TorchScript' - caffe = 'Caffe' - caffe2 = 'Caffe2' - onnx = 'ONNX' - keras = 'Keras' - mknet = 'MXNet' - cntk = 'CNTK' - torch = 'Torch' - darknet = 'Darknet' - paddlepaddle = 'PaddlePaddle' - scikitlearn = 'ScikitLearn' - xgboost = 'XGBoost' - lightgbm = 'LightGBM' - parquet = 'Parquet' - megengine = 'MegEngine' - catboost = 'CatBoost' - tensorrt = 'TensorRT' - openvino = 'OpenVINO' + + tensorflow = "TensorFlow" + tensorflowjs = "TensorFlow_js" + tensorflowlite = "TensorFlow_Lite" + pytorch = "PyTorch" + torchscript = "TorchScript" + caffe = "Caffe" + caffe2 = "Caffe2" + onnx = "ONNX" + keras = "Keras" + mknet = "MXNet" + cntk = "CNTK" + torch = "Torch" + darknet = "Darknet" + paddlepaddle = "PaddlePaddle" + scikitlearn = "ScikitLearn" + xgboost = "XGBoost" + lightgbm = "LightGBM" + parquet = "Parquet" + megengine = "MegEngine" + catboost = "CatBoost" + tensorrt = "TensorRT" + openvino = "OpenVINO" __file_extensions_mapping = { - '.pb': (tensorflow, tensorflowjs, onnx, ), - '.meta': (tensorflow, ), - '.pbtxt': (tensorflow, onnx, ), - '.zip': (tensorflow, ), - '.tgz': (tensorflow, ), - '.tar.gz': (tensorflow, ), - 'model.json': (tensorflowjs, ), - '.tflite': (tensorflowlite, ), - '.pth': (pytorch, ), - '.pt': (pytorch, ), - '.caffemodel': (caffe, ), - '.prototxt': (caffe, ), - 'predict_net.pb': (caffe2, ), - 'predict_net.pbtxt': (caffe2, ), - '.onnx': (onnx, ), - '.h5': (keras, ), - '.hdf5': (keras, ), - '.keras': (keras, ), - '.model': (mknet, cntk, xgboost), - '-symbol.json': (mknet, ), - '.cntk': (cntk, ), - '.t7': (torch, ), - '.cfg': (darknet, ), - '__model__': (paddlepaddle, ), - '.pkl': (scikitlearn, keras, xgboost, megengine), - '.parquet': (parquet, ), - '.cbm': (catboost, ), - '.plan': (tensorrt,), + ".pb": ( + tensorflow, + tensorflowjs, + onnx, + ), + ".meta": (tensorflow,), + ".pbtxt": ( + tensorflow, + onnx, + ), + ".zip": (tensorflow,), + ".tgz": (tensorflow,), + ".tar.gz": (tensorflow,), + "model.json": (tensorflowjs,), + ".tflite": (tensorflowlite,), + ".pth": (pytorch,), + ".pt": (pytorch,), + ".caffemodel": (caffe,), + ".prototxt": (caffe,), + "predict_net.pb": (caffe2,), + "predict_net.pbtxt": (caffe2,), + ".onnx": (onnx,), + ".h5": (keras,), + ".hdf5": (keras,), + ".keras": (keras,), + ".model": (mknet, cntk, xgboost), + "-symbol.json": (mknet,), + ".cntk": (cntk,), + ".t7": (torch,), + ".cfg": (darknet,), + "__model__": (paddlepaddle,), + ".pkl": (scikitlearn, keras, xgboost, megengine), + ".parquet": (parquet,), + ".cbm": (catboost,), + ".plan": (tensorrt,), } __parent_mapping = { @@ -112,7 +124,7 @@ class Framework(Options): "xgboost": (xgboost,), "lightgbm": (lightgbm,), "catboost": (catboost,), - "joblib": (scikitlearn, xgboost) + "joblib": (scikitlearn, xgboost), } @classmethod @@ -140,9 +152,14 @@ class Framework(Options): # If no framework, try finding first framework matching the extension, otherwise (or if no match) try matching # the given extension to the given framework. If no match return an empty extension return ( - (not framework and find_framework_by_ext(lambda frameworks_: frameworks_[0])) - or find_framework_by_ext(lambda frameworks_: framework if framework in frameworks_ else None) - or (framework, filename.split('.')[-1] if '.' in filename else '') + ( + not framework + and find_framework_by_ext(lambda frameworks_: frameworks_[0]) + ) + or find_framework_by_ext( + lambda frameworks_: framework if framework in frameworks_ else None + ) + or (framework, filename.split(".")[-1] if "." in filename else "") ) @@ -256,7 +273,7 @@ class BaseModel(object): :return: The list of tags. """ data = self._get_model_data() - return data.system_tags if Session.check_min_api_version('2.3') else data.tags + return data.system_tags if Session.check_min_api_version("2.3") else data.tags @system_tags.setter def system_tags(self, value): @@ -356,7 +373,9 @@ class BaseModel(object): :return: The locally stored file. """ # download model (synchronously) and return local file - return self._get_base_model().download_model_weights(raise_on_error=raise_on_error, force_download=force_download) + return self._get_base_model().download_model_weights( + raise_on_error=raise_on_error, force_download=force_download + ) def get_weights_package( self, return_path=False, raise_on_error=False, force_download=False @@ -373,27 +392,35 @@ class BaseModel(object): :param bool raise_on_error: If True, and the artifact could not be downloaded, raise ValueError, otherwise return None on failure and output log warning. + :param bool force_download: If True, the base artifact will be downloaded, even if the artifact is already cached. + :return: The model weights, or a list of the locally stored filenames. if raise_on_error=False, returns None on error. """ # check if model was packaged if not self._is_package(): - raise ValueError('Model is not packaged') + raise ValueError("Model is not packaged") # download packaged model - packed_file = self.get_weights(raise_on_error=raise_on_error, force_download=force_download) + packed_file = self.get_weights( + raise_on_error=raise_on_error, force_download=force_download + ) if not packed_file: if raise_on_error: - raise ValueError('Model package \'{}\' could not be downloaded'.format(self.url)) + raise ValueError( + "Model package '{}' could not be downloaded".format(self.url) + ) return None # unpack - target_folder = mkdtemp(prefix='model_package_') + target_folder = mkdtemp(prefix="model_package_") if not target_folder: - raise ValueError('cannot create temporary directory for packed weight files') + raise ValueError( + "cannot create temporary directory for packed weight files" + ) for func in (zipfile.ZipFile, tarfile.open): try: @@ -403,12 +430,14 @@ class BaseModel(object): except (zipfile.BadZipfile, tarfile.ReadError): pass else: - raise ValueError('cannot extract files from packaged model at %s', packed_file) + raise ValueError( + "cannot extract files from packaged model at %s", packed_file + ) if return_path: return target_folder - target_files = list(Path(target_folder).glob('*')) + target_files = list(Path(target_folder).glob("*")) return target_files def report_scalar(self, title, series, value, iteration): @@ -1034,14 +1063,12 @@ class BaseModel(object): def _init_reporter(self): if self._reporter: return - metrics_manager = Metrics( session=_Model._get_default_session(), storage_uri=None, task=self, # this is fine, the ID of the model will be fetched here for_model=True ) - self._reporter = Reporter(metrics=metrics_manager, task=self, for_model=True) def _running_remotely(self): @@ -1051,7 +1078,7 @@ class BaseModel(object): def _set_task(self, value): # type: (_Task) -> () if value is not None and not isinstance(value, _Task): - raise ValueError('task argument must be of Task type') + raise ValueError("task argument must be of Task type") self._task = value @abc.abstractmethod @@ -1073,7 +1100,9 @@ class BaseModel(object): @staticmethod def _config_dict_to_text(config): if not isinstance(config, six.string_types) and not isinstance(config, dict): - raise ValueError("Model configuration only supports dictionary or string objects") + raise ValueError( + "Model configuration only supports dictionary or string objects" + ) return config_dict_to_text(config) @staticmethod @@ -1084,7 +1113,11 @@ class BaseModel(object): @staticmethod def _resolve_config(config_text=None, config_dict=None): - mutually_exclusive(config_text=config_text, config_dict=config_dict, _require_at_least_one=False) + mutually_exclusive( + config_text=config_text, + config_dict=config_dict, + _require_at_least_one=False, + ) if config_dict: return InputModel._config_dict_to_text(config_dict) @@ -1105,13 +1138,25 @@ class BaseModel(object): _Model._get_default_session() .send( models.AddOrUpdateMetadataRequest( - metadata=[{ - "key": str(key), - "value": str(value), - "type": str(v_type) - if str(v_type) in ('float', 'int', 'bool', 'str', 'basestring', 'list', 'tuple', 'dict') else - str(None) - }], + metadata=[ + { + "key": str(key), + "value": str(value), + "type": str(v_type) + if str(v_type) + in ( + "float", + "int", + "bool", + "str", + "basestring", + "list", + "tuple", + "dict", + ) + else str(None), + } + ], model=self.id, replace_metadata=False, ) @@ -1171,7 +1216,9 @@ class BaseModel(object): result = {} metadata = self.get_all_metadata() for key, metadata_entry in metadata.items(): - result[key] = cast_basic_type(metadata_entry.get("value"), metadata_entry.get("type")) + result[key] = cast_basic_type( + metadata_entry.get("value"), metadata_entry.get("type") + ) return result def set_all_metadata(self, metadata, replace=True): @@ -1186,11 +1233,20 @@ class BaseModel(object): :return: True if the metadata was set and False otherwise """ metadata_array = [ - {"key": str(k), "value": str(v_t.get("value")), "type": str(v_t.get("type"))} for k, v_t in metadata.items() + { + "key": str(k), + "value": str(v_t.get("value")), + "type": str(v_t.get("type")), + } + for k, v_t in metadata.items() ] self._reload_required = ( _Model._get_default_session() - .send(models.AddOrUpdateMetadataRequest(metadata=metadata_array, model=self.id, replace_metadata=replace)) + .send( + models.AddOrUpdateMetadataRequest( + metadata=metadata_array, model=self.id, replace_metadata=replace + ) + ) .ok() ) return self._reload_required @@ -1256,7 +1312,7 @@ class Model(BaseModel): if not self._base_model_id: # this shouldn't actually happen - raise Exception('Missing model ID, cannot create an empty model') + raise Exception("Missing model ID, cannot create an empty model") self._base_model = _Model( upload_storage_uri=None, cache_dir=get_cache_dir(), @@ -1269,14 +1325,14 @@ class Model(BaseModel): @classmethod def query_models( - cls, - project_name=None, # type: Optional[str] - model_name=None, # type: Optional[str] - tags=None, # type: Optional[Sequence[str]] - only_published=False, # type: bool - include_archived=False, # type: bool - max_results=None, # type: Optional[int] - metadata=None # type: Optional[Dict[str, str]] + cls, + project_name=None, # type: Optional[str] + model_name=None, # type: Optional[str] + tags=None, # type: Optional[Sequence[str]] + only_published=False, # type: bool + include_archived=False, # type: bool + max_results=None, # type: Optional[int] + metadata=None, # type: Optional[Dict[str, str]] ): # type: (...) -> List[Model] """ @@ -1302,16 +1358,20 @@ class Model(BaseModel): res = _Model._get_default_session().send( projects.GetAllRequest( name=exact_match_regex(project_name), - only_fields=['id', 'name', 'last_update'] + only_fields=["id", "name", "last_update"], ) ) - project = get_single_result(entity='project', query=project_name, results=res.response.projects) + project = get_single_result( + entity="project", query=project_name, results=res.response.projects + ) else: project = None - only_fields = ['id', 'created', 'system_tags'] + only_fields = ["id", "created", "system_tags"] - extra_fields = {"metadata.{}.value".format(k): v for k, v in (metadata or {}).items()} + extra_fields = { + "metadata.{}.value".format(k): v for k, v in (metadata or {}).items() + } models_fetched = [] @@ -1323,12 +1383,16 @@ class Model(BaseModel): res = _Model._get_default_session().send( models.GetAllRequest( project=[project.id] if project else None, - name=exact_match_regex(model_name) if model_name is not None else None, + name=exact_match_regex(model_name) + if model_name is not None + else None, only_fields=only_fields, tags=tags or None, - system_tags=["-" + cls._archived_tag] if not include_archived else None, + system_tags=["-" + cls._archived_tag] + if not include_archived + else None, ready=True if only_published else None, - order_by=['-created'], + order_by=["-created"], page=page, page_size=page_size if results_left > page_size else results_left, _allow_extra_fields_=True, @@ -1352,7 +1416,9 @@ class Model(BaseModel): return self._base_model_id if self._base_model_id else super(Model, self).id @classmethod - def remove(cls, model, delete_weights_file=True, force=False, raise_on_errors=False): + def remove( + cls, model, delete_weights_file=True, force=False, raise_on_errors=False + ): # type: (Union[str, Model], bool, bool, bool) -> bool """ Remove a model from the model repository. @@ -1384,11 +1450,17 @@ class Model(BaseModel): response = res.wait() if not response.ok(): if raise_on_errors: - raise ValueError("Could not remove model id={}: {}".format(model.id, response.meta)) + raise ValueError( + "Could not remove model id={}: {}".format( + model.id, response.meta + ) + ) return False except SendError as ex: if raise_on_errors: - raise ValueError("Could not remove model id={}: {}".format(model.id, ex)) + raise ValueError( + "Could not remove model id={}: {}".format(model.id, ex) + ) return False except ValueError: if raise_on_errors: @@ -1396,7 +1468,9 @@ class Model(BaseModel): return False except Exception as ex: if raise_on_errors: - raise ValueError("Could not remove model id={}: {}".format(model.id, ex)) + raise ValueError( + "Could not remove model id={}: {}".format(model.id, ex) + ) return False if not delete_weights_file: @@ -1406,12 +1480,19 @@ class Model(BaseModel): try: if not helper.delete(weights_url): if raise_on_errors: - raise ValueError("Could not remove model id={} weights file: {}".format(model.id, weights_url)) + raise ValueError( + "Could not remove model id={} weights file: {}".format( + model.id, weights_url + ) + ) return False except Exception as ex: if raise_on_errors: - raise ValueError("Could not remove model id={} weights file \'{}\': {}".format( - model.id, weights_url, ex)) + raise ValueError( + "Could not remove model id={} weights file '{}': {}".format( + model.id, weights_url, ex + ) + ) return False return True @@ -1510,29 +1591,36 @@ class InputModel(Model): :return: The imported model or existing model (see above). """ - config_text = cls._resolve_config(config_text=config_text, config_dict=config_dict) + config_text = cls._resolve_config( + config_text=config_text, config_dict=config_dict + ) weights_url = StorageHelper.conform_url(weights_url) if not weights_url: raise ValueError("Please provide a valid weights_url parameter") # convert local to file to remote one weights_url = CacheManager.get_remote_url(weights_url) - extra = {'system_tags': ["-" + cls._archived_tag]} \ - if Session.check_min_api_version('2.3') else {'tags': ["-" + cls._archived_tag]} + extra = ( + {"system_tags": ["-" + cls._archived_tag]} + if Session.check_min_api_version("2.3") + else {"tags": ["-" + cls._archived_tag]} + ) # noinspection PyProtectedMember - result = _Model._get_default_session().send(models.GetAllRequest( - uri=[weights_url], - only_fields=["id", "name", "created"], - **extra - )) + result = _Model._get_default_session().send( + models.GetAllRequest( + uri=[weights_url], only_fields=["id", "name", "created"], **extra + ) + ) if result.response.models: logger = get_logger() - logger.debug('A model with uri "{}" already exists. Selecting it'.format(weights_url)) + logger.debug( + 'A model with uri "{}" already exists. Selecting it'.format(weights_url) + ) model = get_single_result( - entity='model', + entity="model", query=weights_url, results=result.response.models, log=logger, @@ -1549,11 +1637,14 @@ class InputModel(Model): ) from .task import Task + task = Task.current_task() if task: - comment = 'Imported by task id: {}'.format(task.id) + ('\n' + comment if comment else '') + comment = "Imported by task id: {}".format(task.id) + ( + "\n" + comment if comment else "" + ) project_id = task.project - name = name or 'Imported by {}'.format(task.name or '') + name = name or "Imported by {}".format(task.name or "") # do not register the Task, because we do not want it listed after as "output model", # the Task never actually created the Model task_id = None @@ -1564,14 +1655,13 @@ class InputModel(Model): if project: project_id = get_or_create_project( session=task.session if task else Task._get_default_session(), - project_name=project + project_name=project, ) if not framework: # noinspection PyProtectedMember framework, file_ext = Framework._get_file_ext( - framework=framework, - filename=weights_url + framework=framework, filename=weights_url ) base_model.update( @@ -1632,24 +1722,27 @@ class InputModel(Model): if not load_archived: # noinspection PyTypeChecker - extra = {'system_tags': ["-" + _Task.archived_tag]} \ - if Session.check_min_api_version('2.3') else {'tags': ["-" + cls._archived_tag]} + extra = ( + {"system_tags": ["-" + _Task.archived_tag]} + if Session.check_min_api_version("2.3") + else {"tags": ["-" + cls._archived_tag]} + ) else: extra = {} # noinspection PyProtectedMember - result = _Model._get_default_session().send(models.GetAllRequest( - uri=[weights_url], - only_fields=["id", "name", "created"], - **extra - )) + result = _Model._get_default_session().send( + models.GetAllRequest( + uri=[weights_url], only_fields=["id", "name", "created"], **extra + ) + ) if not result or not result.response or not result.response.models: return None logger = get_logger() model = get_single_result( - entity='model', + entity="model", query=weights_url, results=result.response.models, log=logger, @@ -1697,7 +1790,9 @@ class InputModel(Model): m._data.labels = label_enumeration return this_model - def __init__(self, model_id=None, name=None, project=None, tags=None, only_published=False): + def __init__( + self, model_id=None, name=None, project=None, tags=None, only_published=False + ): # type: (Optional[str], Optional[str], Optional[str], Optional[Sequence[str]], bool) -> None """ Load a model from the Model artifactory, @@ -1712,10 +1807,17 @@ class InputModel(Model): """ if not model_id: found_models = self.query_models( - project_name=project, model_name=name, tags=tags, only_published=only_published) + project_name=project, + model_name=name, + tags=tags, + only_published=only_published, + ) if not found_models: - raise ValueError("Could not locate model with project={} name={} tags={} published={}".format( - project, name, tags, only_published)) + raise ValueError( + "Could not locate model with project={} name={} tags={} published={}".format( + project, name, tags, only_published + ) + ) model_id = found_models[0].id super(InputModel, self).__init__(model_id) @@ -1799,7 +1901,6 @@ class OutputModel(BaseModel): When executing a Task (experiment) remotely in a worker, you can modify the model configuration and / or model's label enumeration using the **ClearML Web-App**. """ - _default_output_uri = None _offline_folder = "models" @@ -1947,13 +2048,18 @@ class OutputModel(BaseModel): """ if not task: from .task import Task + task = Task.current_task() if not task: - raise ValueError("task object was not provided, and no current task was found") + raise ValueError( + "task object was not provided, and no current task was found" + ) super(OutputModel, self).__init__(task=task) - config_text = self._resolve_config(config_text=config_text, config_dict=config_dict) + config_text = self._resolve_config( + config_text=config_text, config_dict=config_dict + ) self._model_local_filename = None self._last_uploaded_url = None @@ -1967,8 +2073,10 @@ class OutputModel(BaseModel): labels=label_enumeration or task.get_labels_enumeration(), name=name or self._task.name, tags=tags, - comment='{} by task id: {}'.format('Created' if not base_model_id else 'Overwritten', task.id) + - ('\n' + comment if comment else ''), + comment="{} by task id: {}".format( + "Created" if not base_model_id else "Overwritten", task.id + ) + + ("\n" + comment if comment else ""), framework=framework, upload_storage_uri=task.output_uri, ) @@ -1986,13 +2094,18 @@ class OutputModel(BaseModel): task_id=self._task.id, project_id=self._task.project, name=self._floating_data.name or self._task.name, - comment=('{}\n{}'.format(_base_model.comment, self._floating_data.comment) - if (_base_model.comment and self._floating_data.comment and - self._floating_data.comment not in _base_model.comment) - else (_base_model.comment or self._floating_data.comment)), + comment=( + "{}\n{}".format(_base_model.comment, self._floating_data.comment) + if ( + _base_model.comment + and self._floating_data.comment + and self._floating_data.comment not in _base_model.comment + ) + else (_base_model.comment or self._floating_data.comment) + ), tags=self._floating_data.tags, framework=self._floating_data.framework, - upload_storage_uri=self._floating_data.upload_storage_uri + upload_storage_uri=self._floating_data.upload_storage_uri, ) self._base_model = _base_model self._floating_data = None @@ -2017,7 +2130,9 @@ class OutputModel(BaseModel): Use examples would be GANs or model ensemble """ if self._task != task: - raise ValueError('Can only connect preexisting model to task, but this is a fresh model') + raise ValueError( + "Can only connect preexisting model to task, but this is a fresh model" + ) if name: self._task_connect_name = name @@ -2030,14 +2145,22 @@ class OutputModel(BaseModel): if not self._task._get_model_config_text(): # noinspection PyProtectedMember task._set_model_config( - config_text=model.model_design if hasattr(model, "model_design") else model.design.get("design", "") + config_text=model.model_design + if hasattr(model, "model_design") + else model.design.get("design", "") ) if not self._task.get_labels_enumeration(): - task.set_model_label_enumeration(model.data.labels if hasattr(model, "data") else model.labels) + task.set_model_label_enumeration( + model.data.labels if hasattr(model, "data") else model.labels + ) if self._base_model: self._base_model.update_for_task( - task_id=self._task.id, model_id=self.id, type_="output", name=self._task_connect_name) + task_id=self._task.id, + model_id=self.id, + type_="output", + name=self._task_connect_name, + ) def set_upload_destination(self, uri): # type: (str) -> None @@ -2076,7 +2199,9 @@ class OutputModel(BaseModel): try: uri = storage.verify_upload(folder_uri=uri) except Exception: - raise ValueError("Could not set destination uri to: %s [Check write permissions]" % uri) + raise ValueError( + "Could not set destination uri to: %s [Check write permissions]" % uri + ) # store default uri self._get_base_model().upload_storage_uri = uri @@ -2127,19 +2252,23 @@ class OutputModel(BaseModel): if filename: os.remove(filename) except OSError: - self._log.debug('Failed removing temporary file %s' % filename) + self._log.debug("Failed removing temporary file %s" % filename) # test if we can update the model if self.id and self.published: - raise ValueError('Model is published and cannot be changed') + raise ValueError("Model is published and cannot be changed") - if (not weights_filename and not register_uri) or (weights_filename and register_uri): - raise ValueError('Model update must have either local weights file to upload, ' - 'or pre-uploaded register_uri, never both') + if (not weights_filename and not register_uri) or ( + weights_filename and register_uri + ): + raise ValueError( + "Model update must have either local weights file to upload, " + "or pre-uploaded register_uri, never both" + ) # only upload if we are connected to a task if not self._task: - raise Exception('Missing a task for this model') + raise Exception("Missing a task for this model") if self._task.is_offline() and (weights_filename is None or not Path(weights_filename).is_dir()): return self._update_weights_offline( @@ -2160,7 +2289,8 @@ class OutputModel(BaseModel): upload_uri=upload_uri, target_filename=target_filename or Path(weights_filename).name, auto_delete_file=auto_delete_file, - iteration=iteration) + iteration=iteration, + ) # make sure we delete the previous file, if it exists if self._model_local_filename != weights_filename: @@ -2171,17 +2301,21 @@ class OutputModel(BaseModel): # make sure the created model is updated: out_model_file_name = target_filename or weights_filename or register_uri - name = Path(out_model_file_name).stem if out_model_file_name else (self._task_connect_name or "Output Model") + name = ( + Path(out_model_file_name).stem + if out_model_file_name + else (self._task_connect_name or "Output Model") + ) model = self._get_force_base_model(task_model_entry=name) if not model: - raise ValueError('Failed creating internal output model') + raise ValueError("Failed creating internal output model") # select the correct file extension based on the framework, # or update the framework based on the file extension # noinspection PyProtectedMember framework, file_ext = Framework._get_file_ext( framework=self._get_model_data().framework, - filename=target_filename or weights_filename or register_uri + filename=target_filename or weights_filename or register_uri, ) if weights_filename: @@ -2195,25 +2329,36 @@ class OutputModel(BaseModel): # let us know the iteration number, we put it in the comment section for now. if update_comment: - comment = self.comment or '' - iteration_msg = 'snapshot {} stored'.format(weights_filename or register_uri) - if not comment.startswith('\n'): - comment = '\n' + comment + comment = self.comment or "" + iteration_msg = "snapshot {} stored".format( + weights_filename or register_uri + ) + if not comment.startswith("\n"): + comment = "\n" + comment comment = iteration_msg + comment else: comment = None # if we have no output destination, just register the local model file - if weights_filename and not self.upload_storage_uri and not self._task.storage_uri: + if ( + weights_filename + and not self.upload_storage_uri + and not self._task.storage_uri + ): register_uri = weights_filename weights_filename = None auto_delete_file = False - self._log.info('No output storage destination defined, registering local model %s' % register_uri) + self._log.info( + "No output storage destination defined, registering local model %s" + % register_uri + ) # start the upload if weights_filename: if not model.upload_storage_uri: - self.set_upload_destination(self.upload_storage_uri or self._task.storage_uri) + self.set_upload_destination( + self.upload_storage_uri or self._task.storage_uri + ) output_uri = model.update_and_upload( model_file=weights_filename, @@ -2227,7 +2372,12 @@ class OutputModel(BaseModel): ) elif register_uri: register_uri = StorageHelper.conform_url(register_uri) - output_uri = model.update(uri=register_uri, task_id=self._task.id, framework=framework, comment=comment) + output_uri = model.update( + uri=register_uri, + task_id=self._task.id, + framework=framework, + comment=comment, + ) else: output_uri = None @@ -2245,7 +2395,7 @@ class OutputModel(BaseModel): upload_uri=None, # type: Optional[str] target_filename=None, # type: Optional[str] auto_delete_file=True, # type: bool - iteration=None # type: Optional[int] + iteration=None, # type: Optional[int] ): # type: (...) -> str """ @@ -2274,52 +2424,70 @@ class OutputModel(BaseModel): :return: The uploaded URI for the weights package. """ # create list of files - if (not weights_filenames and not weights_path) or (weights_filenames and weights_path): - raise ValueError('Model update weights package should get either ' - 'directory path to pack or a list of files') + if (not weights_filenames and not weights_path) or ( + weights_filenames and weights_path + ): + raise ValueError( + "Model update weights package should get either " + "directory path to pack or a list of files" + ) if not weights_filenames: - weights_filenames = list(map(six.text_type, Path(weights_path).rglob('*'))) + weights_filenames = list(map(six.text_type, Path(weights_path).rglob("*"))) elif weights_filenames and len(weights_filenames) > 1: weights_path = get_common_path(weights_filenames) # create packed model from all the files - fd, zip_file = mkstemp(prefix='model_package.', suffix='.zip') + fd, zip_file = mkstemp(prefix="model_package.", suffix=".zip") try: - with zipfile.ZipFile(zip_file, 'w', allowZip64=True, compression=zipfile.ZIP_STORED) as zf: + with zipfile.ZipFile( + zip_file, "w", allowZip64=True, compression=zipfile.ZIP_STORED + ) as zf: for filename in weights_filenames: - relative_file_name = Path(filename).name if not weights_path else \ - Path(filename).absolute().relative_to(Path(weights_path).absolute()).as_posix() + relative_file_name = ( + Path(filename).name + if not weights_path + else Path(filename) + .absolute() + .relative_to(Path(weights_path).absolute()) + .as_posix() + ) zf.write(filename, arcname=relative_file_name) finally: os.close(fd) # now we can delete the files (or path if provided) if auto_delete_file: + def safe_remove(path, is_dir=False): try: (os.rmdir if is_dir else os.remove)(path) except OSError: - self._log.info('Failed removing temporary {}'.format(path)) + self._log.info("Failed removing temporary {}".format(path)) for filename in weights_filenames: safe_remove(filename) if weights_path: safe_remove(weights_path, is_dir=True) - if target_filename and not target_filename.lower().endswith('.zip'): - target_filename += '.zip' + if target_filename and not target_filename.lower().endswith(".zip"): + target_filename += ".zip" # and now we should upload the file, always delete the temporary zip file - comment = self.comment or '' - iteration_msg = 'snapshot {} stored'.format(str(weights_filenames)) - if not comment.startswith('\n'): - comment = '\n' + comment + comment = self.comment or "" + iteration_msg = "snapshot {} stored".format(str(weights_filenames)) + if not comment.startswith("\n"): + comment = "\n" + comment comment = iteration_msg + comment self.comment = comment - uploaded_uri = self.update_weights(weights_filename=zip_file, auto_delete_file=True, upload_uri=upload_uri, - target_filename=target_filename or 'model_package.zip', - iteration=iteration, update_comment=False) + uploaded_uri = self.update_weights( + weights_filename=zip_file, + auto_delete_file=True, + upload_uri=upload_uri, + target_filename=target_filename or "model_package.zip", + iteration=iteration, + update_comment=False, + ) # set the model tag (by now we should have a model object) so we know we have packaged file self._set_package_tag() return uploaded_uri @@ -2344,7 +2512,9 @@ class OutputModel(BaseModel): if not self._validate_update(): return False - config_text = self._resolve_config(config_text=config_text, config_dict=config_dict) + config_text = self._resolve_config( + config_text=config_text, config_dict=config_dict + ) if self._task and not self._task.get_model_config_text(): self._task.set_model_config(config_text=config_text) @@ -2378,7 +2548,12 @@ class OutputModel(BaseModel): :return: """ - validate_dict(labels, key_types=six.string_types, value_types=six.integer_types, desc='label enumeration') + validate_dict( + labels, + key_types=six.string_types, + value_types=six.integer_types, + desc="label enumeration", + ) if not self._validate_update(): return @@ -2437,7 +2612,6 @@ class OutputModel(BaseModel): ) if not self._task: raise Exception("Missing a task for this model") - weights_filename_offline = None if weights_filename: weights_filename_offline = ( @@ -2445,7 +2619,6 @@ class OutputModel(BaseModel): ).as_posix() os.makedirs(os.path.dirname(weights_filename_offline), exist_ok=True) shutil.copyfile(weights_filename, weights_filename_offline) - # noinspection PyProtectedMember self._task._offline_output_models.append( dict( @@ -2484,7 +2657,11 @@ class OutputModel(BaseModel): # noinspection PyProtectedMember config_text = self._task._get_model_config_text() model_name = model_name or self._floating_data.name or self._task.name - task_model_entry = task_model_entry or self._task_connect_name or Path(self._get_model_data().uri).stem + task_model_entry = ( + task_model_entry + or self._task_connect_name + or Path(self._get_model_data().uri).stem + ) parent = self._task.input_models_id.get(task_model_entry) self._base_model.update( labels=self._floating_data.labels or labels, @@ -2496,7 +2673,7 @@ class OutputModel(BaseModel): comment=self._floating_data.comment, tags=self._floating_data.tags, framework=self._floating_data.framework, - upload_storage_uri=self._floating_data.upload_storage_uri + upload_storage_uri=self._floating_data.upload_storage_uri, ) # remove model floating change set, by now they should have matched the task. @@ -2504,12 +2681,22 @@ class OutputModel(BaseModel): # now we have to update the creator task so it points to us if str(self._task.status) not in ( - str(self._task.TaskStatusEnum.created), str(self._task.TaskStatusEnum.in_progress)): - self._log.warning('Could not update last created model in Task {}, ' - 'Task status \'{}\' cannot be updated'.format(self._task.id, self._task.status)) + str(self._task.TaskStatusEnum.created), + str(self._task.TaskStatusEnum.in_progress), + ): + self._log.warning( + "Could not update last created model in Task {}, " + "Task status '{}' cannot be updated".format( + self._task.id, self._task.status + ) + ) else: self._base_model.update_for_task( - task_id=self._task.id, model_id=self.id, type_="output", name=task_model_entry) + task_id=self._task.id, + model_id=self.id, + type_="output", + name=task_model_entry, + ) return self._base_model @@ -2526,7 +2713,7 @@ class OutputModel(BaseModel): def _validate_update(self): # test if we can update the model if self.id and self.published: - raise ValueError('Model is published and cannot be changed') + raise ValueError("Model is published and cannot be changed") return True diff --git a/clearml/storage/helper.py b/clearml/storage/helper.py index 83fb68c6..994823c1 100644 --- a/clearml/storage/helper.py +++ b/clearml/storage/helper.py @@ -179,7 +179,7 @@ class StorageHelper(object): @property def dest_path(self): return self._dest_path - + @property def canonized_dest_path(self): return self._canonized_dest_path @@ -195,11 +195,11 @@ class StorageHelper(object): @property def retries(self): return self._retries - + @property def return_canonized(self): return self._return_canonized - + def __init__(self, src_path, dest_path, canonized_dest_path, extra, callback, retries, return_canonized): self._src_path = src_path self._dest_path = dest_path @@ -792,14 +792,14 @@ class StorageHelper(object): return StorageHelper._upload_pool.apply_async(self._do_async_upload, args=(data,)) else: res = self._do_upload( - src_path=src_path, - dest_path=dest_path, - canonized_dest_path=canonized_dest_path, - extra=extra, - cb=cb, - verbose=False, - retries=retries, - return_canonized=return_canonized) + src_path=src_path, + dest_path=dest_path, + canonized_dest_path=canonized_dest_path, + extra=extra, + cb=cb, + verbose=False, + retries=retries, + return_canonized=return_canonized) if res: result_path = quote_url(result_path) return result_path @@ -1177,8 +1177,7 @@ class StorageHelper(object): def _do_async_upload(self, data): assert isinstance(data, self._UploadData) - return self._do_upload(data.src_path, data.dest_path, data.canonized_dest_path, extra=data.extra, cb=data.callback, - verbose=True, retries=data.retries, return_canonized=data.return_canonized) + return self._do_upload(data.src_path, data.dest_path, data.canonized_dest_path, extra=data.extra, cb=data.callback, verbose=True, retries=data.retries, return_canonized=data.return_canonized) def _upload_from_file(self, local_path, dest_path, extra=None): if not hasattr(self._driver, 'upload_object'): @@ -1473,7 +1472,6 @@ class _HttpDriver(_Driver): try: container = self.get_container(container_name) url = container_name + object_name - return container.session.head(url, allow_redirects=True, headers=container.get_headers(url)).ok except Exception: return False diff --git a/clearml/storage/manager.py b/clearml/storage/manager.py index 14c01217..7e6425a1 100644 --- a/clearml/storage/manager.py +++ b/clearml/storage/manager.py @@ -1,5 +1,4 @@ import fnmatch -import os import shutil import tarfile from multiprocessing.pool import ThreadPool @@ -7,7 +6,6 @@ from random import random from time import time from typing import List, Optional, Union from zipfile import ZipFile -from six.moves.urllib.parse import urlparse from pathlib2 import Path @@ -304,8 +302,8 @@ class StorageManager(object): if not local_folder: local_folder = CacheManager.get_cache_manager().get_cache_folder() local_path = str(Path(local_folder).expanduser().absolute() / bucket_path) - helper = StorageHelper.get(remote_url) + return helper.download_to_file( remote_url, local_path, diff --git a/clearml/task.py b/clearml/task.py index 356af1a1..282612ba 100644 --- a/clearml/task.py +++ b/clearml/task.py @@ -1742,7 +1742,7 @@ class Task(_Task): After having :meth:`Task.close` -d a task, the respective object cannot be used anymore and methods like :meth:`Task.connect` or :meth:`Task.connect_configuration` will throw a `ValueError`. In order to obtain an object representing the task again, use methods like :meth:`Task.get_task`. - + .. warning:: Only call :meth:`Task.close` if you are certain the Task is not needed. """ @@ -1988,7 +1988,9 @@ class Task(_Task): corresponding to debug sample's file name in the UI, also known as variant :param int n_last_iterations: How many debug samples iterations to fetch in reverse chronological order. Leave empty to get all debug samples. + :raise: TypeError if `n_last_iterations` is explicitly set to anything other than a positive integer value + :return: A list of `dict`s, each dictionary containing the debug sample's URL and other metadata. The URLs can be passed to :meth:`StorageManager.get_local_copy` to fetch local copies of debug samples. """ @@ -2021,11 +2023,14 @@ class Task(_Task): def _get_debug_samples(self, title, series, n_last_iterations=None): response = self._send_debug_image_request(title, series, n_last_iterations) + debug_samples = [] + while True: - scroll_id = response.response.scroll_id - for metric_resp in response.response.metrics: - iterations_events = [iteration["events"] for iteration in metric_resp.iterations] # type: List[List[dict]] + scroll_id = response.response_data.get("scroll_id", None) + + for metric_resp in response.response_data.get("metrics", []): + iterations_events = [iteration["events"] for iteration in metric_resp.get("iterations", [])] # type: List[List[dict]] flattened_events = (event for single_iter_events in iterations_events for event in single_iter_events) @@ -2037,8 +2042,8 @@ class Task(_Task): if (len(debug_samples) == n_last_iterations or all( - len(metric_resp.iterations) == 0 - for metric_resp in response.response.metrics)): + len(metric_resp.get("iterations", [])) == 0 + for metric_resp in response.response_data.get("metrics", []))): break return debug_samples @@ -2877,13 +2882,11 @@ class Task(_Task): Set offline mode, where all data and logs are stored into local folder, for later transmission .. note:: - `Task.set_offline` can't move the same task from offline to online, nor can it be applied before `Task.create`. See below an example of **incorect** usage of `Task.set_offline`: - .. code-block:: py - from clearml import Task + Task.set_offline(True) task = Task.create(project_name='DEBUG', task_name="offline") # ^^^ an error or warning is emitted, telling us that `Task.set_offline(True)` @@ -2891,23 +2894,25 @@ class Task(_Task): Task.set_offline(False) # ^^^ an error or warning is emitted, telling us that running `Task.set_offline(False)` # while the current task is not closed is not something we support + data = task.export_task() + imported_task = Task.import_task(task_data=data) The correct way to use `Task.set_offline` can be seen in the following example: .. code-block:: py - from clearml import Task + Task.set_offline(True) task = Task.init(project_name='DEBUG', task_name="offline") task.upload_artifact("large_artifact", "test_strign") task.close() Task.set_offline(False) + imported_task = Task.import_offline_session(task.get_offline_mode_folder()) :param offline_mode: If True, offline-mode is turned on, and no communication to the backend is enabled. - :return: """ if running_remotely() or bool(offline_mode) == InterfaceBase._offline_mode: @@ -2932,6 +2937,7 @@ class Task(_Task): # type: () -> bool """ Return offline-mode state, If in offline-mode, no communication to the backend is enabled. + :return: boolean offline-mode state """ return cls._offline_mode @@ -3542,11 +3548,9 @@ class Task(_Task): def _check_keys(dict_, warning_sent=False): if warning_sent: return - for k, v in dict_.items(): if warning_sent: return - if not isinstance(k, str): getLogger().warning( "Unsupported key of type '{}' found when connecting dictionary. It will be converted to str".format( @@ -3554,12 +3558,10 @@ class Task(_Task): ) ) warning_sent = True - if isinstance(v, dict): _check_keys(v, warning_sent) if not running_remotely() or not (self.is_main_task() or self._is_remote_main_task()): - self._arguments.copy_from_dict(flatten_dictionary(dictionary), prefix=name) _check_keys(dictionary) flat_dict = {str(k): v for k, v in flatten_dictionary(dictionary).items()} self._arguments.copy_from_dict(flat_dict, prefix=name) @@ -3909,7 +3911,6 @@ class Task(_Task): try: # make sure the state of the offline data is saved self._edit() - # create zip file offline_folder = self.get_offline_mode_folder() zip_file = offline_folder.as_posix() + '.zip' diff --git a/docs/tutorials/Getting_Started_2_Setting_Up_Agent.ipynb b/docs/tutorials/Getting_Started_2_Setting_Up_Agent.ipynb index 3e8ca5c1..35f15f06 100644 --- a/docs/tutorials/Getting_Started_2_Setting_Up_Agent.ipynb +++ b/docs/tutorials/Getting_Started_2_Setting_Up_Agent.ipynb @@ -223,11 +223,18 @@ "toc_visible": true }, "kernelspec": { - "display_name": "Python 3", + "display_name": ".venv", + "language": "python", "name": "python3" }, "language_info": { - "name": "python" + "name": "python", + "version": "3.10.6 (main, Nov 14 2022, 16:10:14) [GCC 11.3.0]" + }, + "vscode": { + "interpreter": { + "hash": "8b483fbf9fa60c6c6195634afd5159f586a30c5c6a9d31fa17f93a17f02fdc40" + } } }, "nbformat": 4, diff --git a/docs/tutorials/Getting_Started_3_Remote_Execution.ipynb b/docs/tutorials/Getting_Started_3_Remote_Execution.ipynb index 1c9641a1..b5b1771e 100644 --- a/docs/tutorials/Getting_Started_3_Remote_Execution.ipynb +++ b/docs/tutorials/Getting_Started_3_Remote_Execution.ipynb @@ -1,454 +1,455 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "B7Fs0CZeFPVM" - }, - "source": [ - "
\n", - "\n", - " \n", - " \n", - "\n", - "\n", - "
\n", - "\n", - "

Notebook 3: Remote Task Execution

\n", - "\n", - "
\n", - "\n", - "Hi there! This is the third notebook in the ClearML getting started notebook series, meant to teach you the ropes. In the last notebook we set up an agent within the Google Colab environment. Now it's time to make that agent run our task! We're going to clone our first, exisiting, experiment, change some parameters and then run the resulting task remotely using our colab agent.\n", - "\n", - "You can find out more details about the other ClearML modules and the technical specifics of each in our documentation.\n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
Step 1: Experiment Management\n", - " \"Open\n", - "
Step 2: Remote Agent\n", - " \"Open\n", - "
Step 3: Remote Task Execution\n", - " \"Open\n", - "
\n", - "\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "U7jM_FiPLnvi" - }, - "source": [ - "# 📦 Setup\n", - "\n", - "Just like in the other notebooks, we will need to install and setup ClearML.\n", - "\n", - "**NOTE: Make sure the agent is still running in the other notebook, when running this one!**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "H1Otow-YE9ks" - }, - "outputs": [], - "source": [ - "%pip install --upgrade clearml\n", - "import clearml\n", - "clearml.browser_login()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vneMjf39Z3Uh" - }, - "source": [ - "# 🔎 Querying our Task\n", - "\n", - "If we want to remotely run a task, we first need to know which one!\n", - "Getting the Task ID from the webUI is quite easy, just navigate to your project, select the task you're interested in and click this button to copy the Task ID to your clipboard:\n", - "\n", - "![](https://i.imgur.com/W7ZnEnX.png)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "B7Fs0CZeFPVM" }, - "id": "0TazW5dSZ2w1", - "outputId": "0b238e0d-32b8-4a9d-e4bf-141b798e6806" + "source": [ + "
\n", + "\n", + " \n", + " \n", + "\n", + "\n", + "
\n", + "\n", + "

Notebook 3: Remote Task Execution

\n", + "\n", + "
\n", + "\n", + "Hi there! This is the third notebook in the ClearML getting started notebook series, meant to teach you the ropes. In the last notebook we set up an agent within the Google Colab environment. Now it's time to make that agent run our task! We're going to clone our first, exisiting, experiment, change some parameters and then run the resulting task remotely using our colab agent.\n", + "\n", + "You can find out more details about the other ClearML modules and the technical specifics of each in our documentation.\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
Step 1: Experiment Management\n", + " \"Open\n", + "
Step 2: Remote Agent\n", + " \"Open\n", + "
Step 3: Remote Task Execution\n", + " \"Open\n", + "
\n", + "\n", + "
" + ] }, - "outputs": [ - { - "data": { - "text/plain": [ - "('TB Logging', '0bfefb8b86ba44798d9abe34ba0a6ab4')" - ] + { + "cell_type": "markdown", + "metadata": { + "id": "U7jM_FiPLnvi" + }, + "source": [ + "# 📦 Setup\n", + "\n", + "Just like in the other notebooks, we will need to install and setup ClearML.\n", + "\n", + "**NOTE: Make sure the agent is still running in the other notebook, when running this one!**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H1Otow-YE9ks" + }, + "outputs": [], + "source": [ + "%pip install --upgrade clearml\n", + "import clearml\n", + "clearml.browser_login()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vneMjf39Z3Uh" + }, + "source": [ + "# 🔎 Querying our Task\n", + "\n", + "If we want to remotely run a task, we first need to know which one!\n", + "Getting the Task ID from the webUI is quite easy, just navigate to your project, select the task you're interested in and click this button to copy the Task ID to your clipboard:\n", + "\n", + "![](https://i.imgur.com/W7ZnEnX.png)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# from clearml import Task\n", - "\n", - "# task = Task.get_task(task_id=\"YOUR_TASK_ID\")\n", - "# task.name, task.id" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hgRieZmvgIPS" - }, - "source": [ - "However, we can also query ClearML using the Python SDK. Let's search your ClearML history for any task in the project `Getting Started` with the name `XGBoost Training` (both of which we used in tutorial notebook 1). ClearML should then give us a list of Task IDs that could fit the bill." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "id": "0TazW5dSZ2w1", + "outputId": "0b238e0d-32b8-4a9d-e4bf-141b798e6806" }, - "id": "uQSAOb_AgF78", - "outputId": "81a2d3bc-b4aa-4af3-b3fe-d9f3fcec6a7a" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "XGBoost Training 1c7f708a976b413da147b341c54f862d\n" - ] - } - ], - "source": [ - "from clearml import Task\n", - "\n", - "# This will get the single most recent task fitting the description\n", - "# ⚠️ NOTE: Make sure you ran the XGBoost Training cell from notebook 1 and that the project and task name exist!\n", - "task = Task.get_task(project_name=\"Getting Started\", task_name=\"XGBoost Training\")\n", - "\n", - "if not task:\n", - " print(\"⚠️ WARNING: In order to make this work, you will need the XGBoost Training task from Notebook 1. Make sure to run the cell linked below in the same ClearML account!\")\n", - " print(\"https://colab.research.google.com/drive/1oHiW1qwLVvazk3qFZWBULfpciPEQp8kc#scrollTo=CSaL3XTqhYAy&line=5&uniqifier=1\")\n", - "else:\n", - " print(task.name, task.id)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "gRz3FQFdjKcC", - "outputId": "63148b61-136c-4cd5-fc61-5829c713c831" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[('XGBoost Training', '959c353c183245d5b85bfb2958e49f5d'),\n", - " ('XGBoost Training', '6810dd7c2c944099a4bc544a4c5079ff'),\n", - " ('XGBoost Training', '818894e032a44e7c9bafd758ee9fae7b'),\n", - " ('XGBoost Training', '762d13fc58ad4c5e8ae3695195b00d97'),\n", - " ('XGBoost Training', '3bf2b49800c24fd7b16f64f8c70cef83'),\n", - " ('XGBoost Training', '95ead34d9ce94955b2998b1de37f4892'),\n", - " ('XGBoost Training', '1c7f708a976b413da147b341c54f862d')]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# This will get all tasks fitting the description\n", - "tasks = Task.get_tasks(project_name=\"Getting Started\", task_name=\"XGBoost Training\")\n", - "[(task.name, task.id) for task in tasks]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "id": "nyTBIPxUjutj", - "outputId": "36f3808b-4386-4702-898c-8bb3fee0246a" - }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" + "outputs": [ + { + "data": { + "text/plain": [ + "('TB Logging', '0bfefb8b86ba44798d9abe34ba0a6ab4')" + ] }, - "text/plain": [ - "'1c7f708a976b413da147b341c54f862d'" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Let's set our task ID based on the results here so we can use it below\n", - "task_id = task.id\n", - "task_id" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FLmjZG8gj2WO" + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# from clearml import Task\n", + "\n", + "# task = Task.get_task(task_id=\"YOUR_TASK_ID\")\n", + "# task.name, task.id" + ] }, - "source": [ - "# 👥 Clone and Enqueue Task\n", - "\n", - "Now that we know which task we want to run, we can start by cloning it. Cloning a task makes it editable. Now we can override any of the parameters or runtime configurations. For example, here we can set a different `max_depth` by specifying the parameter name as well as the section.\n", - "\n", - "![](https://i.imgur.com/IOL20Sg.png)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + { + "cell_type": "markdown", + "metadata": { + "id": "hgRieZmvgIPS" }, - "id": "OtYJyyY7jzpl", - "outputId": "3dbd436b-648c-4c73-bbfd-1eeba10e47e2" + "source": [ + "However, we can also query ClearML using the Python SDK. Let's search your ClearML history for any task in the project `Getting Started` with the name `XGBoost Training` (both of which we used in tutorial notebook 1). ClearML should then give us a list of Task IDs that could fit the bill." + ] }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Clone the original task and keep track of the new one\n", - "new_task = Task.clone(source_task=task)\n", - "# Override any of the parameters!\n", - "new_task.update_parameters({\"General/max_depth\": 3})\n", - "# We can even rename it if we wanted\n", - "new_task.rename(f\"Cloned Task\")\n", - "# Now enuque it for the colab worker to start working on it!\n", - "Task.enqueue(task=new_task, queue_name=\"default\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 + "id": "uQSAOb_AgF78", + "outputId": "81a2d3bc-b4aa-4af3-b3fe-d9f3fcec6a7a" }, - "id": "3vxyEaJNk_KV", - "outputId": "ddf31b3e-1cf9-498c-9440-5fbf18802be8" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "XGBoost Training 1c7f708a976b413da147b341c54f862d\n" + ] + } + ], + "source": [ + "from clearml import Task\n", + "\n", + "# This will get the single most recent task fitting the description\n", + "# ⚠️ NOTE: Make sure you ran the XGBoost Training cell from notebook 1 and that the project and task name exist!\n", + "task = Task.get_task(project_name=\"Getting Started\", task_name=\"XGBoost Training\")\n", + "\n", + "if not task:\n", + " print(\"⚠️ WARNING: In order to make this work, you will need the XGBoost Training task from Notebook 1. Make sure to run the cell linked below in the same ClearML account!\")\n", + " print(\"https://colab.research.google.com/drive/1oHiW1qwLVvazk3qFZWBULfpciPEQp8kc#scrollTo=CSaL3XTqhYAy&line=5&uniqifier=1\")\n", + "else:\n", + " print(task.name, task.id)" + ] }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gRz3FQFdjKcC", + "outputId": "63148b61-136c-4cd5-fc61-5829c713c831" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('XGBoost Training', '959c353c183245d5b85bfb2958e49f5d'),\n", + " ('XGBoost Training', '6810dd7c2c944099a4bc544a4c5079ff'),\n", + " ('XGBoost Training', '818894e032a44e7c9bafd758ee9fae7b'),\n", + " ('XGBoost Training', '762d13fc58ad4c5e8ae3695195b00d97'),\n", + " ('XGBoost Training', '3bf2b49800c24fd7b16f64f8c70cef83'),\n", + " ('XGBoost Training', '95ead34d9ce94955b2998b1de37f4892'),\n", + " ('XGBoost Training', '1c7f708a976b413da147b341c54f862d')]" + ] }, - "text/plain": [ - "'in_progress'" - ] + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This will get all tasks fitting the description\n", + "tasks = Task.get_tasks(project_name=\"Getting Started\", task_name=\"XGBoost Training\")\n", + "[(task.name, task.id) for task in tasks]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Let's take a look at the status\n", - "new_task.status" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hx4WqzfDIPTl" - }, - "outputs": [], - "source": [ - "from time import sleep\n", - "# Now we can set up a loop that waits until our task is done!\n", - "# If you have enabled notifications on Colab, it will even let you know\n", - "# when the ClearML task is done!\n", - "while new_task.status not in [\"success\", \"failed\"]:\n", - " if new_task.status == \"draft\":\n", - " print(\"Task is still in draft mode! You have to enqueue it before the agent can run it.\")\n", - "\n", - " print(f\"Task is not done yet! Current status: {new_task.status}\")\n", - " sleep(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mefZhEBVlOx-" - }, - "source": [ - "When a task is enqueued, it has 4 possible states:\n", - "\n", - "- Pending (the task is waiting to be pulled by a worker)\n", - "- In Progress (the task is pulled and being executed)\n", - "- Completed\n", - "- Failed\n", - "\n", - "You can keep track of this status by using the `.status` property of a clearml Task instance. If you only have the Task ID, you can always get the task object by running `task = Task.get_task(task_id=\"your_task_id\")`\n", - "\n", - "In the webUI the status of a task is pretty visible:\n", - "![](https://i.imgur.com/Ep7v6AN.png)\n", - "\n", - "**NOTE: The first time running a task can take quite a long time because the agent has to install all of the original packages and get the original code. Everything is cached however, so it should be faster next time!**\n", - "\n", - "If everything went well, you should now see the colab worker start working on the task:\n", - "\n", - "![](https://i.imgur.com/eI0ui5J.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ejr6wqZxmqil" - }, - "source": [ - "# 🧐 Inspecting the results\n", - "\n", - "Now that the task has completed successfully, you can now view its results just like any other experiment in the experiment manager!\n", - "\n", - "![](https://i.imgur.com/zfkuKlO.png)\n", - "\n", - "You can of course also select both the original and the cloned experiment and compare them directly in ClearML!\n", - "\n", - "![](https://i.imgur.com/Gd56Swd.png)\n", - "\n", - "Remember that the SDK is quite powerful too, so almost any piece of information that you can see in the webUI, you can also get from the SDK to build automations onto.\n", - "\n", - "For example, we could get the reported scalars right into our python environment with just a single function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "id": "nyTBIPxUjutj", + "outputId": "36f3808b-4386-4702-898c-8bb3fee0246a" }, - "id": "C86NZAopnsJE", - "outputId": "3e7e539a-7069-45a5-d534-d97debb4dff8" + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'1c7f708a976b413da147b341c54f862d'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's set our task ID based on the results here so we can use it below\n", + "task_id = task.id\n", + "task_id" + ] }, - "outputs": [], - "source": [ - "new_task.get_reported_scalars()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "yu1X2tJNoIQ-" + { + "cell_type": "markdown", + "metadata": { + "id": "FLmjZG8gj2WO" + }, + "source": [ + "# 👥 Clone and Enqueue Task\n", + "\n", + "Now that we know which task we want to run, we can start by cloning it. Cloning a task makes it editable. Now we can override any of the parameters or runtime configurations. For example, here we can set a different `max_depth` by specifying the parameter name as well as the section.\n", + "\n", + "![](https://i.imgur.com/IOL20Sg.png)" + ] }, - "source": [ - "# 🥾 Next steps\n", - "\n", - "Now that you know the basics of ClearML, you can get started with running your own experiments!\n", - "\n", - "## 📺 Check out our [Youtube Channel](https://www.youtube.com/@ClearML)!\n", - "It holds [detailed tutorial videos](https://www.youtube.com/watch?v=ZxgfHhPi8Gk&list=PLMdIlCuMqSTnoC45ME5_JnsJX0zWqDdlO), as well as [cool use cases and walkthroughs of real-life projects](https://www.youtube.com/watch?v=quSGXvuK1IM&list=PLMdIlCuMqSTmUfsAWXrK8zibwvxfduFPy)!\n", - "\n", - "[![Getting Started](http://img.youtube.com/vi/s3k9ntmQmD4/0.jpg)](http://www.youtube.com/watch?v=s3k9ntmQmD4 \"Getting Started\")\n", - "\n", - "[![Day in The Life Of](http://img.youtube.com/vi/quSGXvuK1IM/0.jpg)](http://www.youtube.com/watch?v=quSGXvuK1IM \"Day in The Life Of\")\n", - "\n", - "[![Training a Sarcasm Detector](http://img.youtube.com/vi/0wmwnNnN8ow/0.jpg)](http://www.youtube.com/watch?v=0wmwnNnN8ow \"Training a Sarcasm Detector\")\n", - "\n", - "\n", - "\n", - "## 💪 Check out our examples\n", - "\n", - "We have very [specific feature examples](https://clear.ml/docs/latest/docs/clearml_sdk/task_sdk) as well as a repo full of [example scripts](https://github.com/allegroai/clearml/tree/master/examples)!\n", - "\n", - "## 📚 Check out our [documentation](https://clear.ml/docs/latest/docs)\n", - "\n", - "## 🔥 Check out our [blog](https://clear.ml/blog/)\n", - "\n" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "vneMjf39Z3Uh", - "FLmjZG8gj2WO", - "Ejr6wqZxmqil" - ], - "provenance": [], - "toc_visible": true + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OtYJyyY7jzpl", + "outputId": "3dbd436b-648c-4c73-bbfd-1eeba10e47e2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Clone the original task and keep track of the new one\n", + "new_task = Task.clone(source_task=task)\n", + "# Override any of the parameters!\n", + "new_task.update_parameters({\"General/max_depth\": 3})\n", + "# We can even rename it if we wanted\n", + "new_task.rename(f\"Cloned Task\")\n", + "# Now enuque it for the colab worker to start working on it!\n", + "Task.enqueue(task=new_task, queue_name=\"default\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "3vxyEaJNk_KV", + "outputId": "ddf31b3e-1cf9-498c-9440-5fbf18802be8" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'in_progress'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's take a look at the status\n", + "new_task.status" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hx4WqzfDIPTl" + }, + "outputs": [], + "source": [ + "from time import sleep\n", + "# Now we can set up a loop that waits until our task is done!\n", + "# If you have enabled notifications on Colab, it will even let you know\n", + "# when the ClearML task is done!\n", + "while new_task.status not in [\"success\", \"failed\"]:\n", + " if new_task.status == \"draft\":\n", + " print(\"Task is still in draft mode! You have to enqueue it before the agent can run it.\")\n", + "\n", + " print(f\"Task is not done yet! Current status: {new_task.status}\")\n", + " sleep(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mefZhEBVlOx-" + }, + "source": [ + "When a task is enqueued, it has 4 possible states:\n", + "\n", + "- Pending (the task is waiting to be pulled by a worker)\n", + "- In Progress (the task is pulled and being executed)\n", + "- Completed\n", + "- Failed\n", + "\n", + "You can keep track of this status by using the `.status` property of a clearml Task instance. If you only have the Task ID, you can always get the task object by running `task = Task.get_task(task_id=\"your_task_id\")`\n", + "\n", + "In the webUI the status of a task is pretty visible:\n", + "![](https://i.imgur.com/Ep7v6AN.png)\n", + "\n", + "**NOTE: The first time running a task can take quite a long time because the agent has to install all of the original packages and get the original code. Everything is cached however, so it should be faster next time!**\n", + "\n", + "If everything went well, you should now see the colab worker start working on the task:\n", + "\n", + "![](https://i.imgur.com/eI0ui5J.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ejr6wqZxmqil" + }, + "source": [ + "# 🧐 Inspecting the results\n", + "\n", + "Now that the task has completed successfully, you can now view its results just like any other experiment in the experiment manager!\n", + "\n", + "![](https://i.imgur.com/zfkuKlO.png)\n", + "\n", + "You can of course also select both the original and the cloned experiment and compare them directly in ClearML!\n", + "\n", + "![](https://i.imgur.com/Gd56Swd.png)\n", + "\n", + "Remember that the SDK is quite powerful too, so almost any piece of information that you can see in the webUI, you can also get from the SDK to build automations onto.\n", + "\n", + "For example, we could get the reported scalars right into our python environment with just a single function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "C86NZAopnsJE", + "outputId": "3e7e539a-7069-45a5-d534-d97debb4dff8" + }, + "outputs": [], + "source": [ + "new_task.get_reported_scalars()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "yu1X2tJNoIQ-" + }, + "source": [ + "# 🥾 Next steps\n", + "\n", + "Now that you know the basics of ClearML, you can get started with running your own experiments!\n", + "\n", + "## 📺 Check out our [Youtube Channel](https://www.youtube.com/@ClearML)!\n", + "It holds [detailed tutorial videos](https://www.youtube.com/watch?v=ZxgfHhPi8Gk&list=PLMdIlCuMqSTnoC45ME5_JnsJX0zWqDdlO), as well as [cool use cases and walkthroughs of real-life projects](https://www.youtube.com/watch?v=quSGXvuK1IM&list=PLMdIlCuMqSTmUfsAWXrK8zibwvxfduFPy)!\n", + "\n", + "[![Getting Started](http://img.youtube.com/vi/s3k9ntmQmD4/0.jpg)](http://www.youtube.com/watch?v=s3k9ntmQmD4 \"Getting Started\")\n", + "\n", + "[![Day in The Life Of](http://img.youtube.com/vi/quSGXvuK1IM/0.jpg)](http://www.youtube.com/watch?v=quSGXvuK1IM \"Day in The Life Of\")\n", + "\n", + "[![Training a Sarcasm Detector](http://img.youtube.com/vi/0wmwnNnN8ow/0.jpg)](http://www.youtube.com/watch?v=0wmwnNnN8ow \"Training a Sarcasm Detector\")\n", + "\n", + "\n", + "\n", + "## 💪 Check out our examples\n", + "\n", + "We have very [specific feature examples](https://clear.ml/docs/latest/docs/clearml_sdk/task_sdk) as well as a repo full of [example scripts](https://github.com/allegroai/clearml/tree/master/examples)!\n", + "\n", + "## 📚 Check out our [documentation](https://clear.ml/docs/latest/docs)\n", + "\n", + "## 🔥 Check out our [blog](https://clear.ml/blog/)\n", + "\n" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "vneMjf39Z3Uh", + "FLmjZG8gj2WO", + "Ejr6wqZxmqil" + ], + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + "nbformat": 4, + "nbformat_minor": 0 + } + \ No newline at end of file diff --git a/examples/reporting/configuration_including_hyperparameters.py b/examples/reporting/hyper_parameters.py similarity index 100% rename from examples/reporting/configuration_including_hyperparameters.py rename to examples/reporting/hyper_parameters.py index b725090b..bb8348bf 100644 --- a/examples/reporting/configuration_including_hyperparameters.py +++ b/examples/reporting/hyper_parameters.py @@ -17,6 +17,7 @@ from enum import Enum from clearml import Task from clearml.task_parameters import TaskParameters, param, percent_param + # Connecting ClearML with the current process, # from here on everything is logged automatically task = Task.init(project_name='FirstTrial', task_name='first_trial') @@ -43,7 +44,6 @@ class IntEnumClass(Enum): C = 1 D = 2 - parameters = { 'list': [1, 2, 3], 'dict': {'a': 1, 'b': 2},