From a2b3f1cf3b3e58f94b003658ce1ae69da7dbc3e7 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Thu, 22 Dec 2022 22:06:38 +0200 Subject: [PATCH] Add `Dataset.set_metadata()` and `Dataset.get_metadata()` --- clearml/binding/artifacts.py | 4 ++-- clearml/datasets/dataset.py | 40 +++++++++++++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/clearml/binding/artifacts.py b/clearml/binding/artifacts.py index 64b67708..ed22fed3 100644 --- a/clearml/binding/artifacts.py +++ b/clearml/binding/artifacts.py @@ -141,7 +141,7 @@ class Artifact(object): self._object = self._not_set def get(self, force_download=False, deserialization_function=None): - # type: (bool, Optional[Callable[bytes, Any]]) -> Any + # type: (bool, Optional[Callable[[bytes], Any]]) -> Any """ Return an object constructed from the artifact file @@ -357,7 +357,7 @@ class Artifacts(object): auto_pickle=True, # type: bool wait_on_upload=False, # type: bool extension_name=None, # type: Optional[str] - serialization_function=None, # type: Optional[Callable[Any, Union[bytes, bytearray]]] + serialization_function=None, # type: Optional[Callable[[Any], Union[bytes, bytearray]]] ): # type: (...) -> bool if not Session.check_min_api_version("2.3"): diff --git a/clearml/datasets/dataset.py b/clearml/datasets/dataset.py index a72c3aeb..d293f7bb 100644 --- a/clearml/datasets/dataset.py +++ b/clearml/datasets/dataset.py @@ -800,6 +800,40 @@ class Dataset(object): return True + def set_metadata(self, metadata, metadata_name='metadata', ui_visible=True): + # type: (Union[numpy.array, pd.DataFrame, Dict[str, Any]], str, bool) -> () + """ + Attach a user-defined metadata to the dataset. Check `Task.upload_artifact` for supported types. + If type is Optionally make it visible as a table in the UI. + """ + self._task.upload_artifact(name=metadata_name, artifact_object=metadata) + if ui_visible: + if pd and isinstance(metadata, pd.DataFrame): + self.get_logger().report_table( + title='Dataset Metadata', + series='Dataset Metadata', + table_plot=metadata + ) + else: + self._task.get_logger().report_text( + "Displaying metadata in the UI is only supported for pandas Dataframes for now. Skipping!", + print_console=True, + ) + + def get_metadata(self, metadata_name='metadata'): + # type: (str) -> Optional[numpy.array, pd.DataFrame, dict, str, bool] + """ + Get attached metadata back in its original format. Will return None if none was found. + """ + metadata = self._task.artifacts.get(metadata_name) + if metadata is None: + self._task.get_logger().report_text( + "Cannot find metadata on this task, are you sure it has the correct name?", + print_console=True, + ) + return None + return metadata.get() + def set_description(self, description): # type: (str) -> () """ @@ -2291,7 +2325,7 @@ class Dataset(object): return local_folder, cache def _release_lock_ds_target_folder(self, target_folder): - # type: () -> None + # type: (Union[str, Path]) -> None cache = CacheManager.get_cache_manager(cache_context=self.__cache_context) cache.unlock_cache_folder(target_folder) @@ -3167,7 +3201,7 @@ class Dataset(object): raise_on_multiple=False, shallow_search=True, ): - # type: (str, str, Optional[str], Optional[str], bool, bool) -> Tuple[str, str] + # type: (str, str, Optional[str], Optional[str], bool, bool) -> Tuple[Optional[str], Optional[str]] """ Gets the dataset ID that matches a project, name and a version. @@ -3257,7 +3291,7 @@ class Dataset(object): @classmethod def _build_hidden_project_name(cls, dataset_project, dataset_name): - # type: (str, str) -> Tuple[str, str] + # type: (str, str) -> Tuple[Optional[str], Optional[str]] """ Build the corresponding hidden name of a dataset, given its `dataset_project` and `dataset_name`