Add support for datasets version with non semantic version string, Issue #776

This commit is contained in:
allegroai 2022-09-15 16:03:14 +03:00
parent 63e7cbab30
commit 789ca3a76f
2 changed files with 66 additions and 10 deletions

View File

@ -142,6 +142,10 @@ class Dataset(object):
self._dataset_version = None self._dataset_version = None
if dataset_version: if dataset_version:
self._dataset_version = str(dataset_version).strip() self._dataset_version = str(dataset_version).strip()
if not Version.is_valid_version_string(self._dataset_version):
LoggerRoot.get_base_logger().warning(
"Setting non-semantic dataset version '{}'".format(self._dataset_version)
)
if task: if task:
self._task_pinger = None self._task_pinger = None
self._created_task = False self._created_task = False
@ -309,6 +313,22 @@ class Dataset(object):
return self._task.get_project_name().partition("/.datasets/")[-1] return self._task.get_project_name().partition("/.datasets/")[-1]
return self._task.name return self._task.name
@property
def version(self):
# type: () -> Optional[str]
return self._dataset_version
@version.setter
def version(self, version):
# type: (str) -> ()
version = str(version).strip()
self._dataset_version = version
if not Version.is_valid_version_string(version):
LoggerRoot.get_base_logger().warning("Setting non-semantic dataset version '{}'".format(version))
# noinspection PyProtectedMember
self._task._set_runtime_properties({"version": version})
self._task.set_user_properties(version=version)
@property @property
def tags(self): def tags(self):
# type: () -> List[str] # type: () -> List[str]
@ -1510,7 +1530,10 @@ class Dataset(object):
): ):
# type: (...) -> "Dataset" # type: (...) -> "Dataset"
""" """
Get a specific Dataset. If multiple datasets are found, the dataset with the highest version is returned Get a specific Dataset. If multiple datasets are found, the dataset with the
highest semantic version is returned. If no semantic version if found, the most recently
updated dataset is returned. This functions raises an Exception in case no dataset
can be found and the ``auto_create=True`` flag is not set
:param dataset_id: Requested dataset ID :param dataset_id: Requested dataset ID
:param dataset_project: Requested dataset project name :param dataset_project: Requested dataset project name
@ -1518,7 +1541,7 @@ class Dataset(object):
:param dataset_tags: Requested dataset tags (list of tag strings) :param dataset_tags: Requested dataset tags (list of tag strings)
:param only_completed: Return only if the requested dataset is completed or published :param only_completed: Return only if the requested dataset is completed or published
:param only_published: Return only if the requested dataset is published :param only_published: Return only if the requested dataset is published
:param auto_create: Create new dataset if it does not exist yet :param auto_create: Create a new dataset if it does not exist yet
:param writable_copy: Get a newly created mutable dataset with the current one as its parent, :param writable_copy: Get a newly created mutable dataset with the current one as its parent,
so new files can added to the instance. so new files can added to the instance.
:param dataset_version: Requested version of the Dataset :param dataset_version: Requested version of the Dataset
@ -1533,10 +1556,17 @@ class Dataset(object):
""" """
if not any([dataset_id, dataset_project, dataset_name, dataset_tags]): if not any([dataset_id, dataset_project, dataset_name, dataset_tags]):
raise ValueError("Dataset selection criteria not met. Didn't provide id/name/project/tags correctly.") raise ValueError("Dataset selection criteria not met. Didn't provide id/name/project/tags correctly.")
if not alias:
LoggerRoot.get_base_logger().info(
"Dataset.get() did not specify alias. Dataset information wont be automatically logged in ClearML Server.")
mutually_exclusive(dataset_id=dataset_id, dataset_project=dataset_project, _require_at_least_one=False) mutually_exclusive(dataset_id=dataset_id, dataset_project=dataset_project, _require_at_least_one=False)
mutually_exclusive(dataset_id=dataset_id, dataset_name=dataset_name, _require_at_least_one=False) mutually_exclusive(dataset_id=dataset_id, dataset_name=dataset_name, _require_at_least_one=False)
invalid_kwargs = [kwarg for kwarg in kwargs.keys() if not kwarg.startswith("_")]
if invalid_kwargs:
raise ValueError("Invalid 'Dataset.get' arguments: {}".format(invalid_kwargs))
current_task = Task.current_task() current_task = Task.current_task()
def get_instance(dataset_id_): def get_instance(dataset_id_):
@ -3025,14 +3055,33 @@ class Dataset(object):
) )
result_dataset = None result_dataset = None
for dataset in datasets: for dataset in datasets:
current_version = dataset.runtime.get("version") candidate_dataset_version = dataset.runtime.get("version")
if not current_version: if not dataset_version:
continue if not result_dataset:
if dataset_version is None and ( result_dataset = dataset
not result_dataset or Version(result_dataset.runtime["version"]) < Version(current_version) else:
): # noinspection PyBroadException
result_dataset = dataset try:
elif dataset_version == current_version:
if (
candidate_dataset_version
and Version.is_valid_version_string(candidate_dataset_version)
and (
(
not result_dataset.runtime.get("version")
or not Version.is_valid_version_string(result_dataset.runtime.get("version"))
)
or (
result_dataset.runtime.get("version")
and Version(result_dataset.runtime.get("version"))
< Version(candidate_dataset_version)
)
)
):
result_dataset = dataset
except Exception:
pass
elif dataset_version == candidate_dataset_version:
if result_dataset and raise_on_multiple: if result_dataset and raise_on_multiple:
raise ValueError( raise ValueError(
"Multiple datasets found with dataset_project={}, dataset_name={}, dataset_version={}".format( "Multiple datasets found with dataset_project={}, dataset_name={}, dataset_version={}".format(

View File

@ -267,6 +267,13 @@ class Version(_BaseVersion):
return letter, int(number) return letter, int(number)
@classmethod
def is_valid_version_string(cls, version_string):
if not version_string:
return False
match = cls._regex.search(version_string)
return bool(match)
@classmethod @classmethod
def _parse_local_version(cls, local): def _parse_local_version(cls, local):
""" """