mirror of
https://github.com/clearml/clearml
synced 2025-04-04 21:03:00 +00:00
Add support for datasets version with non semantic version string, Issue #776
This commit is contained in:
parent
63e7cbab30
commit
789ca3a76f
@ -142,6 +142,10 @@ class Dataset(object):
|
|||||||
self._dataset_version = None
|
self._dataset_version = None
|
||||||
if dataset_version:
|
if dataset_version:
|
||||||
self._dataset_version = str(dataset_version).strip()
|
self._dataset_version = str(dataset_version).strip()
|
||||||
|
if not Version.is_valid_version_string(self._dataset_version):
|
||||||
|
LoggerRoot.get_base_logger().warning(
|
||||||
|
"Setting non-semantic dataset version '{}'".format(self._dataset_version)
|
||||||
|
)
|
||||||
if task:
|
if task:
|
||||||
self._task_pinger = None
|
self._task_pinger = None
|
||||||
self._created_task = False
|
self._created_task = False
|
||||||
@ -309,6 +313,22 @@ class Dataset(object):
|
|||||||
return self._task.get_project_name().partition("/.datasets/")[-1]
|
return self._task.get_project_name().partition("/.datasets/")[-1]
|
||||||
return self._task.name
|
return self._task.name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def version(self):
|
||||||
|
# type: () -> Optional[str]
|
||||||
|
return self._dataset_version
|
||||||
|
|
||||||
|
@version.setter
|
||||||
|
def version(self, version):
|
||||||
|
# type: (str) -> ()
|
||||||
|
version = str(version).strip()
|
||||||
|
self._dataset_version = version
|
||||||
|
if not Version.is_valid_version_string(version):
|
||||||
|
LoggerRoot.get_base_logger().warning("Setting non-semantic dataset version '{}'".format(version))
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
self._task._set_runtime_properties({"version": version})
|
||||||
|
self._task.set_user_properties(version=version)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tags(self):
|
def tags(self):
|
||||||
# type: () -> List[str]
|
# type: () -> List[str]
|
||||||
@ -1510,7 +1530,10 @@ class Dataset(object):
|
|||||||
):
|
):
|
||||||
# type: (...) -> "Dataset"
|
# type: (...) -> "Dataset"
|
||||||
"""
|
"""
|
||||||
Get a specific Dataset. If multiple datasets are found, the dataset with the highest version is returned
|
Get a specific Dataset. If multiple datasets are found, the dataset with the
|
||||||
|
highest semantic version is returned. If no semantic version if found, the most recently
|
||||||
|
updated dataset is returned. This functions raises an Exception in case no dataset
|
||||||
|
can be found and the ``auto_create=True`` flag is not set
|
||||||
|
|
||||||
:param dataset_id: Requested dataset ID
|
:param dataset_id: Requested dataset ID
|
||||||
:param dataset_project: Requested dataset project name
|
:param dataset_project: Requested dataset project name
|
||||||
@ -1518,7 +1541,7 @@ class Dataset(object):
|
|||||||
:param dataset_tags: Requested dataset tags (list of tag strings)
|
:param dataset_tags: Requested dataset tags (list of tag strings)
|
||||||
:param only_completed: Return only if the requested dataset is completed or published
|
:param only_completed: Return only if the requested dataset is completed or published
|
||||||
:param only_published: Return only if the requested dataset is published
|
:param only_published: Return only if the requested dataset is published
|
||||||
:param auto_create: Create new dataset if it does not exist yet
|
:param auto_create: Create a new dataset if it does not exist yet
|
||||||
:param writable_copy: Get a newly created mutable dataset with the current one as its parent,
|
:param writable_copy: Get a newly created mutable dataset with the current one as its parent,
|
||||||
so new files can added to the instance.
|
so new files can added to the instance.
|
||||||
:param dataset_version: Requested version of the Dataset
|
:param dataset_version: Requested version of the Dataset
|
||||||
@ -1533,10 +1556,17 @@ class Dataset(object):
|
|||||||
"""
|
"""
|
||||||
if not any([dataset_id, dataset_project, dataset_name, dataset_tags]):
|
if not any([dataset_id, dataset_project, dataset_name, dataset_tags]):
|
||||||
raise ValueError("Dataset selection criteria not met. Didn't provide id/name/project/tags correctly.")
|
raise ValueError("Dataset selection criteria not met. Didn't provide id/name/project/tags correctly.")
|
||||||
|
if not alias:
|
||||||
|
LoggerRoot.get_base_logger().info(
|
||||||
|
"Dataset.get() did not specify alias. Dataset information won’t be automatically logged in ClearML Server.")
|
||||||
|
|
||||||
mutually_exclusive(dataset_id=dataset_id, dataset_project=dataset_project, _require_at_least_one=False)
|
mutually_exclusive(dataset_id=dataset_id, dataset_project=dataset_project, _require_at_least_one=False)
|
||||||
mutually_exclusive(dataset_id=dataset_id, dataset_name=dataset_name, _require_at_least_one=False)
|
mutually_exclusive(dataset_id=dataset_id, dataset_name=dataset_name, _require_at_least_one=False)
|
||||||
|
|
||||||
|
invalid_kwargs = [kwarg for kwarg in kwargs.keys() if not kwarg.startswith("_")]
|
||||||
|
if invalid_kwargs:
|
||||||
|
raise ValueError("Invalid 'Dataset.get' arguments: {}".format(invalid_kwargs))
|
||||||
|
|
||||||
current_task = Task.current_task()
|
current_task = Task.current_task()
|
||||||
|
|
||||||
def get_instance(dataset_id_):
|
def get_instance(dataset_id_):
|
||||||
@ -3025,14 +3055,33 @@ class Dataset(object):
|
|||||||
)
|
)
|
||||||
result_dataset = None
|
result_dataset = None
|
||||||
for dataset in datasets:
|
for dataset in datasets:
|
||||||
current_version = dataset.runtime.get("version")
|
candidate_dataset_version = dataset.runtime.get("version")
|
||||||
if not current_version:
|
if not dataset_version:
|
||||||
continue
|
if not result_dataset:
|
||||||
if dataset_version is None and (
|
result_dataset = dataset
|
||||||
not result_dataset or Version(result_dataset.runtime["version"]) < Version(current_version)
|
else:
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
|
||||||
|
if (
|
||||||
|
candidate_dataset_version
|
||||||
|
and Version.is_valid_version_string(candidate_dataset_version)
|
||||||
|
and (
|
||||||
|
(
|
||||||
|
not result_dataset.runtime.get("version")
|
||||||
|
or not Version.is_valid_version_string(result_dataset.runtime.get("version"))
|
||||||
|
)
|
||||||
|
or (
|
||||||
|
result_dataset.runtime.get("version")
|
||||||
|
and Version(result_dataset.runtime.get("version"))
|
||||||
|
< Version(candidate_dataset_version)
|
||||||
|
)
|
||||||
|
)
|
||||||
):
|
):
|
||||||
result_dataset = dataset
|
result_dataset = dataset
|
||||||
elif dataset_version == current_version:
|
except Exception:
|
||||||
|
pass
|
||||||
|
elif dataset_version == candidate_dataset_version:
|
||||||
if result_dataset and raise_on_multiple:
|
if result_dataset and raise_on_multiple:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Multiple datasets found with dataset_project={}, dataset_name={}, dataset_version={}".format(
|
"Multiple datasets found with dataset_project={}, dataset_name={}, dataset_version={}".format(
|
||||||
|
@ -267,6 +267,13 @@ class Version(_BaseVersion):
|
|||||||
|
|
||||||
return letter, int(number)
|
return letter, int(number)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_valid_version_string(cls, version_string):
|
||||||
|
if not version_string:
|
||||||
|
return False
|
||||||
|
match = cls._regex.search(version_string)
|
||||||
|
return bool(match)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _parse_local_version(cls, local):
|
def _parse_local_version(cls, local):
|
||||||
"""
|
"""
|
||||||
|
Loading…
Reference in New Issue
Block a user