Fix possibility to get a local copy of a dataset that is not finalized

This commit is contained in:
allegroai 2022-09-26 23:26:29 +03:00
parent 3b2a296724
commit 77f25b82bd

View File

@ -839,8 +839,8 @@ class Dataset(object):
def get_local_copy(self, use_soft_links=None, part=None, num_parts=None, raise_on_error=True, max_workers=None):
# type: (bool, Optional[int], Optional[int], bool, Optional[int]) -> str
"""
return a base folder with a read-only (immutable) local copy of the entire dataset
download and copy / soft-link, files from all the parent dataset versions
Return a base folder with a read-only (immutable) local copy of the entire dataset
download and copy / soft-link, files from all the parent dataset versions. The dataset needs to be finalized
:param use_soft_links: If True use soft links, default False on windows True on Posix systems
:param part: Optional, if provided only download the selected part (index) of the Dataset.
@ -863,6 +863,8 @@ class Dataset(object):
assert self._id
if not self._task:
self._task = Task.get_task(task_id=self._id)
if not self.is_final():
raise ValueError("Cannot get a local copy of a dataset that was not finalized/closed")
if not max_workers:
max_workers = psutil.cpu_count()
@ -1566,7 +1568,8 @@ class Dataset(object):
"""
if not any([dataset_id, dataset_project, dataset_name, dataset_tags]):
raise ValueError("Dataset selection criteria not met. Didn't provide id/name/project/tags correctly.")
if not alias:
current_task = Task.current_task()
if not alias and current_task:
LoggerRoot.get_base_logger().info(
"Dataset.get() did not specify alias. Dataset information "
"will not be automatically logged in ClearML Server.")
@ -1578,8 +1581,6 @@ class Dataset(object):
if invalid_kwargs:
raise ValueError("Invalid 'Dataset.get' arguments: {}".format(invalid_kwargs))
current_task = Task.current_task()
def get_instance(dataset_id_):
task = Task.get_task(task_id=dataset_id_)
if task.status == "created":