Allow deleting files when deleting datasets stored with clearml-data

This commit is contained in:
Alex Burlacu 2023-03-23 18:03:40 +02:00
parent fce03ef0a2
commit 067c817f30
2 changed files with 27 additions and 21 deletions

View File

@ -1410,7 +1410,9 @@ class Dataset(object):
force=False, # bool force=False, # bool
dataset_version=None, # Optional[str] dataset_version=None, # Optional[str]
entire_dataset=False, # bool entire_dataset=False, # bool
shallow_search=False # bool shallow_search=False, # bool
delete_files=True, # bool
delete_external_files=False # bool
): ):
# type: (...) -> () # type: (...) -> ()
""" """
@ -1426,6 +1428,9 @@ class Dataset(object):
:param entire_dataset: If True, delete all datasets that match the given `dataset_project`, :param entire_dataset: If True, delete all datasets that match the given `dataset_project`,
`dataset_name`, `dataset_version`. Note that `force` has to be True if this parameter is True `dataset_name`, `dataset_version`. Note that `force` has to be True if this parameter is True
:param shallow_search: If True, search only the first 500 results (first page) :param shallow_search: If True, search only the first 500 results (first page)
:param delete_files: Delete all local files in the dataset (from the ClearML file server), as well as
all artifacts related to the dataset.
:param delete_external_files: Delete all external files in the dataset (from their external storage)
""" """
if not any([dataset_id, dataset_project, dataset_name]): if not any([dataset_id, dataset_project, dataset_name]):
raise ValueError("Dataset deletion criteria not met. Didn't provide id/name/project correctly.") raise ValueError("Dataset deletion criteria not met. Didn't provide id/name/project correctly.")
@ -1446,28 +1451,26 @@ class Dataset(object):
action="delete", action="delete",
) )
except Exception as e: except Exception as e:
LoggerRoot.get_base_logger().warning("Error: {}".format(str(e))) LoggerRoot.get_base_logger().warning("Failed deleting dataset: {}".format(str(e)))
return return
client = APIClient()
for dataset_id in dataset_ids: for dataset_id in dataset_ids:
task = Task.get_task(task_id=dataset_id) try:
if str(task.task_type) != str(Task.TaskTypes.data_processing) or cls.__tag not in ( dataset = Dataset.get(dataset_id=dataset_id)
task.get_system_tags() or [] except Exception as e:
): LoggerRoot.get_base_logger().warning("Could not get dataset with ID {}: {}".format(dataset_id, str(e)))
LoggerRoot.get_base_logger().warning("Task id={} is not of type Dataset".format(dataset_id))
continue continue
for artifact in task.artifacts.values(): # noinspection PyProtectedMember
h = StorageHelper.get(artifact.url) dataset._task.delete(delete_artifacts_and_models=delete_files)
# noinspection PyBroadException if delete_external_files:
try: for external_file in dataset.link_entries:
h.delete(artifact.url) if external_file.parent_dataset_id == dataset_id:
except Exception as ex: try:
LoggerRoot.get_base_logger().warning( helper = StorageHelper.get(external_file.link)
"Failed deleting remote file '{}': {}".format(artifact.url, ex) helper.delete(external_file.link)
) except Exception as ex:
# this force is different than the force passed in Dataset.delete LoggerRoot.get_base_logger().warning(
# it indicated that we want delete a non-draft task "Failed deleting remote file '{}': {}".format(external_file.link, ex)
client.tasks.delete(task=dataset_id, force=True) )
@classmethod @classmethod
def rename( def rename(

View File

@ -1471,7 +1471,10 @@ class _HttpDriver(_Driver):
def exists_file(self, container_name, object_name): def exists_file(self, container_name, object_name):
# noinspection PyBroadException # noinspection PyBroadException
try: try:
return requests.head(container_name + object_name, allow_redirects=True).ok container = self.get_container(container_name)
url = container_name + object_name
return container.session.head(url, allow_redirects=True, headers=container.get_headers(url)).ok
except Exception: except Exception:
return False return False