mirror of
https://github.com/clearml/clearml
synced 2025-06-26 18:16:07 +00:00
Fix Dataset.get_local_copy()
deletes the source archive if it is stored locally
This commit is contained in:
parent
d45ec5d3e2
commit
74614bad6d
@ -2208,21 +2208,25 @@ class Dataset(object):
|
|||||||
raise ValueError("Could not download dataset id={} entry={}".format(self._id, data_artifact_name))
|
raise ValueError("Could not download dataset id={} entry={}".format(self._id, data_artifact_name))
|
||||||
return local_zip
|
return local_zip
|
||||||
|
|
||||||
def _extract_part(local_zip):
|
def _extract_part(local_zip, data_artifact_name):
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
StorageManager._extract_to_cache(
|
StorageManager._extract_to_cache(
|
||||||
cached_file=local_zip, name=self._id,
|
cached_file=local_zip, name=self._id,
|
||||||
cache_context=self.__cache_context, target_folder=local_folder, force=True)
|
cache_context=self.__cache_context, target_folder=local_folder, force=True)
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
Path(local_zip).unlink()
|
# do not delete files we accessed directly
|
||||||
|
url = self._task.artifacts[data_artifact_name].url
|
||||||
|
helper = StorageHelper.get(url)
|
||||||
|
if helper.get_driver_direct_access(url) is None:
|
||||||
|
Path(local_zip).unlink()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
||||||
for d in data_artifact_entries:
|
for d in data_artifact_entries:
|
||||||
local_zip = _download_part(d)
|
local_zip = _download_part(d)
|
||||||
pool.submit(_extract_part, local_zip)
|
pool.submit(_extract_part, local_zip, d)
|
||||||
|
|
||||||
return local_folder
|
return local_folder
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user