mirror of
https://github.com/clearml/clearml
synced 2025-06-23 01:55:38 +00:00
Add clearml-Data (Datasets) multi-chunk support
This commit is contained in:
parent
0dd9ba8adc
commit
844c01e15b
@ -179,8 +179,8 @@ class PrintPatchLogger(object):
|
||||
cr_flush_period = None
|
||||
|
||||
def __init__(self, stream, logger=None, level=logging.INFO):
|
||||
if self.__class__.cr_flush_period is None:
|
||||
self.__class__.cr_flush_period = config.get("development.worker.console_cr_flush_period", 0)
|
||||
if PrintPatchLogger.cr_flush_period is None:
|
||||
PrintPatchLogger.cr_flush_period = config.get("development.worker.console_cr_flush_period", 0)
|
||||
PrintPatchLogger.patched = True
|
||||
self._terminal = stream
|
||||
self._log = logger
|
||||
|
@ -1266,6 +1266,34 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
||||
self._edit(execution=execution)
|
||||
return self.data.execution.artifacts or []
|
||||
|
||||
def _delete_artifacts(self, artifact_names):
|
||||
# type: (Sequence[str]) -> bool
|
||||
"""
|
||||
Delete a list of artifacts, by artifact name, from the Task.
|
||||
|
||||
:param list artifact_names: list of artifact names
|
||||
:return: True if successful
|
||||
"""
|
||||
if not Session.check_min_api_version('2.3'):
|
||||
return False
|
||||
if not isinstance(artifact_names, (list, tuple)):
|
||||
raise ValueError('Expected artifact names as List[str]')
|
||||
|
||||
with self._edit_lock:
|
||||
if Session.check_min_api_version("2.13") and not self._offline_mode:
|
||||
req = tasks.DeleteArtifactsRequest(
|
||||
task=self.task_id, artifacts=[{"key": n, "mode": "output"} for n in artifact_names], force=True)
|
||||
res = self.send(req, raise_on_errors=False)
|
||||
if not res or not res.response or not res.response.deleted:
|
||||
return False
|
||||
self.reload()
|
||||
else:
|
||||
self.reload()
|
||||
execution = self.data.execution
|
||||
execution.artifacts = [a for a in execution.artifacts or [] if a.key not in artifact_names]
|
||||
self._edit(execution=execution)
|
||||
return self.data.execution.artifacts or []
|
||||
|
||||
def _set_model_design(self, design=None):
|
||||
# type: (str) -> ()
|
||||
with self._edit_lock:
|
||||
|
@ -85,7 +85,7 @@ def get_epoch_beginning_of_time(timezone_info=None):
|
||||
return datetime(1970, 1, 1).replace(tzinfo=timezone_info if timezone_info else utc_timezone)
|
||||
|
||||
|
||||
def get_single_result(entity, query, results, log=None, show_results=10, raise_on_error=True, sort_by_date=True):
|
||||
def get_single_result(entity, query, results, log=None, show_results=1, raise_on_error=True, sort_by_date=True):
|
||||
if not results:
|
||||
if not raise_on_error:
|
||||
return None
|
||||
@ -96,8 +96,12 @@ def get_single_result(entity, query, results, log=None, show_results=10, raise_o
|
||||
if show_results:
|
||||
if not log:
|
||||
log = get_logger()
|
||||
log.warning('More than one {entity} found when searching for `{query}`'
|
||||
' (showing first {show_results} {entity}s follow)'.format(**locals()))
|
||||
if show_results > 1:
|
||||
log.warning('{num} {entity} found when searching for `{query}`'
|
||||
' (showing first {show_results} {entity}s follow)'.format(num=len(results), **locals()))
|
||||
else:
|
||||
log.warning('{num} {entity} found when searching for `{query}`'.format(num=len(results), **locals()))
|
||||
|
||||
if sort_by_date:
|
||||
relative_time = get_epoch_beginning_of_time()
|
||||
# sort results based on timestamp and return the newest one
|
||||
|
@ -70,9 +70,7 @@ def cli():
|
||||
subparsers = parser.add_subparsers(help='Dataset actions', dest='command')
|
||||
|
||||
create = subparsers.add_parser('create', help='Create a new dataset')
|
||||
create.add_argument('--parents', type=str, nargs='*',
|
||||
help='[Optional] Specify dataset parents IDs (i.e. merge all parents). '
|
||||
'Example: a17b4fID1 f0ee5ID2 a17b4f09eID3')
|
||||
create.add_argument('--parents', type=str, nargs='*', help='Specify dataset parents IDs (i.e. merge all parents)')
|
||||
create.add_argument('--project', type=str, required=False, default=None, help='Dataset project name')
|
||||
create.add_argument('--name', type=str, required=True, default=None, help='Dataset name')
|
||||
create.add_argument('--tags', type=str, nargs='*', help='Dataset user Tags')
|
||||
@ -100,20 +98,22 @@ def cli():
|
||||
help='Local folder to sync (support for wildcard selection). '
|
||||
'Example: ~/data/*.jpg')
|
||||
sync.add_argument('--parents', type=str, nargs='*',
|
||||
help='[Optional] Specify dataset parents IDs (i.e. merge all parents). '
|
||||
'Example: a17b4fID1 f0ee5ID2 a17b4f09eID3')
|
||||
help='[Optional - Create new dataset] Specify dataset parents IDs (i.e. merge all parents)')
|
||||
sync.add_argument('--project', type=str, required=False, default=None,
|
||||
help='[Optional] Dataset project name')
|
||||
help='[Optional - Create new dataset] Dataset project name')
|
||||
sync.add_argument('--name', type=str, required=False, default=None,
|
||||
help='[Optional] Dataset project name')
|
||||
help='[Optional - Create new dataset] Dataset project name')
|
||||
sync.add_argument('--tags', type=str, nargs='*',
|
||||
help='[Optional] Dataset user Tags')
|
||||
help='[Optional - Create new dataset] Dataset user Tags')
|
||||
sync.add_argument('--storage', type=str, default=None,
|
||||
help='Remote storage to use for the dataset files (default: files_server). '
|
||||
'Examples: \'s3://bucket/data\', \'gs://bucket/data\', \'azure://bucket/data\', '
|
||||
'\'/mnt/shared/folder/data\'')
|
||||
sync.add_argument('--skip-close', action='store_true', default=False,
|
||||
help='Do not auto close dataset after syncing folders')
|
||||
sync.add_argument('--chunk-size', default=-1, type=int,
|
||||
help='Set dataset artifact chunk size in MB. Default -1, unlimited size. '
|
||||
'Example: 512, dataset will be split and uploaded in 512mb chunks.')
|
||||
sync.add_argument('--verbose', action='store_true', default=False, help='Verbose reporting')
|
||||
sync.set_defaults(func=ds_sync)
|
||||
|
||||
@ -136,6 +136,9 @@ def cli():
|
||||
help='Remote storage to use for the dataset files (default: files_server). '
|
||||
'Examples: \'s3://bucket/data\', \'gs://bucket/data\', \'azure://bucket/data\', '
|
||||
'\'/mnt/shared/folder/data\'')
|
||||
upload.add_argument('--chunk-size', default=-1, type=int,
|
||||
help='Set dataset artifact chunk size in MB. Default -1, unlimited size. '
|
||||
'Example: 512, dataset will be split and uploaded in 512mb chunks.')
|
||||
upload.add_argument('--verbose', default=False, action='store_true', help='Verbose reporting')
|
||||
upload.set_defaults(func=ds_upload)
|
||||
|
||||
@ -148,6 +151,9 @@ def cli():
|
||||
'\'/mnt/shared/folder/data\'')
|
||||
finalize.add_argument('--disable-upload', action='store_true', default=False,
|
||||
help='Disable automatic upload when closing the dataset')
|
||||
finalize.add_argument('--chunk-size', default=-1, type=int,
|
||||
help='Set dataset artifact chunk size in MB. Default -1, unlimited size. '
|
||||
'Example: 512, dataset will be split and uploaded in 512mb chunks.')
|
||||
finalize.add_argument('--verbose', action='store_true', default=False, help='Verbose reporting')
|
||||
finalize.set_defaults(func=ds_close)
|
||||
|
||||
@ -216,6 +222,14 @@ def cli():
|
||||
get.add_argument('--link', type=str, default=None,
|
||||
help='Create a soft link (not supported on Windows) to a '
|
||||
'read-only cached folder containing the dataset')
|
||||
get.add_argument('--part', type=int, default=None,
|
||||
help='Retrieve a partial copy of the dataset. '
|
||||
'Part number (0 to `num-parts`-1) of total parts --num-parts.')
|
||||
get.add_argument('--num-parts', type=int, default=None,
|
||||
help='Total number of parts to divide the dataset to. '
|
||||
'Notice minimum retrieved part is a single chunk in a dataset (or its parents).'
|
||||
'Example: Dataset gen4, with 3 parents, each with a single chunk, '
|
||||
'can be divided into 4 parts')
|
||||
get.add_argument('--overwrite', action='store_true', default=False, help='If True, overwrite the target folder')
|
||||
get.add_argument('--verbose', action='store_true', default=False, help='Verbose reporting')
|
||||
get.set_defaults(func=ds_get)
|
||||
@ -274,7 +288,7 @@ def ds_get(args):
|
||||
pass
|
||||
if args.copy:
|
||||
ds_folder = args.copy
|
||||
ds.get_mutable_local_copy(target_folder=ds_folder)
|
||||
ds.get_mutable_local_copy(target_folder=ds_folder, part=args.part, num_parts=args.num_parts)
|
||||
else:
|
||||
if args.link:
|
||||
Path(args.link).mkdir(parents=True, exist_ok=True)
|
||||
@ -286,7 +300,7 @@ def ds_get(args):
|
||||
Path(args.link).unlink()
|
||||
except Exception:
|
||||
raise ValueError("Target directory {} is not empty. Use --overwrite.".format(args.link))
|
||||
ds_folder = ds.get_local_copy()
|
||||
ds_folder = ds.get_local_copy(part=args.part, num_parts=args.num_parts)
|
||||
if args.link:
|
||||
os.symlink(ds_folder, args.link)
|
||||
ds_folder = args.link
|
||||
@ -372,7 +386,10 @@ def ds_close(args):
|
||||
raise ValueError("Pending uploads, cannot finalize dataset. run `clearml-data upload`")
|
||||
# upload the files
|
||||
print("Pending uploads, starting dataset upload to {}".format(args.storage or ds.get_default_storage()))
|
||||
ds.upload(show_progress=True, verbose=args.verbose, output_url=args.storage or None)
|
||||
ds.upload(show_progress=True,
|
||||
verbose=args.verbose,
|
||||
output_url=args.storage or None,
|
||||
chunk_size=args.chunk_size or -1,)
|
||||
|
||||
ds.finalize()
|
||||
print('Dataset closed and finalized')
|
||||
@ -399,7 +416,7 @@ def ds_upload(args):
|
||||
check_null_id(args)
|
||||
print_args(args)
|
||||
ds = Dataset.get(dataset_id=args.id)
|
||||
ds.upload(verbose=args.verbose, output_url=args.storage or None)
|
||||
ds.upload(verbose=args.verbose, output_url=args.storage or None, chunk_size=args.chunk_size or -1)
|
||||
print('Dataset upload completed')
|
||||
return 0
|
||||
|
||||
@ -443,7 +460,10 @@ def ds_sync(args):
|
||||
if ds.is_dirty():
|
||||
# upload the files
|
||||
print("Pending uploads, starting dataset upload to {}".format(args.storage or ds.get_default_storage()))
|
||||
ds.upload(show_progress=True, verbose=args.verbose, output_url=args.storage or None)
|
||||
ds.upload(show_progress=True,
|
||||
verbose=args.verbose,
|
||||
output_url=args.storage or None,
|
||||
chunk_size=args.chunk_size or -1, )
|
||||
|
||||
ds.finalize()
|
||||
print('Dataset closed and finalized')
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,13 +1,20 @@
|
||||
import atexit
|
||||
import hashlib
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from collections import OrderedDict
|
||||
from threading import RLock
|
||||
from typing import Union, Optional, Tuple, Dict
|
||||
|
||||
from pathlib2 import Path
|
||||
|
||||
from .helper import StorageHelper
|
||||
from .util import quote_url
|
||||
from ..config import get_cache_dir, deferred_config
|
||||
from ..debugging.log import LoggerRoot
|
||||
from ..utilities.locks.utils import Lock as FileLock
|
||||
from ..utilities.locks.exceptions import LockException
|
||||
|
||||
|
||||
class CacheManager(object):
|
||||
@ -19,17 +26,26 @@ class CacheManager(object):
|
||||
__local_to_remote_url_lookup_max_size = 1024
|
||||
_context_to_folder_lookup = dict()
|
||||
_default_context_folder_template = "{0}_artifacts_archive_{1}"
|
||||
_lockfile_prefix = '.lock.'
|
||||
_lockfile_suffix = '.clearml'
|
||||
|
||||
class CacheContext(object):
|
||||
_folder_locks = dict() # type: Dict[str, FileLock]
|
||||
_lockfile_at_exit_cb = None
|
||||
|
||||
def __init__(self, cache_context, default_cache_file_limit=10):
|
||||
# type: (str, int) -> None
|
||||
self._context = str(cache_context)
|
||||
self._file_limit = int(default_cache_file_limit)
|
||||
self._rlock = RLock()
|
||||
|
||||
def set_cache_limit(self, cache_file_limit):
|
||||
# type: (int) -> int
|
||||
self._file_limit = max(self._file_limit, int(cache_file_limit))
|
||||
return self._file_limit
|
||||
|
||||
def get_local_copy(self, remote_url, force_download):
|
||||
# type: (str, bool) -> Optional[str]
|
||||
helper = StorageHelper.get(remote_url)
|
||||
if not helper:
|
||||
raise ValueError("Storage access failed: {}".format(remote_url))
|
||||
@ -59,6 +75,7 @@ class CacheManager(object):
|
||||
|
||||
@staticmethod
|
||||
def upload_file(local_file, remote_url, wait_for_upload=True, retries=1):
|
||||
# type: (str, str, bool, int) -> Optional[str]
|
||||
helper = StorageHelper.get(remote_url)
|
||||
result = helper.upload(
|
||||
local_file, remote_url, async_enable=not wait_for_upload, retries=retries,
|
||||
@ -68,11 +85,13 @@ class CacheManager(object):
|
||||
|
||||
@classmethod
|
||||
def get_hashed_url_file(cls, url):
|
||||
# type: (str) -> str
|
||||
str_hash = hashlib.md5(url.encode()).hexdigest()
|
||||
filename = url.split("/")[-1]
|
||||
return "{}.{}".format(str_hash, quote_url(filename))
|
||||
|
||||
def get_cache_folder(self):
|
||||
# type: () -> str
|
||||
"""
|
||||
:return: full path to current contexts cache folder
|
||||
"""
|
||||
@ -82,6 +101,7 @@ class CacheManager(object):
|
||||
return folder.as_posix()
|
||||
|
||||
def get_cache_file(self, remote_url=None, local_filename=None):
|
||||
# type: (Optional[str], Optional[str]) -> Tuple[str, Optional[int]]
|
||||
"""
|
||||
:param remote_url: check if we have the remote url in our cache
|
||||
:param local_filename: if local_file is given, search for the local file/directory in the cache folder
|
||||
@ -123,10 +143,52 @@ class CacheManager(object):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# first exclude lock files
|
||||
lock_files = dict()
|
||||
files = []
|
||||
for f in sorted(folder.iterdir(), reverse=True, key=sort_max_access_time):
|
||||
if f.name.startswith(CacheManager._lockfile_prefix) and f.name.endswith(CacheManager._lockfile_suffix):
|
||||
# parse the lock filename
|
||||
name = f.name[len(CacheManager._lockfile_prefix):-len(CacheManager._lockfile_suffix)]
|
||||
num, _, name = name.partition('.')
|
||||
lock_files[name] = lock_files.get(name, []) + [f.as_posix()]
|
||||
else:
|
||||
files.append(f)
|
||||
|
||||
# remove new lock files from the list (we will delete them when time comes)
|
||||
for f in files[:self._file_limit]:
|
||||
lock_files.pop(f.name, None)
|
||||
|
||||
# delete old files
|
||||
files = sorted(folder.iterdir(), reverse=True, key=sort_max_access_time)
|
||||
files = files[self._file_limit:]
|
||||
for f in files:
|
||||
for i, f in enumerate(files):
|
||||
if i < self._file_limit:
|
||||
continue
|
||||
|
||||
# check if the file is in the lock folder list:
|
||||
folder_lock = self._folder_locks.get(f.absolute().as_posix())
|
||||
if folder_lock:
|
||||
# pop from lock files
|
||||
lock_files.pop(f.name, None)
|
||||
continue
|
||||
|
||||
# check if someone else holds the lock file
|
||||
locks = lock_files.get(f.name, [])
|
||||
for l in locks:
|
||||
try:
|
||||
a_lock = FileLock(filename=l)
|
||||
a_lock.acquire(timeout=0)
|
||||
a_lock.release()
|
||||
a_lock.delete_lock_file()
|
||||
del a_lock
|
||||
except LockException:
|
||||
# someone have the lock skip the file
|
||||
continue
|
||||
|
||||
# if we got here we need to pop from the lock_files, later we will delete the leftover entries
|
||||
lock_files.pop(f.name, None)
|
||||
|
||||
# if we are here we can delete the file
|
||||
if not f.is_dir():
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
@ -135,23 +197,93 @@ class CacheManager(object):
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
shutil.rmtree(f)
|
||||
shutil.rmtree(f.as_posix())
|
||||
except Exception as e:
|
||||
# failed deleting folder
|
||||
LoggerRoot.get_base_logger().debug(
|
||||
"Exception {}\nFailed deleting folder {}".format(e, f)
|
||||
)
|
||||
|
||||
# cleanup old lock files
|
||||
for lock_files in lock_files.values():
|
||||
for f in lock_files:
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
os.unlink(f)
|
||||
except BaseException:
|
||||
pass
|
||||
|
||||
# if file doesn't exist, return file size None
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
size = new_file.stat().st_size if new_file_exists else None
|
||||
except Exception:
|
||||
size = None
|
||||
|
||||
return new_file.as_posix(), size
|
||||
|
||||
def lock_cache_folder(self, local_path):
|
||||
# type: (Union[str, Path]) -> ()
|
||||
"""
|
||||
Lock a specific cache folder, making sure it will not be deleted in the next
|
||||
cache cleanup round
|
||||
:param local_path: Path (str/Path) to a sub-folder inside the instance cache folder
|
||||
"""
|
||||
local_path = Path(local_path).absolute()
|
||||
self._rlock.acquire()
|
||||
if self._lockfile_at_exit_cb is None:
|
||||
self._lockfile_at_exit_cb = True
|
||||
atexit.register(self._lock_file_cleanup_callback)
|
||||
|
||||
lock = self._folder_locks.get(local_path.as_posix())
|
||||
i = 0
|
||||
# try to create a lock if we do not already have one (if we do, we assume it is locked)
|
||||
while not lock:
|
||||
lock_path = local_path.parent / '{}{:03d}.{}{}'.format(
|
||||
CacheManager._lockfile_prefix, i, local_path.name, CacheManager._lockfile_suffix)
|
||||
lock = FileLock(filename=lock_path)
|
||||
|
||||
# try to lock folder (if we failed to create lock, try nex number)
|
||||
try:
|
||||
lock.acquire(timeout=0)
|
||||
break
|
||||
except LockException:
|
||||
# failed locking, maybe someone else already locked it.
|
||||
del lock
|
||||
lock = None
|
||||
i += 1
|
||||
|
||||
# store lock
|
||||
self._folder_locks[local_path.as_posix()] = lock
|
||||
self._rlock.release()
|
||||
|
||||
def unlock_cache_folder(self, local_path):
|
||||
# type: (Union[str, Path]) -> ()
|
||||
"""
|
||||
Lock a specific cache folder, making sure it will not be deleted in the next
|
||||
cache cleanup round
|
||||
:param local_path: Path (str/Path) to a sub-folder inside the instance cache folder
|
||||
"""
|
||||
local_path = Path(local_path).absolute()
|
||||
self._rlock.acquire()
|
||||
# pop lock
|
||||
lock = self._folder_locks.pop(local_path.as_posix(), None)
|
||||
if lock:
|
||||
lock.release()
|
||||
lock.delete_lock_file()
|
||||
del lock
|
||||
|
||||
self._rlock.release()
|
||||
|
||||
@classmethod
|
||||
def _lock_file_cleanup_callback(cls):
|
||||
for lock in cls._folder_locks.values():
|
||||
lock.release()
|
||||
lock.delete_lock_file()
|
||||
|
||||
@classmethod
|
||||
def get_cache_manager(cls, cache_context=None, cache_file_limit=None):
|
||||
# type: (Optional[str], Optional[int]) -> CacheManager.CacheContext
|
||||
cache_context = cache_context or cls._default_context
|
||||
if cache_context not in cls.__cache_managers:
|
||||
cls.__cache_managers[cache_context] = cls.CacheContext(
|
||||
@ -165,6 +297,7 @@ class CacheManager(object):
|
||||
|
||||
@staticmethod
|
||||
def get_remote_url(local_copy_path):
|
||||
# type: (str) -> str
|
||||
if not CacheManager._local_to_remote_url_lookup:
|
||||
return local_copy_path
|
||||
|
||||
@ -178,6 +311,7 @@ class CacheManager(object):
|
||||
|
||||
@staticmethod
|
||||
def _add_remote_url(remote_url, local_copy_path):
|
||||
# type: (str, str) -> ()
|
||||
# so that we can disable the cache lookup altogether
|
||||
if CacheManager._local_to_remote_url_lookup is None:
|
||||
return
|
||||
@ -206,11 +340,13 @@ class CacheManager(object):
|
||||
|
||||
@classmethod
|
||||
def set_context_folder_lookup(cls, context, name_template):
|
||||
# type: (str, str) -> str
|
||||
cls._context_to_folder_lookup[str(context)] = str(name_template)
|
||||
return str(name_template)
|
||||
|
||||
@classmethod
|
||||
def get_context_folder_lookup(cls, context):
|
||||
# type: (Optional[str]) -> str
|
||||
if not context:
|
||||
return cls._default_context_folder_template
|
||||
return cls._context_to_folder_lookup.get(str(context), cls._default_context_folder_template)
|
||||
|
@ -97,8 +97,16 @@ class StorageManager(object):
|
||||
).set_cache_limit(cache_file_limit)
|
||||
|
||||
@classmethod
|
||||
def _extract_to_cache(cls, cached_file, name, cache_context=None, target_folder=None, cache_path_encoding=None):
|
||||
# type: (str, str, Optional[str], Optional[str], Optional[str]) -> str
|
||||
def _extract_to_cache(
|
||||
cls,
|
||||
cached_file, # type: str
|
||||
name, # type: str
|
||||
cache_context=None, # type: Optional[str]
|
||||
target_folder=None, # type: Optional[str]
|
||||
cache_path_encoding=None, # type: Optional[str]
|
||||
force=False, # type: bool
|
||||
):
|
||||
# type: (...) -> str
|
||||
"""
|
||||
Extract cached file to cache folder
|
||||
:param str cached_file: local copy of archive file
|
||||
@ -108,6 +116,7 @@ class StorageManager(object):
|
||||
:param str cache_path_encoding: specify representation of the local path of the cached files,
|
||||
this will always point to local cache folder, even if we have direct access file.
|
||||
Used for extracting the cached archived based on cache_path_encoding
|
||||
:param bool force: Force archive extraction even if target folder exists
|
||||
:return: cached folder containing the extracted archive content
|
||||
"""
|
||||
if not cached_file:
|
||||
@ -133,7 +142,7 @@ class StorageManager(object):
|
||||
target_folder = cache_folder / CacheManager.get_context_folder_lookup(
|
||||
cache_context).format(archive_suffix, name)
|
||||
|
||||
if target_folder.is_dir():
|
||||
if target_folder.is_dir() and not force:
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
target_folder.touch(exist_ok=True)
|
||||
@ -143,9 +152,14 @@ class StorageManager(object):
|
||||
|
||||
base_logger = LoggerRoot.get_base_logger()
|
||||
try:
|
||||
temp_target_folder = cache_folder / "{0}_{1}_{2}".format(
|
||||
target_folder.name, time() * 1000, str(random()).replace('.', ''))
|
||||
temp_target_folder.mkdir(parents=True, exist_ok=True)
|
||||
# if target folder exists, meaning this is forced ao we extract directly into target folder
|
||||
if target_folder.is_dir():
|
||||
temp_target_folder = target_folder
|
||||
else:
|
||||
temp_target_folder = cache_folder / "{0}_{1}_{2}".format(
|
||||
target_folder.name, time() * 1000, str(random()).replace('.', ''))
|
||||
temp_target_folder.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if suffix == ".zip":
|
||||
ZipFile(cached_file.as_posix()).extractall(path=temp_target_folder.as_posix())
|
||||
elif suffix == ".tar.gz":
|
||||
@ -155,23 +169,24 @@ class StorageManager(object):
|
||||
with tarfile.open(cached_file.as_posix(), mode='r:gz') as file:
|
||||
file.extractall(temp_target_folder.as_posix())
|
||||
|
||||
# we assume we will have such folder if we already extract the file
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
# if rename fails, it means that someone else already manged to extract the file, delete the current
|
||||
# folder and return the already existing cached zip folder
|
||||
shutil.move(temp_target_folder.as_posix(), target_folder.as_posix())
|
||||
except Exception:
|
||||
if target_folder.exists():
|
||||
target_folder.touch(exist_ok=True)
|
||||
else:
|
||||
base_logger.warning(
|
||||
"Failed renaming {0} to {1}".format(temp_target_folder.as_posix(), target_folder.as_posix()))
|
||||
if temp_target_folder != target_folder:
|
||||
# we assume we will have such folder if we already extract the file
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
shutil.rmtree(temp_target_folder.as_posix())
|
||||
except Exception as ex:
|
||||
base_logger.warning(
|
||||
"Exception {}\nFailed deleting folder {}".format(ex, temp_target_folder.as_posix()))
|
||||
# if rename fails, it means that someone else already manged to extract the file, delete the current
|
||||
# folder and return the already existing cached zip folder
|
||||
shutil.move(temp_target_folder.as_posix(), target_folder.as_posix())
|
||||
except Exception:
|
||||
if target_folder.exists():
|
||||
target_folder.touch(exist_ok=True)
|
||||
else:
|
||||
base_logger.warning(
|
||||
"Failed renaming {0} to {1}".format(temp_target_folder.as_posix(), target_folder.as_posix()))
|
||||
try:
|
||||
shutil.rmtree(temp_target_folder.as_posix())
|
||||
except Exception as ex:
|
||||
base_logger.warning(
|
||||
"Exception {}\nFailed deleting folder {}".format(ex, temp_target_folder.as_posix()))
|
||||
except Exception as ex:
|
||||
# failed extracting the file:
|
||||
base_logger.warning(
|
||||
|
@ -179,6 +179,22 @@ class Lock(object):
|
||||
pass
|
||||
self.fh = None
|
||||
|
||||
def delete_lock_file(self):
|
||||
# type: () -> bool
|
||||
"""
|
||||
Remove the local file used for locking (fail if file is locked)
|
||||
|
||||
:return: True is successful
|
||||
"""
|
||||
if self.fh:
|
||||
return False
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
os.unlink(path=self.filename)
|
||||
except BaseException:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _get_fh(self):
|
||||
'''Get a new filehandle'''
|
||||
return open(self.filename, self.mode, **self.file_open_kwargs)
|
||||
|
Loading…
Reference in New Issue
Block a user