mirror of
https://github.com/clearml/clearml
synced 2025-04-19 05:44:42 +00:00
Support controlling the naming of the sub-folder created by StorageManager/CacheManager
This commit is contained in:
parent
8f65f28d58
commit
d78ee6c669
@ -17,6 +17,8 @@ class CacheManager(object):
|
||||
_default_context = "global"
|
||||
_local_to_remote_url_lookup = OrderedDict()
|
||||
__local_to_remote_url_lookup_max_size = 1024
|
||||
_context_to_folder_lookup = dict()
|
||||
_default_context_folder_template = "{0}_artifacts_archive_{1}"
|
||||
|
||||
class CacheContext(object):
|
||||
def __init__(self, cache_context, default_cache_file_limit=10):
|
||||
@ -43,7 +45,7 @@ class CacheManager(object):
|
||||
return direct_access
|
||||
|
||||
# check if we already have the file in our cache
|
||||
cached_file, cached_size = self._get_cache_file(remote_url)
|
||||
cached_file, cached_size = self.get_cache_file(remote_url)
|
||||
if cached_size is not None and not force_download:
|
||||
CacheManager._add_remote_url(remote_url, cached_file)
|
||||
return cached_file
|
||||
@ -70,9 +72,19 @@ class CacheManager(object):
|
||||
filename = url.split("/")[-1]
|
||||
return "{}.{}".format(str_hash, quote_url(filename))
|
||||
|
||||
def _get_cache_file(self, remote_url):
|
||||
def get_cache_folder(self):
|
||||
"""
|
||||
:return: full path to current contexts cache folder
|
||||
"""
|
||||
folder = Path(
|
||||
get_cache_dir() / CacheManager._storage_manager_folder / self._context
|
||||
)
|
||||
return folder.as_posix()
|
||||
|
||||
def get_cache_file(self, remote_url=None, local_filename=None):
|
||||
"""
|
||||
:param remote_url: check if we have the remote url in our cache
|
||||
:param local_filename: if local_file is given, search for the local file/directory in the cache folder
|
||||
:return: full path to file name, current file size or None
|
||||
"""
|
||||
def safe_time(x):
|
||||
@ -101,7 +113,7 @@ class CacheManager(object):
|
||||
get_cache_dir() / CacheManager._storage_manager_folder / self._context
|
||||
)
|
||||
folder.mkdir(parents=True, exist_ok=True)
|
||||
local_filename = self._get_hashed_url_file(remote_url)
|
||||
local_filename = local_filename or self._get_hashed_url_file(remote_url)
|
||||
new_file = folder / local_filename
|
||||
new_file_exists = new_file.exists()
|
||||
if new_file_exists:
|
||||
@ -190,3 +202,14 @@ class CacheManager(object):
|
||||
if len(CacheManager._local_to_remote_url_lookup) > CacheManager.__local_to_remote_url_lookup_max_size:
|
||||
# pop the first item (FIFO)
|
||||
CacheManager._local_to_remote_url_lookup.popitem(last=False)
|
||||
|
||||
@classmethod
|
||||
def set_context_folder_lookup(cls, context, name_template):
|
||||
cls._context_to_folder_lookup[str(context)] = str(name_template)
|
||||
return str(name_template)
|
||||
|
||||
@classmethod
|
||||
def get_context_folder_lookup(cls, context):
|
||||
if not context:
|
||||
return cls._default_context_folder_template
|
||||
return cls._context_to_folder_lookup.get(str(context), cls._default_context_folder_template)
|
||||
|
@ -691,7 +691,7 @@ class StorageHelper(object):
|
||||
except (ValueError, AttributeError, KeyError):
|
||||
pass
|
||||
|
||||
# if driver supports download with call back, use it (it might be faster)
|
||||
# if driver supports download with callback, use it (it might be faster)
|
||||
if hasattr(self._driver, 'download_object'):
|
||||
# callback
|
||||
cb = _DownloadProgressReport(total_size_mb, verbose,
|
||||
|
@ -1,3 +1,4 @@
|
||||
import os
|
||||
import shutil
|
||||
import tarfile
|
||||
from random import random
|
||||
@ -7,9 +8,9 @@ from zipfile import ZipFile
|
||||
|
||||
from pathlib2 import Path
|
||||
|
||||
from .cache import CacheManager
|
||||
from .util import encode_string_to_filename
|
||||
from ..debugging.log import LoggerRoot
|
||||
from .cache import CacheManager
|
||||
|
||||
|
||||
class StorageManager(object):
|
||||
@ -42,7 +43,7 @@ class StorageManager(object):
|
||||
cache_context=cache_context
|
||||
).get_local_copy(remote_url=remote_url, force_download=force_download)
|
||||
if extract_archive and cached_file:
|
||||
return cls._extract_to_cache(cached_file, name)
|
||||
return cls._extract_to_cache(cached_file, name, cache_context)
|
||||
|
||||
return cached_file
|
||||
|
||||
@ -89,11 +90,14 @@ class StorageManager(object):
|
||||
).set_cache_limit(cache_file_limit)
|
||||
|
||||
@classmethod
|
||||
def _extract_to_cache(cls, cached_file, name):
|
||||
def _extract_to_cache(cls, cached_file, name, cache_context=None, target_folder=None):
|
||||
# type: (str, str, Optional[str], Optional[str]) -> str
|
||||
"""
|
||||
Extract cached file to cache folder
|
||||
:param str cached_file: local copy of archive file
|
||||
:param str name: cache context
|
||||
:param str name: name of the target file
|
||||
:param str cache_context: cache context id
|
||||
:param str target_folder: specify target path to use for archive extraction
|
||||
:return: cached folder containing the extracted archive content
|
||||
"""
|
||||
if not cached_file:
|
||||
@ -102,21 +106,24 @@ class StorageManager(object):
|
||||
cached_file = Path(cached_file)
|
||||
|
||||
# we support zip and tar.gz files auto-extraction
|
||||
if (
|
||||
not cached_file.suffix == ".zip"
|
||||
and not cached_file.suffixes[-2:] == [".tar", ".gz"]
|
||||
):
|
||||
suffix = cached_file.suffix.lower()
|
||||
if suffix == '.gz':
|
||||
suffix = ''.join(a.lower() for a in cached_file.suffixes[-2:])
|
||||
|
||||
if suffix not in (".zip", ".tgz", ".tar.gz"):
|
||||
return str(cached_file)
|
||||
|
||||
cached_folder = cached_file.parent
|
||||
cached_folder = Path(cached_file).parent
|
||||
archive_suffix = cached_file.name[:-len(suffix)]
|
||||
name = encode_string_to_filename(name)
|
||||
target_folder = Path(
|
||||
target_folder or CacheManager.get_context_folder_lookup(cache_context).format(archive_suffix, name))
|
||||
|
||||
name = encode_string_to_filename(name) if name else name
|
||||
target_folder = Path("{0}/{1}_artifacts_archive_{2}".format(cached_folder, cached_file.stem, name))
|
||||
if target_folder.exists():
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
target_folder.touch(exist_ok=True)
|
||||
return target_folder
|
||||
return target_folder.as_posix()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@ -125,11 +132,14 @@ class StorageManager(object):
|
||||
temp_target_folder = cached_folder / "{0}_{1}_{2}".format(
|
||||
target_folder.name, time() * 1000, str(random()).replace('.', ''))
|
||||
temp_target_folder.mkdir(parents=True, exist_ok=True)
|
||||
if cached_file.suffix == ".zip":
|
||||
if suffix == ".zip":
|
||||
ZipFile(cached_file).extractall(path=temp_target_folder.as_posix())
|
||||
elif cached_file.suffixes[-2:] == [".tar", ".gz"]:
|
||||
elif suffix == ".tar.gz":
|
||||
with tarfile.open(cached_file) as file:
|
||||
file.extractall(temp_target_folder)
|
||||
file.extractall(temp_target_folder.as_posix())
|
||||
elif suffix == ".tgz":
|
||||
with tarfile.open(cached_file, mode='r:gz') as file:
|
||||
file.extractall(temp_target_folder.as_posix())
|
||||
|
||||
# we assume we will have such folder if we already extract the file
|
||||
# noinspection PyBroadException
|
||||
@ -165,7 +175,7 @@ class StorageManager(object):
|
||||
except Exception:
|
||||
pass
|
||||
return cached_file
|
||||
return target_folder
|
||||
return target_folder.as_posix()
|
||||
|
||||
@classmethod
|
||||
def get_files_server(cls):
|
||||
|
@ -1,7 +1,13 @@
|
||||
import hashlib
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
from six.moves.urllib.parse import quote, urlparse, urlunparse
|
||||
import six
|
||||
import fnmatch
|
||||
|
||||
from ..debugging.log import LoggerRoot
|
||||
|
||||
|
||||
def get_config_object_matcher(**patterns):
|
||||
unsupported = {k: v for k, v in patterns.items() if not isinstance(v, six.string_types)}
|
||||
@ -39,3 +45,35 @@ def quote_url(url):
|
||||
|
||||
def encode_string_to_filename(text):
|
||||
return quote(text, safe=" ")
|
||||
|
||||
|
||||
def sha256sum(filename, skip_header=0, block_size=65536):
|
||||
# type: (str, int, int) -> (Optional[str], Optional[str])
|
||||
# create sha2 of the file, notice we skip the header of the file (32 bytes)
|
||||
# because sometimes that is the only change
|
||||
h = hashlib.sha256()
|
||||
file_hash = hashlib.sha256()
|
||||
b = bytearray(block_size)
|
||||
mv = memoryview(b)
|
||||
try:
|
||||
with open(filename, 'rb', buffering=0) as f:
|
||||
# skip header
|
||||
if skip_header:
|
||||
file_hash.update(f.read(skip_header))
|
||||
# noinspection PyUnresolvedReferences
|
||||
for n in iter(lambda: f.readinto(mv), 0):
|
||||
h.update(mv[:n])
|
||||
if skip_header:
|
||||
file_hash.update(mv[:n])
|
||||
except Exception as e:
|
||||
LoggerRoot.get_base_logger().warning(str(e))
|
||||
return None, None
|
||||
|
||||
return h.hexdigest(), file_hash.hexdigest() if skip_header else None
|
||||
|
||||
|
||||
def is_windows():
|
||||
"""
|
||||
:return: True if currently running on windows OS
|
||||
"""
|
||||
return sys.platform == 'win32'
|
Loading…
Reference in New Issue
Block a user