diff --git a/trains/storage/cache.py b/trains/storage/cache.py index 40e9b484..82485ec6 100644 --- a/trains/storage/cache.py +++ b/trains/storage/cache.py @@ -6,13 +6,13 @@ from pathlib2 import Path from .helper import StorageHelper from .util import quote_url -from ..config import get_cache_dir +from ..config import get_cache_dir, config from ..debugging.log import LoggerRoot class CacheManager(object): __cache_managers = {} - _default_cache_file_limit = 100 + _default_cache_file_limit = config.get("storage.cache.default_cache_manager_size", 100) _storage_manager_folder = "storage_manager" _default_context = "global" _local_to_remote_url_lookup = OrderedDict() @@ -75,24 +75,21 @@ class CacheManager(object): :param remote_url: check if we have the remote url in our cache :return: full path to file name, current file size or None """ - folder = Path( - get_cache_dir() / CacheManager._storage_manager_folder / self._context - ) - folder.mkdir(parents=True, exist_ok=True) - local_filename = self._get_hashed_url_file(remote_url) - new_file = folder / local_filename - if new_file.exists(): - new_file.touch(exist_ok=True) + def safe_time(x): + # noinspection PyBroadException + try: + return x.stat().st_mtime + except Exception: + return 0 - # delete old files def sort_max_access_time(x): - atime = x.stat().st_atime + atime = safe_time(x) # noinspection PyBroadException try: if x.is_dir(): dir_files = list(x.iterdir()) atime = ( - max(atime, max(s.stat().st_atime for s in dir_files)) + max(atime, max(safe_time(s) for s in dir_files)) if dir_files else atime ) @@ -100,11 +97,30 @@ class CacheManager(object): pass return atime + folder = Path( + get_cache_dir() / CacheManager._storage_manager_folder / self._context + ) + folder.mkdir(parents=True, exist_ok=True) + local_filename = self._get_hashed_url_file(remote_url) + new_file = folder / local_filename + new_file_exists = new_file.exists() + if new_file_exists: + # noinspection PyBroadException + try: + new_file.touch(exist_ok=True) + except Exception: + pass + + # delete old files files = sorted(folder.iterdir(), reverse=True, key=sort_max_access_time) files = files[self._file_limit:] for f in files: if not f.is_dir(): - f.unlink() + # noinspection PyBroadException + try: + f.unlink() + except Exception: + pass else: try: shutil.rmtree(f) @@ -115,10 +131,12 @@ class CacheManager(object): ) # if file doesn't exist, return file size None - return ( - new_file.as_posix(), - new_file.stat().st_size if new_file.exists() else None, - ) + # noinspection PyBroadException + try: + size = new_file.stat().st_size if new_file_exists else None + except Exception: + size = 0 + return new_file.as_posix(), size @classmethod def get_cache_manager(cls, cache_context=None, cache_file_limit=None): diff --git a/trains/storage/manager.py b/trains/storage/manager.py index 69fbbf48..e80b4e39 100644 --- a/trains/storage/manager.py +++ b/trains/storage/manager.py @@ -1,11 +1,13 @@ import os import shutil +from random import random from time import time from typing import Optional from zipfile import ZipFile from pathlib2 import Path +from .util import encode_string_to_filename from ..debugging.log import LoggerRoot from .cache import CacheManager @@ -96,19 +98,30 @@ class StorageManager(object): if not cached_file or not str(cached_file).lower().endswith('.zip'): return cached_file + cached_folder = Path(cached_file).parent archive_suffix = cached_file.rpartition(".")[0] - target_folder = Path("{0}_artifact_archive_{1}".format(archive_suffix, name)) + name = encode_string_to_filename(name) + target_folder = Path("{0}_artifacts_archive_{1}".format(archive_suffix, name)) + if target_folder.exists(): + # noinspection PyBroadException + try: + target_folder.touch(exist_ok=True) + except Exception: + pass + return target_folder + base_logger = LoggerRoot.get_base_logger() try: - temp_target_folder = "{0}_{1}".format(target_folder.name, time() * 1000) - os.mkdir(path=temp_target_folder) - ZipFile(cached_file).extractall(path=temp_target_folder) + temp_target_folder = cached_folder / "{0}_{1}_{2}".format( + target_folder.name, time() * 1000, str(random()).replace('.', '')) + temp_target_folder.mkdir(parents=True, exist_ok=True) + ZipFile(cached_file).extractall(path=temp_target_folder.as_posix()) # we assume we will have such folder if we already extract the zip file # noinspection PyBroadException try: # if rename fails, it means that someone else already manged to extract the zip, delete the current # folder and return the already existing cached zip folder - shutil.move(temp_target_folder, str(target_folder)) + shutil.move(temp_target_folder.as_posix(), target_folder.as_posix()) except Exception: if target_folder.exists(): target_folder.touch(exist_ok=True) diff --git a/trains/storage/util.py b/trains/storage/util.py index 74a6ef3f..0bde4e96 100644 --- a/trains/storage/util.py +++ b/trains/storage/util.py @@ -35,3 +35,7 @@ def quote_url(url): return url parsed = parsed._replace(path=quote(parsed.path)) return urlunparse(parsed) + + +def encode_string_to_filename(text): + return quote(text, safe=" ")