Fix StorageManager cache extract zipped artifacts. Use modified time instead of access time for cached files.

This commit is contained in:
allegroai 2020-10-15 23:16:44 +03:00
parent c9fac89bcd
commit 2d95f7885d
3 changed files with 58 additions and 23 deletions

View File

@ -6,13 +6,13 @@ from pathlib2 import Path
from .helper import StorageHelper from .helper import StorageHelper
from .util import quote_url from .util import quote_url
from ..config import get_cache_dir from ..config import get_cache_dir, config
from ..debugging.log import LoggerRoot from ..debugging.log import LoggerRoot
class CacheManager(object): class CacheManager(object):
__cache_managers = {} __cache_managers = {}
_default_cache_file_limit = 100 _default_cache_file_limit = config.get("storage.cache.default_cache_manager_size", 100)
_storage_manager_folder = "storage_manager" _storage_manager_folder = "storage_manager"
_default_context = "global" _default_context = "global"
_local_to_remote_url_lookup = OrderedDict() _local_to_remote_url_lookup = OrderedDict()
@ -75,24 +75,21 @@ class CacheManager(object):
:param remote_url: check if we have the remote url in our cache :param remote_url: check if we have the remote url in our cache
:return: full path to file name, current file size or None :return: full path to file name, current file size or None
""" """
folder = Path( def safe_time(x):
get_cache_dir() / CacheManager._storage_manager_folder / self._context # noinspection PyBroadException
) try:
folder.mkdir(parents=True, exist_ok=True) return x.stat().st_mtime
local_filename = self._get_hashed_url_file(remote_url) except Exception:
new_file = folder / local_filename return 0
if new_file.exists():
new_file.touch(exist_ok=True)
# delete old files
def sort_max_access_time(x): def sort_max_access_time(x):
atime = x.stat().st_atime atime = safe_time(x)
# noinspection PyBroadException # noinspection PyBroadException
try: try:
if x.is_dir(): if x.is_dir():
dir_files = list(x.iterdir()) dir_files = list(x.iterdir())
atime = ( atime = (
max(atime, max(s.stat().st_atime for s in dir_files)) max(atime, max(safe_time(s) for s in dir_files))
if dir_files if dir_files
else atime else atime
) )
@ -100,11 +97,30 @@ class CacheManager(object):
pass pass
return atime return atime
folder = Path(
get_cache_dir() / CacheManager._storage_manager_folder / self._context
)
folder.mkdir(parents=True, exist_ok=True)
local_filename = self._get_hashed_url_file(remote_url)
new_file = folder / local_filename
new_file_exists = new_file.exists()
if new_file_exists:
# noinspection PyBroadException
try:
new_file.touch(exist_ok=True)
except Exception:
pass
# delete old files
files = sorted(folder.iterdir(), reverse=True, key=sort_max_access_time) files = sorted(folder.iterdir(), reverse=True, key=sort_max_access_time)
files = files[self._file_limit:] files = files[self._file_limit:]
for f in files: for f in files:
if not f.is_dir(): if not f.is_dir():
f.unlink() # noinspection PyBroadException
try:
f.unlink()
except Exception:
pass
else: else:
try: try:
shutil.rmtree(f) shutil.rmtree(f)
@ -115,10 +131,12 @@ class CacheManager(object):
) )
# if file doesn't exist, return file size None # if file doesn't exist, return file size None
return ( # noinspection PyBroadException
new_file.as_posix(), try:
new_file.stat().st_size if new_file.exists() else None, size = new_file.stat().st_size if new_file_exists else None
) except Exception:
size = 0
return new_file.as_posix(), size
@classmethod @classmethod
def get_cache_manager(cls, cache_context=None, cache_file_limit=None): def get_cache_manager(cls, cache_context=None, cache_file_limit=None):

View File

@ -1,11 +1,13 @@
import os import os
import shutil import shutil
from random import random
from time import time from time import time
from typing import Optional from typing import Optional
from zipfile import ZipFile from zipfile import ZipFile
from pathlib2 import Path from pathlib2 import Path
from .util import encode_string_to_filename
from ..debugging.log import LoggerRoot from ..debugging.log import LoggerRoot
from .cache import CacheManager from .cache import CacheManager
@ -96,19 +98,30 @@ class StorageManager(object):
if not cached_file or not str(cached_file).lower().endswith('.zip'): if not cached_file or not str(cached_file).lower().endswith('.zip'):
return cached_file return cached_file
cached_folder = Path(cached_file).parent
archive_suffix = cached_file.rpartition(".")[0] archive_suffix = cached_file.rpartition(".")[0]
target_folder = Path("{0}_artifact_archive_{1}".format(archive_suffix, name)) name = encode_string_to_filename(name)
target_folder = Path("{0}_artifacts_archive_{1}".format(archive_suffix, name))
if target_folder.exists():
# noinspection PyBroadException
try:
target_folder.touch(exist_ok=True)
except Exception:
pass
return target_folder
base_logger = LoggerRoot.get_base_logger() base_logger = LoggerRoot.get_base_logger()
try: try:
temp_target_folder = "{0}_{1}".format(target_folder.name, time() * 1000) temp_target_folder = cached_folder / "{0}_{1}_{2}".format(
os.mkdir(path=temp_target_folder) target_folder.name, time() * 1000, str(random()).replace('.', ''))
ZipFile(cached_file).extractall(path=temp_target_folder) temp_target_folder.mkdir(parents=True, exist_ok=True)
ZipFile(cached_file).extractall(path=temp_target_folder.as_posix())
# we assume we will have such folder if we already extract the zip file # we assume we will have such folder if we already extract the zip file
# noinspection PyBroadException # noinspection PyBroadException
try: try:
# if rename fails, it means that someone else already manged to extract the zip, delete the current # if rename fails, it means that someone else already manged to extract the zip, delete the current
# folder and return the already existing cached zip folder # folder and return the already existing cached zip folder
shutil.move(temp_target_folder, str(target_folder)) shutil.move(temp_target_folder.as_posix(), target_folder.as_posix())
except Exception: except Exception:
if target_folder.exists(): if target_folder.exists():
target_folder.touch(exist_ok=True) target_folder.touch(exist_ok=True)

View File

@ -35,3 +35,7 @@ def quote_url(url):
return url return url
parsed = parsed._replace(path=quote(parsed.path)) parsed = parsed._replace(path=quote(parsed.path))
return urlunparse(parsed) return urlunparse(parsed)
def encode_string_to_filename(text):
return quote(text, safe=" ")