Fix StorageManager cache extract zipped artifacts. Use modified time instead of access time for cached files.

This commit is contained in:
allegroai 2020-10-15 23:16:44 +03:00
parent c9fac89bcd
commit 2d95f7885d
3 changed files with 58 additions and 23 deletions

View File

@ -6,13 +6,13 @@ from pathlib2 import Path
from .helper import StorageHelper
from .util import quote_url
from ..config import get_cache_dir
from ..config import get_cache_dir, config
from ..debugging.log import LoggerRoot
class CacheManager(object):
__cache_managers = {}
_default_cache_file_limit = 100
_default_cache_file_limit = config.get("storage.cache.default_cache_manager_size", 100)
_storage_manager_folder = "storage_manager"
_default_context = "global"
_local_to_remote_url_lookup = OrderedDict()
@ -75,24 +75,21 @@ class CacheManager(object):
:param remote_url: check if we have the remote url in our cache
:return: full path to file name, current file size or None
"""
folder = Path(
get_cache_dir() / CacheManager._storage_manager_folder / self._context
)
folder.mkdir(parents=True, exist_ok=True)
local_filename = self._get_hashed_url_file(remote_url)
new_file = folder / local_filename
if new_file.exists():
new_file.touch(exist_ok=True)
def safe_time(x):
# noinspection PyBroadException
try:
return x.stat().st_mtime
except Exception:
return 0
# delete old files
def sort_max_access_time(x):
atime = x.stat().st_atime
atime = safe_time(x)
# noinspection PyBroadException
try:
if x.is_dir():
dir_files = list(x.iterdir())
atime = (
max(atime, max(s.stat().st_atime for s in dir_files))
max(atime, max(safe_time(s) for s in dir_files))
if dir_files
else atime
)
@ -100,11 +97,30 @@ class CacheManager(object):
pass
return atime
folder = Path(
get_cache_dir() / CacheManager._storage_manager_folder / self._context
)
folder.mkdir(parents=True, exist_ok=True)
local_filename = self._get_hashed_url_file(remote_url)
new_file = folder / local_filename
new_file_exists = new_file.exists()
if new_file_exists:
# noinspection PyBroadException
try:
new_file.touch(exist_ok=True)
except Exception:
pass
# delete old files
files = sorted(folder.iterdir(), reverse=True, key=sort_max_access_time)
files = files[self._file_limit:]
for f in files:
if not f.is_dir():
f.unlink()
# noinspection PyBroadException
try:
f.unlink()
except Exception:
pass
else:
try:
shutil.rmtree(f)
@ -115,10 +131,12 @@ class CacheManager(object):
)
# if file doesn't exist, return file size None
return (
new_file.as_posix(),
new_file.stat().st_size if new_file.exists() else None,
)
# noinspection PyBroadException
try:
size = new_file.stat().st_size if new_file_exists else None
except Exception:
size = 0
return new_file.as_posix(), size
@classmethod
def get_cache_manager(cls, cache_context=None, cache_file_limit=None):

View File

@ -1,11 +1,13 @@
import os
import shutil
from random import random
from time import time
from typing import Optional
from zipfile import ZipFile
from pathlib2 import Path
from .util import encode_string_to_filename
from ..debugging.log import LoggerRoot
from .cache import CacheManager
@ -96,19 +98,30 @@ class StorageManager(object):
if not cached_file or not str(cached_file).lower().endswith('.zip'):
return cached_file
cached_folder = Path(cached_file).parent
archive_suffix = cached_file.rpartition(".")[0]
target_folder = Path("{0}_artifact_archive_{1}".format(archive_suffix, name))
name = encode_string_to_filename(name)
target_folder = Path("{0}_artifacts_archive_{1}".format(archive_suffix, name))
if target_folder.exists():
# noinspection PyBroadException
try:
target_folder.touch(exist_ok=True)
except Exception:
pass
return target_folder
base_logger = LoggerRoot.get_base_logger()
try:
temp_target_folder = "{0}_{1}".format(target_folder.name, time() * 1000)
os.mkdir(path=temp_target_folder)
ZipFile(cached_file).extractall(path=temp_target_folder)
temp_target_folder = cached_folder / "{0}_{1}_{2}".format(
target_folder.name, time() * 1000, str(random()).replace('.', ''))
temp_target_folder.mkdir(parents=True, exist_ok=True)
ZipFile(cached_file).extractall(path=temp_target_folder.as_posix())
# we assume we will have such folder if we already extract the zip file
# noinspection PyBroadException
try:
# if rename fails, it means that someone else already manged to extract the zip, delete the current
# folder and return the already existing cached zip folder
shutil.move(temp_target_folder, str(target_folder))
shutil.move(temp_target_folder.as_posix(), target_folder.as_posix())
except Exception:
if target_folder.exists():
target_folder.touch(exist_ok=True)

View File

@ -35,3 +35,7 @@ def quote_url(url):
return url
parsed = parsed._replace(path=quote(parsed.path))
return urlunparse(parsed)
def encode_string_to_filename(text):
return quote(text, safe=" ")