mirror of
https://github.com/clearml/clearml
synced 2025-01-31 17:17:00 +00:00
Fix StorageManager cache extract zipped artifacts. Use modified time instead of access time for cached files.
This commit is contained in:
parent
c9fac89bcd
commit
2d95f7885d
@ -6,13 +6,13 @@ from pathlib2 import Path
|
||||
|
||||
from .helper import StorageHelper
|
||||
from .util import quote_url
|
||||
from ..config import get_cache_dir
|
||||
from ..config import get_cache_dir, config
|
||||
from ..debugging.log import LoggerRoot
|
||||
|
||||
|
||||
class CacheManager(object):
|
||||
__cache_managers = {}
|
||||
_default_cache_file_limit = 100
|
||||
_default_cache_file_limit = config.get("storage.cache.default_cache_manager_size", 100)
|
||||
_storage_manager_folder = "storage_manager"
|
||||
_default_context = "global"
|
||||
_local_to_remote_url_lookup = OrderedDict()
|
||||
@ -75,24 +75,21 @@ class CacheManager(object):
|
||||
:param remote_url: check if we have the remote url in our cache
|
||||
:return: full path to file name, current file size or None
|
||||
"""
|
||||
folder = Path(
|
||||
get_cache_dir() / CacheManager._storage_manager_folder / self._context
|
||||
)
|
||||
folder.mkdir(parents=True, exist_ok=True)
|
||||
local_filename = self._get_hashed_url_file(remote_url)
|
||||
new_file = folder / local_filename
|
||||
if new_file.exists():
|
||||
new_file.touch(exist_ok=True)
|
||||
def safe_time(x):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
return x.stat().st_mtime
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
# delete old files
|
||||
def sort_max_access_time(x):
|
||||
atime = x.stat().st_atime
|
||||
atime = safe_time(x)
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if x.is_dir():
|
||||
dir_files = list(x.iterdir())
|
||||
atime = (
|
||||
max(atime, max(s.stat().st_atime for s in dir_files))
|
||||
max(atime, max(safe_time(s) for s in dir_files))
|
||||
if dir_files
|
||||
else atime
|
||||
)
|
||||
@ -100,11 +97,30 @@ class CacheManager(object):
|
||||
pass
|
||||
return atime
|
||||
|
||||
folder = Path(
|
||||
get_cache_dir() / CacheManager._storage_manager_folder / self._context
|
||||
)
|
||||
folder.mkdir(parents=True, exist_ok=True)
|
||||
local_filename = self._get_hashed_url_file(remote_url)
|
||||
new_file = folder / local_filename
|
||||
new_file_exists = new_file.exists()
|
||||
if new_file_exists:
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
new_file.touch(exist_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# delete old files
|
||||
files = sorted(folder.iterdir(), reverse=True, key=sort_max_access_time)
|
||||
files = files[self._file_limit:]
|
||||
for f in files:
|
||||
if not f.is_dir():
|
||||
f.unlink()
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
f.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
shutil.rmtree(f)
|
||||
@ -115,10 +131,12 @@ class CacheManager(object):
|
||||
)
|
||||
|
||||
# if file doesn't exist, return file size None
|
||||
return (
|
||||
new_file.as_posix(),
|
||||
new_file.stat().st_size if new_file.exists() else None,
|
||||
)
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
size = new_file.stat().st_size if new_file_exists else None
|
||||
except Exception:
|
||||
size = 0
|
||||
return new_file.as_posix(), size
|
||||
|
||||
@classmethod
|
||||
def get_cache_manager(cls, cache_context=None, cache_file_limit=None):
|
||||
|
@ -1,11 +1,13 @@
|
||||
import os
|
||||
import shutil
|
||||
from random import random
|
||||
from time import time
|
||||
from typing import Optional
|
||||
from zipfile import ZipFile
|
||||
|
||||
from pathlib2 import Path
|
||||
|
||||
from .util import encode_string_to_filename
|
||||
from ..debugging.log import LoggerRoot
|
||||
from .cache import CacheManager
|
||||
|
||||
@ -96,19 +98,30 @@ class StorageManager(object):
|
||||
if not cached_file or not str(cached_file).lower().endswith('.zip'):
|
||||
return cached_file
|
||||
|
||||
cached_folder = Path(cached_file).parent
|
||||
archive_suffix = cached_file.rpartition(".")[0]
|
||||
target_folder = Path("{0}_artifact_archive_{1}".format(archive_suffix, name))
|
||||
name = encode_string_to_filename(name)
|
||||
target_folder = Path("{0}_artifacts_archive_{1}".format(archive_suffix, name))
|
||||
if target_folder.exists():
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
target_folder.touch(exist_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
return target_folder
|
||||
|
||||
base_logger = LoggerRoot.get_base_logger()
|
||||
try:
|
||||
temp_target_folder = "{0}_{1}".format(target_folder.name, time() * 1000)
|
||||
os.mkdir(path=temp_target_folder)
|
||||
ZipFile(cached_file).extractall(path=temp_target_folder)
|
||||
temp_target_folder = cached_folder / "{0}_{1}_{2}".format(
|
||||
target_folder.name, time() * 1000, str(random()).replace('.', ''))
|
||||
temp_target_folder.mkdir(parents=True, exist_ok=True)
|
||||
ZipFile(cached_file).extractall(path=temp_target_folder.as_posix())
|
||||
# we assume we will have such folder if we already extract the zip file
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
# if rename fails, it means that someone else already manged to extract the zip, delete the current
|
||||
# folder and return the already existing cached zip folder
|
||||
shutil.move(temp_target_folder, str(target_folder))
|
||||
shutil.move(temp_target_folder.as_posix(), target_folder.as_posix())
|
||||
except Exception:
|
||||
if target_folder.exists():
|
||||
target_folder.touch(exist_ok=True)
|
||||
|
@ -35,3 +35,7 @@ def quote_url(url):
|
||||
return url
|
||||
parsed = parsed._replace(path=quote(parsed.path))
|
||||
return urlunparse(parsed)
|
||||
|
||||
|
||||
def encode_string_to_filename(text):
|
||||
return quote(text, safe=" ")
|
||||
|
Loading…
Reference in New Issue
Block a user