mirror of
https://github.com/clearml/clearml
synced 2025-02-01 01:26:49 +00:00
Fix StorageManager cache extract zipped artifacts. Use modified time instead of access time for cached files.
This commit is contained in:
parent
c9fac89bcd
commit
2d95f7885d
@ -6,13 +6,13 @@ from pathlib2 import Path
|
|||||||
|
|
||||||
from .helper import StorageHelper
|
from .helper import StorageHelper
|
||||||
from .util import quote_url
|
from .util import quote_url
|
||||||
from ..config import get_cache_dir
|
from ..config import get_cache_dir, config
|
||||||
from ..debugging.log import LoggerRoot
|
from ..debugging.log import LoggerRoot
|
||||||
|
|
||||||
|
|
||||||
class CacheManager(object):
|
class CacheManager(object):
|
||||||
__cache_managers = {}
|
__cache_managers = {}
|
||||||
_default_cache_file_limit = 100
|
_default_cache_file_limit = config.get("storage.cache.default_cache_manager_size", 100)
|
||||||
_storage_manager_folder = "storage_manager"
|
_storage_manager_folder = "storage_manager"
|
||||||
_default_context = "global"
|
_default_context = "global"
|
||||||
_local_to_remote_url_lookup = OrderedDict()
|
_local_to_remote_url_lookup = OrderedDict()
|
||||||
@ -75,24 +75,21 @@ class CacheManager(object):
|
|||||||
:param remote_url: check if we have the remote url in our cache
|
:param remote_url: check if we have the remote url in our cache
|
||||||
:return: full path to file name, current file size or None
|
:return: full path to file name, current file size or None
|
||||||
"""
|
"""
|
||||||
folder = Path(
|
def safe_time(x):
|
||||||
get_cache_dir() / CacheManager._storage_manager_folder / self._context
|
# noinspection PyBroadException
|
||||||
)
|
try:
|
||||||
folder.mkdir(parents=True, exist_ok=True)
|
return x.stat().st_mtime
|
||||||
local_filename = self._get_hashed_url_file(remote_url)
|
except Exception:
|
||||||
new_file = folder / local_filename
|
return 0
|
||||||
if new_file.exists():
|
|
||||||
new_file.touch(exist_ok=True)
|
|
||||||
|
|
||||||
# delete old files
|
|
||||||
def sort_max_access_time(x):
|
def sort_max_access_time(x):
|
||||||
atime = x.stat().st_atime
|
atime = safe_time(x)
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
if x.is_dir():
|
if x.is_dir():
|
||||||
dir_files = list(x.iterdir())
|
dir_files = list(x.iterdir())
|
||||||
atime = (
|
atime = (
|
||||||
max(atime, max(s.stat().st_atime for s in dir_files))
|
max(atime, max(safe_time(s) for s in dir_files))
|
||||||
if dir_files
|
if dir_files
|
||||||
else atime
|
else atime
|
||||||
)
|
)
|
||||||
@ -100,11 +97,30 @@ class CacheManager(object):
|
|||||||
pass
|
pass
|
||||||
return atime
|
return atime
|
||||||
|
|
||||||
|
folder = Path(
|
||||||
|
get_cache_dir() / CacheManager._storage_manager_folder / self._context
|
||||||
|
)
|
||||||
|
folder.mkdir(parents=True, exist_ok=True)
|
||||||
|
local_filename = self._get_hashed_url_file(remote_url)
|
||||||
|
new_file = folder / local_filename
|
||||||
|
new_file_exists = new_file.exists()
|
||||||
|
if new_file_exists:
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
new_file.touch(exist_ok=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# delete old files
|
||||||
files = sorted(folder.iterdir(), reverse=True, key=sort_max_access_time)
|
files = sorted(folder.iterdir(), reverse=True, key=sort_max_access_time)
|
||||||
files = files[self._file_limit:]
|
files = files[self._file_limit:]
|
||||||
for f in files:
|
for f in files:
|
||||||
if not f.is_dir():
|
if not f.is_dir():
|
||||||
f.unlink()
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
f.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
shutil.rmtree(f)
|
shutil.rmtree(f)
|
||||||
@ -115,10 +131,12 @@ class CacheManager(object):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# if file doesn't exist, return file size None
|
# if file doesn't exist, return file size None
|
||||||
return (
|
# noinspection PyBroadException
|
||||||
new_file.as_posix(),
|
try:
|
||||||
new_file.stat().st_size if new_file.exists() else None,
|
size = new_file.stat().st_size if new_file_exists else None
|
||||||
)
|
except Exception:
|
||||||
|
size = 0
|
||||||
|
return new_file.as_posix(), size
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_cache_manager(cls, cache_context=None, cache_file_limit=None):
|
def get_cache_manager(cls, cache_context=None, cache_file_limit=None):
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
from random import random
|
||||||
from time import time
|
from time import time
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
|
||||||
from pathlib2 import Path
|
from pathlib2 import Path
|
||||||
|
|
||||||
|
from .util import encode_string_to_filename
|
||||||
from ..debugging.log import LoggerRoot
|
from ..debugging.log import LoggerRoot
|
||||||
from .cache import CacheManager
|
from .cache import CacheManager
|
||||||
|
|
||||||
@ -96,19 +98,30 @@ class StorageManager(object):
|
|||||||
if not cached_file or not str(cached_file).lower().endswith('.zip'):
|
if not cached_file or not str(cached_file).lower().endswith('.zip'):
|
||||||
return cached_file
|
return cached_file
|
||||||
|
|
||||||
|
cached_folder = Path(cached_file).parent
|
||||||
archive_suffix = cached_file.rpartition(".")[0]
|
archive_suffix = cached_file.rpartition(".")[0]
|
||||||
target_folder = Path("{0}_artifact_archive_{1}".format(archive_suffix, name))
|
name = encode_string_to_filename(name)
|
||||||
|
target_folder = Path("{0}_artifacts_archive_{1}".format(archive_suffix, name))
|
||||||
|
if target_folder.exists():
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
target_folder.touch(exist_ok=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return target_folder
|
||||||
|
|
||||||
base_logger = LoggerRoot.get_base_logger()
|
base_logger = LoggerRoot.get_base_logger()
|
||||||
try:
|
try:
|
||||||
temp_target_folder = "{0}_{1}".format(target_folder.name, time() * 1000)
|
temp_target_folder = cached_folder / "{0}_{1}_{2}".format(
|
||||||
os.mkdir(path=temp_target_folder)
|
target_folder.name, time() * 1000, str(random()).replace('.', ''))
|
||||||
ZipFile(cached_file).extractall(path=temp_target_folder)
|
temp_target_folder.mkdir(parents=True, exist_ok=True)
|
||||||
|
ZipFile(cached_file).extractall(path=temp_target_folder.as_posix())
|
||||||
# we assume we will have such folder if we already extract the zip file
|
# we assume we will have such folder if we already extract the zip file
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
# if rename fails, it means that someone else already manged to extract the zip, delete the current
|
# if rename fails, it means that someone else already manged to extract the zip, delete the current
|
||||||
# folder and return the already existing cached zip folder
|
# folder and return the already existing cached zip folder
|
||||||
shutil.move(temp_target_folder, str(target_folder))
|
shutil.move(temp_target_folder.as_posix(), target_folder.as_posix())
|
||||||
except Exception:
|
except Exception:
|
||||||
if target_folder.exists():
|
if target_folder.exists():
|
||||||
target_folder.touch(exist_ok=True)
|
target_folder.touch(exist_ok=True)
|
||||||
|
@ -35,3 +35,7 @@ def quote_url(url):
|
|||||||
return url
|
return url
|
||||||
parsed = parsed._replace(path=quote(parsed.path))
|
parsed = parsed._replace(path=quote(parsed.path))
|
||||||
return urlunparse(parsed)
|
return urlunparse(parsed)
|
||||||
|
|
||||||
|
|
||||||
|
def encode_string_to_filename(text):
|
||||||
|
return quote(text, safe=" ")
|
||||||
|
Loading…
Reference in New Issue
Block a user