mirror of
https://github.com/clearml/clearml
synced 2025-02-01 01:26:49 +00:00
86 lines
3.4 KiB
Python
86 lines
3.4 KiB
Python
|
import hashlib
|
||
|
|
||
|
from pathlib2 import Path
|
||
|
|
||
|
from .helper import StorageHelper
|
||
|
from .util import quote_url
|
||
|
from ..config import get_cache_dir
|
||
|
|
||
|
|
||
|
class CacheManager(object):
|
||
|
__cache_managers = {}
|
||
|
_default_cache_file_limit = 100
|
||
|
_storage_manager_folder = 'storage_manager'
|
||
|
_default_context = 'global'
|
||
|
|
||
|
class CacheContext(object):
|
||
|
def __init__(self, cache_context, default_cache_file_limit=10):
|
||
|
self._context = str(cache_context)
|
||
|
self._file_limit = int(default_cache_file_limit)
|
||
|
|
||
|
def set_cache_limit(self, cache_file_limit):
|
||
|
self._file_limit = max(self._file_limit, int(cache_file_limit))
|
||
|
return self._file_limit
|
||
|
|
||
|
def get_local_copy(self, remote_url):
|
||
|
helper = StorageHelper.get(remote_url)
|
||
|
if not helper:
|
||
|
raise ValueError("Remote storage not supported: {}".format(remote_url))
|
||
|
# check if we need to cache the file
|
||
|
direct_access = helper._driver.get_direct_access(remote_url)
|
||
|
if direct_access:
|
||
|
return direct_access
|
||
|
# check if we already have the file in our cache
|
||
|
cached_file, cached_size = self._get_cache_file(remote_url)
|
||
|
if cached_size is not None:
|
||
|
return cached_file
|
||
|
# we need to download the file:
|
||
|
downloaded_file = helper.download_to_file(remote_url, cached_file)
|
||
|
if downloaded_file != cached_file:
|
||
|
# something happened
|
||
|
return None
|
||
|
return cached_file
|
||
|
|
||
|
@staticmethod
|
||
|
def upload_file(local_file, remote_url, wait_for_upload=True):
|
||
|
helper = StorageHelper.get(remote_url)
|
||
|
return helper.upload(local_file, remote_url, async_enable=not wait_for_upload)
|
||
|
|
||
|
@classmethod
|
||
|
def _get_hashed_url_file(cls, url):
|
||
|
str_hash = hashlib.md5(url.encode()).hexdigest()
|
||
|
filename = url.split('/')[-1]
|
||
|
return '{}.{}'.format(str_hash, quote_url(filename))
|
||
|
|
||
|
def _get_cache_file(self, remote_url):
|
||
|
"""
|
||
|
:param remote_url: check if we have the remote url in our cache
|
||
|
:return: full path to file name, current file size or None
|
||
|
"""
|
||
|
folder = Path(get_cache_dir() / CacheManager._storage_manager_folder / self._context)
|
||
|
folder.mkdir(parents=True, exist_ok=True)
|
||
|
local_filename = self._get_hashed_url_file(remote_url)
|
||
|
new_file = folder / local_filename
|
||
|
if new_file.exists():
|
||
|
new_file.touch(exist_ok=True)
|
||
|
|
||
|
# delete old files
|
||
|
files = sorted(folder.iterdir(), reverse=True, key=lambda x: x.stat().st_atime)
|
||
|
files = files[self._file_limit:]
|
||
|
for f in files:
|
||
|
f.unlink()
|
||
|
|
||
|
# if file doesn't exist, return file size None
|
||
|
return new_file.as_posix(), new_file.stat().st_size if new_file.exists() else None
|
||
|
|
||
|
@classmethod
|
||
|
def get_cache_manager(cls, cache_context=None, cache_file_limit=None):
|
||
|
cache_context = cache_context or cls._default_context
|
||
|
if cache_context not in cls.__cache_managers:
|
||
|
cls.__cache_managers[cache_context] = cls.CacheContext(
|
||
|
cache_context, cache_file_limit or cls._default_cache_file_limit)
|
||
|
if cache_file_limit:
|
||
|
cls.__cache_managers[cache_context].set_cache_limit(cache_file_limit)
|
||
|
|
||
|
return cls.__cache_managers[cache_context]
|