import hashlib from pathlib2 import Path from .helper import StorageHelper from .util import quote_url from ..config import get_cache_dir class CacheManager(object): __cache_managers = {} _default_cache_file_limit = 100 _storage_manager_folder = 'storage_manager' _default_context = 'global' class CacheContext(object): def __init__(self, cache_context, default_cache_file_limit=10): self._context = str(cache_context) self._file_limit = int(default_cache_file_limit) def set_cache_limit(self, cache_file_limit): self._file_limit = max(self._file_limit, int(cache_file_limit)) return self._file_limit def get_local_copy(self, remote_url): helper = StorageHelper.get(remote_url) if not helper: raise ValueError("Remote storage not supported: {}".format(remote_url)) # check if we need to cache the file direct_access = helper._driver.get_direct_access(remote_url) if direct_access: return direct_access # check if we already have the file in our cache cached_file, cached_size = self._get_cache_file(remote_url) if cached_size is not None: return cached_file # we need to download the file: downloaded_file = helper.download_to_file(remote_url, cached_file) if downloaded_file != cached_file: # something happened return None return cached_file @staticmethod def upload_file(local_file, remote_url, wait_for_upload=True): helper = StorageHelper.get(remote_url) return helper.upload(local_file, remote_url, async_enable=not wait_for_upload) @classmethod def _get_hashed_url_file(cls, url): str_hash = hashlib.md5(url.encode()).hexdigest() filename = url.split('/')[-1] return '{}.{}'.format(str_hash, quote_url(filename)) def _get_cache_file(self, remote_url): """ :param remote_url: check if we have the remote url in our cache :return: full path to file name, current file size or None """ folder = Path(get_cache_dir() / CacheManager._storage_manager_folder / self._context) folder.mkdir(parents=True, exist_ok=True) local_filename = self._get_hashed_url_file(remote_url) new_file = folder / local_filename if new_file.exists(): new_file.touch(exist_ok=True) # delete old files files = sorted(folder.iterdir(), reverse=True, key=lambda x: x.stat().st_atime) files = files[self._file_limit:] for f in files: f.unlink() # if file doesn't exist, return file size None return new_file.as_posix(), new_file.stat().st_size if new_file.exists() else None @classmethod def get_cache_manager(cls, cache_context=None, cache_file_limit=None): cache_context = cache_context or cls._default_context if cache_context not in cls.__cache_managers: cls.__cache_managers[cache_context] = cls.CacheContext( cache_context, cache_file_limit or cls._default_cache_file_limit) if cache_file_limit: cls.__cache_managers[cache_context].set_cache_limit(cache_file_limit) return cls.__cache_managers[cache_context]