Fix CacheManager attempts to clear cache when file limit has not been reached resulting in slow unnecessary checks

This commit is contained in:
allegroai 2024-02-15 19:03:09 +02:00
parent 87f0e63b28
commit 8746f34409
2 changed files with 15 additions and 13 deletions

View File

@ -208,11 +208,21 @@ class CacheManager(object):
new_file.touch(exist_ok=True) new_file.touch(exist_ok=True)
except Exception: except Exception:
pass pass
# if file doesn't exist, return file size None
# noinspection PyBroadException
try:
new_file_size = new_file.stat().st_size if new_file_exists else None
except Exception:
new_file_size = None
folder_files = list(folder.iterdir())
if len(folder_files) <= self._file_limit:
return new_file.as_posix(), new_file_size
# first exclude lock files # first exclude lock files
lock_files = dict() lock_files = dict()
files = [] files = []
for f in sorted(folder.iterdir(), reverse=True, key=sort_max_access_time): for f in sorted(folder_files, reverse=True, key=sort_max_access_time):
if f.name.startswith(CacheManager._lockfile_prefix) and f.name.endswith( if f.name.startswith(CacheManager._lockfile_prefix) and f.name.endswith(
CacheManager._lockfile_suffix CacheManager._lockfile_suffix
): ):
@ -233,10 +243,7 @@ class CacheManager(object):
# delete old files # delete old files
files = files[self._file_limit:] files = files[self._file_limit:]
for i, f in enumerate(files): for f in files:
if i < self._file_limit:
continue
# check if the file is in the lock folder list: # check if the file is in the lock folder list:
folder_lock = self._folder_locks.get(f.absolute().as_posix()) folder_lock = self._folder_locks.get(f.absolute().as_posix())
if folder_lock: if folder_lock:
@ -285,14 +292,7 @@ class CacheManager(object):
except BaseException: except BaseException:
pass pass
# if file doesn't exist, return file size None return new_file.as_posix(), new_file_size
# noinspection PyBroadException
try:
size = new_file.stat().st_size if new_file_exists else None
except Exception:
size = None
return new_file.as_posix(), size
def lock_cache_folder(self, local_path): def lock_cache_folder(self, local_path):
# type: (Union[str, Path]) -> () # type: (Union[str, Path]) -> ()

View File

@ -36,6 +36,8 @@ class StorageManager(object):
the returned link is the same, otherwise a link to a local copy of the url file is returned. the returned link is the same, otherwise a link to a local copy of the url file is returned.
Caching is enabled by default, cache limited by number of stored files per cache context. Caching is enabled by default, cache limited by number of stored files per cache context.
Oldest accessed files are deleted when cache is full. Oldest accessed files are deleted when cache is full.
One can also use this function to prevent the deletion of a file that has been cached,
as the respective file will have its timestamp refreshed
:param str remote_url: remote url link (string) :param str remote_url: remote url link (string)
:param str cache_context: Optional caching context identifier (string), default context 'global' :param str cache_context: Optional caching context identifier (string), default context 'global'