From 8746f3440988fbb30ce8f8740ae4e242393f48f0 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Thu, 15 Feb 2024 19:03:09 +0200 Subject: [PATCH] Fix CacheManager attempts to clear cache when file limit has not been reached resulting in slow unnecessary checks --- clearml/storage/cache.py | 26 +++++++++++++------------- clearml/storage/manager.py | 2 ++ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/clearml/storage/cache.py b/clearml/storage/cache.py index e8c0bbb3..ec5f319d 100644 --- a/clearml/storage/cache.py +++ b/clearml/storage/cache.py @@ -208,11 +208,21 @@ class CacheManager(object): new_file.touch(exist_ok=True) except Exception: pass + # if file doesn't exist, return file size None + # noinspection PyBroadException + try: + new_file_size = new_file.stat().st_size if new_file_exists else None + except Exception: + new_file_size = None + + folder_files = list(folder.iterdir()) + if len(folder_files) <= self._file_limit: + return new_file.as_posix(), new_file_size # first exclude lock files lock_files = dict() files = [] - for f in sorted(folder.iterdir(), reverse=True, key=sort_max_access_time): + for f in sorted(folder_files, reverse=True, key=sort_max_access_time): if f.name.startswith(CacheManager._lockfile_prefix) and f.name.endswith( CacheManager._lockfile_suffix ): @@ -233,10 +243,7 @@ class CacheManager(object): # delete old files files = files[self._file_limit:] - for i, f in enumerate(files): - if i < self._file_limit: - continue - + for f in files: # check if the file is in the lock folder list: folder_lock = self._folder_locks.get(f.absolute().as_posix()) if folder_lock: @@ -285,14 +292,7 @@ class CacheManager(object): except BaseException: pass - # if file doesn't exist, return file size None - # noinspection PyBroadException - try: - size = new_file.stat().st_size if new_file_exists else None - except Exception: - size = None - - return new_file.as_posix(), size + return new_file.as_posix(), new_file_size def lock_cache_folder(self, local_path): # type: (Union[str, Path]) -> () diff --git a/clearml/storage/manager.py b/clearml/storage/manager.py index 55710017..ff0a7f20 100644 --- a/clearml/storage/manager.py +++ b/clearml/storage/manager.py @@ -36,6 +36,8 @@ class StorageManager(object): the returned link is the same, otherwise a link to a local copy of the url file is returned. Caching is enabled by default, cache limited by number of stored files per cache context. Oldest accessed files are deleted when cache is full. + One can also use this function to prevent the deletion of a file that has been cached, + as the respective file will have its timestamp refreshed :param str remote_url: remote url link (string) :param str cache_context: Optional caching context identifier (string), default context 'global'