mirror of
https://github.com/clearml/clearml
synced 2025-01-30 16:46:58 +00:00
Fix slow handling of cached files with large cache_file_limit (#1352)
This commit is contained in:
parent
ba492dd65d
commit
fd01be6be3
@ -71,6 +71,8 @@ class CacheManager(object):
|
||||
if cached_size is not None and not force_download:
|
||||
CacheManager._add_remote_url(remote_url, cached_file)
|
||||
return cached_file
|
||||
|
||||
self.clean_cache()
|
||||
# we need to download the file:
|
||||
downloaded_file = helper.download_to_file(
|
||||
remote_url,
|
||||
@ -163,25 +165,6 @@ class CacheManager(object):
|
||||
:param local_filename: if local_file is given, search for the local file/directory in the cache folder
|
||||
:return: full path to file name, current file size or None
|
||||
"""
|
||||
|
||||
def safe_time(x):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
return x.stat().st_mtime
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
def sort_max_access_time(x):
|
||||
atime = safe_time(x)
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if x.is_dir():
|
||||
dir_files = list(x.iterdir())
|
||||
atime = max(atime, max(safe_time(s) for s in dir_files)) if dir_files else atime
|
||||
except Exception:
|
||||
pass
|
||||
return atime
|
||||
|
||||
folder = Path(get_cache_dir() / CacheManager._storage_manager_folder / self._context)
|
||||
folder.mkdir(parents=True, exist_ok=True)
|
||||
local_filename = local_filename or self.get_hashed_url_file(remote_url)
|
||||
@ -201,9 +184,37 @@ class CacheManager(object):
|
||||
except Exception:
|
||||
new_file_size = None
|
||||
|
||||
return new_file.as_posix(), new_file_size
|
||||
|
||||
def clean_cache(self):
|
||||
# type: () -> bool
|
||||
"""
|
||||
If cache is full, clean it by deleting old/lock files
|
||||
|
||||
:return: True if the cache has been cleaned and False otherwise
|
||||
"""
|
||||
def safe_time(x):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
return x.stat().st_mtime
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
def sort_max_access_time(x):
|
||||
atime = safe_time(x)
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if x.is_dir():
|
||||
dir_files = list(x.iterdir())
|
||||
atime = max(atime, max(safe_time(s) for s in dir_files)) if dir_files else atime
|
||||
except Exception:
|
||||
pass
|
||||
return atime
|
||||
|
||||
folder = Path(get_cache_dir() / CacheManager._storage_manager_folder / self._context)
|
||||
folder_files = list(folder.iterdir())
|
||||
if len(folder_files) <= self._file_limit:
|
||||
return new_file.as_posix(), new_file_size
|
||||
return False
|
||||
|
||||
# first exclude lock files
|
||||
lock_files = dict()
|
||||
@ -269,8 +280,7 @@ class CacheManager(object):
|
||||
os.unlink(f)
|
||||
except BaseException:
|
||||
pass
|
||||
|
||||
return new_file.as_posix(), new_file_size
|
||||
return True
|
||||
|
||||
def lock_cache_folder(self, local_path):
|
||||
# type: (Union[str, Path]) -> ()
|
||||
|
@ -281,12 +281,16 @@ class SafeQueue(object):
|
||||
# Fix the python Queue and Use SimpleQueue write so it uses a single OS write,
|
||||
# making it atomic message passing
|
||||
self._q = SimpleQueue(*args, **kwargs)
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
# noinspection PyUnresolvedReferences,PyProtectedMember
|
||||
self._q._writer._send_bytes = partial(SafeQueue._pipe_override_send_bytes, self._q._writer)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# on Windows, queue communication is done via pipes, no need to override the _send_bytes method
|
||||
if sys.platform != 'win32':
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
# noinspection PyUnresolvedReferences,PyProtectedMember
|
||||
self._q._writer._send_bytes = partial(SafeQueue._pipe_override_send_bytes, self._q._writer)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._internal_q = None
|
||||
# Note we should Never! assign a new object to `self._q_size`, just work with the initial object
|
||||
self._q_size = [] # list of PIDs we pushed, so this is atomic.
|
||||
|
@ -8,7 +8,7 @@ Then, when running this example, it creates a router which binds to 0.0.0.0:9000
|
||||
A local route is then created, which will proxy all traffic from
|
||||
`http://<PRIVATE_IP>:9000/example_source` to `http://localhost:8000/serve`.
|
||||
|
||||
Trafic can be intercepted both on request and response via callbacks. See
|
||||
Traffic can be intercepted both on request and response via callbacks. See
|
||||
`request_callback` and `response_callback`.
|
||||
|
||||
By default, the route traffic is monitored and telemetry is sent to the ClearML
|
||||
|
Loading…
Reference in New Issue
Block a user