mirror of
https://github.com/clearml/clearml
synced 2025-01-31 00:56:57 +00:00
Fix slow handling of cached files with large cache_file_limit (#1352)
This commit is contained in:
parent
ba492dd65d
commit
fd01be6be3
@ -71,6 +71,8 @@ class CacheManager(object):
|
|||||||
if cached_size is not None and not force_download:
|
if cached_size is not None and not force_download:
|
||||||
CacheManager._add_remote_url(remote_url, cached_file)
|
CacheManager._add_remote_url(remote_url, cached_file)
|
||||||
return cached_file
|
return cached_file
|
||||||
|
|
||||||
|
self.clean_cache()
|
||||||
# we need to download the file:
|
# we need to download the file:
|
||||||
downloaded_file = helper.download_to_file(
|
downloaded_file = helper.download_to_file(
|
||||||
remote_url,
|
remote_url,
|
||||||
@ -163,25 +165,6 @@ class CacheManager(object):
|
|||||||
:param local_filename: if local_file is given, search for the local file/directory in the cache folder
|
:param local_filename: if local_file is given, search for the local file/directory in the cache folder
|
||||||
:return: full path to file name, current file size or None
|
:return: full path to file name, current file size or None
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def safe_time(x):
|
|
||||||
# noinspection PyBroadException
|
|
||||||
try:
|
|
||||||
return x.stat().st_mtime
|
|
||||||
except Exception:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def sort_max_access_time(x):
|
|
||||||
atime = safe_time(x)
|
|
||||||
# noinspection PyBroadException
|
|
||||||
try:
|
|
||||||
if x.is_dir():
|
|
||||||
dir_files = list(x.iterdir())
|
|
||||||
atime = max(atime, max(safe_time(s) for s in dir_files)) if dir_files else atime
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return atime
|
|
||||||
|
|
||||||
folder = Path(get_cache_dir() / CacheManager._storage_manager_folder / self._context)
|
folder = Path(get_cache_dir() / CacheManager._storage_manager_folder / self._context)
|
||||||
folder.mkdir(parents=True, exist_ok=True)
|
folder.mkdir(parents=True, exist_ok=True)
|
||||||
local_filename = local_filename or self.get_hashed_url_file(remote_url)
|
local_filename = local_filename or self.get_hashed_url_file(remote_url)
|
||||||
@ -201,9 +184,37 @@ class CacheManager(object):
|
|||||||
except Exception:
|
except Exception:
|
||||||
new_file_size = None
|
new_file_size = None
|
||||||
|
|
||||||
|
return new_file.as_posix(), new_file_size
|
||||||
|
|
||||||
|
def clean_cache(self):
|
||||||
|
# type: () -> bool
|
||||||
|
"""
|
||||||
|
If cache is full, clean it by deleting old/lock files
|
||||||
|
|
||||||
|
:return: True if the cache has been cleaned and False otherwise
|
||||||
|
"""
|
||||||
|
def safe_time(x):
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
return x.stat().st_mtime
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def sort_max_access_time(x):
|
||||||
|
atime = safe_time(x)
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
if x.is_dir():
|
||||||
|
dir_files = list(x.iterdir())
|
||||||
|
atime = max(atime, max(safe_time(s) for s in dir_files)) if dir_files else atime
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return atime
|
||||||
|
|
||||||
|
folder = Path(get_cache_dir() / CacheManager._storage_manager_folder / self._context)
|
||||||
folder_files = list(folder.iterdir())
|
folder_files = list(folder.iterdir())
|
||||||
if len(folder_files) <= self._file_limit:
|
if len(folder_files) <= self._file_limit:
|
||||||
return new_file.as_posix(), new_file_size
|
return False
|
||||||
|
|
||||||
# first exclude lock files
|
# first exclude lock files
|
||||||
lock_files = dict()
|
lock_files = dict()
|
||||||
@ -269,8 +280,7 @@ class CacheManager(object):
|
|||||||
os.unlink(f)
|
os.unlink(f)
|
||||||
except BaseException:
|
except BaseException:
|
||||||
pass
|
pass
|
||||||
|
return True
|
||||||
return new_file.as_posix(), new_file_size
|
|
||||||
|
|
||||||
def lock_cache_folder(self, local_path):
|
def lock_cache_folder(self, local_path):
|
||||||
# type: (Union[str, Path]) -> ()
|
# type: (Union[str, Path]) -> ()
|
||||||
|
@ -281,12 +281,16 @@ class SafeQueue(object):
|
|||||||
# Fix the python Queue and Use SimpleQueue write so it uses a single OS write,
|
# Fix the python Queue and Use SimpleQueue write so it uses a single OS write,
|
||||||
# making it atomic message passing
|
# making it atomic message passing
|
||||||
self._q = SimpleQueue(*args, **kwargs)
|
self._q = SimpleQueue(*args, **kwargs)
|
||||||
# noinspection PyBroadException
|
|
||||||
try:
|
# on Windows, queue communication is done via pipes, no need to override the _send_bytes method
|
||||||
# noinspection PyUnresolvedReferences,PyProtectedMember
|
if sys.platform != 'win32':
|
||||||
self._q._writer._send_bytes = partial(SafeQueue._pipe_override_send_bytes, self._q._writer)
|
# noinspection PyBroadException
|
||||||
except Exception:
|
try:
|
||||||
pass
|
# noinspection PyUnresolvedReferences,PyProtectedMember
|
||||||
|
self._q._writer._send_bytes = partial(SafeQueue._pipe_override_send_bytes, self._q._writer)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
self._internal_q = None
|
self._internal_q = None
|
||||||
# Note we should Never! assign a new object to `self._q_size`, just work with the initial object
|
# Note we should Never! assign a new object to `self._q_size`, just work with the initial object
|
||||||
self._q_size = [] # list of PIDs we pushed, so this is atomic.
|
self._q_size = [] # list of PIDs we pushed, so this is atomic.
|
||||||
|
@ -8,7 +8,7 @@ Then, when running this example, it creates a router which binds to 0.0.0.0:9000
|
|||||||
A local route is then created, which will proxy all traffic from
|
A local route is then created, which will proxy all traffic from
|
||||||
`http://<PRIVATE_IP>:9000/example_source` to `http://localhost:8000/serve`.
|
`http://<PRIVATE_IP>:9000/example_source` to `http://localhost:8000/serve`.
|
||||||
|
|
||||||
Trafic can be intercepted both on request and response via callbacks. See
|
Traffic can be intercepted both on request and response via callbacks. See
|
||||||
`request_callback` and `response_callback`.
|
`request_callback` and `response_callback`.
|
||||||
|
|
||||||
By default, the route traffic is monitored and telemetry is sent to the ClearML
|
By default, the route traffic is monitored and telemetry is sent to the ClearML
|
||||||
|
Loading…
Reference in New Issue
Block a user