mirror of
https://github.com/clearml/clearml
synced 2025-02-14 16:46:12 +00:00
Add support for renaming very long filenames to avoid file system errors when downloading files
This commit is contained in:
parent
df1c134a7a
commit
b84963fea0
@ -15,6 +15,7 @@ from ..config import get_cache_dir, deferred_config
|
|||||||
from ..debugging.log import LoggerRoot
|
from ..debugging.log import LoggerRoot
|
||||||
from ..utilities.locks.utils import Lock as FileLock
|
from ..utilities.locks.utils import Lock as FileLock
|
||||||
from ..utilities.locks.exceptions import LockException
|
from ..utilities.locks.exceptions import LockException
|
||||||
|
from ..utilities.files import get_filename_max_length
|
||||||
|
|
||||||
|
|
||||||
class CacheManager(object):
|
class CacheManager(object):
|
||||||
@ -40,6 +41,7 @@ class CacheManager(object):
|
|||||||
self._context = str(cache_context)
|
self._context = str(cache_context)
|
||||||
self._file_limit = int(default_cache_file_limit)
|
self._file_limit = int(default_cache_file_limit)
|
||||||
self._rlock = RLock()
|
self._rlock = RLock()
|
||||||
|
self._max_file_name_length = None
|
||||||
|
|
||||||
def set_cache_limit(self, cache_file_limit):
|
def set_cache_limit(self, cache_file_limit):
|
||||||
# type: (int) -> int
|
# type: (int) -> int
|
||||||
@ -108,6 +110,50 @@ class CacheManager(object):
|
|||||||
filename = url.split("/")[-1]
|
filename = url.split("/")[-1]
|
||||||
return "{}.{}".format(str_hash, quote_url(filename))
|
return "{}.{}".format(str_hash, quote_url(filename))
|
||||||
|
|
||||||
|
def _conform_filename(self, file_name):
|
||||||
|
# type: (str) -> str
|
||||||
|
"""
|
||||||
|
Renames very long filename by reducing characters from the end
|
||||||
|
without the extensions from 2 floating point.
|
||||||
|
:param file_name: base file name
|
||||||
|
:return: new_file name (if it has very long name) or original
|
||||||
|
"""
|
||||||
|
if self._max_file_name_length is None:
|
||||||
|
self._max_file_name_length = get_filename_max_length(self.get_cache_folder())
|
||||||
|
|
||||||
|
# Maximum character supported for filename
|
||||||
|
# (FS limit) - (32 for temporary file name addition)
|
||||||
|
allowed_length = self._max_file_name_length - 32
|
||||||
|
|
||||||
|
if len(file_name) <= allowed_length:
|
||||||
|
return file_name # File name size is in limit
|
||||||
|
|
||||||
|
file_ext = "".join(Path(file_name).suffixes[-2:])
|
||||||
|
file_ext = file_ext.rstrip(" ")
|
||||||
|
|
||||||
|
file_basename = file_name[:-len(file_ext)]
|
||||||
|
file_basename = file_basename.strip()
|
||||||
|
|
||||||
|
# Omit characters from extensionss
|
||||||
|
if len(file_ext) > allowed_length:
|
||||||
|
file_ext = file_ext[-(allowed_length - 1):]
|
||||||
|
file_ext = "." + file_ext.lstrip(".")
|
||||||
|
|
||||||
|
# Updating maximum character length
|
||||||
|
allowed_length -= len(file_ext)
|
||||||
|
|
||||||
|
# Omit characters from filename (without extension)
|
||||||
|
if len(file_basename) > allowed_length:
|
||||||
|
file_basename = file_basename[:allowed_length].strip()
|
||||||
|
|
||||||
|
new_file_name = file_basename + file_ext
|
||||||
|
|
||||||
|
LoggerRoot.get_base_logger().warning(
|
||||||
|
'Renaming file to "{}" due to filename length limit'.format(new_file_name)
|
||||||
|
)
|
||||||
|
|
||||||
|
return new_file_name
|
||||||
|
|
||||||
def get_cache_folder(self):
|
def get_cache_folder(self):
|
||||||
# type: () -> str
|
# type: () -> str
|
||||||
"""
|
"""
|
||||||
@ -153,6 +199,7 @@ class CacheManager(object):
|
|||||||
)
|
)
|
||||||
folder.mkdir(parents=True, exist_ok=True)
|
folder.mkdir(parents=True, exist_ok=True)
|
||||||
local_filename = local_filename or self.get_hashed_url_file(remote_url)
|
local_filename = local_filename or self.get_hashed_url_file(remote_url)
|
||||||
|
local_filename = self._conform_filename(local_filename)
|
||||||
new_file = folder / local_filename
|
new_file = folder / local_filename
|
||||||
new_file_exists = new_file.exists()
|
new_file_exists = new_file.exists()
|
||||||
if new_file_exists:
|
if new_file_exists:
|
||||||
|
23
clearml/utilities/files.py
Normal file
23
clearml/utilities/files.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
import os
|
||||||
|
from sys import platform
|
||||||
|
|
||||||
|
import pathlib2
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
|
||||||
|
def get_filename_max_length(dir_path):
|
||||||
|
# type: (str) -> int
|
||||||
|
try:
|
||||||
|
dir_path = pathlib2.Path(os.path.abspath(dir_path))
|
||||||
|
if platform == "win32":
|
||||||
|
dir_drive = dir_path.drive
|
||||||
|
for drv in psutil.disk_partitions():
|
||||||
|
if drv.device.startswith(dir_drive):
|
||||||
|
return drv.maxfile
|
||||||
|
elif platform in ("linux", "darwin"):
|
||||||
|
return os.statvfs(dir_path).f_namemax
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
|
||||||
|
return 255 # Common filesystems like NTFS, EXT4 and HFS+ limited with 255
|
||||||
|
|
Loading…
Reference in New Issue
Block a user