Add support for renaming very long filenames to avoid file system errors when downloading files

This commit is contained in:
allegroai 2022-06-28 21:22:51 +03:00
parent df1c134a7a
commit b84963fea0
2 changed files with 70 additions and 0 deletions

View File

@ -15,6 +15,7 @@ from ..config import get_cache_dir, deferred_config
from ..debugging.log import LoggerRoot
from ..utilities.locks.utils import Lock as FileLock
from ..utilities.locks.exceptions import LockException
from ..utilities.files import get_filename_max_length
class CacheManager(object):
@ -40,6 +41,7 @@ class CacheManager(object):
self._context = str(cache_context)
self._file_limit = int(default_cache_file_limit)
self._rlock = RLock()
self._max_file_name_length = None
def set_cache_limit(self, cache_file_limit):
# type: (int) -> int
@ -108,6 +110,50 @@ class CacheManager(object):
filename = url.split("/")[-1]
return "{}.{}".format(str_hash, quote_url(filename))
def _conform_filename(self, file_name):
# type: (str) -> str
"""
Renames very long filename by reducing characters from the end
without the extensions from 2 floating point.
:param file_name: base file name
:return: new_file name (if it has very long name) or original
"""
if self._max_file_name_length is None:
self._max_file_name_length = get_filename_max_length(self.get_cache_folder())
# Maximum character supported for filename
# (FS limit) - (32 for temporary file name addition)
allowed_length = self._max_file_name_length - 32
if len(file_name) <= allowed_length:
return file_name # File name size is in limit
file_ext = "".join(Path(file_name).suffixes[-2:])
file_ext = file_ext.rstrip(" ")
file_basename = file_name[:-len(file_ext)]
file_basename = file_basename.strip()
# Omit characters from extensionss
if len(file_ext) > allowed_length:
file_ext = file_ext[-(allowed_length - 1):]
file_ext = "." + file_ext.lstrip(".")
# Updating maximum character length
allowed_length -= len(file_ext)
# Omit characters from filename (without extension)
if len(file_basename) > allowed_length:
file_basename = file_basename[:allowed_length].strip()
new_file_name = file_basename + file_ext
LoggerRoot.get_base_logger().warning(
'Renaming file to "{}" due to filename length limit'.format(new_file_name)
)
return new_file_name
def get_cache_folder(self):
# type: () -> str
"""
@ -153,6 +199,7 @@ class CacheManager(object):
)
folder.mkdir(parents=True, exist_ok=True)
local_filename = local_filename or self.get_hashed_url_file(remote_url)
local_filename = self._conform_filename(local_filename)
new_file = folder / local_filename
new_file_exists = new_file.exists()
if new_file_exists:

View File

@ -0,0 +1,23 @@
import os
from sys import platform
import pathlib2
import psutil
def get_filename_max_length(dir_path):
# type: (str) -> int
try:
dir_path = pathlib2.Path(os.path.abspath(dir_path))
if platform == "win32":
dir_drive = dir_path.drive
for drv in psutil.disk_partitions():
if drv.device.startswith(dir_drive):
return drv.maxfile
elif platform in ("linux", "darwin"):
return os.statvfs(dir_path).f_namemax
except Exception as err:
print(err)
return 255 # Common filesystems like NTFS, EXT4 and HFS+ limited with 255