Fix archive support: StorageManager should only try to extract .zip files, Model should not auto extract package

This commit is contained in:
allegroai 2020-05-22 11:54:46 +03:00
parent 2393ac5f7f
commit b457b9aaad
2 changed files with 54 additions and 40 deletions

View File

@ -415,7 +415,7 @@ class Model(IdObjectBase, AsyncManagerMixin, _StorageUriMixin):
# remove non existing model file
Model._local_model_to_id_uri.pop(dl_file, None)
local_download = StorageManager.get_local_copy(uri)
local_download = StorageManager.get_local_copy(uri, extract_archive=False)
# save local model, so we can later query what was the original one
if local_download is not None:

View File

@ -40,6 +40,59 @@ class StorageManager(object):
).get_local_copy(remote_url=remote_url)
if not extract_archive or not cached_file:
return cached_file
return cls._extract_to_cache(cached_file, name)
@classmethod
def upload_file(
cls, local_file, remote_url, wait_for_upload=True
): # type: (str, str, bool) -> str
"""
Upload a local file to a remote location.
remote url is the finale destination of the uploaded file.
Examples:
upload_file('/tmp/artifact.yaml', 'http://localhost:8081/manual_artifacts/my_artifact.yaml')
upload_file('/tmp/artifact.yaml', 's3://a_bucket/artifacts/my_artifact.yaml')
upload_file('/tmp/artifact.yaml', '/mnt/share/folder/artifacts/my_artifact.yaml')
:param str local_file: Full path of a local file to be uploaded
:param str remote_url: Full path or remote url to upload to (including file name)
:param bool wait_for_upload: If False, return immediately and upload in the background. Default True.
:return str: Newly uploaded remote url
"""
return CacheManager.get_cache_manager().upload_file(
local_file=local_file,
remote_url=remote_url,
wait_for_upload=wait_for_upload,
)
@classmethod
def set_cache_file_limit(
cls, cache_file_limit, cache_context=None
): # type: (int, Optional[str]) -> int
"""
Set the cache context file limit. File limit is the maximum number of files the specific cache context holds.
Notice, there is no limit on the size of these files, only the total number of cached files.
:param int cache_file_limit: New maximum number of cached files
:param str cache_context: Optional cache context identifier, default global context
:return int: Return new cache context file limit
"""
return CacheManager.get_cache_manager(
cache_context=cache_context, cache_file_limit=cache_file_limit
).set_cache_limit(cache_file_limit)
@classmethod
def _extract_to_cache(cls, cached_file, name):
"""
Extract cached file zip file to cache folder
:param str cached_file: local copy of archive file
:param str name: cache context
:return str: cached folder containing the extracted archive content
"""
# only zip files
if not cached_file or not str(cached_file).lower().endswith('.zip'):
return cached_file
archive_suffix = cached_file.rpartition(".")[0]
target_folder = Path("{0}_artifact_archive_{1}".format(archive_suffix, name))
base_logger = LoggerRoot.get_base_logger()
@ -82,42 +135,3 @@ class StorageManager(object):
pass
return cached_file
return target_folder
@classmethod
def upload_file(
cls, local_file, remote_url, wait_for_upload=True
): # type: (str, str, bool) -> str
"""
Upload a local file to a remote location.
remote url is the finale destination of the uploaded file.
Examples:
upload_file('/tmp/artifact.yaml', 'http://localhost:8081/manual_artifacts/my_artifact.yaml')
upload_file('/tmp/artifact.yaml', 's3://a_bucket/artifacts/my_artifact.yaml')
upload_file('/tmp/artifact.yaml', '/mnt/share/folder/artifacts/my_artifact.yaml')
:param str local_file: Full path of a local file to be uploaded
:param str remote_url: Full path or remote url to upload to (including file name)
:param bool wait_for_upload: If False, return immediately and upload in the background. Default True.
:return str: Newly uploaded remote url
"""
return CacheManager.get_cache_manager().upload_file(
local_file=local_file,
remote_url=remote_url,
wait_for_upload=wait_for_upload,
)
@classmethod
def set_cache_file_limit(
cls, cache_file_limit, cache_context=None
): # type: (int, Optional[str]) -> int
"""
Set the cache context file limit. File limit is the maximum number of files the specific cache context holds.
Notice, there is no limit on the size of these files, only the total number of cached files.
:param int cache_file_limit: New maximum number of cached files
:param str cache_context: Optional cache context identifier, default global context
:return int: Return new cache context file limit
"""
return CacheManager.get_cache_manager(
cache_context=cache_context, cache_file_limit=cache_file_limit
).set_cache_limit(cache_file_limit)