Add artifacts support for folders/wildcard, include retrieval and extraction

This commit is contained in:
allegroai 2019-09-25 00:33:00 +03:00
parent e6ed09664f
commit 86655412b0
4 changed files with 31 additions and 9 deletions

View File

@ -31,7 +31,8 @@ task.upload_artifact('Numpy Eye', np.eye(100, 100))
# add and upload Image (stored as .png file)
im = Image.open('samples/dancing.jpg')
task.upload_artifact('pillow_image', im)
# add and upload a folder, artifact_object should be the folder path
task.upload_artifact('local folder', artifact_object='samples/')
# do something
sleep(1.)

View File

@ -114,12 +114,31 @@ class Artifact(object):
self._metadata = dict(artifact_api_object.display_data) if artifact_api_object.display_data else {}
self._preview = artifact_api_object.type_data.preview if artifact_api_object.type_data else None
def get_local_copy(self):
def get_local_copy(self, extract_archive=True):
"""
:param bool extract_archive: If True and artifact is of type 'archive' (compressed folder)
The returned path will be a temporary folder containing the archive content
:return: a local path to a downloaded copy of the artifact
"""
from trains.storage.helper import StorageHelper
return StorageHelper.get_local_copy(self.url)
local_path = StorageHelper.get_local_copy(self.url)
if local_path and extract_archive and self.type == 'archive':
try:
temp_folder = mkdtemp(prefix='artifact_', suffix='.archive_'+self.name)
ZipFile(local_path).extractall(path=temp_folder)
except Exception:
try:
Path(temp_folder).rmdir()
except Exception:
pass
return local_path
try:
Path(local_path).unlink()
except Exception:
pass
return temp_folder
return local_path
def __repr__(self):
return str({'name': self.name, 'size': self.size, 'type': self.type, 'mode': self.mode, 'url': self.url,
@ -294,7 +313,7 @@ class Artifacts(object):
os.close(fd)
artifact_object = zip_file
artifact_type = 'zip'
artifact_type = 'archive'
artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]
local_filename = artifact_object
delete_after_upload = True

View File

@ -569,6 +569,8 @@ class StorageHelper(object):
# we won't think we have the entire file
temp_local_path = '{}_{}{}'.format(local_path, time(), self._temp_download_suffix)
obj = self._get_object(remote_path)
if not obj:
return None
# object size in bytes
total_size_mb = -1

View File

@ -686,10 +686,10 @@ class Task(_Task):
:param str name: Artifact name. Notice! it will override previous artifact if name already exists
:param object artifact_object: Artifact object to upload. Currently supports:
- string / pathlib2.Path are treated as path to artifact file to upload
If wildcard or a folder is passed, zip file containing the local files will be created and uploaded.
- dict will be stored as .json,
- pandas.DataFrame will be stored as .csv.gz (compressed CSV file),
- numpy.ndarray will be stored as .npz,
If wildcard or a folder is passed, zip file containing the local files will be created and uploaded
- dict will be stored as .json file and uploaded
- pandas.DataFrame will be stored as .csv.gz (compressed CSV file) and uploaded
- numpy.ndarray will be stored as .npz and uploaded
- PIL.Image will be stored to .png file and uploaded
:param dict metadata: Simple key/value dictionary to store on the artifact
:param bool delete_after_upload: If True local artifact will be deleted