Add upload artifact supports pd.DataFrame

This commit is contained in:
allegroai 2019-09-15 21:52:57 +03:00
parent 406536d105
commit 24c2f86741
5 changed files with 18 additions and 5 deletions

View File

@ -12,7 +12,7 @@ df = pd.DataFrame({'num_legs': [2, 4, 8, 0],
'num_specimen_seen': [10, 2, 1, 8]},
index=['falcon', 'dog', 'spider', 'fish'])
# register Pandas object as artifact to watch
# Register Pandas object as artifact to watch
# (it will be monitored in the background and automatically synced and uploaded)
task.register_artifact('train', df, metadata={'counting': 'legs', 'max legs': 69})
# change the artifact object
@ -20,6 +20,8 @@ df.sample(frac=0.5, replace=True, random_state=1)
# or access it from anywhere using the Task
Task.current_task().artifacts['train'].sample(frac=0.5, replace=True, random_state=1)
# add and upload pandas.DataFrame (onetime snapshot of the object)
task.upload_artifact('Pandas', artifact_object=df)
# add and upload local file artifact
task.upload_artifact('local file', artifact_object='samples/dancing.jpg')
# add and upload dictionary stored as JSON)

View File

@ -243,7 +243,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
value = value.rstrip('/') if value else None
self._storage_uri = StorageHelper.conform_url(value)
self.data.output.destination = self._storage_uri
self._edit(output_dest=self._storage_uri or '')
self._edit(output_dest=self._storage_uri or ('' if Session.check_min_api_version('2.3') else None))
if self._storage_uri or self._output_model:
self.output_model.upload_storage_uri = self._storage_uri

View File

@ -47,8 +47,9 @@ def get_single_result(entity, query, results, log=None, show_results=10, raise_o
results = sorted(results, key=lambda x: int(x.created.strftime('%s')
if x.created else 0), reverse=True)
for obj in (o if isinstance(o, dict) else o.to_dict() for o in results[:show_results]):
log.warn('Found {entity} `{obj[name]}` (id={obj[id]})'.format(**locals()))
for i, obj in enumerate(o if isinstance(o, dict) else o.to_dict() for o in results[:show_results]):
selected = 'Selected' if i == 0 else 'Additionally found'
log.warn('{selected} {entity} `{obj[name]}` (id={obj[id]})'.format(**locals()))
if raise_on_error:
raise ValueError('More than one {entity}s found when searching for ``{query}`'.format(**locals()))

View File

@ -123,6 +123,15 @@ class Artifacts(object):
np.savez_compressed(local_filename, **{name: artifact_object})
delete_after_upload = True
use_filename_in_uri = False
elif pd and isinstance(artifact_object, pd.DataFrame):
artifact_type = 'pandas'
artifact_type_data.content_type = 'text/csv'
artifact_type_data.preview = str(artifact_object.__repr__())
fd, local_filename = mkstemp(suffix=self._save_format)
os.close(fd)
artifact_object.to_csv(local_filename, compression=self._compression)
delete_after_upload = True
use_filename_in_uri = False
elif isinstance(artifact_object, Image.Image):
artifact_type = 'image'
artifact_type_data.content_type = 'image/png'

View File

@ -672,6 +672,7 @@ class Task(_Task):
:param object artifact_object: Artifact object to upload. Currently supports:
- string / pathlib2.Path are treated as path to artifact file to upload
- dict will be stored as .json,
- pandas.DataFrame will be stored as .csv.gz (compressed CSV file),
- numpy.ndarray will be stored as .npz,
- PIL.Image will be stored to .png file and uploaded
:param dict metadata: Simple key/value dictionary to store on the artifact
@ -1260,7 +1261,7 @@ class Task(_Task):
only_fields=['id', 'name', 'last_update']
)
)
task = get_single_result(entity='task', query=task_name, results=res.response.tasks)
task = get_single_result(entity='task', query=task_name, results=res.response.tasks, raise_on_error=False)
return cls(
private=cls.__create_protection,