mirror of
https://github.com/clearml/clearml
synced 2025-02-02 01:54:23 +00:00
Add support for artifacts with different formats (#634)
This commit is contained in:
parent
382d361bff
commit
42fa0dde65
@ -1,4 +1,5 @@
|
||||
import json
|
||||
import yaml
|
||||
import mimetypes
|
||||
import os
|
||||
import pickle
|
||||
@ -308,8 +309,8 @@ class Artifacts(object):
|
||||
self.flush()
|
||||
|
||||
def upload_artifact(self, name, artifact_object=None, metadata=None, preview=None,
|
||||
delete_after_upload=False, auto_pickle=True, wait_on_upload=False):
|
||||
# type: (str, Optional[object], Optional[dict], Optional[str], bool, bool, bool) -> bool
|
||||
delete_after_upload=False, auto_pickle=True, wait_on_upload=False, extension_name=None):
|
||||
# type: (str, Optional[object], Optional[dict], Optional[str], bool, bool, bool, Optional[str]) -> bool
|
||||
if not Session.check_min_api_version('2.3'):
|
||||
LoggerRoot.get_base_logger().warning('Artifacts not supported by your ClearML-server version, '
|
||||
'please upgrade to the latest server version')
|
||||
@ -354,65 +355,133 @@ class Artifacts(object):
|
||||
override_filename_in_uri = None
|
||||
override_filename_ext_in_uri = None
|
||||
uri = None
|
||||
|
||||
def get_extension(extension_name_, valid_extensions, default_extension, artifact_type_):
|
||||
if not extension_name_:
|
||||
return default_extension
|
||||
if extension_name_ in valid_extensions:
|
||||
return extension_name_
|
||||
LoggerRoot.get_base_logger().warning(
|
||||
"{} artifact can not be uploaded with extension {}. Valid extensions are: {}. Defaulting to {}.".format(
|
||||
artifact_type_, extension_name_, ", ".join(valid_extensions), default_extension
|
||||
)
|
||||
)
|
||||
return default_extension
|
||||
|
||||
if np and isinstance(artifact_object, np.ndarray):
|
||||
artifact_type = 'numpy'
|
||||
artifact_type_data.content_type = 'application/numpy'
|
||||
artifact_type_data.preview = preview or str(artifact_object.__repr__())
|
||||
override_filename_ext_in_uri = '.npz'
|
||||
override_filename_ext_in_uri = get_extension(
|
||||
extension_name, [".npz", ".csv.gz"], ".npz", artifact_type
|
||||
)
|
||||
override_filename_in_uri = name + override_filename_ext_in_uri
|
||||
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
|
||||
os.close(fd)
|
||||
np.savez_compressed(local_filename, **{name: artifact_object})
|
||||
if override_filename_ext_in_uri == ".npz":
|
||||
artifact_type_data.content_type = "application/numpy"
|
||||
np.savez_compressed(local_filename, **{name: artifact_object})
|
||||
elif override_filename_ext_in_uri == ".csv.gz":
|
||||
artifact_type_data.content_type = "text/csv"
|
||||
np.savetxt(local_filename, artifact_object, delimiter=",")
|
||||
delete_after_upload = True
|
||||
elif pd and isinstance(artifact_object, pd.DataFrame):
|
||||
artifact_type = 'pandas'
|
||||
artifact_type_data.content_type = 'text/csv'
|
||||
artifact_type = "pandas"
|
||||
artifact_type_data.preview = preview or str(artifact_object.__repr__())
|
||||
override_filename_ext_in_uri = self._save_format
|
||||
override_filename_ext_in_uri = get_extension(
|
||||
extension_name, [".csv.gz", ".parquet", ".feather", ".pickle"], ".csv.gz", artifact_type
|
||||
)
|
||||
override_filename_in_uri = name
|
||||
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
|
||||
os.close(fd)
|
||||
artifact_object.to_csv(local_filename, compression=self._compression)
|
||||
if override_filename_ext_in_uri == ".csv.gz":
|
||||
artifact_type_data.content_type = "text/csv"
|
||||
artifact_object.to_csv(local_filename, compression=self._compression)
|
||||
elif override_filename_ext_in_uri == ".parquet":
|
||||
artifact_type_data.content_type = "application/parquet"
|
||||
artifact_object.to_parquet(local_filename)
|
||||
elif override_filename_ext_in_uri == ".feather":
|
||||
artifact_type_data.content_type = "application/feather"
|
||||
artifact_object.to_feather(local_filename)
|
||||
elif override_filename_ext_in_uri == ".pickle":
|
||||
artifact_type_data.content_type = "application/pickle"
|
||||
artifact_object.to_pickle(local_filename)
|
||||
delete_after_upload = True
|
||||
elif isinstance(artifact_object, Image.Image):
|
||||
artifact_type = 'image'
|
||||
artifact_type_data.content_type = 'image/png'
|
||||
artifact_type = "image"
|
||||
artifact_type_data.content_type = "image/png"
|
||||
desc = str(artifact_object.__repr__())
|
||||
artifact_type_data.preview = preview or desc[1:desc.find(' at ')]
|
||||
override_filename_ext_in_uri = '.png'
|
||||
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if not Image.EXTENSION:
|
||||
Image.init()
|
||||
if not Image.EXTENSION:
|
||||
raise Exception()
|
||||
override_filename_ext_in_uri = get_extension(
|
||||
extension_name, Image.EXTENSION.keys(), ".png", artifact_type
|
||||
)
|
||||
except Exception:
|
||||
override_filename_ext_in_uri = ".png"
|
||||
if extension_name and extension_name != ".png":
|
||||
LoggerRoot.get_base_logger().warning(
|
||||
"image artifact can not be uploaded with extension {}. Defaulting to .png.".format(
|
||||
extension_name
|
||||
)
|
||||
)
|
||||
|
||||
override_filename_in_uri = name + override_filename_ext_in_uri
|
||||
artifact_type_data.content_type = "image/unknown-type"
|
||||
guessed_type = mimetypes.guess_type(override_filename_in_uri)[0]
|
||||
if guessed_type:
|
||||
artifact_type_data.content_type = guessed_type
|
||||
|
||||
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
|
||||
os.close(fd)
|
||||
artifact_object.save(local_filename)
|
||||
delete_after_upload = True
|
||||
elif isinstance(artifact_object, dict):
|
||||
artifact_type = 'JSON'
|
||||
artifact_type_data.content_type = 'application/json'
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
json_text = json.dumps(artifact_object, sort_keys=True, indent=4)
|
||||
except Exception:
|
||||
if not auto_pickle:
|
||||
raise
|
||||
LoggerRoot.get_base_logger().warning(
|
||||
"JSON serialization of artifact \'{}\' failed, reverting to pickle".format(name))
|
||||
store_as_pickle = True
|
||||
json_text = None
|
||||
artifact_type = "dict"
|
||||
override_filename_ext_in_uri = get_extension(extension_name, [".json", ".yaml"], ".json", artifact_type)
|
||||
if override_filename_ext_in_uri == ".json":
|
||||
artifact_type_data.content_type = "application/json"
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
serialized_text = json.dumps(artifact_object, sort_keys=True, indent=4)
|
||||
except Exception:
|
||||
if not auto_pickle:
|
||||
raise
|
||||
LoggerRoot.get_base_logger().warning(
|
||||
"JSON serialization of artifact \'{}\' failed, reverting to pickle".format(name))
|
||||
store_as_pickle = True
|
||||
serialized_text = None
|
||||
else:
|
||||
artifact_type_data.content_type = "application/yaml"
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
serialized_text = yaml.dump(artifact_object, sort_keys=True, indent=4)
|
||||
except Exception:
|
||||
if not auto_pickle:
|
||||
raise
|
||||
LoggerRoot.get_base_logger().warning(
|
||||
"YAML serialization of artifact \'{}\' failed, reverting to pickle".format(name))
|
||||
store_as_pickle = True
|
||||
serialized_text = None
|
||||
|
||||
if json_text is not None:
|
||||
override_filename_ext_in_uri = '.json'
|
||||
if serialized_text is not None:
|
||||
override_filename_in_uri = name + override_filename_ext_in_uri
|
||||
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
|
||||
os.write(fd, bytes(json_text.encode()))
|
||||
fd, local_filename = mkstemp(prefix=quote(name, safe="") + ".", suffix=override_filename_ext_in_uri)
|
||||
os.write(fd, bytes(serialized_text.encode()))
|
||||
os.close(fd)
|
||||
preview = preview or json_text
|
||||
preview = preview or serialized_text
|
||||
if len(preview) < self.max_preview_size_bytes:
|
||||
artifact_type_data.preview = preview
|
||||
else:
|
||||
artifact_type_data.preview = '# full json too large to store, storing first {}kb\n{}'.format(
|
||||
self.max_preview_size_bytes//1024, preview[:self.max_preview_size_bytes]
|
||||
artifact_type_data.preview = (
|
||||
"# full serialized dict too large to store, storing first {}kb\n{}".format(
|
||||
self.max_preview_size_bytes // 1024, preview[: self.max_preview_size_bytes]
|
||||
)
|
||||
)
|
||||
|
||||
delete_after_upload = True
|
||||
elif isinstance(artifact_object, pathlib_types):
|
||||
# check if single file
|
||||
|
@ -291,7 +291,7 @@ class StorageHelper(object):
|
||||
logger=None,
|
||||
retries=5,
|
||||
token=None,
|
||||
**kwargs,
|
||||
**kwargs
|
||||
):
|
||||
level = config.get("storage.log.level", None)
|
||||
|
||||
|
@ -1701,6 +1701,7 @@ class Task(_Task):
|
||||
auto_pickle=True, # type: bool
|
||||
preview=None, # type: Any
|
||||
wait_on_upload=False, # type: bool
|
||||
extension_name=None, # type: Optional[str]
|
||||
):
|
||||
# type: (...) -> bool
|
||||
"""
|
||||
@ -1710,10 +1711,12 @@ class Task(_Task):
|
||||
|
||||
- string / pathlib2.Path - A path to artifact file. If a wildcard or a folder is specified, then ClearML
|
||||
creates and uploads a ZIP file.
|
||||
- dict - ClearML stores a dictionary as ``.json`` file and uploads it.
|
||||
- pandas.DataFrame - ClearML stores a pandas.DataFrame as ``.csv.gz`` (compressed CSV) file and uploads it.
|
||||
- numpy.ndarray - ClearML stores a numpy.ndarray as ``.npz`` file and uploads it.
|
||||
- PIL.Image - ClearML stores a PIL.Image as ``.png`` file and uploads it.
|
||||
- dict - ClearML stores a dictionary as ``.json`` (or see ``extension_name``) file and uploads it.
|
||||
- pandas.DataFrame - ClearML stores a pandas.DataFrame as ``.csv.gz`` (compressed CSV)
|
||||
(or see ``extension_name``) file and uploads it.
|
||||
- numpy.ndarray - ClearML stores a numpy.ndarray as ``.npz`` (or see ``extension_name``)
|
||||
file and uploads it.
|
||||
- PIL.Image - ClearML stores a PIL.Image as ``.png`` (or see ``extension_name``) file and uploads it.
|
||||
- Any - If called with auto_pickle=True, the object will be pickled and uploaded.
|
||||
|
||||
:param str name: The artifact name.
|
||||
@ -1738,6 +1741,14 @@ class Task(_Task):
|
||||
:param bool wait_on_upload: Whether or not the upload should be synchronous, forcing the upload to complete
|
||||
before continuing.
|
||||
|
||||
:param str extension_name: File extension which indicates the format the artifact should be stored as.
|
||||
The following are supported, depending on the artifact type
|
||||
(default value applies when extension_name is None):
|
||||
- dict - ``.json``, ``.yaml`` (default ``.json``)
|
||||
- pandas.DataFrame - ``.csv.gz``, ``.parquet``, ``.feather``, ``.pickle`` (default ``.csv.gz``)
|
||||
- numpy.ndarray - ``.npz``, ``.csv.gz`` (default ``.npz``)
|
||||
- PIL.Image - whatever extensions PIL supports (default ``.png``)
|
||||
|
||||
:return: The status of the upload.
|
||||
|
||||
- ``True`` - Upload succeeded.
|
||||
@ -1747,7 +1758,7 @@ class Task(_Task):
|
||||
"""
|
||||
return self._artifacts_manager.upload_artifact(
|
||||
name=name, artifact_object=artifact_object, metadata=metadata, delete_after_upload=delete_after_upload,
|
||||
auto_pickle=auto_pickle, preview=preview, wait_on_upload=wait_on_upload)
|
||||
auto_pickle=auto_pickle, preview=preview, wait_on_upload=wait_on_upload, extension_name=extension_name)
|
||||
|
||||
def get_models(self):
|
||||
# type: () -> Mapping[str, Sequence[Model]]
|
||||
|
Loading…
Reference in New Issue
Block a user