mirror of
https://github.com/clearml/clearml
synced 2025-05-24 13:54:16 +00:00
Add support for artifacts with different formats (#634)
This commit is contained in:
parent
382d361bff
commit
42fa0dde65
@ -1,4 +1,5 @@
|
|||||||
import json
|
import json
|
||||||
|
import yaml
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import os
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
@ -308,8 +309,8 @@ class Artifacts(object):
|
|||||||
self.flush()
|
self.flush()
|
||||||
|
|
||||||
def upload_artifact(self, name, artifact_object=None, metadata=None, preview=None,
|
def upload_artifact(self, name, artifact_object=None, metadata=None, preview=None,
|
||||||
delete_after_upload=False, auto_pickle=True, wait_on_upload=False):
|
delete_after_upload=False, auto_pickle=True, wait_on_upload=False, extension_name=None):
|
||||||
# type: (str, Optional[object], Optional[dict], Optional[str], bool, bool, bool) -> bool
|
# type: (str, Optional[object], Optional[dict], Optional[str], bool, bool, bool, Optional[str]) -> bool
|
||||||
if not Session.check_min_api_version('2.3'):
|
if not Session.check_min_api_version('2.3'):
|
||||||
LoggerRoot.get_base_logger().warning('Artifacts not supported by your ClearML-server version, '
|
LoggerRoot.get_base_logger().warning('Artifacts not supported by your ClearML-server version, '
|
||||||
'please upgrade to the latest server version')
|
'please upgrade to the latest server version')
|
||||||
@ -354,65 +355,133 @@ class Artifacts(object):
|
|||||||
override_filename_in_uri = None
|
override_filename_in_uri = None
|
||||||
override_filename_ext_in_uri = None
|
override_filename_ext_in_uri = None
|
||||||
uri = None
|
uri = None
|
||||||
|
|
||||||
|
def get_extension(extension_name_, valid_extensions, default_extension, artifact_type_):
|
||||||
|
if not extension_name_:
|
||||||
|
return default_extension
|
||||||
|
if extension_name_ in valid_extensions:
|
||||||
|
return extension_name_
|
||||||
|
LoggerRoot.get_base_logger().warning(
|
||||||
|
"{} artifact can not be uploaded with extension {}. Valid extensions are: {}. Defaulting to {}.".format(
|
||||||
|
artifact_type_, extension_name_, ", ".join(valid_extensions), default_extension
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return default_extension
|
||||||
|
|
||||||
if np and isinstance(artifact_object, np.ndarray):
|
if np and isinstance(artifact_object, np.ndarray):
|
||||||
artifact_type = 'numpy'
|
artifact_type = 'numpy'
|
||||||
artifact_type_data.content_type = 'application/numpy'
|
|
||||||
artifact_type_data.preview = preview or str(artifact_object.__repr__())
|
artifact_type_data.preview = preview or str(artifact_object.__repr__())
|
||||||
override_filename_ext_in_uri = '.npz'
|
override_filename_ext_in_uri = get_extension(
|
||||||
|
extension_name, [".npz", ".csv.gz"], ".npz", artifact_type
|
||||||
|
)
|
||||||
override_filename_in_uri = name + override_filename_ext_in_uri
|
override_filename_in_uri = name + override_filename_ext_in_uri
|
||||||
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
|
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
|
||||||
os.close(fd)
|
os.close(fd)
|
||||||
|
if override_filename_ext_in_uri == ".npz":
|
||||||
|
artifact_type_data.content_type = "application/numpy"
|
||||||
np.savez_compressed(local_filename, **{name: artifact_object})
|
np.savez_compressed(local_filename, **{name: artifact_object})
|
||||||
|
elif override_filename_ext_in_uri == ".csv.gz":
|
||||||
|
artifact_type_data.content_type = "text/csv"
|
||||||
|
np.savetxt(local_filename, artifact_object, delimiter=",")
|
||||||
delete_after_upload = True
|
delete_after_upload = True
|
||||||
elif pd and isinstance(artifact_object, pd.DataFrame):
|
elif pd and isinstance(artifact_object, pd.DataFrame):
|
||||||
artifact_type = 'pandas'
|
artifact_type = "pandas"
|
||||||
artifact_type_data.content_type = 'text/csv'
|
|
||||||
artifact_type_data.preview = preview or str(artifact_object.__repr__())
|
artifact_type_data.preview = preview or str(artifact_object.__repr__())
|
||||||
override_filename_ext_in_uri = self._save_format
|
override_filename_ext_in_uri = get_extension(
|
||||||
|
extension_name, [".csv.gz", ".parquet", ".feather", ".pickle"], ".csv.gz", artifact_type
|
||||||
|
)
|
||||||
override_filename_in_uri = name
|
override_filename_in_uri = name
|
||||||
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
|
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
|
||||||
os.close(fd)
|
os.close(fd)
|
||||||
|
if override_filename_ext_in_uri == ".csv.gz":
|
||||||
|
artifact_type_data.content_type = "text/csv"
|
||||||
artifact_object.to_csv(local_filename, compression=self._compression)
|
artifact_object.to_csv(local_filename, compression=self._compression)
|
||||||
|
elif override_filename_ext_in_uri == ".parquet":
|
||||||
|
artifact_type_data.content_type = "application/parquet"
|
||||||
|
artifact_object.to_parquet(local_filename)
|
||||||
|
elif override_filename_ext_in_uri == ".feather":
|
||||||
|
artifact_type_data.content_type = "application/feather"
|
||||||
|
artifact_object.to_feather(local_filename)
|
||||||
|
elif override_filename_ext_in_uri == ".pickle":
|
||||||
|
artifact_type_data.content_type = "application/pickle"
|
||||||
|
artifact_object.to_pickle(local_filename)
|
||||||
delete_after_upload = True
|
delete_after_upload = True
|
||||||
elif isinstance(artifact_object, Image.Image):
|
elif isinstance(artifact_object, Image.Image):
|
||||||
artifact_type = 'image'
|
artifact_type = "image"
|
||||||
artifact_type_data.content_type = 'image/png'
|
artifact_type_data.content_type = "image/png"
|
||||||
desc = str(artifact_object.__repr__())
|
desc = str(artifact_object.__repr__())
|
||||||
artifact_type_data.preview = preview or desc[1:desc.find(' at ')]
|
artifact_type_data.preview = preview or desc[1:desc.find(' at ')]
|
||||||
override_filename_ext_in_uri = '.png'
|
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
if not Image.EXTENSION:
|
||||||
|
Image.init()
|
||||||
|
if not Image.EXTENSION:
|
||||||
|
raise Exception()
|
||||||
|
override_filename_ext_in_uri = get_extension(
|
||||||
|
extension_name, Image.EXTENSION.keys(), ".png", artifact_type
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
override_filename_ext_in_uri = ".png"
|
||||||
|
if extension_name and extension_name != ".png":
|
||||||
|
LoggerRoot.get_base_logger().warning(
|
||||||
|
"image artifact can not be uploaded with extension {}. Defaulting to .png.".format(
|
||||||
|
extension_name
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
override_filename_in_uri = name + override_filename_ext_in_uri
|
override_filename_in_uri = name + override_filename_ext_in_uri
|
||||||
|
artifact_type_data.content_type = "image/unknown-type"
|
||||||
|
guessed_type = mimetypes.guess_type(override_filename_in_uri)[0]
|
||||||
|
if guessed_type:
|
||||||
|
artifact_type_data.content_type = guessed_type
|
||||||
|
|
||||||
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
|
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
|
||||||
os.close(fd)
|
os.close(fd)
|
||||||
artifact_object.save(local_filename)
|
artifact_object.save(local_filename)
|
||||||
delete_after_upload = True
|
delete_after_upload = True
|
||||||
elif isinstance(artifact_object, dict):
|
elif isinstance(artifact_object, dict):
|
||||||
artifact_type = 'JSON'
|
artifact_type = "dict"
|
||||||
artifact_type_data.content_type = 'application/json'
|
override_filename_ext_in_uri = get_extension(extension_name, [".json", ".yaml"], ".json", artifact_type)
|
||||||
|
if override_filename_ext_in_uri == ".json":
|
||||||
|
artifact_type_data.content_type = "application/json"
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
json_text = json.dumps(artifact_object, sort_keys=True, indent=4)
|
serialized_text = json.dumps(artifact_object, sort_keys=True, indent=4)
|
||||||
except Exception:
|
except Exception:
|
||||||
if not auto_pickle:
|
if not auto_pickle:
|
||||||
raise
|
raise
|
||||||
LoggerRoot.get_base_logger().warning(
|
LoggerRoot.get_base_logger().warning(
|
||||||
"JSON serialization of artifact \'{}\' failed, reverting to pickle".format(name))
|
"JSON serialization of artifact \'{}\' failed, reverting to pickle".format(name))
|
||||||
store_as_pickle = True
|
store_as_pickle = True
|
||||||
json_text = None
|
serialized_text = None
|
||||||
|
else:
|
||||||
|
artifact_type_data.content_type = "application/yaml"
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
serialized_text = yaml.dump(artifact_object, sort_keys=True, indent=4)
|
||||||
|
except Exception:
|
||||||
|
if not auto_pickle:
|
||||||
|
raise
|
||||||
|
LoggerRoot.get_base_logger().warning(
|
||||||
|
"YAML serialization of artifact \'{}\' failed, reverting to pickle".format(name))
|
||||||
|
store_as_pickle = True
|
||||||
|
serialized_text = None
|
||||||
|
|
||||||
if json_text is not None:
|
if serialized_text is not None:
|
||||||
override_filename_ext_in_uri = '.json'
|
|
||||||
override_filename_in_uri = name + override_filename_ext_in_uri
|
override_filename_in_uri = name + override_filename_ext_in_uri
|
||||||
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
|
fd, local_filename = mkstemp(prefix=quote(name, safe="") + ".", suffix=override_filename_ext_in_uri)
|
||||||
os.write(fd, bytes(json_text.encode()))
|
os.write(fd, bytes(serialized_text.encode()))
|
||||||
os.close(fd)
|
os.close(fd)
|
||||||
preview = preview or json_text
|
preview = preview or serialized_text
|
||||||
if len(preview) < self.max_preview_size_bytes:
|
if len(preview) < self.max_preview_size_bytes:
|
||||||
artifact_type_data.preview = preview
|
artifact_type_data.preview = preview
|
||||||
else:
|
else:
|
||||||
artifact_type_data.preview = '# full json too large to store, storing first {}kb\n{}'.format(
|
artifact_type_data.preview = (
|
||||||
self.max_preview_size_bytes//1024, preview[:self.max_preview_size_bytes]
|
"# full serialized dict too large to store, storing first {}kb\n{}".format(
|
||||||
|
self.max_preview_size_bytes // 1024, preview[: self.max_preview_size_bytes]
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
delete_after_upload = True
|
delete_after_upload = True
|
||||||
elif isinstance(artifact_object, pathlib_types):
|
elif isinstance(artifact_object, pathlib_types):
|
||||||
# check if single file
|
# check if single file
|
||||||
|
@ -291,7 +291,7 @@ class StorageHelper(object):
|
|||||||
logger=None,
|
logger=None,
|
||||||
retries=5,
|
retries=5,
|
||||||
token=None,
|
token=None,
|
||||||
**kwargs,
|
**kwargs
|
||||||
):
|
):
|
||||||
level = config.get("storage.log.level", None)
|
level = config.get("storage.log.level", None)
|
||||||
|
|
||||||
|
@ -1701,6 +1701,7 @@ class Task(_Task):
|
|||||||
auto_pickle=True, # type: bool
|
auto_pickle=True, # type: bool
|
||||||
preview=None, # type: Any
|
preview=None, # type: Any
|
||||||
wait_on_upload=False, # type: bool
|
wait_on_upload=False, # type: bool
|
||||||
|
extension_name=None, # type: Optional[str]
|
||||||
):
|
):
|
||||||
# type: (...) -> bool
|
# type: (...) -> bool
|
||||||
"""
|
"""
|
||||||
@ -1710,10 +1711,12 @@ class Task(_Task):
|
|||||||
|
|
||||||
- string / pathlib2.Path - A path to artifact file. If a wildcard or a folder is specified, then ClearML
|
- string / pathlib2.Path - A path to artifact file. If a wildcard or a folder is specified, then ClearML
|
||||||
creates and uploads a ZIP file.
|
creates and uploads a ZIP file.
|
||||||
- dict - ClearML stores a dictionary as ``.json`` file and uploads it.
|
- dict - ClearML stores a dictionary as ``.json`` (or see ``extension_name``) file and uploads it.
|
||||||
- pandas.DataFrame - ClearML stores a pandas.DataFrame as ``.csv.gz`` (compressed CSV) file and uploads it.
|
- pandas.DataFrame - ClearML stores a pandas.DataFrame as ``.csv.gz`` (compressed CSV)
|
||||||
- numpy.ndarray - ClearML stores a numpy.ndarray as ``.npz`` file and uploads it.
|
(or see ``extension_name``) file and uploads it.
|
||||||
- PIL.Image - ClearML stores a PIL.Image as ``.png`` file and uploads it.
|
- numpy.ndarray - ClearML stores a numpy.ndarray as ``.npz`` (or see ``extension_name``)
|
||||||
|
file and uploads it.
|
||||||
|
- PIL.Image - ClearML stores a PIL.Image as ``.png`` (or see ``extension_name``) file and uploads it.
|
||||||
- Any - If called with auto_pickle=True, the object will be pickled and uploaded.
|
- Any - If called with auto_pickle=True, the object will be pickled and uploaded.
|
||||||
|
|
||||||
:param str name: The artifact name.
|
:param str name: The artifact name.
|
||||||
@ -1738,6 +1741,14 @@ class Task(_Task):
|
|||||||
:param bool wait_on_upload: Whether or not the upload should be synchronous, forcing the upload to complete
|
:param bool wait_on_upload: Whether or not the upload should be synchronous, forcing the upload to complete
|
||||||
before continuing.
|
before continuing.
|
||||||
|
|
||||||
|
:param str extension_name: File extension which indicates the format the artifact should be stored as.
|
||||||
|
The following are supported, depending on the artifact type
|
||||||
|
(default value applies when extension_name is None):
|
||||||
|
- dict - ``.json``, ``.yaml`` (default ``.json``)
|
||||||
|
- pandas.DataFrame - ``.csv.gz``, ``.parquet``, ``.feather``, ``.pickle`` (default ``.csv.gz``)
|
||||||
|
- numpy.ndarray - ``.npz``, ``.csv.gz`` (default ``.npz``)
|
||||||
|
- PIL.Image - whatever extensions PIL supports (default ``.png``)
|
||||||
|
|
||||||
:return: The status of the upload.
|
:return: The status of the upload.
|
||||||
|
|
||||||
- ``True`` - Upload succeeded.
|
- ``True`` - Upload succeeded.
|
||||||
@ -1747,7 +1758,7 @@ class Task(_Task):
|
|||||||
"""
|
"""
|
||||||
return self._artifacts_manager.upload_artifact(
|
return self._artifacts_manager.upload_artifact(
|
||||||
name=name, artifact_object=artifact_object, metadata=metadata, delete_after_upload=delete_after_upload,
|
name=name, artifact_object=artifact_object, metadata=metadata, delete_after_upload=delete_after_upload,
|
||||||
auto_pickle=auto_pickle, preview=preview, wait_on_upload=wait_on_upload)
|
auto_pickle=auto_pickle, preview=preview, wait_on_upload=wait_on_upload, extension_name=extension_name)
|
||||||
|
|
||||||
def get_models(self):
|
def get_models(self):
|
||||||
# type: () -> Mapping[str, Sequence[Model]]
|
# type: () -> Mapping[str, Sequence[Model]]
|
||||||
|
Loading…
Reference in New Issue
Block a user