Add option to not sort yaml/json keys when uploading artifact

This commit is contained in:
clearml 2025-05-21 10:05:12 +03:00
parent 66a352b587
commit dc4241eb0c
3 changed files with 28 additions and 17 deletions

View File

@ -1276,6 +1276,7 @@ class PipelineController(object):
preview: Any = None, preview: Any = None,
wait_on_upload: bool = False, wait_on_upload: bool = False,
serialization_function: Optional[Callable[[Any], Union[bytes, bytearray]]] = None, serialization_function: Optional[Callable[[Any], Union[bytes, bytearray]]] = None,
sort_keys: bool = True,
) -> bool: ) -> bool:
""" """
Upload (add) an artifact to the main Pipeline Task object. Upload (add) an artifact to the main Pipeline Task object.
@ -1295,27 +1296,27 @@ class PipelineController(object):
- PIL.Image - ClearML stores a PIL.Image as ``.png`` file and uploads it. - PIL.Image - ClearML stores a PIL.Image as ``.png`` file and uploads it.
- Any - If called with auto_pickle=True, the object will be pickled and uploaded. - Any - If called with auto_pickle=True, the object will be pickled and uploaded.
:param str name: The artifact name. :param name: The artifact name.
.. warning:: .. warning::
If an artifact with the same name was previously uploaded, then it is overwritten. If an artifact with the same name was previously uploaded, then it is overwritten.
:param object artifact_object: The artifact object. :param artifact_object: The artifact object.
:param dict metadata: A dictionary of key-value pairs for any metadata. This dictionary appears with the :param metadata: A dictionary of key-value pairs for any metadata. This dictionary appears with the
experiment in the **ClearML Web-App (UI)**, **ARTIFACTS** tab. experiment in the **ClearML Web-App (UI)**, **ARTIFACTS** tab.
:param bool delete_after_upload: After the upload, delete the local copy of the artifact :param delete_after_upload: After the upload, delete the local copy of the artifact
- ``True`` - Delete the local copy of the artifact. - ``True`` - Delete the local copy of the artifact.
- ``False`` - Do not delete. (default) - ``False`` - Do not delete. (default)
:param bool auto_pickle: If True, and the artifact_object is not one of the following types: :param auto_pickle: If True, and the artifact_object is not one of the following types:
pathlib2.Path, dict, pandas.DataFrame, numpy.ndarray, PIL.Image, url (string), local_file (string) pathlib2.Path, dict, pandas.DataFrame, numpy.ndarray, PIL.Image, url (string), local_file (string)
the artifact_object will be pickled and uploaded as pickle file artifact (with file extension .pkl) the artifact_object will be pickled and uploaded as pickle file artifact (with file extension .pkl)
If set to None (default) the sdk.development.artifacts.auto_pickle configuration value will be used. If set to None (default) the sdk.development.artifacts.auto_pickle configuration value will be used.
:param object preview: The artifact preview :param preview: The artifact preview
:param bool wait_on_upload: Whether the upload should be synchronous, forcing the upload to complete :param wait_on_upload: Whether the upload should be synchronous, forcing the upload to complete
before continuing. before continuing.
:param serialization_function: A serialization function that takes one :param serialization_function: A serialization function that takes one
@ -1325,6 +1326,9 @@ class PipelineController(object):
(e.g. `pandas.DataFrame.to_csv`), even if possible. To deserialize this artifact when getting (e.g. `pandas.DataFrame.to_csv`), even if possible. To deserialize this artifact when getting
it using the `Artifact.get` method, use its `deserialization_function` argument. it using the `Artifact.get` method, use its `deserialization_function` argument.
:param sort_keys: If True (default), sort the keys of the artifact if it is yaml/json serializable.
Otherwise, don't sort the keys. Ignored if the artifact is not yaml/json serializable.
:return: The status of the upload. :return: The status of the upload.
- ``True`` - Upload succeeded. - ``True`` - Upload succeeded.
@ -1342,6 +1346,7 @@ class PipelineController(object):
preview=preview, preview=preview,
wait_on_upload=wait_on_upload, wait_on_upload=wait_on_upload,
serialization_function=serialization_function, serialization_function=serialization_function,
sort_keys=sort_keys,
) )
def stop( def stop(

View File

@ -392,6 +392,7 @@ class Artifacts(object):
wait_on_upload: bool = False, wait_on_upload: bool = False,
extension_name: Optional[str] = None, extension_name: Optional[str] = None,
serialization_function: Optional[Callable[[Any], Union[bytes, bytearray]]] = None, serialization_function: Optional[Callable[[Any], Union[bytes, bytearray]]] = None,
sort_keys: bool = True,
) -> bool: ) -> bool:
if not Session.check_min_api_version("2.3"): if not Session.check_min_api_version("2.3"):
LoggerRoot.get_base_logger().warning( LoggerRoot.get_base_logger().warning(
@ -599,7 +600,7 @@ class Artifacts(object):
artifact_type_data.content_type = "application/json" artifact_type_data.content_type = "application/json"
# noinspection PyBroadException # noinspection PyBroadException
try: try:
serialized_text = json.dumps(artifact_object, sort_keys=True, indent=4) serialized_text = json.dumps(artifact_object, sort_keys=sort_keys, indent=4)
except Exception: except Exception:
if not auto_pickle: if not auto_pickle:
raise raise
@ -612,7 +613,7 @@ class Artifacts(object):
artifact_type_data.content_type = "application/yaml" artifact_type_data.content_type = "application/yaml"
# noinspection PyBroadException # noinspection PyBroadException
try: try:
serialized_text = yaml.dump(artifact_object, sort_keys=True, indent=4) serialized_text = yaml.dump(artifact_object, sort_keys=sort_keys, indent=4)
except Exception: except Exception:
if not auto_pickle: if not auto_pickle:
raise raise

View File

@ -2838,6 +2838,7 @@ class Task(_Task):
extension_name: Optional[str] = None, extension_name: Optional[str] = None,
serialization_function: Optional[Callable[[Any], Union[bytes, bytearray]]] = None, serialization_function: Optional[Callable[[Any], Union[bytes, bytearray]]] = None,
retries: int = 0, retries: int = 0,
sort_keys: bool = True,
) -> bool: ) -> bool:
""" """
Upload (add) a static artifact to a Task object. The artifact is uploaded in the background. Upload (add) a static artifact to a Task object. The artifact is uploaded in the background.
@ -2853,30 +2854,30 @@ class Task(_Task):
- PIL.Image - ClearML stores a PIL.Image as ``.png`` (or see ``extension_name``) file and uploads it. - PIL.Image - ClearML stores a PIL.Image as ``.png`` (or see ``extension_name``) file and uploads it.
- Any - If called with auto_pickle=True, the object will be pickled and uploaded. - Any - If called with auto_pickle=True, the object will be pickled and uploaded.
:param str name: The artifact name. :param name: The artifact name.
.. warning:: .. warning::
If an artifact with the same name was previously uploaded, then it is overwritten. If an artifact with the same name was previously uploaded, then it is overwritten.
:param object artifact_object: The artifact object. :param artifact_object: The artifact object.
:param dict metadata: A dictionary of key-value pairs for any metadata. This dictionary appears with the :param metadata: A dictionary of key-value pairs for any metadata. This dictionary appears with the
experiment in the **ClearML Web-App (UI)**, **ARTIFACTS** tab. experiment in the **ClearML Web-App (UI)**, **ARTIFACTS** tab.
:param bool delete_after_upload: After the upload, delete the local copy of the artifact :param bool delete_after_upload: After the upload, delete the local copy of the artifact
- ``True`` - Delete the local copy of the artifact. - ``True`` - Delete the local copy of the artifact.
- ``False`` - Do not delete. (default) - ``False`` - Do not delete. (default)
:param bool auto_pickle: If True and the artifact_object is not one of the following types: :param auto_pickle: If True and the artifact_object is not one of the following types:
pathlib2.Path, dict, pandas.DataFrame, numpy.ndarray, PIL.Image, url (string), local_file (string), pathlib2.Path, dict, pandas.DataFrame, numpy.ndarray, PIL.Image, url (string), local_file (string),
the artifact_object will be pickled and uploaded as pickle file artifact (with file extension .pkl). the artifact_object will be pickled and uploaded as pickle file artifact (with file extension .pkl).
If set to None (default) the sdk.development.artifacts.auto_pickle configuration value will be used. If set to None (default) the sdk.development.artifacts.auto_pickle configuration value will be used.
:param object preview: The artifact preview :param preview: The artifact preview
:param bool wait_on_upload: Whether the upload should be synchronous, forcing the upload to complete :param wait_on_upload: Whether the upload should be synchronous, forcing the upload to complete
before continuing. before continuing.
:param str extension_name: File extension which indicates the format the artifact should be stored as. :param extension_name: File extension which indicates the format the artifact should be stored as.
The following are supported, depending on the artifact type (default value applies when extension_name is None): The following are supported, depending on the artifact type (default value applies when extension_name is None):
- Any - ``.pkl`` if passed supersedes any other serialization type, and always pickles the object - Any - ``.pkl`` if passed supersedes any other serialization type, and always pickles the object
@ -2893,7 +2894,10 @@ class Task(_Task):
(e.g. `pandas.DataFrame.to_csv`), even if possible. To deserialize this artifact when getting (e.g. `pandas.DataFrame.to_csv`), even if possible. To deserialize this artifact when getting
it using the `Artifact.get` method, use its `deserialization_function` argument. it using the `Artifact.get` method, use its `deserialization_function` argument.
:param int retries: Number of retries before failing to upload artifact. If 0, the upload is not retried :param retries: Number of retries before failing to upload artifact. If 0, the upload is not retried
:param sort_keys: If True (default), sort the keys of the artifact if it is yaml/json serializable.
Otherwise, don't sort the keys. Ignored if the artifact is not yaml/json serializable.
:return: The status of the upload. :return: The status of the upload.
@ -2916,6 +2920,7 @@ class Task(_Task):
wait_on_upload=wait_on_upload, wait_on_upload=wait_on_upload,
extension_name=extension_name, extension_name=extension_name,
serialization_function=serialization_function, serialization_function=serialization_function,
sort_keys=sort_keys,
): ):
return True return True
except Exception as e: except Exception as e: