mirror of
https://github.com/clearml/clearml
synced 2025-02-07 13:23:40 +00:00
Add support for a default extension name when uploading a pandas dataframe artifact (see sdk.development.artifacts.default_pandas_dataframe_extension_name
)
This commit is contained in:
parent
0086785372
commit
9a9ee54681
@ -28,6 +28,7 @@ from ..storage.helper import remote_driver_schemes
|
|||||||
from ..storage.util import sha256sum, format_size, get_common_path
|
from ..storage.util import sha256sum, format_size, get_common_path
|
||||||
from ..utilities.process.mp import SafeEvent, ForkSafeRLock
|
from ..utilities.process.mp import SafeEvent, ForkSafeRLock
|
||||||
from ..utilities.proxy_object import LazyEvalWrapper
|
from ..utilities.proxy_object import LazyEvalWrapper
|
||||||
|
from ..config import deferred_config
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -262,6 +263,9 @@ class Artifacts(object):
|
|||||||
# hashing constants
|
# hashing constants
|
||||||
_hash_block_size = 65536
|
_hash_block_size = 65536
|
||||||
_pd_artifact_type = 'data-audit-table'
|
_pd_artifact_type = 'data-audit-table'
|
||||||
|
_default_pandas_dataframe_extension_name = deferred_config(
|
||||||
|
"development.artifacts.default_pandas_dataframe_extension_name", None
|
||||||
|
)
|
||||||
|
|
||||||
class _ProxyDictWrite(dict):
|
class _ProxyDictWrite(dict):
|
||||||
""" Dictionary wrapper that updates an arguments instance on any item set in the dictionary """
|
""" Dictionary wrapper that updates an arguments instance on any item set in the dictionary """
|
||||||
@ -464,19 +468,23 @@ class Artifacts(object):
|
|||||||
artifact_type_data.content_type = "text/csv"
|
artifact_type_data.content_type = "text/csv"
|
||||||
np.savetxt(local_filename, artifact_object, delimiter=",")
|
np.savetxt(local_filename, artifact_object, delimiter=",")
|
||||||
delete_after_upload = True
|
delete_after_upload = True
|
||||||
elif pd and isinstance(artifact_object, pd.DataFrame) \
|
|
||||||
and (isinstance(artifact_object.index, pd.MultiIndex) or
|
|
||||||
isinstance(artifact_object.columns, pd.MultiIndex)):
|
|
||||||
store_as_pickle = True
|
|
||||||
elif pd and isinstance(artifact_object, pd.DataFrame):
|
elif pd and isinstance(artifact_object, pd.DataFrame):
|
||||||
artifact_type = "pandas"
|
artifact_type = "pandas"
|
||||||
artifact_type_data.preview = preview or str(artifact_object.__repr__())
|
artifact_type_data.preview = preview or str(artifact_object.__repr__())
|
||||||
|
# we are making sure self._default_pandas_dataframe_extension_name is not deferred
|
||||||
|
extension_name = extension_name or str(self._default_pandas_dataframe_extension_name or "")
|
||||||
override_filename_ext_in_uri = get_extension(
|
override_filename_ext_in_uri = get_extension(
|
||||||
extension_name, [".csv.gz", ".parquet", ".feather", ".pickle"], ".csv.gz", artifact_type
|
extension_name, [".csv.gz", ".parquet", ".feather", ".pickle"], ".csv.gz", artifact_type
|
||||||
)
|
)
|
||||||
override_filename_in_uri = name
|
override_filename_in_uri = name
|
||||||
local_filename = self._push_temp_file(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
|
local_filename = self._push_temp_file(
|
||||||
if override_filename_ext_in_uri == ".csv.gz":
|
prefix=quote(name, safe="") + ".", suffix=override_filename_ext_in_uri
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
isinstance(artifact_object.index, pd.MultiIndex) or isinstance(artifact_object.columns, pd.MultiIndex)
|
||||||
|
) and not extension_name:
|
||||||
|
store_as_pickle = True
|
||||||
|
elif override_filename_ext_in_uri == ".csv.gz":
|
||||||
artifact_type_data.content_type = "text/csv"
|
artifact_type_data.content_type = "text/csv"
|
||||||
self._store_compressed_pd_csv(artifact_object, local_filename)
|
self._store_compressed_pd_csv(artifact_object, local_filename)
|
||||||
elif override_filename_ext_in_uri == ".parquet":
|
elif override_filename_ext_in_uri == ".parquet":
|
||||||
|
@ -240,5 +240,12 @@ sdk {
|
|||||||
# iteration reporting x-axis after starting to report "seconds from start"
|
# iteration reporting x-axis after starting to report "seconds from start"
|
||||||
# max_wait_for_first_iteration_to_start_sec: 1800
|
# max_wait_for_first_iteration_to_start_sec: 1800
|
||||||
}
|
}
|
||||||
|
|
||||||
|
artifacts {
|
||||||
|
# set default extension_name for pandas DataFrame objects
|
||||||
|
# valid values are: ``.csv.gz``, ``.parquet``, ``.feather``, ``.pickle``
|
||||||
|
# extension_name supplied to Task.upload_artifact is prioritized over this value
|
||||||
|
default_pandas_dataframe_extension_name: ""
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user