Fix large git diff preview (artifact auxiliary_git_diff) is a single line per file, add option to control the artifact preview

This commit is contained in:
allegroai 2020-07-30 15:10:41 +03:00
parent 0a5c10b4b0
commit 1d277d01d3
2 changed files with 27 additions and 15 deletions

View File

@ -278,8 +278,13 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
# if the git is too large to store on the task, we must store it as artifact: # if the git is too large to store on the task, we must store it as artifact:
if result.auxiliary_git_diff: if result.auxiliary_git_diff:
diff_preview = "# git diff too large to handle, storing as artifact. git diff summary:\n"
diff_preview += '\n'.join(
line for line in result.auxiliary_git_diff.split('\n') if line.startswith('diff --git '))
self._artifacts_manager.upload_artifact( self._artifacts_manager.upload_artifact(
name='auxiliary_git_diff', artifact_object=result.auxiliary_git_diff) name='auxiliary_git_diff', artifact_object=result.auxiliary_git_diff,
preview=diff_preview,
)
# store original entry point # store original entry point
entry_point = result.script.get('entry_point') if result.script else None entry_point = result.script.get('entry_point') if result.script else None

View File

@ -302,8 +302,9 @@ class Artifacts(object):
self._unregister_request.add(name) self._unregister_request.add(name)
self.flush() self.flush()
def upload_artifact(self, name, artifact_object=None, metadata=None, delete_after_upload=False, auto_pickle=True): def upload_artifact(self, name, artifact_object=None, metadata=None, preview=None,
# type: (str, Optional[object], Optional[dict], bool, bool) -> bool delete_after_upload=False, auto_pickle=True):
# type: (str, Optional[object], Optional[dict], Optional[str], bool, bool) -> bool
if not Session.check_min_api_version('2.3'): if not Session.check_min_api_version('2.3'):
LoggerRoot.get_base_logger().warning('Artifacts not supported by your TRAINS-server version, ' LoggerRoot.get_base_logger().warning('Artifacts not supported by your TRAINS-server version, '
'please upgrade to the latest server version') 'please upgrade to the latest server version')
@ -312,6 +313,10 @@ class Artifacts(object):
if name in self._artifacts_container: if name in self._artifacts_container:
raise ValueError("Artifact by the name of {} is already registered, use register_artifact".format(name)) raise ValueError("Artifact by the name of {} is already registered, use register_artifact".format(name))
# cast preview to string
if preview:
preview = str(preview)
# convert string to object if try is a file/folder (dont try to serialize long texts # convert string to object if try is a file/folder (dont try to serialize long texts
if isinstance(artifact_object, six.string_types) and len(artifact_object) < 2048: if isinstance(artifact_object, six.string_types) and len(artifact_object) < 2048:
# noinspection PyBroadException # noinspection PyBroadException
@ -337,7 +342,7 @@ class Artifacts(object):
if np and isinstance(artifact_object, np.ndarray): if np and isinstance(artifact_object, np.ndarray):
artifact_type = 'numpy' artifact_type = 'numpy'
artifact_type_data.content_type = 'application/numpy' artifact_type_data.content_type = 'application/numpy'
artifact_type_data.preview = str(artifact_object.__repr__()) artifact_type_data.preview = preview or str(artifact_object.__repr__())
override_filename_ext_in_uri = '.npz' override_filename_ext_in_uri = '.npz'
override_filename_in_uri = name + override_filename_ext_in_uri override_filename_in_uri = name + override_filename_ext_in_uri
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
@ -347,7 +352,7 @@ class Artifacts(object):
elif pd and isinstance(artifact_object, pd.DataFrame): elif pd and isinstance(artifact_object, pd.DataFrame):
artifact_type = 'pandas' artifact_type = 'pandas'
artifact_type_data.content_type = 'text/csv' artifact_type_data.content_type = 'text/csv'
artifact_type_data.preview = str(artifact_object.__repr__()) artifact_type_data.preview = preview or str(artifact_object.__repr__())
override_filename_ext_in_uri = self._save_format override_filename_ext_in_uri = self._save_format
override_filename_in_uri = name override_filename_in_uri = name
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
@ -358,7 +363,7 @@ class Artifacts(object):
artifact_type = 'image' artifact_type = 'image'
artifact_type_data.content_type = 'image/png' artifact_type_data.content_type = 'image/png'
desc = str(artifact_object.__repr__()) desc = str(artifact_object.__repr__())
artifact_type_data.preview = desc[1:desc.find(' at ')] artifact_type_data.preview = preview or desc[1:desc.find(' at ')]
override_filename_ext_in_uri = '.png' override_filename_ext_in_uri = '.png'
override_filename_in_uri = name + override_filename_ext_in_uri override_filename_in_uri = name + override_filename_ext_in_uri
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
@ -368,7 +373,7 @@ class Artifacts(object):
elif isinstance(artifact_object, dict): elif isinstance(artifact_object, dict):
artifact_type = 'JSON' artifact_type = 'JSON'
artifact_type_data.content_type = 'application/json' artifact_type_data.content_type = 'application/json'
preview = json.dumps(artifact_object, sort_keys=True, indent=4) preview = preview or json.dumps(artifact_object, sort_keys=True, indent=4)
override_filename_ext_in_uri = '.json' override_filename_ext_in_uri = '.json'
override_filename_in_uri = name + override_filename_ext_in_uri override_filename_in_uri = name + override_filename_ext_in_uri
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
@ -412,13 +417,13 @@ class Artifacts(object):
) )
try: try:
artifact_type_data.content_type = 'application/zip' artifact_type_data.content_type = 'application/zip'
artifact_type_data.preview = 'Archive content {}:\n'.format(artifact_object.as_posix()) archive_preview = 'Archive content {}:\n'.format(artifact_object.as_posix())
with ZipFile(zip_file, 'w', allowZip64=True, compression=ZIP_DEFLATED) as zf: with ZipFile(zip_file, 'w', allowZip64=True, compression=ZIP_DEFLATED) as zf:
for filename in sorted(files): for filename in sorted(files):
if filename.is_file(): if filename.is_file():
relative_file_name = filename.relative_to(folder).as_posix() relative_file_name = filename.relative_to(folder).as_posix()
artifact_type_data.preview += '{} - {}\n'.format( archive_preview += '{} - {}\n'.format(
relative_file_name, humanfriendly.format_size(filename.stat().st_size)) relative_file_name, humanfriendly.format_size(filename.stat().st_size))
zf.write(filename.as_posix(), arcname=relative_file_name) zf.write(filename.as_posix(), arcname=relative_file_name)
except Exception as e: except Exception as e:
@ -428,7 +433,7 @@ class Artifacts(object):
return False return False
finally: finally:
os.close(fd) os.close(fd)
artifact_type_data.preview = preview or archive_preview
artifact_object = zip_file artifact_object = zip_file
artifact_type = 'archive' artifact_type = 'archive'
artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0] artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]
@ -439,14 +444,14 @@ class Artifacts(object):
raise ValueError("Artifact file '{}' could not be found".format(artifact_object.as_posix())) raise ValueError("Artifact file '{}' could not be found".format(artifact_object.as_posix()))
override_filename_in_uri = artifact_object.parts[-1] override_filename_in_uri = artifact_object.parts[-1]
artifact_type_data.preview = '{} - {}\n'.format( artifact_type_data.preview = preview or '{} - {}\n'.format(
artifact_object, humanfriendly.format_size(artifact_object.stat().st_size)) artifact_object, humanfriendly.format_size(artifact_object.stat().st_size))
artifact_object = artifact_object.as_posix() artifact_object = artifact_object.as_posix()
artifact_type = 'custom' artifact_type = 'custom'
artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0] artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]
local_filename = artifact_object local_filename = artifact_object
elif ( elif (
isinstance(artifact_object, six.string_types) isinstance(artifact_object, six.string_types) and len(artifact_object) < 4096
and urlparse(artifact_object).scheme in remote_driver_schemes and urlparse(artifact_object).scheme in remote_driver_schemes
): ):
# we should not upload this, just register # we should not upload this, just register
@ -458,7 +463,9 @@ class Artifacts(object):
# if we got here, we should store it as text file. # if we got here, we should store it as text file.
artifact_type = 'string' artifact_type = 'string'
artifact_type_data.content_type = 'text/plain' artifact_type_data.content_type = 'text/plain'
if len(artifact_object) < self.max_preview_size_bytes: if preview:
artifact_type_data.preview = preview
elif len(artifact_object) < self.max_preview_size_bytes:
artifact_type_data.preview = artifact_object artifact_type_data.preview = artifact_object
else: else:
artifact_type_data.preview = '# full text too large to store, storing first {}kb\n{}'.format( artifact_type_data.preview = '# full text too large to store, storing first {}kb\n{}'.format(
@ -483,9 +490,9 @@ class Artifacts(object):
artifact_type_data.content_type = 'application/pickle' artifact_type_data.content_type = 'application/pickle'
# noinspection PyBroadException # noinspection PyBroadException
try: try:
artifact_type_data.preview = str(artifact_object.__repr__())[:self.max_preview_size_bytes] artifact_type_data.preview = preview or str(artifact_object.__repr__())[:self.max_preview_size_bytes]
except Exception: except Exception:
artifact_type_data.preview = '' artifact_type_data.preview = preview or ''
delete_after_upload = True delete_after_upload = True
override_filename_ext_in_uri = '.pkl' override_filename_ext_in_uri = '.pkl'
override_filename_in_uri = name + override_filename_ext_in_uri override_filename_in_uri = name + override_filename_ext_in_uri