Fix git diff larger than 500kb , git section will contain warning, and entire git diff will be uploaded as artifact named auxiliary_git_diff

This commit is contained in:
allegroai 2020-07-04 22:53:26 +03:00
parent d1744785b1
commit 934771184d
4 changed files with 57 additions and 6 deletions

View File

@ -592,6 +592,7 @@ class ScriptInfo(object):
script_dir = scripts_dir[0]
script_path = scripts_path[0]
messages = []
auxiliary_git_diff = None
if not plugin:
log.info("No repository found, storing script code instead")
@ -625,7 +626,10 @@ class ScriptInfo(object):
messages.append(
"======> WARNING! Git diff to large to store "
"({}kb), skipping uncommitted changes <======".format(len(diff)//1024))
diff = ''
auxiliary_git_diff = diff
diff = '# WARNING! git diff too large to store, clear this section to execute without it.\n' \
'# full git diff available in Artifacts/auxiliary_git_diff\n' \
'# Clear the section before enqueueing Task!\n'
else:
diff = ''
@ -665,7 +669,7 @@ class ScriptInfo(object):
if not any(script_info.values()):
script_info = None
return (ScriptInfoResult(script=script_info, warning_messages=messages),
return (ScriptInfoResult(script=script_info, warning_messages=messages, auxiliary_git_diff=auxiliary_git_diff),
script_requirements)
@classmethod
@ -724,6 +728,7 @@ class ScriptInfo(object):
class ScriptInfoResult(object):
script = attr.ib(default=None)
warning_messages = attr.ib(factory=list)
auxiliary_git_diff = attr.ib(default=None)
class _JupyterHistoryLogger(object):

View File

@ -21,6 +21,7 @@ from collections import OrderedDict
from six.moves.urllib.parse import quote
from ...utilities.locks import RLock as FileRLock
from ...binding.artifacts import Artifacts
from ...backend_interface.task.development.worker import DevWorker
from ...backend_api import Session
from ...backend_api.services import tasks, models, events, projects
@ -154,6 +155,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
log_to_backend = False
self._log_to_backend = log_to_backend
self._setup_log(default_log_to_backend=log_to_backend)
self._artifacts_manager = Artifacts(self)
def _setup_log(self, default_log_to_backend=None, replace_existing=False):
"""
@ -264,6 +266,11 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
for msg in result.warning_messages:
self.get_logger().report_text(msg)
# if the git is too large to store on the task, we must store it as artifact:
if result.auxiliary_git_diff:
self._artifacts_manager.upload_artifact(
name='auxiliary_git_diff', artifact_object=result.auxiliary_git_diff)
# store original entry point
entry_point = result.script.get('entry_point') if result.script else None

View File

@ -161,6 +161,9 @@ class Artifact(object):
elif self.type == 'JSON':
with open(local_file, 'rt') as f:
self._object = json.load(f)
elif self.type == 'string':
with open(local_file, 'rt') as f:
self._object = f.read()
elif self.type == 'pickle':
with open(local_file, 'rb') as f:
self._object = pickle.load(f)
@ -316,10 +319,18 @@ class Artifacts(object):
artifact_path = Path(artifact_object)
if artifact_path.exists():
artifact_object = artifact_path
elif '*' in artifact_object or '?' in artifact_object:
# hackish, detect wildcard in tr files
folder = Path('').joinpath(*artifact_path.parts[:-1])
if folder.is_dir() and folder.parts:
wildcard = artifact_path.parts[-1]
if list(Path(folder).rglob(wildcard)):
artifact_object = artifact_path
except Exception:
pass
artifact_type_data = tasks.ArtifactTypeData()
artifact_type_data.preview = ''
override_filename_in_uri = None
override_filename_ext_in_uri = None
uri = None
@ -367,7 +378,7 @@ class Artifacts(object):
artifact_type_data.preview = preview
else:
artifact_type_data.preview = '# full json too large to store, storing first {}kb\n{}'.format(
len(preview)//1024, preview[:self.max_preview_size_bytes]
self.max_preview_size_bytes//1024, preview[:self.max_preview_size_bytes]
)
delete_after_upload = True
@ -428,6 +439,8 @@ class Artifacts(object):
raise ValueError("Artifact file '{}' could not be found".format(artifact_object.as_posix()))
override_filename_in_uri = artifact_object.parts[-1]
artifact_type_data.preview = '{} - {}\n'.format(
artifact_object, humanfriendly.format_size(artifact_object.stat().st_size))
artifact_object = artifact_object.as_posix()
artifact_type = 'custom'
artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]
@ -441,11 +454,38 @@ class Artifacts(object):
uri = artifact_object
artifact_type = 'custom'
artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]
elif isinstance(artifact_object, six.string_types):
# if we got here, we should store it as text file.
artifact_type = 'string'
artifact_type_data.content_type = 'text/plain'
if len(artifact_object) < self.max_preview_size_bytes:
artifact_type_data.preview = artifact_object
else:
artifact_type_data.preview = '# full text too large to store, storing first {}kb\n{}'.format(
self.max_preview_size_bytes//1024, artifact_object[:self.max_preview_size_bytes]
)
delete_after_upload = True
override_filename_ext_in_uri = '.txt'
override_filename_in_uri = name + override_filename_ext_in_uri
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
os.close(fd)
# noinspection PyBroadException
try:
with open(local_filename, 'wt') as f:
f.write(artifact_object)
except Exception as ex:
# cleanup and raise exception
os.unlink(local_filename)
raise
elif auto_pickle:
# if we are here it means we do not know what to do with the object, so we serialize it with pickle.
artifact_type = 'pickle'
artifact_type_data.content_type = 'application/pickle'
artifact_type_data.preview = str(artifact_object.__repr__())[:self.max_preview_size_bytes]
# noinspection PyBroadException
try:
artifact_type_data.preview = str(artifact_object.__repr__())[:self.max_preview_size_bytes]
except Exception:
artifact_type_data.preview = ''
delete_after_upload = True
override_filename_ext_in_uri = '.pkl'
override_filename_in_uri = name + override_filename_ext_in_uri
@ -455,7 +495,7 @@ class Artifacts(object):
try:
with open(local_filename, 'wb') as f:
pickle.dump(artifact_object, f)
except Exception:
except Exception as ex:
# cleanup and raise exception
os.unlink(local_filename)
raise

View File

@ -152,7 +152,6 @@ class Task(_Task):
self._connected_parameter_type = None
self._detect_repo_async_thread = None
self._resource_monitor = None
self._artifacts_manager = Artifacts(self)
self._calling_filename = None
# register atexit, so that we mark the task as stopped
self._at_exit_called = False