sth like this should work

This commit is contained in:
Niels ten Boom 2022-11-04 11:11:41 +01:00
parent 26e62da1a8
commit b318da7b7f
No known key found for this signature in database
GPG Key ID: BA06100106A9088F
2 changed files with 10 additions and 1 deletions

View File

@ -325,4 +325,9 @@
# into the file specified in CLEARML_CUSTOM_BUILD_OUTPUT, the agent will emit a warning and continue with the
# standard flow.
custom_build_script: ""
# when set to false if the agent encounters an exception,
# it will catch and log the exception and continue running.
# When set to true the exception will be propogated.
crash_on_exception: false
}

View File

@ -1548,10 +1548,14 @@ class Worker(ServiceCommandSection):
gpu_indexes=gpu_indexes,
gpu_queues=dynamic_gpus,
)
except Exception:
except Exception as e:
tb = six.text_type(traceback.format_exc())
print("FATAL ERROR:")
print(tb)
if self._session.config["agent.crash_on_exception"]:
raise e
crash_file, name = safe_mkstemp(prefix=".clearml_agent-crash", suffix=".log")
try:
with crash_file: