mirror of
https://github.com/clearml/clearml-agent
synced 2025-01-31 00:56:53 +00:00
Add option to crash agent on exception using agent.crash_on_exception
configuration setting (#123)
This commit is contained in:
parent
e4861fc0fb
commit
9eee213683
@ -327,4 +327,9 @@
|
||||
# into the file specified in CLEARML_CUSTOM_BUILD_OUTPUT, the agent will emit a warning and continue with the
|
||||
# standard flow.
|
||||
custom_build_script: ""
|
||||
|
||||
# Crash on exception: by default when encountering an exception while running a task,
|
||||
# the agent will catch the exception, log it and continue running.
|
||||
# Set this to `true` to propagate exceptions and crash the agent.
|
||||
# crash_on_exception: true
|
||||
}
|
||||
|
@ -1548,10 +1548,14 @@ class Worker(ServiceCommandSection):
|
||||
gpu_indexes=gpu_indexes,
|
||||
gpu_queues=dynamic_gpus,
|
||||
)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
tb = six.text_type(traceback.format_exc())
|
||||
print("FATAL ERROR:")
|
||||
print(tb)
|
||||
|
||||
if self._session.config.get("agent.crash_on_exception", False):
|
||||
raise e
|
||||
|
||||
crash_file, name = safe_mkstemp(prefix=".clearml_agent-crash", suffix=".log")
|
||||
try:
|
||||
with crash_file:
|
||||
|
@ -87,6 +87,7 @@ ENVIRONMENT_CONFIG = {
|
||||
"agent.cpu_only": EnvironmentConfig(
|
||||
names=("CLEARML_CPU_ONLY", "TRAINS_CPU_ONLY", "CPU_ONLY"), type=bool
|
||||
),
|
||||
"agent.crash_on_exception": EnvironmentConfig("CLEAMRL_AGENT_CRASH_ON_EXCEPTION", type=bool),
|
||||
"sdk.aws.s3.key": EnvironmentConfig("AWS_ACCESS_KEY_ID"),
|
||||
"sdk.aws.s3.secret": ENV_AWS_SECRET_KEY,
|
||||
"sdk.aws.s3.region": EnvironmentConfig("AWS_DEFAULT_REGION"),
|
||||
|
Loading…
Reference in New Issue
Block a user