From b318da7b7f56f528f0e6b07ea7ae6a15fdb28622 Mon Sep 17 00:00:00 2001 From: Niels ten Boom Date: Fri, 4 Nov 2022 11:11:41 +0100 Subject: [PATCH] sth like this should work --- clearml_agent/backend_api/config/default/agent.conf | 5 +++++ clearml_agent/commands/worker.py | 6 +++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/clearml_agent/backend_api/config/default/agent.conf b/clearml_agent/backend_api/config/default/agent.conf index 94e0e0d..a40b249 100644 --- a/clearml_agent/backend_api/config/default/agent.conf +++ b/clearml_agent/backend_api/config/default/agent.conf @@ -325,4 +325,9 @@ # into the file specified in CLEARML_CUSTOM_BUILD_OUTPUT, the agent will emit a warning and continue with the # standard flow. custom_build_script: "" + + # when set to false if the agent encounters an exception, + # it will catch and log the exception and continue running. + # When set to true the exception will be propogated. + crash_on_exception: false } diff --git a/clearml_agent/commands/worker.py b/clearml_agent/commands/worker.py index 24b1450..e4e2f63 100644 --- a/clearml_agent/commands/worker.py +++ b/clearml_agent/commands/worker.py @@ -1548,10 +1548,14 @@ class Worker(ServiceCommandSection): gpu_indexes=gpu_indexes, gpu_queues=dynamic_gpus, ) - except Exception: + except Exception as e: tb = six.text_type(traceback.format_exc()) print("FATAL ERROR:") print(tb) + + if self._session.config["agent.crash_on_exception"]: + raise e + crash_file, name = safe_mkstemp(prefix=".clearml_agent-crash", suffix=".log") try: with crash_file: