From 94dc32a9638d37ec77c65824eb491654eee4eb26 Mon Sep 17 00:00:00 2001 From: pollfly <75068813+pollfly@users.noreply.github.com> Date: Wed, 18 Aug 2021 10:29:24 +0300 Subject: [PATCH] add enabling/disabling docs for non-responsive watchdog (#39) --- docs/deploying_clearml/clearml_server_config.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/deploying_clearml/clearml_server_config.md b/docs/deploying_clearml/clearml_server_config.md index 5eb4b600..3d15d9ed 100644 --- a/docs/deploying_clearml/clearml_server_config.md +++ b/docs/deploying_clearml/clearml_server_config.md @@ -300,6 +300,7 @@ the watchdog marks them as `aborted`. The non-responsive experiment watchdog is Modify the following settings for the watchdog: +* Watchdog status - enabled / disabled * The time threshold (in seconds) of experiment inactivity (default value is 7200 seconds (2 hours)). * The time interval (in seconds) between watchdog cycles. @@ -312,6 +313,8 @@ Modify the following settings for the watchdog: tasks { non_responsive_tasks_watchdog { + enabled: true + # In-progress tasks that haven't been updated for at least 'value' seconds will be stopped by the watchdog threshold_sec: 7200