From 537b67e0cd6cc790ff5b3410b5100356f7a0f6c8 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Mon, 12 Apr 2021 23:00:59 +0300 Subject: [PATCH] Fix agent can return non-zero error code and pods will end up restarting forever (issue #56) --- clearml_agent/glue/k8s.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clearml_agent/glue/k8s.py b/clearml_agent/glue/k8s.py index 6ec5ff3..d00abd2 100644 --- a/clearml_agent/glue/k8s.py +++ b/clearml_agent/glue/k8s.py @@ -434,11 +434,12 @@ class K8sIntegration(Worker): script_encoded.encode('ascii') ).decode('ascii')) + # Notice: we always leave with exit code 0, so pods are never restarted container = self._merge_containers( container, dict(name=name, image=docker_image, command=['/bin/bash'], - args=['-c', '{} ; {}'.format(create_clearml_conf, create_init_script)]) + args=['-c', '{} ; {} ; exit 0'.format(create_clearml_conf, create_init_script)]) ) if template['spec']['containers']: