From 7292263f8682819097105e96b31404221673b608 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Tue, 23 Aug 2022 23:16:36 +0300 Subject: [PATCH] Add CLEARML_K8S_GLUE_START_AGENT_SCRIPT_PATH to allow customizing the agent startup script location for k8s glue agent --- clearml_agent/glue/definitions.py | 7 +++++++ clearml_agent/glue/k8s.py | 13 +++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 clearml_agent/glue/definitions.py diff --git a/clearml_agent/glue/definitions.py b/clearml_agent/glue/definitions.py new file mode 100644 index 0000000..2c420b0 --- /dev/null +++ b/clearml_agent/glue/definitions.py @@ -0,0 +1,7 @@ +from clearml_agent.definitions import EnvironmentConfig + +ENV_START_AGENT_SCRIPT_PATH = EnvironmentConfig('CLEARML_K8S_GLUE_START_AGENT_SCRIPT_PATH') +""" +Script path to use when creating the bash script to run the agent inside the scheduled pod's docker container. +Script will be appended to the specified file. +""" diff --git a/clearml_agent/glue/k8s.py b/clearml_agent/glue/k8s.py index 3bdf486..a531345 100644 --- a/clearml_agent/glue/k8s.py +++ b/clearml_agent/glue/k8s.py @@ -27,6 +27,8 @@ from clearml_agent.helper.process import get_bash_output from clearml_agent.helper.resource_monitor import ResourceMonitor from clearml_agent.interface.base import ObjectID +from .definitions import ENV_START_AGENT_SCRIPT_PATH + class K8sIntegration(Worker): K8S_PENDING_QUEUE = "k8s_scheduler" @@ -644,12 +646,15 @@ class K8sIntegration(Worker): extra_bash_commands = list(create_clearml_conf or []) + start_agent_script_path = ENV_START_AGENT_SCRIPT_PATH.get() or "~/__start_agent__.sh" + extra_bash_commands.append( - "echo '{}' | base64 --decode >> ~/__start_agent__.sh ; " - "/bin/bash ~/__start_agent__.sh".format( - base64.b64encode( + "echo '{content}' | base64 --decode >> {script_path} ; /bin/bash {script_path}".format( + content=base64.b64encode( script_encoded.encode('ascii') - ).decode('ascii')) + ).decode('ascii'), + script_path=start_agent_script_path + ) ) # Notice: we always leave with exit code 0, so pods are never restarted