Add command line arguments for k8s_glue_example.py (#196)

Co-authored-by: Meshcheryakov Ilya <i.meshcheryakov@mts.ai>
Co-authored-by: Jake Henning <59198928+jkhenning@users.noreply.github.com>
This commit is contained in:
Ilia Meshcheriakov 2025-06-25 18:13:59 +03:00 committed by GitHub
parent 740f90c96f
commit 3e5153a068
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 41 additions and 3 deletions

View File

@ -148,6 +148,10 @@ class K8sIntegration(Worker):
:param str extra_bash_init_script: Additional bash script to run before starting the Task inside the container :param str extra_bash_init_script: Additional bash script to run before starting the Task inside the container
:param str namespace: K8S namespace to be used when creating the new pods (default: clearml) :param str namespace: K8S namespace to be used when creating the new pods (default: clearml)
:param int max_pods_limit: Maximum number of pods that K8S glue can run at the same time :param int max_pods_limit: Maximum number of pods that K8S glue can run at the same time
:param str pod_name_prefix: Define pod name prefix for k8s (default: clearml-id-)
:param str limit_pod_label: Define limit pod label for k8s (default: ai.allegro.agent.serial=pod-{pod_number})
:param bool force_system_packages: true when running tasks in containers (i.e. docker mode or k8s glue).
(default: true)
""" """
super(K8sIntegration, self).__init__() super(K8sIntegration, self).__init__()
self.kind = os.environ.get("CLEARML_K8S_GLUE_KIND", "pod").strip().lower() self.kind = os.environ.get("CLEARML_K8S_GLUE_KIND", "pod").strip().lower()

View File

@ -13,9 +13,19 @@ def parse_args():
group = parser.add_mutually_exclusive_group() group = parser.add_mutually_exclusive_group()
parser.add_argument( parser.add_argument(
"--queue", "--k8s-pending-queue-name", type=str,
type=str, help="Queue name to use when task is pending in the k8s scheduler (default: %(default)s)", default="k8s_scheduler"
help="Queues to pull tasks from. If multiple queues, use comma separated list, e.g. 'queue1,queue2'", )
parser.add_argument(
"--container-bash-script", type=str,
help="Path to the file with container bash script to be executed in k8s", default=None
)
parser.add_argument(
"--debug", action="store_true", default=False,
help="Switch logging on (default: %(default)s)"
)
parser.add_argument(
"--queue", type=str, help="Queues to pull tasks from. If multiple queues, use comma separated list, e.g. 'queue1,queue2'",
) )
group.add_argument( group.add_argument(
"--ports-mode", "--ports-mode",
@ -82,6 +92,18 @@ def parse_args():
help="Limit the maximum number of pods that this service can run at the same time." help="Limit the maximum number of pods that this service can run at the same time."
"Should not be used with ports-mode", "Should not be used with ports-mode",
) )
parser.add_argument(
"--pod-name-prefix", type=str,
help="Define pod name prefix for k8s (default: %(default)s)", default="clearml-id-"
)
parser.add_argument(
"--limit-pod-label", type=str,
help="Define limit pod label for k8s (default: %(default)s)", default="ai.allegro.agent.serial=pod-{pod_number}"
)
parser.add_argument(
"--no-system-packages", action="store_true", default=False,
help="False when running tasks in containers (default: %(default)s)"
)
parser.add_argument( parser.add_argument(
"--use-owner-token", "--use-owner-token",
action="store_true", action="store_true",
@ -113,7 +135,15 @@ def main():
user_props_cb = k8s_user_props_cb user_props_cb = k8s_user_props_cb
if args.container_bash_script:
with open(args.container_bash_script, "r") as file:
container_bash_script = file.read().splitlines()
else:
container_bash_script = None
k8s = K8sIntegration( k8s = K8sIntegration(
k8s_pending_queue_name=args.k8s_pending_queue_name,
container_bash_script=container_bash_script,
ports_mode=args.ports_mode, ports_mode=args.ports_mode,
num_of_services=args.num_of_services, num_of_services=args.num_of_services,
base_pod_num=args.base_pod_num, base_pod_num=args.base_pod_num,
@ -126,6 +156,10 @@ def main():
else None, else None,
namespace=args.namespace, namespace=args.namespace,
max_pods_limit=args.max_pods or None, max_pods_limit=args.max_pods or None,
pod_name_prefix=args.pod_name_prefix,
limit_pod_label=args.limit_pod_label,
force_system_packages=not args.no_system_packages,
debug=args.debug,
) )
queue = [q.strip() for q in args.queue.split(",") if q.strip()] if args.queue else None queue = [q.strip() for q in args.queue.split(",") if q.strip()] if args.queue else None