From e1e3c84a8d07b9924402dfe73af991e71088b554 Mon Sep 17 00:00:00 2001 From: Alex Burlacu Date: Thu, 24 Aug 2023 19:01:26 +0300 Subject: [PATCH] Update docker versions --- docker/k8s-glue/build-resources/clearml.conf | 2 +- .../build-resources/k8s_glue_example.py | 22 +++++++++++++++++-- docker/k8s-glue/glue-build/Dockerfile.alpine | 4 ++-- .../k8s-glue/glue-build/Dockerfile.bullseye | 2 +- .../k8s-glue/glue-build/k8s_glue_example.py | 6 ++++- examples/dynamic_cloud_cluster.ipynb | 4 ++-- 6 files changed, 31 insertions(+), 9 deletions(-) diff --git a/docker/k8s-glue/build-resources/clearml.conf b/docker/k8s-glue/build-resources/clearml.conf index 1500af1..13b9663 100644 --- a/docker/k8s-glue/build-resources/clearml.conf +++ b/docker/k8s-glue/build-resources/clearml.conf @@ -171,7 +171,7 @@ agent { default_docker: { # default docker image to use when running in docker mode - image: "nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04" + image: "nvidia/cuda:11.0.3-cudnn8-runtime-ubuntu20.04" # optional arguments to pass to docker image # arguments: ["--ipc=host", ] diff --git a/docker/k8s-glue/build-resources/k8s_glue_example.py b/docker/k8s-glue/build-resources/k8s_glue_example.py index dc69c37..80c82af 100644 --- a/docker/k8s-glue/build-resources/k8s_glue_example.py +++ b/docker/k8s-glue/build-resources/k8s_glue_example.py @@ -65,6 +65,19 @@ def parse_args(): help="Limit the maximum number of pods that this service can run at the same time." "Should not be used with ports-mode" ) + parser.add_argument( + "--use-owner-token", action="store_true", default=False, + help="Generate and use task owner token for the execution of each task" + ) + parser.add_argument( + "--standalone-mode", action="store_true", default=False, + help="Do not use any network connects, assume everything is pre-installed" + ) + parser.add_argument( + "--child-report-tags", type=str, nargs="+", default=None, + help="List of tags to send with the status reports from a worker that runs a task" + ) + return parser.parse_args() @@ -85,9 +98,14 @@ def main(): user_props_cb=user_props_cb, overrides_yaml=args.overrides_yaml, clearml_conf_file=args.pod_clearml_conf, template_yaml=args.template_yaml, extra_bash_init_script=K8sIntegration.get_ssh_server_bash( ssh_port_number=args.ssh_server_port) if args.ssh_server_port else None, - namespace=args.namespace, max_pods_limit=args.max_pods or None, + namespace=args.namespace, max_pods_limit=args.max_pods or None + ) + k8s.k8s_daemon( + args.queue, + use_owner_token=args.use_owner_token, + standalone_mode=args.standalone_mode, + child_report_tags=args.child_report_tags ) - k8s.k8s_daemon(args.queue) if __name__ == "__main__": diff --git a/docker/k8s-glue/glue-build/Dockerfile.alpine b/docker/k8s-glue/glue-build/Dockerfile.alpine index 186a976..a092608 100644 --- a/docker/k8s-glue/glue-build/Dockerfile.alpine +++ b/docker/k8s-glue/glue-build/Dockerfile.alpine @@ -1,4 +1,4 @@ -ARG TAG=3.7.12-alpine3.15 +ARG TAG=3.7.17-alpine3.18 FROM python:${TAG} as build @@ -20,7 +20,7 @@ FROM python:${TAG} as target WORKDIR /app -ARG KUBECTL_VERSION=1.22.4 +ARG KUBECTL_VERSION=1.24.0 # Not sure about these ENV vars # ENV LC_ALL=en_US.UTF-8 diff --git a/docker/k8s-glue/glue-build/Dockerfile.bullseye b/docker/k8s-glue/glue-build/Dockerfile.bullseye index 9725838..4443419 100644 --- a/docker/k8s-glue/glue-build/Dockerfile.bullseye +++ b/docker/k8s-glue/glue-build/Dockerfile.bullseye @@ -1,4 +1,4 @@ -ARG TAG=3.7.12-slim-bullseye +ARG TAG=3.7.17-slim-bullseye FROM python:${TAG} as target diff --git a/docker/k8s-glue/glue-build/k8s_glue_example.py b/docker/k8s-glue/glue-build/k8s_glue_example.py index dc69c37..7085238 100644 --- a/docker/k8s-glue/glue-build/k8s_glue_example.py +++ b/docker/k8s-glue/glue-build/k8s_glue_example.py @@ -65,6 +65,10 @@ def parse_args(): help="Limit the maximum number of pods that this service can run at the same time." "Should not be used with ports-mode" ) + parser.add_argument( + "--use-owner-token", action="store_true", default=False, + help="Generate and use task owner token for the execution of each task" + ) return parser.parse_args() @@ -87,7 +91,7 @@ def main(): ssh_port_number=args.ssh_server_port) if args.ssh_server_port else None, namespace=args.namespace, max_pods_limit=args.max_pods or None, ) - k8s.k8s_daemon(args.queue) + k8s.k8s_daemon(args.queue, use_owner_token=args.use_owner_token) if __name__ == "__main__": diff --git a/examples/dynamic_cloud_cluster.ipynb b/examples/dynamic_cloud_cluster.ipynb index 6928c71..b245aea 100644 --- a/examples/dynamic_cloud_cluster.ipynb +++ b/examples/dynamic_cloud_cluster.ipynb @@ -156,7 +156,7 @@ "TRAINS_GIT_PASS = \"\"\n", "\n", "# Additional fields for trains.conf file created on the remote instance\n", - "# for example: 'agent.default_docker.image: \"nvidia/cuda:10.0-cudnn7-runtime\"'\n", + "# for example: 'agent.default_docker.image: \"nvidia/cuda:11.0.3-cudnn8-runtime-ubuntu20.04\"'\n", "EXTRA_TRAINS_CONF = \"\"\"\n", "\"\"\"\n", "\n", @@ -584,4 +584,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +}