diff --git a/clearml_agent/glue/k8s.py b/clearml_agent/glue/k8s.py index a7f3cc6..7e984a2 100644 --- a/clearml_agent/glue/k8s.py +++ b/clearml_agent/glue/k8s.py @@ -455,6 +455,9 @@ class K8sIntegration(Worker): def ports_mode_supported_for_task(self, task_id: str, task_data): return self.ports_mode + def get_default_docker_image(self, session, queue: str) -> str: + return str(ENV_DOCKER_IMAGE.get() or session.config.get("agent.default_docker.image", "nvidia/cuda")) + def run_one_task(self, queue: Text, task_id: Text, worker_args=None, task_session=None, **_): print('Pulling task {} launching on kubernetes cluster'.format(task_id)) session = task_session or self._session @@ -509,9 +512,7 @@ class K8sIntegration(Worker): container = get_task_container(session, task_id) if not container.get('image'): - container['image'] = str( - ENV_DOCKER_IMAGE.get() or session.config.get("agent.default_docker.image", "nvidia/cuda") - ) + container['image'] = self.get_default_docker_image(session, queue) container['arguments'] = session.config.get("agent.default_docker.arguments", None) set_task_container( session, task_id, docker_image=container['image'], docker_arguments=container['arguments'] diff --git a/docker/k8s-glue/build-resources/clearml.conf b/docker/k8s-glue/build-resources/clearml.conf index 18c38cc..5fb1dcc 100644 --- a/docker/k8s-glue/build-resources/clearml.conf +++ b/docker/k8s-glue/build-resources/clearml.conf @@ -13,7 +13,7 @@ api { agent.git_user="" agent.git_pass="" -# extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"] +# extra_index_url: ["https://clearml.jfrog.io/clearml/api/pypi/public/simple"] agent.package_manager.extra_index_url= [ ] @@ -68,7 +68,7 @@ agent { force_upgrade: false, # additional artifact repositories to use when installing python packages - # extra_index_url: ["https://allegroai.jfrog.io/clearmlai/api/pypi/public/simple"] + # extra_index_url: ["https://clearml.jfrog.io/clearmlai/api/pypi/public/simple"] # additional conda channels to use when installing with conda package manager conda_channels: ["pytorch", "conda-forge", "defaults", ] diff --git a/examples/dynamic_cloud_cluster.ipynb b/examples/dynamic_cloud_cluster.ipynb index c9e5b46..0c9414a 100644 --- a/examples/dynamic_cloud_cluster.ipynb +++ b/examples/dynamic_cloud_cluster.ipynb @@ -20,7 +20,7 @@ "This notebook defines a cloud budget (currently only AWS is supported, but feel free to expand with PRs), and spins an instance the minute a job is waiting for execution. It will also spin down idle machines, saving you some $$$ :)\n", "\n", "> **Note:**\n", - "> This is just an example of how you can use ClearML Agent to implement custom autoscaling. For a more structured autoscaler script, see [here](https://github.com/allegroai/clearml/blob/master/clearml/automation/auto_scaler.py).\n", + "> This is just an example of how you can use ClearML Agent to implement custom autoscaling. For a more structured autoscaler script, see [here](https://github.com/clearml/clearml/blob/master/clearml/automation/auto_scaler.py).\n", "\n", "Configuration steps:\n", "- Define maximum budget to be used (instance type / number of instances).\n", diff --git a/examples/k8s_glue_example.py b/examples/k8s_glue_example.py index c88bada..6275aca 100644 --- a/examples/k8s_glue_example.py +++ b/examples/k8s_glue_example.py @@ -1,6 +1,6 @@ """ This example assumes you have preconfigured services with selectors in the form of - "ai.allegro.agent.serial=pod-" and a targetPort of 10022. + "ai.clearml.agent.serial=pod-" and a targetPort of 10022. The K8sIntegration component will label each pod accordingly. """ from argparse import ArgumentParser @@ -22,7 +22,7 @@ def parse_args(): action="store_true", default=False, help="Ports-Mode will add a label to the pod which can be used as service, in order to expose ports" - "Should not be used with max-pods" + "Should not be used with max-pods", ) parser.add_argument( "--num-of-services", @@ -34,15 +34,15 @@ def parse_args(): "--base-port", type=int, help="Used in conjunction with ports-mode, specifies the base port exposed by the services. " - "For pod #X, the port will be +X. Note that pod number is calculated based on base-pod-num" - "e.g. if base-port=20000 and base-pod-num=3, the port for the first pod will be 20003" + "For pod #X, the port will be +X. Note that pod number is calculated based on base-pod-num" + "e.g. if base-port=20000 and base-pod-num=3, the port for the first pod will be 20003", ) parser.add_argument( "--base-pod-num", type=int, default=1, help="Used in conjunction with ports-mode and base-port, specifies the base pod number to be used by the " - "service (default: %(default)s)" + "service (default: %(default)s)", ) parser.add_argument( "--gateway-address", @@ -62,7 +62,7 @@ def parse_args(): "--template-yaml", type=str, help="YAML file containing pod template. If provided pod will be scheduled with kubectl apply " - "and overrides are ignored, otherwise it will be scheduled with kubectl run" + "and overrides are ignored, otherwise it will be scheduled with kubectl run", ) parser.add_argument( "--ssh-server-port", @@ -80,7 +80,7 @@ def parse_args(): "--max-pods", type=int, help="Limit the maximum number of pods that this service can run at the same time." - "Should not be used with ports-mode" + "Should not be used with ports-mode", ) parser.add_argument( "--use-owner-token",