This commit is contained in:
clearml 2025-04-02 14:38:58 +03:00
parent 1926673951
commit 326ba81105
4 changed files with 14 additions and 13 deletions

View File

@ -455,6 +455,9 @@ class K8sIntegration(Worker):
def ports_mode_supported_for_task(self, task_id: str, task_data):
return self.ports_mode
def get_default_docker_image(self, session, queue: str) -> str:
return str(ENV_DOCKER_IMAGE.get() or session.config.get("agent.default_docker.image", "nvidia/cuda"))
def run_one_task(self, queue: Text, task_id: Text, worker_args=None, task_session=None, **_):
print('Pulling task {} launching on kubernetes cluster'.format(task_id))
session = task_session or self._session
@ -509,9 +512,7 @@ class K8sIntegration(Worker):
container = get_task_container(session, task_id)
if not container.get('image'):
container['image'] = str(
ENV_DOCKER_IMAGE.get() or session.config.get("agent.default_docker.image", "nvidia/cuda")
)
container['image'] = self.get_default_docker_image(session, queue)
container['arguments'] = session.config.get("agent.default_docker.arguments", None)
set_task_container(
session, task_id, docker_image=container['image'], docker_arguments=container['arguments']

View File

@ -13,7 +13,7 @@ api {
agent.git_user=""
agent.git_pass=""
# extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"]
# extra_index_url: ["https://clearml.jfrog.io/clearml/api/pypi/public/simple"]
agent.package_manager.extra_index_url= [
]
@ -68,7 +68,7 @@ agent {
force_upgrade: false,
# additional artifact repositories to use when installing python packages
# extra_index_url: ["https://allegroai.jfrog.io/clearmlai/api/pypi/public/simple"]
# extra_index_url: ["https://clearml.jfrog.io/clearmlai/api/pypi/public/simple"]
# additional conda channels to use when installing with conda package manager
conda_channels: ["pytorch", "conda-forge", "defaults", ]

View File

@ -20,7 +20,7 @@
"This notebook defines a cloud budget (currently only AWS is supported, but feel free to expand with PRs), and spins an instance the minute a job is waiting for execution. It will also spin down idle machines, saving you some $$$ :)\n",
"\n",
"> **Note:**\n",
"> This is just an example of how you can use ClearML Agent to implement custom autoscaling. For a more structured autoscaler script, see [here](https://github.com/allegroai/clearml/blob/master/clearml/automation/auto_scaler.py).\n",
"> This is just an example of how you can use ClearML Agent to implement custom autoscaling. For a more structured autoscaler script, see [here](https://github.com/clearml/clearml/blob/master/clearml/automation/auto_scaler.py).\n",
"\n",
"Configuration steps:\n",
"- Define maximum budget to be used (instance type / number of instances).\n",

View File

@ -1,6 +1,6 @@
"""
This example assumes you have preconfigured services with selectors in the form of
"ai.allegro.agent.serial=pod-<number>" and a targetPort of 10022.
"ai.clearml.agent.serial=pod-<number>" and a targetPort of 10022.
The K8sIntegration component will label each pod accordingly.
"""
from argparse import ArgumentParser
@ -22,7 +22,7 @@ def parse_args():
action="store_true",
default=False,
help="Ports-Mode will add a label to the pod which can be used as service, in order to expose ports"
"Should not be used with max-pods"
"Should not be used with max-pods",
)
parser.add_argument(
"--num-of-services",
@ -34,15 +34,15 @@ def parse_args():
"--base-port",
type=int,
help="Used in conjunction with ports-mode, specifies the base port exposed by the services. "
"For pod #X, the port will be <base-port>+X. Note that pod number is calculated based on base-pod-num"
"e.g. if base-port=20000 and base-pod-num=3, the port for the first pod will be 20003"
"For pod #X, the port will be <base-port>+X. Note that pod number is calculated based on base-pod-num"
"e.g. if base-port=20000 and base-pod-num=3, the port for the first pod will be 20003",
)
parser.add_argument(
"--base-pod-num",
type=int,
default=1,
help="Used in conjunction with ports-mode and base-port, specifies the base pod number to be used by the "
"service (default: %(default)s)"
"service (default: %(default)s)",
)
parser.add_argument(
"--gateway-address",
@ -62,7 +62,7 @@ def parse_args():
"--template-yaml",
type=str,
help="YAML file containing pod template. If provided pod will be scheduled with kubectl apply "
"and overrides are ignored, otherwise it will be scheduled with kubectl run"
"and overrides are ignored, otherwise it will be scheduled with kubectl run",
)
parser.add_argument(
"--ssh-server-port",
@ -80,7 +80,7 @@ def parse_args():
"--max-pods",
type=int,
help="Limit the maximum number of pods that this service can run at the same time."
"Should not be used with ports-mode"
"Should not be used with ports-mode",
)
parser.add_argument(
"--use-owner-token",