mirror of
https://github.com/clearml/clearml-agent
synced 2025-04-03 12:20:58 +00:00
Refactor
This commit is contained in:
parent
1926673951
commit
326ba81105
@ -455,6 +455,9 @@ class K8sIntegration(Worker):
|
||||
def ports_mode_supported_for_task(self, task_id: str, task_data):
|
||||
return self.ports_mode
|
||||
|
||||
def get_default_docker_image(self, session, queue: str) -> str:
|
||||
return str(ENV_DOCKER_IMAGE.get() or session.config.get("agent.default_docker.image", "nvidia/cuda"))
|
||||
|
||||
def run_one_task(self, queue: Text, task_id: Text, worker_args=None, task_session=None, **_):
|
||||
print('Pulling task {} launching on kubernetes cluster'.format(task_id))
|
||||
session = task_session or self._session
|
||||
@ -509,9 +512,7 @@ class K8sIntegration(Worker):
|
||||
|
||||
container = get_task_container(session, task_id)
|
||||
if not container.get('image'):
|
||||
container['image'] = str(
|
||||
ENV_DOCKER_IMAGE.get() or session.config.get("agent.default_docker.image", "nvidia/cuda")
|
||||
)
|
||||
container['image'] = self.get_default_docker_image(session, queue)
|
||||
container['arguments'] = session.config.get("agent.default_docker.arguments", None)
|
||||
set_task_container(
|
||||
session, task_id, docker_image=container['image'], docker_arguments=container['arguments']
|
||||
|
@ -13,7 +13,7 @@ api {
|
||||
agent.git_user=""
|
||||
agent.git_pass=""
|
||||
|
||||
# extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"]
|
||||
# extra_index_url: ["https://clearml.jfrog.io/clearml/api/pypi/public/simple"]
|
||||
agent.package_manager.extra_index_url= [
|
||||
|
||||
]
|
||||
@ -68,7 +68,7 @@ agent {
|
||||
force_upgrade: false,
|
||||
|
||||
# additional artifact repositories to use when installing python packages
|
||||
# extra_index_url: ["https://allegroai.jfrog.io/clearmlai/api/pypi/public/simple"]
|
||||
# extra_index_url: ["https://clearml.jfrog.io/clearmlai/api/pypi/public/simple"]
|
||||
|
||||
# additional conda channels to use when installing with conda package manager
|
||||
conda_channels: ["pytorch", "conda-forge", "defaults", ]
|
||||
|
@ -20,7 +20,7 @@
|
||||
"This notebook defines a cloud budget (currently only AWS is supported, but feel free to expand with PRs), and spins an instance the minute a job is waiting for execution. It will also spin down idle machines, saving you some $$$ :)\n",
|
||||
"\n",
|
||||
"> **Note:**\n",
|
||||
"> This is just an example of how you can use ClearML Agent to implement custom autoscaling. For a more structured autoscaler script, see [here](https://github.com/allegroai/clearml/blob/master/clearml/automation/auto_scaler.py).\n",
|
||||
"> This is just an example of how you can use ClearML Agent to implement custom autoscaling. For a more structured autoscaler script, see [here](https://github.com/clearml/clearml/blob/master/clearml/automation/auto_scaler.py).\n",
|
||||
"\n",
|
||||
"Configuration steps:\n",
|
||||
"- Define maximum budget to be used (instance type / number of instances).\n",
|
||||
|
@ -1,6 +1,6 @@
|
||||
"""
|
||||
This example assumes you have preconfigured services with selectors in the form of
|
||||
"ai.allegro.agent.serial=pod-<number>" and a targetPort of 10022.
|
||||
"ai.clearml.agent.serial=pod-<number>" and a targetPort of 10022.
|
||||
The K8sIntegration component will label each pod accordingly.
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
@ -22,7 +22,7 @@ def parse_args():
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Ports-Mode will add a label to the pod which can be used as service, in order to expose ports"
|
||||
"Should not be used with max-pods"
|
||||
"Should not be used with max-pods",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num-of-services",
|
||||
@ -34,15 +34,15 @@ def parse_args():
|
||||
"--base-port",
|
||||
type=int,
|
||||
help="Used in conjunction with ports-mode, specifies the base port exposed by the services. "
|
||||
"For pod #X, the port will be <base-port>+X. Note that pod number is calculated based on base-pod-num"
|
||||
"e.g. if base-port=20000 and base-pod-num=3, the port for the first pod will be 20003"
|
||||
"For pod #X, the port will be <base-port>+X. Note that pod number is calculated based on base-pod-num"
|
||||
"e.g. if base-port=20000 and base-pod-num=3, the port for the first pod will be 20003",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--base-pod-num",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Used in conjunction with ports-mode and base-port, specifies the base pod number to be used by the "
|
||||
"service (default: %(default)s)"
|
||||
"service (default: %(default)s)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gateway-address",
|
||||
@ -62,7 +62,7 @@ def parse_args():
|
||||
"--template-yaml",
|
||||
type=str,
|
||||
help="YAML file containing pod template. If provided pod will be scheduled with kubectl apply "
|
||||
"and overrides are ignored, otherwise it will be scheduled with kubectl run"
|
||||
"and overrides are ignored, otherwise it will be scheduled with kubectl run",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ssh-server-port",
|
||||
@ -80,7 +80,7 @@ def parse_args():
|
||||
"--max-pods",
|
||||
type=int,
|
||||
help="Limit the maximum number of pods that this service can run at the same time."
|
||||
"Should not be used with ports-mode"
|
||||
"Should not be used with ports-mode",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use-owner-token",
|
||||
|
Loading…
Reference in New Issue
Block a user