mirror of
https://github.com/clearml/clearml-agent
synced 2025-04-06 05:25:07 +00:00
Refactor
This commit is contained in:
parent
1926673951
commit
326ba81105
@ -455,6 +455,9 @@ class K8sIntegration(Worker):
|
|||||||
def ports_mode_supported_for_task(self, task_id: str, task_data):
|
def ports_mode_supported_for_task(self, task_id: str, task_data):
|
||||||
return self.ports_mode
|
return self.ports_mode
|
||||||
|
|
||||||
|
def get_default_docker_image(self, session, queue: str) -> str:
|
||||||
|
return str(ENV_DOCKER_IMAGE.get() or session.config.get("agent.default_docker.image", "nvidia/cuda"))
|
||||||
|
|
||||||
def run_one_task(self, queue: Text, task_id: Text, worker_args=None, task_session=None, **_):
|
def run_one_task(self, queue: Text, task_id: Text, worker_args=None, task_session=None, **_):
|
||||||
print('Pulling task {} launching on kubernetes cluster'.format(task_id))
|
print('Pulling task {} launching on kubernetes cluster'.format(task_id))
|
||||||
session = task_session or self._session
|
session = task_session or self._session
|
||||||
@ -509,9 +512,7 @@ class K8sIntegration(Worker):
|
|||||||
|
|
||||||
container = get_task_container(session, task_id)
|
container = get_task_container(session, task_id)
|
||||||
if not container.get('image'):
|
if not container.get('image'):
|
||||||
container['image'] = str(
|
container['image'] = self.get_default_docker_image(session, queue)
|
||||||
ENV_DOCKER_IMAGE.get() or session.config.get("agent.default_docker.image", "nvidia/cuda")
|
|
||||||
)
|
|
||||||
container['arguments'] = session.config.get("agent.default_docker.arguments", None)
|
container['arguments'] = session.config.get("agent.default_docker.arguments", None)
|
||||||
set_task_container(
|
set_task_container(
|
||||||
session, task_id, docker_image=container['image'], docker_arguments=container['arguments']
|
session, task_id, docker_image=container['image'], docker_arguments=container['arguments']
|
||||||
|
@ -13,7 +13,7 @@ api {
|
|||||||
agent.git_user=""
|
agent.git_user=""
|
||||||
agent.git_pass=""
|
agent.git_pass=""
|
||||||
|
|
||||||
# extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"]
|
# extra_index_url: ["https://clearml.jfrog.io/clearml/api/pypi/public/simple"]
|
||||||
agent.package_manager.extra_index_url= [
|
agent.package_manager.extra_index_url= [
|
||||||
|
|
||||||
]
|
]
|
||||||
@ -68,7 +68,7 @@ agent {
|
|||||||
force_upgrade: false,
|
force_upgrade: false,
|
||||||
|
|
||||||
# additional artifact repositories to use when installing python packages
|
# additional artifact repositories to use when installing python packages
|
||||||
# extra_index_url: ["https://allegroai.jfrog.io/clearmlai/api/pypi/public/simple"]
|
# extra_index_url: ["https://clearml.jfrog.io/clearmlai/api/pypi/public/simple"]
|
||||||
|
|
||||||
# additional conda channels to use when installing with conda package manager
|
# additional conda channels to use when installing with conda package manager
|
||||||
conda_channels: ["pytorch", "conda-forge", "defaults", ]
|
conda_channels: ["pytorch", "conda-forge", "defaults", ]
|
||||||
|
@ -20,7 +20,7 @@
|
|||||||
"This notebook defines a cloud budget (currently only AWS is supported, but feel free to expand with PRs), and spins an instance the minute a job is waiting for execution. It will also spin down idle machines, saving you some $$$ :)\n",
|
"This notebook defines a cloud budget (currently only AWS is supported, but feel free to expand with PRs), and spins an instance the minute a job is waiting for execution. It will also spin down idle machines, saving you some $$$ :)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"> **Note:**\n",
|
"> **Note:**\n",
|
||||||
"> This is just an example of how you can use ClearML Agent to implement custom autoscaling. For a more structured autoscaler script, see [here](https://github.com/allegroai/clearml/blob/master/clearml/automation/auto_scaler.py).\n",
|
"> This is just an example of how you can use ClearML Agent to implement custom autoscaling. For a more structured autoscaler script, see [here](https://github.com/clearml/clearml/blob/master/clearml/automation/auto_scaler.py).\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Configuration steps:\n",
|
"Configuration steps:\n",
|
||||||
"- Define maximum budget to be used (instance type / number of instances).\n",
|
"- Define maximum budget to be used (instance type / number of instances).\n",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
"""
|
"""
|
||||||
This example assumes you have preconfigured services with selectors in the form of
|
This example assumes you have preconfigured services with selectors in the form of
|
||||||
"ai.allegro.agent.serial=pod-<number>" and a targetPort of 10022.
|
"ai.clearml.agent.serial=pod-<number>" and a targetPort of 10022.
|
||||||
The K8sIntegration component will label each pod accordingly.
|
The K8sIntegration component will label each pod accordingly.
|
||||||
"""
|
"""
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
@ -22,7 +22,7 @@ def parse_args():
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
default=False,
|
default=False,
|
||||||
help="Ports-Mode will add a label to the pod which can be used as service, in order to expose ports"
|
help="Ports-Mode will add a label to the pod which can be used as service, in order to expose ports"
|
||||||
"Should not be used with max-pods"
|
"Should not be used with max-pods",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--num-of-services",
|
"--num-of-services",
|
||||||
@ -34,15 +34,15 @@ def parse_args():
|
|||||||
"--base-port",
|
"--base-port",
|
||||||
type=int,
|
type=int,
|
||||||
help="Used in conjunction with ports-mode, specifies the base port exposed by the services. "
|
help="Used in conjunction with ports-mode, specifies the base port exposed by the services. "
|
||||||
"For pod #X, the port will be <base-port>+X. Note that pod number is calculated based on base-pod-num"
|
"For pod #X, the port will be <base-port>+X. Note that pod number is calculated based on base-pod-num"
|
||||||
"e.g. if base-port=20000 and base-pod-num=3, the port for the first pod will be 20003"
|
"e.g. if base-port=20000 and base-pod-num=3, the port for the first pod will be 20003",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--base-pod-num",
|
"--base-pod-num",
|
||||||
type=int,
|
type=int,
|
||||||
default=1,
|
default=1,
|
||||||
help="Used in conjunction with ports-mode and base-port, specifies the base pod number to be used by the "
|
help="Used in conjunction with ports-mode and base-port, specifies the base pod number to be used by the "
|
||||||
"service (default: %(default)s)"
|
"service (default: %(default)s)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--gateway-address",
|
"--gateway-address",
|
||||||
@ -62,7 +62,7 @@ def parse_args():
|
|||||||
"--template-yaml",
|
"--template-yaml",
|
||||||
type=str,
|
type=str,
|
||||||
help="YAML file containing pod template. If provided pod will be scheduled with kubectl apply "
|
help="YAML file containing pod template. If provided pod will be scheduled with kubectl apply "
|
||||||
"and overrides are ignored, otherwise it will be scheduled with kubectl run"
|
"and overrides are ignored, otherwise it will be scheduled with kubectl run",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--ssh-server-port",
|
"--ssh-server-port",
|
||||||
@ -80,7 +80,7 @@ def parse_args():
|
|||||||
"--max-pods",
|
"--max-pods",
|
||||||
type=int,
|
type=int,
|
||||||
help="Limit the maximum number of pods that this service can run at the same time."
|
help="Limit the maximum number of pods that this service can run at the same time."
|
||||||
"Should not be used with ports-mode"
|
"Should not be used with ports-mode",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--use-owner-token",
|
"--use-owner-token",
|
||||||
|
Loading…
Reference in New Issue
Block a user