mirror of
https://github.com/clearml/clearml-agent
synced 2025-03-13 06:58:37 +00:00
Add additional k8s-glue dockerfiles (#94)
This commit is contained in:
parent
36073ad488
commit
92b5ce61a0
@ -60,6 +60,8 @@ It is a zero configuration fire-and-forget execution agent, providing a full ML/
|
|||||||
### Kubernetes Integration (Optional)
|
### Kubernetes Integration (Optional)
|
||||||
We think Kubernetes is awesome, but it should be a choice.
|
We think Kubernetes is awesome, but it should be a choice.
|
||||||
We designed `clearml-agent` so you can run bare-metal or inside a pod with any mix that fits your environment.
|
We designed `clearml-agent` so you can run bare-metal or inside a pod with any mix that fits your environment.
|
||||||
|
|
||||||
|
Find Dockerfiles in [docker](./docker) dir and a helm Chart in https://github.com/allegroai/clearml-helm-charts
|
||||||
#### Benefits of integrating existing K8s with ClearML-Agent
|
#### Benefits of integrating existing K8s with ClearML-Agent
|
||||||
- ClearML-Agent adds the missing scheduling capabilities to K8s
|
- ClearML-Agent adds the missing scheduling capabilities to K8s
|
||||||
- Allowing for more flexible automation from code
|
- Allowing for more flexible automation from code
|
||||||
|
75
docker/k8s-glue/glue-build/Dockerfile.alpine
Normal file
75
docker/k8s-glue/glue-build/Dockerfile.alpine
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
ARG TAG=3.7.12-alpine3.15
|
||||||
|
|
||||||
|
FROM python:${TAG} as build
|
||||||
|
|
||||||
|
RUN apk add --no-cache \
|
||||||
|
gcc \
|
||||||
|
musl-dev \
|
||||||
|
libffi-dev
|
||||||
|
|
||||||
|
RUN python3 \
|
||||||
|
-m pip \
|
||||||
|
install \
|
||||||
|
--prefix=/install \
|
||||||
|
--no-cache-dir \
|
||||||
|
-U \
|
||||||
|
clearml-agent \
|
||||||
|
cryptography>=2.9
|
||||||
|
|
||||||
|
FROM python:${TAG} as target
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ARG KUBECTL_VERSION=1.22.4
|
||||||
|
|
||||||
|
# Not sure about these ENV vars
|
||||||
|
# ENV LC_ALL=en_US.UTF-8
|
||||||
|
# ENV LANG=en_US.UTF-8
|
||||||
|
# ENV LANGUAGE=en_US.UTF-8
|
||||||
|
# ENV PYTHONIOENCODING=UTF-8
|
||||||
|
|
||||||
|
COPY --from=build /install /usr/local
|
||||||
|
|
||||||
|
ADD https://storage.googleapis.com/kubernetes-release/release/v${KUBECTL_VERSION}/bin/linux/amd64/kubectl /usr/bin/
|
||||||
|
|
||||||
|
RUN chmod +x /usr/bin/kubectl
|
||||||
|
|
||||||
|
RUN apk add --no-cache \
|
||||||
|
bash
|
||||||
|
|
||||||
|
COPY k8s_glue_example.py .
|
||||||
|
|
||||||
|
# AWS CLI
|
||||||
|
# https://github.com/kyleknap/aws-cli/blob/source-proposal/proposals/source-install.md#alpine-linux
|
||||||
|
# https://github.com/aws/aws-cli/issues/4685
|
||||||
|
# https://github.com/aws/aws-cli/pull/6352
|
||||||
|
|
||||||
|
# https://github.com/GoogleCloudPlatform/cloud-sdk-docker/blob/master/alpine/Dockerfile
|
||||||
|
|
||||||
|
FROM target as gcp
|
||||||
|
|
||||||
|
ARG CLOUD_SDK_VERSION=371.0.0
|
||||||
|
ENV CLOUD_SDK_VERSION=$CLOUD_SDK_VERSION
|
||||||
|
ENV PATH /google-cloud-sdk/bin:$PATH
|
||||||
|
|
||||||
|
WORKDIR /
|
||||||
|
|
||||||
|
RUN apk --no-cache add \
|
||||||
|
curl \
|
||||||
|
python3 \
|
||||||
|
py3-crcmod \
|
||||||
|
py3-openssl \
|
||||||
|
bash \
|
||||||
|
libc6-compat \
|
||||||
|
openssh-client \
|
||||||
|
git \
|
||||||
|
gnupg \
|
||||||
|
&& curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-${CLOUD_SDK_VERSION}-linux-x86_64.tar.gz && \
|
||||||
|
tar xzf google-cloud-sdk-${CLOUD_SDK_VERSION}-linux-x86_64.tar.gz && \
|
||||||
|
rm google-cloud-sdk-${CLOUD_SDK_VERSION}-linux-x86_64.tar.gz && \
|
||||||
|
gcloud config set core/disable_usage_reporting true && \
|
||||||
|
gcloud config set component_manager/disable_update_check true && \
|
||||||
|
gcloud config set metrics/environment github_docker_image && \
|
||||||
|
gcloud --version
|
||||||
|
|
||||||
|
WORKDIR /app
|
82
docker/k8s-glue/glue-build/Dockerfile.bullseye
Normal file
82
docker/k8s-glue/glue-build/Dockerfile.bullseye
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
ARG TAG=3.7.12-slim-bullseye
|
||||||
|
|
||||||
|
FROM python:${TAG} as target
|
||||||
|
|
||||||
|
ARG KUBECTL_VERSION=1.22.4
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN python3 \
|
||||||
|
-m pip \
|
||||||
|
install \
|
||||||
|
--no-cache-dir \
|
||||||
|
-U \
|
||||||
|
clearml-agent \
|
||||||
|
cryptography>=2.9
|
||||||
|
|
||||||
|
# Not sure about these ENV vars
|
||||||
|
# ENV LC_ALL=en_US.UTF-8
|
||||||
|
# ENV LANG=en_US.UTF-8
|
||||||
|
# ENV LANGUAGE=en_US.UTF-8
|
||||||
|
# ENV PYTHONIOENCODING=UTF-8
|
||||||
|
|
||||||
|
ADD https://storage.googleapis.com/kubernetes-release/release/v${KUBECTL_VERSION}/bin/linux/amd64/kubectl /usr/bin/
|
||||||
|
|
||||||
|
RUN chmod +x /usr/bin/kubectl
|
||||||
|
|
||||||
|
COPY k8s_glue_example.py .
|
||||||
|
|
||||||
|
CMD ["python3", "k8s_glue_example.py"]
|
||||||
|
|
||||||
|
FROM target as aws
|
||||||
|
|
||||||
|
# https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html
|
||||||
|
# https://docs.aws.amazon.com/eks/latest/userguide/install-aws-iam-authenticator.html
|
||||||
|
|
||||||
|
RUN apt-get update -qqy && \
|
||||||
|
apt-get install -qqy \
|
||||||
|
unzip && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
ADD https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip awscliv2.zip
|
||||||
|
ADD https://amazon-eks.s3.us-west-2.amazonaws.com/1.21.2/2021-07-05/bin/linux/amd64/aws-iam-authenticator /usr/local/bin/aws-iam-authenticator
|
||||||
|
|
||||||
|
RUN unzip awscliv2.zip && \
|
||||||
|
./aws/install && \
|
||||||
|
rm -r awscliv2.zip aws/ && \
|
||||||
|
chmod +x /usr/local/bin/aws-iam-authenticator && \
|
||||||
|
aws --version && \
|
||||||
|
aws-iam-authenticator version
|
||||||
|
|
||||||
|
# https://github.com/GoogleCloudPlatform/cloud-sdk-docker/blob/master/debian_slim/Dockerfile
|
||||||
|
|
||||||
|
FROM target as gcp
|
||||||
|
|
||||||
|
ARG CLOUD_SDK_VERSION=371.0.0
|
||||||
|
ENV CLOUD_SDK_VERSION=$CLOUD_SDK_VERSION
|
||||||
|
|
||||||
|
ENV PATH "$PATH:/opt/google-cloud-sdk/bin/"
|
||||||
|
|
||||||
|
ARG INSTALL_COMPONENTS
|
||||||
|
RUN mkdir -p /usr/share/man/man1/
|
||||||
|
RUN apt-get update -qqy && \
|
||||||
|
apt-get install -qqy \
|
||||||
|
curl \
|
||||||
|
gcc \
|
||||||
|
python3-dev \
|
||||||
|
python3-pip \
|
||||||
|
apt-transport-https \
|
||||||
|
lsb-release \
|
||||||
|
openssh-client \
|
||||||
|
git \
|
||||||
|
gnupg && \
|
||||||
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
|
pip3 install -U crcmod && \
|
||||||
|
export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
|
||||||
|
echo "deb https://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" > /etc/apt/sources.list.d/google-cloud-sdk.list && \
|
||||||
|
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
|
||||||
|
apt-get update && apt-get install -y google-cloud-sdk=${CLOUD_SDK_VERSION}-0 $INSTALL_COMPONENTS && \
|
||||||
|
gcloud config set core/disable_usage_reporting true && \
|
||||||
|
gcloud config set component_manager/disable_update_check true && \
|
||||||
|
gcloud config set metrics/environment github_docker_image && \
|
||||||
|
gcloud --version
|
94
docker/k8s-glue/glue-build/k8s_glue_example.py
Normal file
94
docker/k8s-glue/glue-build/k8s_glue_example.py
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
"""
|
||||||
|
This example assumes you have preconfigured services with selectors in the form of
|
||||||
|
"ai.allegro.agent.serial=pod-<number>" and a targetPort of 10022.
|
||||||
|
The K8sIntegration component will label each pod accordingly.
|
||||||
|
"""
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
|
from clearml_agent.glue.k8s import K8sIntegration
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = ArgumentParser()
|
||||||
|
group = parser.add_mutually_exclusive_group()
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--queue", type=str, help="Queue to pull tasks from"
|
||||||
|
)
|
||||||
|
group.add_argument(
|
||||||
|
"--ports-mode", action='store_true', default=False,
|
||||||
|
help="Ports-Mode will add a label to the pod which can be used as service, in order to expose ports"
|
||||||
|
"Should not be used with max-pods"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--num-of-services", type=int, default=20,
|
||||||
|
help="Specify the number of k8s services to be used. Use only with ports-mode."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--base-port", type=int,
|
||||||
|
help="Used in conjunction with ports-mode, specifies the base port exposed by the services. "
|
||||||
|
"For pod #X, the port will be <base-port>+X. Note that pod number is calculated based on base-pod-num"
|
||||||
|
"e.g. if base-port=20000 and base-pod-num=3, the port for the first pod will be 20003"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--base-pod-num", type=int, default=1,
|
||||||
|
help="Used in conjunction with ports-mode and base-port, specifies the base pod number to be used by the "
|
||||||
|
"service (default: %(default)s)"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--gateway-address", type=str, default=None,
|
||||||
|
help="Used in conjunction with ports-mode, specify the external address of the k8s ingress / ELB"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--pod-clearml-conf", type=str,
|
||||||
|
help="Configuration file to be used by the pod itself (if not provided, current configuration is used)"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--overrides-yaml", type=str,
|
||||||
|
help="YAML file containing pod overrides to be used when launching a new pod"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--template-yaml", type=str,
|
||||||
|
help="YAML file containing pod template. If provided pod will be scheduled with kubectl apply "
|
||||||
|
"and overrides are ignored, otherwise it will be scheduled with kubectl run"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--ssh-server-port", type=int, default=0,
|
||||||
|
help="If non-zero, every pod will also start an SSH server on the selected port (default: zero, not active)"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--namespace", type=str,
|
||||||
|
help="Specify the namespace in which pods will be created (default: %(default)s)", default="clearml"
|
||||||
|
)
|
||||||
|
group.add_argument(
|
||||||
|
"--max-pods", type=int,
|
||||||
|
help="Limit the maximum number of pods that this service can run at the same time."
|
||||||
|
"Should not be used with ports-mode"
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
user_props_cb = None
|
||||||
|
if args.ports_mode and args.base_port:
|
||||||
|
def k8s_user_props_cb(pod_number=0):
|
||||||
|
user_prop = {"k8s-pod-port": args.base_port + pod_number}
|
||||||
|
if args.gateway_address:
|
||||||
|
user_prop["k8s-gateway-address"] = args.gateway_address
|
||||||
|
return user_prop
|
||||||
|
user_props_cb = k8s_user_props_cb
|
||||||
|
|
||||||
|
k8s = K8sIntegration(
|
||||||
|
ports_mode=args.ports_mode, num_of_services=args.num_of_services, base_pod_num=args.base_pod_num,
|
||||||
|
user_props_cb=user_props_cb, overrides_yaml=args.overrides_yaml, clearml_conf_file=args.pod_clearml_conf,
|
||||||
|
template_yaml=args.template_yaml, extra_bash_init_script=K8sIntegration.get_ssh_server_bash(
|
||||||
|
ssh_port_number=args.ssh_server_port) if args.ssh_server_port else None,
|
||||||
|
namespace=args.namespace, max_pods_limit=args.max_pods or None,
|
||||||
|
)
|
||||||
|
k8s.k8s_daemon(args.queue)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user