Add additional k8s-glue dockerfiles (#94)

This commit is contained in:
Andrey Okhotnikov 2022-02-21 14:59:50 +01:00 committed by GitHub
parent 36073ad488
commit 92b5ce61a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 253 additions and 0 deletions

View File

@ -60,6 +60,8 @@ It is a zero configuration fire-and-forget execution agent, providing a full ML/
### Kubernetes Integration (Optional)
We think Kubernetes is awesome, but it should be a choice.
We designed `clearml-agent` so you can run bare-metal or inside a pod with any mix that fits your environment.
Find Dockerfiles in [docker](./docker) dir and a helm Chart in https://github.com/allegroai/clearml-helm-charts
#### Benefits of integrating existing K8s with ClearML-Agent
- ClearML-Agent adds the missing scheduling capabilities to K8s
- Allowing for more flexible automation from code

View File

@ -0,0 +1,75 @@
ARG TAG=3.7.12-alpine3.15
FROM python:${TAG} as build
RUN apk add --no-cache \
gcc \
musl-dev \
libffi-dev
RUN python3 \
-m pip \
install \
--prefix=/install \
--no-cache-dir \
-U \
clearml-agent \
cryptography>=2.9
FROM python:${TAG} as target
WORKDIR /app
ARG KUBECTL_VERSION=1.22.4
# Not sure about these ENV vars
# ENV LC_ALL=en_US.UTF-8
# ENV LANG=en_US.UTF-8
# ENV LANGUAGE=en_US.UTF-8
# ENV PYTHONIOENCODING=UTF-8
COPY --from=build /install /usr/local
ADD https://storage.googleapis.com/kubernetes-release/release/v${KUBECTL_VERSION}/bin/linux/amd64/kubectl /usr/bin/
RUN chmod +x /usr/bin/kubectl
RUN apk add --no-cache \
bash
COPY k8s_glue_example.py .
# AWS CLI
# https://github.com/kyleknap/aws-cli/blob/source-proposal/proposals/source-install.md#alpine-linux
# https://github.com/aws/aws-cli/issues/4685
# https://github.com/aws/aws-cli/pull/6352
# https://github.com/GoogleCloudPlatform/cloud-sdk-docker/blob/master/alpine/Dockerfile
FROM target as gcp
ARG CLOUD_SDK_VERSION=371.0.0
ENV CLOUD_SDK_VERSION=$CLOUD_SDK_VERSION
ENV PATH /google-cloud-sdk/bin:$PATH
WORKDIR /
RUN apk --no-cache add \
curl \
python3 \
py3-crcmod \
py3-openssl \
bash \
libc6-compat \
openssh-client \
git \
gnupg \
&& curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-${CLOUD_SDK_VERSION}-linux-x86_64.tar.gz && \
tar xzf google-cloud-sdk-${CLOUD_SDK_VERSION}-linux-x86_64.tar.gz && \
rm google-cloud-sdk-${CLOUD_SDK_VERSION}-linux-x86_64.tar.gz && \
gcloud config set core/disable_usage_reporting true && \
gcloud config set component_manager/disable_update_check true && \
gcloud config set metrics/environment github_docker_image && \
gcloud --version
WORKDIR /app

View File

@ -0,0 +1,82 @@
ARG TAG=3.7.12-slim-bullseye
FROM python:${TAG} as target
ARG KUBECTL_VERSION=1.22.4
WORKDIR /app
RUN python3 \
-m pip \
install \
--no-cache-dir \
-U \
clearml-agent \
cryptography>=2.9
# Not sure about these ENV vars
# ENV LC_ALL=en_US.UTF-8
# ENV LANG=en_US.UTF-8
# ENV LANGUAGE=en_US.UTF-8
# ENV PYTHONIOENCODING=UTF-8
ADD https://storage.googleapis.com/kubernetes-release/release/v${KUBECTL_VERSION}/bin/linux/amd64/kubectl /usr/bin/
RUN chmod +x /usr/bin/kubectl
COPY k8s_glue_example.py .
CMD ["python3", "k8s_glue_example.py"]
FROM target as aws
# https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html
# https://docs.aws.amazon.com/eks/latest/userguide/install-aws-iam-authenticator.html
RUN apt-get update -qqy && \
apt-get install -qqy \
unzip && \
rm -rf /var/lib/apt/lists/*
ADD https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip awscliv2.zip
ADD https://amazon-eks.s3.us-west-2.amazonaws.com/1.21.2/2021-07-05/bin/linux/amd64/aws-iam-authenticator /usr/local/bin/aws-iam-authenticator
RUN unzip awscliv2.zip && \
./aws/install && \
rm -r awscliv2.zip aws/ && \
chmod +x /usr/local/bin/aws-iam-authenticator && \
aws --version && \
aws-iam-authenticator version
# https://github.com/GoogleCloudPlatform/cloud-sdk-docker/blob/master/debian_slim/Dockerfile
FROM target as gcp
ARG CLOUD_SDK_VERSION=371.0.0
ENV CLOUD_SDK_VERSION=$CLOUD_SDK_VERSION
ENV PATH "$PATH:/opt/google-cloud-sdk/bin/"
ARG INSTALL_COMPONENTS
RUN mkdir -p /usr/share/man/man1/
RUN apt-get update -qqy && \
apt-get install -qqy \
curl \
gcc \
python3-dev \
python3-pip \
apt-transport-https \
lsb-release \
openssh-client \
git \
gnupg && \
rm -rf /var/lib/apt/lists/* && \
pip3 install -U crcmod && \
export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
echo "deb https://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" > /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
apt-get update && apt-get install -y google-cloud-sdk=${CLOUD_SDK_VERSION}-0 $INSTALL_COMPONENTS && \
gcloud config set core/disable_usage_reporting true && \
gcloud config set component_manager/disable_update_check true && \
gcloud config set metrics/environment github_docker_image && \
gcloud --version

View File

@ -0,0 +1,94 @@
"""
This example assumes you have preconfigured services with selectors in the form of
"ai.allegro.agent.serial=pod-<number>" and a targetPort of 10022.
The K8sIntegration component will label each pod accordingly.
"""
from argparse import ArgumentParser
from clearml_agent.glue.k8s import K8sIntegration
def parse_args():
parser = ArgumentParser()
group = parser.add_mutually_exclusive_group()
parser.add_argument(
"--queue", type=str, help="Queue to pull tasks from"
)
group.add_argument(
"--ports-mode", action='store_true', default=False,
help="Ports-Mode will add a label to the pod which can be used as service, in order to expose ports"
"Should not be used with max-pods"
)
parser.add_argument(
"--num-of-services", type=int, default=20,
help="Specify the number of k8s services to be used. Use only with ports-mode."
)
parser.add_argument(
"--base-port", type=int,
help="Used in conjunction with ports-mode, specifies the base port exposed by the services. "
"For pod #X, the port will be <base-port>+X. Note that pod number is calculated based on base-pod-num"
"e.g. if base-port=20000 and base-pod-num=3, the port for the first pod will be 20003"
)
parser.add_argument(
"--base-pod-num", type=int, default=1,
help="Used in conjunction with ports-mode and base-port, specifies the base pod number to be used by the "
"service (default: %(default)s)"
)
parser.add_argument(
"--gateway-address", type=str, default=None,
help="Used in conjunction with ports-mode, specify the external address of the k8s ingress / ELB"
)
parser.add_argument(
"--pod-clearml-conf", type=str,
help="Configuration file to be used by the pod itself (if not provided, current configuration is used)"
)
parser.add_argument(
"--overrides-yaml", type=str,
help="YAML file containing pod overrides to be used when launching a new pod"
)
parser.add_argument(
"--template-yaml", type=str,
help="YAML file containing pod template. If provided pod will be scheduled with kubectl apply "
"and overrides are ignored, otherwise it will be scheduled with kubectl run"
)
parser.add_argument(
"--ssh-server-port", type=int, default=0,
help="If non-zero, every pod will also start an SSH server on the selected port (default: zero, not active)"
)
parser.add_argument(
"--namespace", type=str,
help="Specify the namespace in which pods will be created (default: %(default)s)", default="clearml"
)
group.add_argument(
"--max-pods", type=int,
help="Limit the maximum number of pods that this service can run at the same time."
"Should not be used with ports-mode"
)
return parser.parse_args()
def main():
args = parse_args()
user_props_cb = None
if args.ports_mode and args.base_port:
def k8s_user_props_cb(pod_number=0):
user_prop = {"k8s-pod-port": args.base_port + pod_number}
if args.gateway_address:
user_prop["k8s-gateway-address"] = args.gateway_address
return user_prop
user_props_cb = k8s_user_props_cb
k8s = K8sIntegration(
ports_mode=args.ports_mode, num_of_services=args.num_of_services, base_pod_num=args.base_pod_num,
user_props_cb=user_props_cb, overrides_yaml=args.overrides_yaml, clearml_conf_file=args.pod_clearml_conf,
template_yaml=args.template_yaml, extra_bash_init_script=K8sIntegration.get_ssh_server_bash(
ssh_port_number=args.ssh_server_port) if args.ssh_server_port else None,
namespace=args.namespace, max_pods_limit=args.max_pods or None,
)
k8s.k8s_daemon(args.queue)
if __name__ == "__main__":
main()