From 2741bbae4b5d015f5629ba911e9844c5852abcb8 Mon Sep 17 00:00:00 2001 From: revital Date: Wed, 12 Feb 2025 08:07:20 +0200 Subject: [PATCH] Add ClearML Application Gateway overview and installation instructions --- .../appgw_install_compose.md | 0 docs/deploying_clearml/appgw_install_k8s.md | 0 .../enterprise_deploy/appgw.md | 40 ++++++ .../appgw_install_compose.md | 133 ++++++++++++++++++ .../enterprise_deploy/appgw_install_k8s.md | 91 ++++++++++++ sidebars.js | 5 +- 6 files changed, 267 insertions(+), 2 deletions(-) delete mode 100644 docs/deploying_clearml/appgw_install_compose.md delete mode 100644 docs/deploying_clearml/appgw_install_k8s.md create mode 100644 docs/deploying_clearml/enterprise_deploy/appgw.md create mode 100644 docs/deploying_clearml/enterprise_deploy/appgw_install_compose.md create mode 100644 docs/deploying_clearml/enterprise_deploy/appgw_install_k8s.md diff --git a/docs/deploying_clearml/appgw_install_compose.md b/docs/deploying_clearml/appgw_install_compose.md deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/deploying_clearml/appgw_install_k8s.md b/docs/deploying_clearml/appgw_install_k8s.md deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/deploying_clearml/enterprise_deploy/appgw.md b/docs/deploying_clearml/enterprise_deploy/appgw.md new file mode 100644 index 00000000..0e3cfdff --- /dev/null +++ b/docs/deploying_clearml/enterprise_deploy/appgw.md @@ -0,0 +1,40 @@ +--- +title: AI Application Gateway +--- + +Services running through a cluster orchestrator such as Kubernetes or cloud hyperscaler require meticulous configuration +to make them available as these environments do not expose their networks to external users. + +The ClearML AI Application Gateway facilitates setting up secure, authenticated access to jobs running on your compute +nodes from external networks. + +Using the AI Application Gateway, services are allocated externally accessible, SSL secure network routes which provide +access in adherence to ClearML RBAC privileges. The AI Application Gateway supports HTTP/S as well as raw TCP routing. + +The following ClearML UI applications make use of the AI Application Gateway to provide authenticated HTTPS access to +their instances: + +* GPUaaS + * [JupyterLab](../../webapp/applications/apps_jupyter_lab.md) + * [VScode](../../webapp/applications/apps_vscode.md) + * [SSH Session](../../webapp/applications/apps_ssh_session.md) +* UI Dev + * [Gradio launcher](../../webapp/applications/apps_gradio.md) + * [Streamlit launcher](../../webapp/applications/apps_streamlit.md) +* Deploy + * [vLLM Deployment](../../webapp/applications/apps_model_deployment.md) + * [Embedding Model Deployment](../../webapp/applications/apps_embed_model_deployment.md) + * [Llama.cpp Model Deployment](../../webapp/applications/apps_llama_deployment.md) + +The AI Application Gateway is provided through an additional component to the ClearML Server deployment: The ClearML Task Traffic Router. +If your ClearML Deployment does not have the Task Traffic Router properly installed, these application instances may not be accessible. + +## Installation + +The Task Traffic Router supports two deployment options: + +* [Docker Compose](appgw_install_compose.md) +* [Kubernetes](appgw_install_k8s.md) + +The deployment configuration specifies the external and internal address and port mappings for routing requests. + diff --git a/docs/deploying_clearml/enterprise_deploy/appgw_install_compose.md b/docs/deploying_clearml/enterprise_deploy/appgw_install_compose.md new file mode 100644 index 00000000..c29a0570 --- /dev/null +++ b/docs/deploying_clearml/enterprise_deploy/appgw_install_compose.md @@ -0,0 +1,133 @@ +--- +title: Docker Compose Installation +--- + +Use docker-compose to deploy the Task Traffic Router. + +## Requirements + +* Linux OS (x86) machine +* Root access +* Credentials for the ClearML Docker repository +* Valid ClearML Server installation + +## Host Configuration + +1. Install Docker (procedure may vary depending on your operating system). The code below is an example for Amazon Linux: + + ``` + sudo dnf -y install docker + DOCKER_CONFIG="/usr/local/lib/docker" + sudo mkdir -p $DOCKER_CONFIG/cli-plugins + sudo curl -SL https://github.com/docker/compose/releases/download/v2.17.3/docker-compose-linux-x86_64 -o $DOCKER_CONFIG/cli-plugins/docker-compose + sudo chmod +x $DOCKER_CONFIG/cli-plugins/docker-compose + sudo systemctl enable docker + sudo systemctl start docker + ``` + +1. Log in with credentials for the ClearML Docker Hub repository: + + ``` + sudo docker login + ``` + +## Docker Compose Configuration + + +1. Create a `docker-compose.yml` file. For example: + + ``` + version: '3.5' + services:` + task_traffic_webserver: + image: allegroai/task-traffic-router-webserver:${TASK-TRAFFIC-ROUTER-WEBSERVER-TAG} + ports: + - "80:8080" + restart: unless-stopped + container_name: task_traffic_webserver + volumes: + - ./task_traffic_router/config/nginx:/etc/nginx/conf.d:ro + - ./task_traffic_router/config/lua:/usr/local/openresty/nginx/lua:ro + task_traffic_router: + image: allegroai/task-traffic-router:${TASK-TRAFFIC-ROUTER-TAG} + restart: unless-stopped + container_name: task_traffic_router + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - ./task_traffic_router/config/nginx:/etc/nginx/conf.d:rw + - ./task_traffic_router/config/lua:/usr/local/openresty/nginx/lua:rw + environment: + - LOGGER_LEVEL=INFO + - CLEARML_API_HOST=${CLEARML_API_HOST:?err} + - CLEARML_API_ACCESS_KEY=${CLEARML_API_ACCESS_KEY:?err} + - CLEARML_API_SECRET_KEY=${CLEARML_API_SECRET_KEY:?err} + - ROUTER_URL=${ROUTER_URL:?err} + - ROUTER_NAME=${ROUTER_NAME:?err} + - AUTH_ENABLED=${AUTH_ENABLED:?err} + - SSL_VERIFY=${SSL_VERIFY:?err} + - AUTH_COOKIE_NAME=${AUTH_COOKIE_NAME:?err} + - AUTH_BASE64_JWKS_KEY=${AUTH_BASE64_JWKS_KEY:?err} + - LISTEN_QUEUE_NAME=${LISTEN_QUEUE_NAME} + - EXTRA_BASH_COMMAND=${EXTRA_BASH_COMMAND} + - TCP_ROUTER_ADDRESS=${TCP_ROUTER_ADDRESS} + - TCP_PORT_START=${TCP_PORT_START} + - TCP_PORT_END=${TCP_PORT_END} + ``` + +1. Create a `runtime.env` file with the following entries: + + ``` + TASK-TRAFFIC-ROUTER-WEBSERVER-TAG= + TASK-TRAFFIC-ROUTER-TAG= + CLEARML_API_HOST=https://api. + CLEARML_API_ACCESS_KEY= + CLEARML_API_SECRET_KEY= + ROUTER_URL= + ROUTER_NAME=main-router + AUTH_ENABLED=true + SSL_VERIFY=true + AUTH_COOKIE_NAME= + AUTH_BASE64_JWKS_KEY= + LISTEN_QUEUE_NAME= + EXTRA_BASH_COMMAND= + TCP_ROUTER_ADDRESS= + TCP_PORT_START= + TCP_PORT_END= + ``` + + Edit the runtime.env file: + * `CLEARML_API_HOST`: The URL of your ClearML API Server (i.e. starting with `https://api`). + * `CLEARML_API_ACCESS_KEY`: ClearML server API key + * `CLEARML_API_SECRET_KEY`: ClearML server API secret + * `ROUTER_URL`: The URL users will use to access the router, starting with `https://` + * `ROUTER_NAME`: The name for the router. Must be unique across the ClearML control plane scope + * `AUTH_ENABLED`: Whether to enable http calls authentication when the router is communicating with the ClearML Server + * `SSL_VERIFY`: Whether to enable SSL certificate validation when the router is communicating with the ClearML Server + * `AUTH_COOKIE_NAME`: The cookie used by the ClearML server to store the ClearML authentication token. This can + usually be found in the `value_prefix` key starting with `allegro_token` in the `envoy.yaml` file in the ClearML + Server installation (`/opt/allegro/config/envoy/envoy.yaml`) + * `AUTH_SECURE_ENABLED`: Enable the Set-Cookie `secure` parameter + * `AUTH_BASE64_JWKS_KEY`: Value form `k` key in the `jwks.json` file in the ClearML server installation (see [JWKS key](#jwks-key)) + * `LISTEN_QUEUE_NAME`: The ClearML Server queue whose tasks the router will service (useful for setting up more than + one router in the same deployment, facilitating directing different routers to different tasks). Use `none` to have + the router service all tasks. + * `EXTRA_BASH_COMMAND`: Command to be launched before starting router + * `TCP_ROUTER_ADDRESS`: The network address users will use for TCP connections to the router: IP address or hostname + (for the machine or a load balancer configured in front of it). + * `TCP_PORT_START` and `TCP_PORT_END`: The range of ports available for TCP connections to the router. Ensure that + the chosen range is open and accessible in your network configuration to allow proper routing. + +1. Start the router: + + ``` + sudo docker compose --env-file runtime.env up -d + ``` + +## JWKS Key + + +The **JSON Web Key Set** (JWKS) is a set of keys containing the public keys used to verify any JSON Web Token (JWT). + +For the `docker-compose` installation, the JWKS key is the value of the `CLEARML__secure__auth__token_secret` environment +variable in the API server component + diff --git a/docs/deploying_clearml/enterprise_deploy/appgw_install_k8s.md b/docs/deploying_clearml/enterprise_deploy/appgw_install_k8s.md new file mode 100644 index 00000000..a5c7db6b --- /dev/null +++ b/docs/deploying_clearml/enterprise_deploy/appgw_install_k8s.md @@ -0,0 +1,91 @@ +--- +title: Kubernetes Installation +--- + +Use Kubernetes to deploy the Task Traffic Router. + +### Requirements + +* Kubernetes cluster: `>= 1.21.0-0` `< 1.32.0-0` +* Helm installed and configured +* Helm token to access ClearML helm-chart repo +* Credentials for ClearML Docker repo +* Valid ClearML Server installation + +**Optional for HTTPS:** + +* Valid DNS entry for the new Task Router instance +* Valid SSL certificate + +## Helm Configuration + +1. Add the `allegroai-enterprise` Helm repository: + + ``` + helm repo add allegroai-enterprise \ + https://raw.githubusercontent.com/allegroai/clearml-enterprise-helm-charts/gh-pages \ + --username \ + --password + ``` + +1. Create a `task-traffic-router.values-override.yaml` file: + + ``` + imageCredentials: + password: "${dockerhub_token}" + clearml: + apiServerKey: "" + apiServerSecret: "" + apiServerUrlReference: "https://api." + jwksKey: "" + authCookieName: "" + ingress: + enabled: true + hostName: "task-router.dev" + tcpSession: + routerAddress: "" + portRange: + start: + end: + ``` + Edit the file according to these guidelines: + * `clearml.apiServerUrlReference`: URL starting with `https://api`. + * `clearml.apiServerKey`: ClearML Server API key + * `clearml.apiServerSecret`: ClearML Server API secret + * `ingress.hostName`: A Unique URL users will use to access the router, starting with `https://` + * `clearml.sslVerify`: Whether to enable SSL certificate validation when the router is communicating with the ClearML + Server + * `clearml.authCookieName`: The cookie used by the ClearML server to store the ClearML authentication token. This + can usually be found in the `value_prefix` key starting with `allegro_token` in the `envoy.yaml` file in the ClearML + server installation (`/opt/allegro/config/envoy/envoy.yaml`) (see [JWKS Key](#JWKS_KEY)) + * `clearml.jwksKey`: Value from `k` key in `jwks.json` file in ClearML Server installation (see [JWKS Key](#JWKS_KEY)). + * `tcpSession.routerAddress`: The network address users will use for TCP connections to the router: This can be an IP address or hostname (for the machine or a load balancer configured in front of it). + * `tcpSession.portRange.start` and `tcpSession.portRange.end`: These ports define the range of ports available for TCP connections to the router. + + For a complete list of supported configurations: + ``` + helm show readme allegroai-enterprise/clearml-task-traffic-router + ``` + +3. Install the task traffic router component via Helm: + + ``` + helm upgrade --install \ + \ + -n \ + allegroai-enterprise/clearml-task-traffic-router \ + --version \ + -f task-traffic-router.values-override.yaml + ``` + +## JWKS Key + +The **JSON Web Key Set** (JWKS) is a set of keys containing the public keys used to verify any JSON Web Token (JWT). + +For the Kubernetes installation, use the following command to retrieve the **JWKS key**: + +``` +kubectl \-n clearml get secret clearml-conf \ +\-o jsonpath='{.data.secure\_auth\_token\_secret}' \ +| base64 \-d && echo +``` diff --git a/sidebars.js b/sidebars.js index 9e29f4fd..4900e486 100644 --- a/sidebars.js +++ b/sidebars.js @@ -635,9 +635,10 @@ module.exports = { collapsible: true, collapsed: true, label: 'ClearML Application Gateway', + link: {type: 'doc', id: 'deploying_clearml/enterprise_deploy/appgw'}, items: [ - 'deploying_clearml/appgw_install_compose', - 'deploying_clearml/appgw_install_k8s', + 'deploying_clearml/enterprise_deploy/appgw_install_compose', + 'deploying_clearml/enterprise_deploy/appgw_install_k8s', ] }, ]