From de583aa7d9c061af43a9cafcf3dcb805e87b8700 Mon Sep 17 00:00:00 2001 From: revital Date: Tue, 11 Mar 2025 14:18:29 +0200 Subject: [PATCH] Edit AI app gateway deployment instructions --- .../enterprise_deploy/appgw.md | 4 +- .../appgw_install_compose.md | 148 ++++++++++-------- .../enterprise_deploy/appgw_install_k8s.md | 94 +++++------ .../enterprise_deploy/multi_tenant_k8s.md | 11 +- .../apps_embed_model_deployment.md | 2 +- .../applications/apps_llama_deployment.md | 2 +- .../applications/apps_model_deployment.md | 2 +- 7 files changed, 146 insertions(+), 117 deletions(-) diff --git a/docs/deploying_clearml/enterprise_deploy/appgw.md b/docs/deploying_clearml/enterprise_deploy/appgw.md index 2679df85..85ce6fef 100644 --- a/docs/deploying_clearml/enterprise_deploy/appgw.md +++ b/docs/deploying_clearml/enterprise_deploy/appgw.md @@ -30,12 +30,12 @@ their instances: * [Embedding Model Deployment](../../webapp/applications/apps_embed_model_deployment.md) * [Llama.cpp Model Deployment](../../webapp/applications/apps_llama_deployment.md) -The AI Application Gateway is provided through an additional component to the ClearML Server deployment: The ClearML Task Traffic Router. +The AI Application Gateway requires an additional component to the ClearML Server deployment: The ClearML Task Traffic Router. If your ClearML Deployment does not have the Task Traffic Router properly installed, these application instances may not be accessible. #### Installation -The Task Traffic Router supports two deployment options: +The AI Application Gateway supports two deployment options: * [Docker Compose](appgw_install_compose.md) * [Kubernetes](appgw_install_k8s.md) diff --git a/docs/deploying_clearml/enterprise_deploy/appgw_install_compose.md b/docs/deploying_clearml/enterprise_deploy/appgw_install_compose.md index c77f4113..e4acc3b8 100644 --- a/docs/deploying_clearml/enterprise_deploy/appgw_install_compose.md +++ b/docs/deploying_clearml/enterprise_deploy/appgw_install_compose.md @@ -40,77 +40,65 @@ This is an example of the `docker-compose` file you will need: ``` version: '3.5' services: -task_traffic_webserver: - image: allegroai/task-traffic-router-webserver:${TASK-TRAFFIC-ROUTER-WEBSERVER-TAG} - ports: - - "80:8080" - restart: unless-stopped - container_name: task_traffic_webserver - volumes: - - ./task_traffic_router/config/nginx:/etc/nginx/conf.d:ro - - ./task_traffic_router/config/lua:/usr/local/openresty/nginx/lua:ro -task_traffic_router: - image: allegroai/task-traffic-router:${TASK-TRAFFIC-ROUTER-TAG} - restart: unless-stopped - container_name: task_traffic_router - volumes: - - /var/run/docker.sock:/var/run/docker.sock - - ./task_traffic_router/config/nginx:/etc/nginx/conf.d:rw - - ./task_traffic_router/config/lua:/usr/local/openresty/nginx/lua:rw - environment: - - LOGGER_LEVEL=INFO - - CLEARML_API_HOST=${CLEARML_API_HOST:?err} - - CLEARML_API_ACCESS_KEY=${CLEARML_API_ACCESS_KEY:?err} - - CLEARML_API_SECRET_KEY=${CLEARML_API_SECRET_KEY:?err} - - ROUTER_URL=${ROUTER_URL:?err} - - ROUTER_NAME=${ROUTER_NAME:?err} - - AUTH_ENABLED=${AUTH_ENABLED:?err} - - SSL_VERIFY=${SSL_VERIFY:?err} - - AUTH_COOKIE_NAME=${AUTH_COOKIE_NAME:?err} - - AUTH_BASE64_JWKS_KEY=${AUTH_BASE64_JWKS_KEY:?err} - - LISTEN_QUEUE_NAME=${LISTEN_QUEUE_NAME} - - EXTRA_BASH_COMMAND=${EXTRA_BASH_COMMAND} - - TCP_ROUTER_ADDRESS=${TCP_ROUTER_ADDRESS} - - TCP_PORT_START=${TCP_PORT_START} - - TCP_PORT_END=${TCP_PORT_END} - + task_traffic_webserver: + image: clearml/ai-gateway-proxy:${PROXY_TAG:?err} + network_mode: "host" + restart: unless-stopped + container_name: task_traffic_webserver + volumes: + - ./task_traffic_router/config/nginx:/etc/nginx/conf.d:ro + - ./task_traffic_router/config/lua:/usr/local/openresty/nginx/lua:ro + task_traffic_router: + image: clearml/ai-gateway-router:${ROUTER_TAG:?err} + restart: unless-stopped + container_name: task_traffic_router + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - ./task_traffic_router/config/nginx:/etc/nginx/conf.d:rw + - ./task_traffic_router/config/lua:/usr/local/openresty/nginx/lua:rw + environment: + - ROUTER_NAME=${ROUTER_NAME:?err} + - ROUTER__WEBSERVER__SERVER_PORT=${ROUTER__WEBSERVER__SERVER_PORT:?err} + - ROUTER_URL=${ROUTER_URL:?err} + - CLEARML_API_HOST=${CLEARML_API_HOST:?err} + - CLEARML_API_ACCESS_KEY=${CLEARML_API_ACCESS_KEY:?err} + - CLEARML_API_SECRET_KEY=${CLEARML_API_SECRET_KEY:?err} + - AUTH_COOKIE_NAME=${AUTH_COOKIE_NAME:?err} + - AUTH_SECURE_ENABLED=${AUTH_SECURE_ENABLED} + - TCP_ROUTER_ADDRESS=${TCP_ROUTER_ADDRESS} + - TCP_PORT_START=${TCP_PORT_START} + - TCP_PORT_END=${TCP_PORT_END} ``` -Create a *runtime.env* file containing the following entries: +Create a `runtime.env` file containing the following entries: ``` -TASK-TRAFFIC-ROUTER-WEBSERVER-TAG= -TASK-TRAFFIC-ROUTER-TAG= -CLEARML_API_HOST=https://api. +PROXY_TAG= +ROUTER_TAG= +ROUTER_NAME=main-router +ROUTER__WEBSERVER__SERVER_PORT=8010 +ROUTER_URL= +CLEARML_API_HOST= CLEARML_API_ACCESS_KEY= CLEARML_API_SECRET_KEY= -ROUTER_URL= -ROUTER_NAME=main-router -AUTH_ENABLED=true -SSL_VERIFY=true AUTH_COOKIE_NAME= -AUTH_BASE64_JWKS_KEY= -LISTEN_QUEUE_NAME= -EXTRA_BASH_COMMAND= +AUTH_SECURE_ENABLED=true TCP_ROUTER_ADDRESS= TCP_PORT_START= TCP_PORT_END= ``` Edit it according to the following guidelines: - -* `CLEARML_API_HOST`: URL usually starting with `https://api.` -* `CLEARML_API_ACCESS_KEY`: ClearML server api key -* `CLEARML_API_SECRET_KEY`: ClearML server secret key -* `ROUTER_URL`: URL for this router that was previously configured in the load balancer starting with `https://` -* `ROUTER_NAME`: Unique name for this router -* `AUTH_ENABLED`: Enable or disable http calls authentication when the router is communicating with the ClearML server -* `SSL_VERIFY`: Enable or disable SSL certificate validation when the router is communicating with the ClearML server -* `AUTH_COOKIE_NAME`: Cookie name used by the ClearML server to store the ClearML authentication cookie. This can usually be found in the `value_prefix` key starting with `allegro_token` in `envoy.yaml` file in the ClearML server installation (`/opt/allegro/config/envoy/envoy.yaml`) (see below) -* `AUTH_SECURE_ENABLED`: Enable the Set-Cookie `secure` parameter -* `AUTH_BASE64_JWKS_KEY`: Value form `k` key in the `jwks.json` file in the ClearML server installation -* `LISTEN_QUEUE_NAME`: (*optional*) Name of queue to check for tasks (if none, every task is checked) -* `EXTRA_BASH_COMMAND`: Command to be launched before starting the router +* `PROXY_TAG`: AI Application Gateway Proxy tag. +* `ROUTER_TAG`: AI Application Gateway Router tag. +* `ROUTER_NAME`: Unique name for this router, needed in case of [multiple routers on the same tenant](#install-multiple-routers-for-the-same-tenant). +* `ROUTER__WEBSERVER__SERVER_PORT`: Webserver port. Default is 8080 but can be set differently based on network needs. +* `ROUTER_URL`: URL for this router that was previously configured in the load balancer starting with `https://`. +* `CLEARML_API_HOST`: ClearML API server URL usually starting with `https://api.` +* `CLEARML_API_ACCESS_KEY`: ClearML server API key. +* `CLEARML_API_SECRET_KEY`: ClearML server secret key. +* `AUTH_COOKIE_NAME`: Cookie name used by the ClearML server to store the ClearML authentication cookie. This can usually be found in the `value_prefix` key starting with `allegro_token` in `envoy.yaml` file in the ClearML server installation (`/opt/allegro/config/envoy/envoy.yaml`) +* `AUTH_SECURE_ENABLED`: Enable the Set-Cookie `secure` parameter. Set to `false` in case services are exposed with `http`. * `TCP_ROUTER_ADDRESS`: Router external address, can be an IP or the host machine or a load balancer hostname, depends on network configuration * `TCP_PORT_START`: Start port for the TCP Session feature * `TCP_PORT_END`: End port for the TCP Session feature @@ -121,12 +109,48 @@ Run the following command to start the router: sudo docker compose --env-file runtime.env up -d ``` -:::note How to find my jwkskey +### Advanced Configuration -The *JSON Web Key Set* (*JWKS*) is a set of keys containing the public keys used to verify any JSON Web Token (JWT). +#### Running without Certificates +When running on `docker-compose` with an HTTP interface without certificates, set the following entry in the +`runtime.env`: -In a `docker-compose` server installation, this can be found in the `CLEARML__secure__auth__token_secret` env var in the apiserver server component. +``` +AUTH_SECURE_ENABLED=false +``` + +#### Install Multiple Routers for the Same Tenant +To deploy multiple routers within the same tenant, you must configure each router to handle specific workloads. + +Using this setting, each router will only route tasks that originated from its assigned queues. This +is important in case you have multiple networks with different agents. For example: +* Tasks started by Agent A can only be reached by Router A (within the same network), but cannot be reached by Router B +* Agent B will handle a separate set of tasks which can only be reached by Router B + +The assumption in this case is that Agent A and Agent B will service different queues, and routers must be configured to +route tasks based on these queue definitions. + +Each router in the same tenant must have: +* A unique `ROUTER_NAME` +* Distinct set of queues listed in `LISTEN_QUEUE_NAME` + +For example: +* **Router-A** `runtime.env` + + ``` + ROUTER_NAME=router-a + LISTEN_QUEUE_NAME=queue1,queue2 + ``` + +* **Router-B** `runtime.env` + + ``` + ROUTER_NAME=router-b + LISTEN_QUEUE_NAME=queue3,queue4 + ```` + +Ensure that `LISTEN_QUEUE_NAME` is included in the [`docker-compose` environment variables](#docker-compose-file) for each router +instance. -::: diff --git a/docs/deploying_clearml/enterprise_deploy/appgw_install_k8s.md b/docs/deploying_clearml/enterprise_deploy/appgw_install_k8s.md index 906429c6..e23b191e 100644 --- a/docs/deploying_clearml/enterprise_deploy/appgw_install_k8s.md +++ b/docs/deploying_clearml/enterprise_deploy/appgw_install_k8s.md @@ -3,17 +3,25 @@ title: Kubernetes Deployment --- :::important Enterprise Feature -The Application Gateway is available under the ClearML Enterprise plan. +The AI Application Gateway is available under the ClearML Enterprise plan. +::: + +This guide details the installation of the ClearML AI Application Gateway. +The AI Application Gateway enables access to session-based applications like VSCode and Jupyter. +It acts as a proxy, discovering ClearML Tasks running within its namespace and configuring them for user access. + +:::important +AI Application Gateway must be installed in the same namespace as a dedicated ClearML Agent. +It can only discover ClearML Tasks within its own namespace. ::: -This guide details the installation of the ClearML AI Application Gateway, specifically the ClearML Task Router Component. ## Requirements * Kubernetes cluster: `>= 1.21.0-0 < 1.32.0-0` * Helm installed and configured -* Helm token to access `allegroai` helm-chart repo -* Credentials for `allegroai` docker repo +* Helm token to access `clearml` helm-chart repo +* Credentials for `clearml` docker repo * A valid ClearML Server installation ## Optional for HTTPS @@ -26,59 +34,52 @@ This guide details the installation of the ClearML AI Application Gateway, speci ### Login ``` -helm repo add allegroai-enterprise \ +helm repo add clearml-enterprise \ https://raw.githubusercontent.com/clearml/clearml-enterprise-helm-charts/gh-pages \ --username \ --password ``` +Replace `` with your valid GitHub token that has access to the ClearML Enterprise Helm charts repository. + ### Prepare Values -Before installing the TTR, create a `helm-override` files named `task-traffic-router.values-override.yaml`: +Before installing the AI Application Gateway, create a Helm override file: ``` imageCredentials: - password: "" + password: "" clearml: - apiServerKey: "" - apiServerSecret: "" - apiServerUrlReference: "https://api." - jwksKey: "" - authCookieName: "" + apiServerKey: "" + apiServerSecret: "" + apiServerUrlReference: "" + authCookieName: "" + sslVerify: true ingress: - enabled: true - hostName: "task-router.dev" + enabled: true + hostName: "" tcpSession: - routerAddress: "" - portRange: - start: - end: + routerAddress: "" + service: + type: LoadBalancer + portRange: + start: + end: ``` -Edit it accordingly to these guidelines: +Edit it according to these guidelines: -* `clearml.apiServerUrlReference`: URL usually starting with `https://api.` -* `clearml.apiServerKey`: ClearML server api key -* `clearml.apiServerSecret`: ClearML server secret key -* `ingress.hostName`: URL of router we configured previously for load balancer starting with `https://` -* `clearml.sslVerify`: Enable or disable SSL certificate validation on apiserver calls check -* `clearml.authCookieName`: Value from `value_prefix` key starting with `allegro_token` in `envoy.yaml` file in ClearML server installation. -* `clearml.jwksKey`: Value form `k` key in `jwks.json` file in ClearML server installation (see below) -* `tcpSession.routerAddress`: Router external address can be an IP or the host machine or a load balancer hostname, depends on the network configuration -* `tcpSession.portRange.start`: Start port for the TCP Session feature -* `tcpSession.portRange.end`: End port for the TCP Session feature - -:::note How to find my jwkskey - -The *JSON Web Key Set* (*JWKS*) is a set of keys containing the public keys used to verify any JSON Web Token (JWT). - -``` -kubectl -n clearml get secret clearml-conf \ --o jsonpath='{.data.secure_auth_token_secret}' \ -| base64 -d && echo -``` - -::: +* `imageCredentials.password`: ClearML DockerHub Access Token. +* `clearml.apiServerKey`: ClearML server API key. +* `clearml.apiServerSecret`: ClearML server secret key. +* `clearml.apiServerUrlReference`: ClearML API server URL usually starting with `https://api.`. +* `clearml.authCookieName`: Cookie name used by the ClearML server to store the ClearML authentication cookie. +* `clearml.sslVerify`: Enable or disable SSL certificate validation on apiserver calls check. +* `ingress.hostName`: Hostname of router used by the ingress controller to access it. +* `tcpSession.routerAddress`: The external router address (can be an IP, hostname, or load balancer address) depending on your network setup. Ensure this address is accessible for TCP connections. +* `tcpSession.service.type`: Service type used to expose TCP functionality, default is `NodePort`. +* `tcpSession.portRange.start`: Start port for the TCP Session feature. +* `tcpSession.portRange.end`: End port for the TCP Session feature. The whole list of supported configuration is available with the command: @@ -94,9 +95,14 @@ To install the TTR component via Helm use the following command: ``` helm upgrade --install \ \ --n \ +-n \ allegroai-enterprise/clearml-enterprise-task-traffic-router \ ---version \ --f task-traffic-router.values-override.yaml +--version \ +-f override.yaml ``` +Replace the placeholders with the following values: + +* `` - Unique name for the AI Application Gateway within the namespace. This name will appear in the UI and be used for the redirection URL. +* `` - Namespace that will be shared with a dedicated ClearML Agent. +* `` - Version recommended by the ClearML Support Team. \ No newline at end of file diff --git a/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md b/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md index 7ab41ef4..2dbeb7fe 100644 --- a/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md +++ b/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md @@ -513,31 +513,30 @@ Create a `NetworkPolicy` in the tenant namespace with the following configuratio - podSelector: {} ``` -### Install Task Traffic Router Chart +### Install AI Application Gateway Chart -Install the [Task Traffic Router](appgw.md) in your Kubernetes cluster, allowing it to manage and route tasks: +Install the [AI App Gateway](appgw.md) in your Kubernetes cluster, allowing it to manage and route tasks: 1. Prepare the `overrides.yaml` file with the following content: ``` imageCredentials: - password: "" + password: "" clearml: apiServerUrlReference: "" apiserverKey: "" apiserverSecret: "" - jwksKey: "" ingress: enabled: true hostName: "" ``` -2. Install Task Traffic Router in the specified tenant namespace: +2. Install AI Gateway Router in the specified tenant namespace: ``` helm install -n \\ clearml-ttr \\ - allegroai-enterprise/clearml-task-traffic-router \\ + clearml-enterprise/clearml-task-traffic-router \\ --create-namespace \\ -f overrides.yaml ``` diff --git a/docs/webapp/applications/apps_embed_model_deployment.md b/docs/webapp/applications/apps_embed_model_deployment.md index ee8be58c..83ba71a6 100644 --- a/docs/webapp/applications/apps_embed_model_deployment.md +++ b/docs/webapp/applications/apps_embed_model_deployment.md @@ -13,7 +13,7 @@ running, it serves your embedding model through a secure, publicly accessible ne endpoint activity and shuts down if the model remains inactive for a specified maximum idle time. :::info AI Application Gateway -The Embedding Model Deployment app makes use of the ClearML Traffic Router which implements a secure, authenticated +The Embedding Model Deployment app makes use of the AI Application Gateway which implements a secure, authenticated network endpoint for the model. If the ClearML AI application Gateway is not available, the model endpoint might not be accessible. diff --git a/docs/webapp/applications/apps_llama_deployment.md b/docs/webapp/applications/apps_llama_deployment.md index 442640b9..375ff0b1 100644 --- a/docs/webapp/applications/apps_llama_deployment.md +++ b/docs/webapp/applications/apps_llama_deployment.md @@ -12,7 +12,7 @@ running, it serves your model through a secure, publicly accessible network endp and shuts down if the model remains inactive for a specified maximum idle time. :::important AI Application Gateway -The llama.cpp Model Deployment app makes use of the ClearML Traffic Router which implements a secure, authenticated +The llama.cpp Model Deployment app makes use of the AI Application Gateway which implements a secure, authenticated network endpoint for the model. If the ClearML AI application Gateway is not available, the model endpoint might not be accessible. diff --git a/docs/webapp/applications/apps_model_deployment.md b/docs/webapp/applications/apps_model_deployment.md index eba05532..250ef0ae 100644 --- a/docs/webapp/applications/apps_model_deployment.md +++ b/docs/webapp/applications/apps_model_deployment.md @@ -13,7 +13,7 @@ it serves your model through a secure, publicly accessible network endpoint. The shuts down if the model remains inactive for a specified maximum idle time. :::info AI Application Gateway -The vLLM Model Deployment app makes use of the ClearML Traffic Router which implements a secure, authenticated +The vLLM Model Deployment app makes use of the AI Application Gateway which implements a secure, authenticated network endpoint for the model. If the ClearML AI application Gateway is not available, the model endpoint might not be accessible.