From b971dcaff932202ce46a26cdd06f9903fb5ec2c3 Mon Sep 17 00:00:00 2001 From: revital Date: Wed, 12 Mar 2025 09:18:07 +0200 Subject: [PATCH 01/17] add hosted app gateway --- .../appgw_intall_compose_hosted.md | 156 ++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 docs/deploying_clearml/enterprise_deploy/appgw_intall_compose_hosted.md diff --git a/docs/deploying_clearml/enterprise_deploy/appgw_intall_compose_hosted.md b/docs/deploying_clearml/enterprise_deploy/appgw_intall_compose_hosted.md new file mode 100644 index 00000000..7f64c9b1 --- /dev/null +++ b/docs/deploying_clearml/enterprise_deploy/appgw_intall_compose_hosted.md @@ -0,0 +1,156 @@ +--- +title: Installing AI Application Gateway with docker-compose - hosted server +--- + +:::important Enterprise Feature +The Application Gateway is available under the ClearML Enterprise plan. +::: + +The AI Application Gateway enables external access to ClearML tasks, and applications running on workload nodes that +require HTTP or TCP access. The gateway is configured with an endpoint or external address, making these services +accessible from the user's machine, outside the workload nodes’ network. + +This guide details the installation of the ClearML AI Application Gateway for ClearML users who use ClearML’s SaaS control +plane while hosting their own workload nodes. + +## Requirements + +* Linux OS (x86) machine with root access +* The machine needs to be reachable from your user network +* The machine needs to have network reachability to workload nodes +* Credentials for the ClearML docker repository +* A valid ClearML Server installation + +## Recommendations + +* For a secure connection, we recommend having a DNS entry and a valid SSL Certificate assigned to the machine IP. + +## Host Configuration + +### Docker Installation + +Installing `docker` and `docker-compose` might vary depending on the specific operating system you're using. Here is an +example for AmazonLinux: + +``` +sudo dnf -y install docker +DOCKER_CONFIG="/usr/local/lib/docker" +sudo mkdir -p $DOCKER_CONFIG/cli-plugins +sudo curl -SL https://github.com/docker/compose/releases/download/v2.17.3/docker-compose-linux-x86_64 -o $DOCKER_CONFIG/cli-plugins/docker-compose +sudo chmod +x $DOCKER_CONFIG/cli-plugins/docker-compose +sudo systemctl enable docker +sudo systemctl start docker + +sudo docker login +``` + +Use the ClearML docker hub credentials when prompted by docker login. + +### Docker-compose File + +This is an example of the docker-compose file you will need to create: + +``` +version: '3.5' +services: + task_traffic_webserver: + image: clearml/ai-gateway-proxy:${PROXY_TAG:?err} + network_mode: "host" + restart: unless-stopped + container_name: task_traffic_webserver + volumes: + - ./task_traffic_router/config/nginx:/etc/nginx/conf.d:ro + - ./task_traffic_router/config/lua:/usr/local/openresty/nginx/lua:ro + task_traffic_router: + image: clearml/ai-gateway-router:${ROUTER_TAG:?err} + restart: unless-stopped + container_name: task_traffic_router + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - ./task_traffic_router/config/nginx:/etc/nginx/conf.d:rw + - ./task_traffic_router/config/lua:/usr/local/openresty/nginx/lua:rw + environment: + - LOGGER_LEVEL=INFO + - ROUTER__WEBSERVER__SERVER_PORT="8010" + - ROUTER_NAME=${ROUTER_NAME:?err} + - ROUTER_URL=${ROUTER_URL:?err} + - CLEARML_API_HOST=${CLEARML_API_HOST:?err} + - CLEARML_API_ACCESS_KEY=${CLEARML_API_ACCESS_KEY:?err} + - CLEARML_API_SECRET_KEY=${CLEARML_API_SECRET_KEY:?err} + - AUTH_COOKIE_NAME=${AUTH_COOKIE_NAME:?err} + - AUTH_SECURE_ENABLED=${AUTH_SECURE_ENABLED} + - TCP_ROUTER_ADDRESS=${TCP_ROUTER_ADDRESS} + - TCP_PORT_START=${TCP_PORT_START} + - TCP_PORT_END=${TCP_PORT_END} +``` + +### Configuration File + +You will be provided with a prefilled `runtime.env` file containing the following entries: + +``` +# PREFILLED SECTION, PROVIDED BY CLEARML +PROXY_TAG= +ROUTER_TAG= +CLEARML_API_HOST=https://api. +AUTH_COOKIE_NAME= + +# TO BE FILLED BY USER +ROUTER_NAME=main-router +ROUTER_URL=http://:8010 +CLEARML_API_ACCESS_KEY= +CLEARML_API_SECRET_KEY= +AUTH_SECURE_ENABLED=true +TCP_ROUTER_ADDRESS= +TCP_PORT_START= +TCP_PORT_END= +``` + +Edit it according to the following guidelines: + +* `ROUTER_NAME`: The name of the Router, which needs to be unique for each tenant. +* `CLEARML_API_ACCESS_KEY, CLEARML_API_SECRET_KEY:` API credentials created in the ClearML web UI, for Admin user or Service Account with admin privileges. Make sure to label these credentials clearly, so that they will not be revoked by mistake. +* `ROUTER_URL`: The URL for this router. This URL will be shown in the UI of any application for users to access (Like hosted Jupyter or LLM UI). +* `TCP_ROUTER_ADDRESS`: The TCP Router external address, which is an IP of the host machine or a load balancer hostname, depending on the customer network configuration. +* `TCP_PORT_START`: The start port for the TCP Tasks, chosen by the customer. Ensure that ports are open and can be allocated on the host. +* `TCP_PORT_END`: The end port for the TCP Tasks, chosen by the customer. Ensure that ports are open and can be allocated on the host. + +### Installation + +Run the following command to start the router: + +``` +sudo docker compose --env-file runtime.env up -d +``` + +### Advanced + +#### Running without Certificates + +When running on docker-compose with an HTTP interface and without certificates please set the following entry in the *runtime.env* as below: + +``` +AUTH_SECURE_ENABLED=false +``` + +#### Install Multiple Routers for the Same Tenant + +To install multiple routers in the same tenant it is necessary to set parameters to identify and split the workload. Using this setting, each router will only handle routing to tasks that have originated from the specific queues it was assigned. This is important in case you have two different networks with two different agents, and tasks started by Agent A can only be reached by Router A (in the same network), but simply cannot be reached by Router B. The assumption in this case is that Agent A and Agent B will service different queues, and the Routers handling routing to the tasks executed by each agent will need to match the queue definitions. +Multiple routers in the same tenant must have different `ROUTER_NAME` and listen to different queues (`LISTEN_QUEUE_NAME`). + +**Router-A** *runtime.env* + +``` +ROUTER_NAME=router-a +LISTEN_QUEUE_NAME=queue1,queue2 +``` + +**Router-2** *runtime.env* + +``` +ROUTER_NAME=router-b +LISTEN_QUEUE_NAME=queue3,queue4 +``` + +The environment variable `LISTEN_QUEUE_NAME` needs to be specified in the docker-compose file in case. +The `LISTEN_QUEUE_NAME` is a list of string names split by a comma. It supports wildcards. \ No newline at end of file From caa2ed6b4f4930fca26593f04d916c1a30a30a92 Mon Sep 17 00:00:00 2001 From: revital Date: Wed, 12 Mar 2025 11:18:04 +0200 Subject: [PATCH 02/17] Add AI App Gateway docker-compose deployment for hosted servers --- ...ted.md => appgw_install_compose_hosted.md} | 66 +++++++++++-------- sidebars.js | 1 + 2 files changed, 40 insertions(+), 27 deletions(-) rename docs/deploying_clearml/enterprise_deploy/{appgw_intall_compose_hosted.md => appgw_install_compose_hosted.md} (61%) diff --git a/docs/deploying_clearml/enterprise_deploy/appgw_intall_compose_hosted.md b/docs/deploying_clearml/enterprise_deploy/appgw_install_compose_hosted.md similarity index 61% rename from docs/deploying_clearml/enterprise_deploy/appgw_intall_compose_hosted.md rename to docs/deploying_clearml/enterprise_deploy/appgw_install_compose_hosted.md index 7f64c9b1..9ded7f78 100644 --- a/docs/deploying_clearml/enterprise_deploy/appgw_intall_compose_hosted.md +++ b/docs/deploying_clearml/enterprise_deploy/appgw_install_compose_hosted.md @@ -1,5 +1,5 @@ --- -title: Installing AI Application Gateway with docker-compose - hosted server +title: Docker-Compose - Hosted Server --- :::important Enterprise Feature @@ -21,9 +21,7 @@ plane while hosting their own workload nodes. * Credentials for the ClearML docker repository * A valid ClearML Server installation -## Recommendations - -* For a secure connection, we recommend having a DNS entry and a valid SSL Certificate assigned to the machine IP. +Additionally, for a secure connection, it is recommended to have a DNS entry and a valid SSL Certificate assigned to the machine IP. ## Host Configuration @@ -44,11 +42,11 @@ sudo systemctl start docker sudo docker login ``` -Use the ClearML docker hub credentials when prompted by docker login. +Use the ClearML docker hub credentials when prompted by `docker` login. ### Docker-compose File -This is an example of the docker-compose file you will need to create: +This is an example of the `docker-compose` file you will need to create: ``` version: '3.5' @@ -108,12 +106,13 @@ TCP_PORT_END= Edit it according to the following guidelines: -* `ROUTER_NAME`: The name of the Router, which needs to be unique for each tenant. -* `CLEARML_API_ACCESS_KEY, CLEARML_API_SECRET_KEY:` API credentials created in the ClearML web UI, for Admin user or Service Account with admin privileges. Make sure to label these credentials clearly, so that they will not be revoked by mistake. -* `ROUTER_URL`: The URL for this router. This URL will be shown in the UI of any application for users to access (Like hosted Jupyter or LLM UI). -* `TCP_ROUTER_ADDRESS`: The TCP Router external address, which is an IP of the host machine or a load balancer hostname, depending on the customer network configuration. -* `TCP_PORT_START`: The start port for the TCP Tasks, chosen by the customer. Ensure that ports are open and can be allocated on the host. -* `TCP_PORT_END`: The end port for the TCP Tasks, chosen by the customer. Ensure that ports are open and can be allocated on the host. +* `ROUTER_NAME`: Unique name for this router. +* `CLEARML_API_ACCESS_KEY, CLEARML_API_SECRET_KEY:` API credentials for Admin user or Service Account with admin privileges + created in the ClearML web UI. Make sure to label these credentials clearly, so that they will not be revoked by mistake. +* `ROUTER_URL`: The URL for this router. This URL will be shown in the UI of any application for users to access (e.g. hosted Jupyter or LLM UI). +* `TCP_ROUTER_ADDRESS`: Router external address, can be an IP or the host machine or a load balancer hostname, depends on network configuration. +* `TCP_PORT_START`: Start port for the TCP Tasks, chosen by the customer. Ensure that ports are open and can be allocated on the host. +* `TCP_PORT_END`: End port for the TCP Tasks, chosen by the customer. Ensure that ports are open and can be allocated on the host. ### Installation @@ -127,7 +126,7 @@ sudo docker compose --env-file runtime.env up -d #### Running without Certificates -When running on docker-compose with an HTTP interface and without certificates please set the following entry in the *runtime.env* as below: +When running on `docker-compose` with an HTTP interface and without certificates, set the following entry in the `runtime.env`: ``` AUTH_SECURE_ENABLED=false @@ -135,22 +134,35 @@ AUTH_SECURE_ENABLED=false #### Install Multiple Routers for the Same Tenant -To install multiple routers in the same tenant it is necessary to set parameters to identify and split the workload. Using this setting, each router will only handle routing to tasks that have originated from the specific queues it was assigned. This is important in case you have two different networks with two different agents, and tasks started by Agent A can only be reached by Router A (in the same network), but simply cannot be reached by Router B. The assumption in this case is that Agent A and Agent B will service different queues, and the Routers handling routing to the tasks executed by each agent will need to match the queue definitions. -Multiple routers in the same tenant must have different `ROUTER_NAME` and listen to different queues (`LISTEN_QUEUE_NAME`). +To deploy multiple routers within the same tenant, you must configure each router to handle specific workloads. -**Router-A** *runtime.env* +Using this setting, each router will only route tasks that originated from its assigned queues. This +is important in case you have multiple networks with different agents. For example: +* Tasks started by Agent A can only be reached by Router A (within the same network), but cannot be reached by Router B +* Agent B will handle a separate set of tasks which can only be reached by Router B -``` -ROUTER_NAME=router-a -LISTEN_QUEUE_NAME=queue1,queue2 -``` +The assumption in this case is that Agent A and Agent B will service different queues, and routers must be configured to +route tasks based on these queue definitions. -**Router-2** *runtime.env* +Each router in the same tenant must have: +* A unique `ROUTER_NAME` +* Distinct set of queues listed in `LISTEN_QUEUE_NAME`. It supports wildcards. -``` -ROUTER_NAME=router-b -LISTEN_QUEUE_NAME=queue3,queue4 -``` +For example: +* **Router-A** `runtime.env` + + ``` + ROUTER_NAME=router-a + LISTEN_QUEUE_NAME=queue1,queue2 + ``` + +* **Router-B** `runtime.env` + + ``` + ROUTER_NAME=router-b + LISTEN_QUEUE_NAME=queue3,queue4 + ```` + +Ensure that `LISTEN_QUEUE_NAME` is included in the [`docker-compose` environment variables](#docker-compose-file) for each router +instance. -The environment variable `LISTEN_QUEUE_NAME` needs to be specified in the docker-compose file in case. -The `LISTEN_QUEUE_NAME` is a list of string names split by a comma. It supports wildcards. \ No newline at end of file diff --git a/sidebars.js b/sidebars.js index d76a812e..502c1377 100644 --- a/sidebars.js +++ b/sidebars.js @@ -659,6 +659,7 @@ module.exports = { label: 'ClearML Application Gateway', items: [ 'deploying_clearml/enterprise_deploy/appgw_install_compose', + 'deploying_clearml/enterprise_deploy/appgw_install_compose_hosted', 'deploying_clearml/enterprise_deploy/appgw_install_k8s', ] }, From 57c98aae9362ed1ba83bb09a900105352ce74bdb Mon Sep 17 00:00:00 2001 From: revital Date: Wed, 12 Mar 2025 14:10:38 +0200 Subject: [PATCH 03/17] Add Configuring Options per tenant --- .../enterprise_deploy/multi_tenant_k8s.md | 58 +++++++++++++++++-- 1 file changed, 53 insertions(+), 5 deletions(-) diff --git a/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md b/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md index 7ab41ef4..5350a0e9 100644 --- a/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md +++ b/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md @@ -268,7 +268,7 @@ metadata: spec: podSelector: {} policyTypes: - - Ingress + - Ingress ingress: - from: - podSelector: {} @@ -337,7 +337,7 @@ must be substituted with valid domain names or values from responses. APISERVER_SECRET="" ``` -2. Create a *Tenant* (company): +2. Create a **Tenant** (company): ``` curl $APISERVER_URL/system.create_company \\ @@ -352,7 +352,7 @@ must be substituted with valid domain names or values from responses. curl -u $APISERVER_KEY:$APISERVER_SECRET $APISERVER_URL/system.get_companies ``` -3. Create an *Admin User*: +3. Create an **Admin User**: ``` curl $APISERVER_URL/auth.create_user \\ @@ -363,7 +363,7 @@ must be substituted with valid domain names or values from responses. This returns the new User ID (``). -4. Generate *Credentials* for the new Admin User: +4. Generate **Credentials** for the new Admin User: ``` curl $APISERVER_URL/auth.create_credentials \\ @@ -374,7 +374,7 @@ must be substituted with valid domain names or values from responses. This returns a set of key and secret credentials associated with the new Admin User. -5. Create an SSO Domain *Whitelist*. The `` is the email domain setup for users to access through SSO. +5. Create an SSO Domain **Whitelist**. The `` is the email domain setup for users to access through SSO. ``` curl $APISERVER_URL/login.set_domains \\ @@ -542,3 +542,51 @@ Install the [Task Traffic Router](appgw.md) in your Kubernetes cluster, allowing -f overrides.yaml ``` +## Configuring Options per Tenant + +### Override Options When Creating a New Tenant + +When creating a new tenant company, you can specify several tenant options. These include: + +* `features` - Add features to a company +* `exclude_features` - Exclude features from a company. +* `allowed_users` - Set the maximum number of users for a company. + +#### Example: Create a New Tenant with a Specific Feature Set + +``` +curl $APISERVER_URL/system.create_company \ +-H "Content-Type: application/json" \ +-u $APISERVER_KEY:$APISERVER_SECRET \ +-d '{"name":"", "defaults": { "allowed_users": "10", "features": ["experiments"], "exclude_features": ["app_management", "applications", "user_management"] }}' +``` + +**Note**: make sure to replace the `` placeholder. + +### Limit Features for all Users + +This Helm Chart value in the `overrides.yaml` will have priority over all tenants, and will limit the features +available to any user in the system. This means that even if the feature is enabled for the tenant, if it;s not in this +list, the user will not see it. + +Example: all users will only have the `applications` feature enabled. + +``` +apiserver: + extraEnvs: + - name: CLEARML__services__auth__default_groups__users__features + value: "[\"applications\"]" +``` + +**Available Features**: + +* `applications` - Viewing and running applications +* `data_management` - Working with hyper-datasets and dataviews +* `experiments` - Viewing experiment table and launching experiments +* `queues` - Viewing the queues screen +* `queue_management` - Creating and deleting queues +* `pipelines` - Viewing/managing pipelines in the system +* `reports` - Viewing and managing reports in the system +* `show_dashboard` - Show the dashboard screen +* `show_projects` - Show the projects menu option +* `resource_dashboard` - Display the resource dashboard in the orchestration page \ No newline at end of file From b0d9a1357ab9195ceb77c5283087ea04b00eff33 Mon Sep 17 00:00:00 2001 From: revital Date: Thu, 20 Mar 2025 09:15:50 +0200 Subject: [PATCH 04/17] Edits --- .../enterprise_deploy/appgw.md | 3 +- .../appgw_install_compose_hosted.md | 92 +++++++++---------- 2 files changed, 47 insertions(+), 48 deletions(-) diff --git a/docs/deploying_clearml/enterprise_deploy/appgw.md b/docs/deploying_clearml/enterprise_deploy/appgw.md index 647c575a..3466dd5a 100644 --- a/docs/deploying_clearml/enterprise_deploy/appgw.md +++ b/docs/deploying_clearml/enterprise_deploy/appgw.md @@ -35,9 +35,10 @@ If your ClearML Deployment does not have the App Gateway Router properly install #### Installation -The App Gateway Router supports two deployment options: +The App Gateway Router supports the following deployment options: * [Docker Compose](appgw_install_compose.md) +* [Docker Compose for hosted servers](appgw_install_compose_hosted.md) * [Kubernetes](appgw_install_k8s.md) The deployment configuration specifies the external and internal address and port mappings for routing requests. diff --git a/docs/deploying_clearml/enterprise_deploy/appgw_install_compose_hosted.md b/docs/deploying_clearml/enterprise_deploy/appgw_install_compose_hosted.md index 9ded7f78..ad6ce13e 100644 --- a/docs/deploying_clearml/enterprise_deploy/appgw_install_compose_hosted.md +++ b/docs/deploying_clearml/enterprise_deploy/appgw_install_compose_hosted.md @@ -3,14 +3,14 @@ title: Docker-Compose - Hosted Server --- :::important Enterprise Feature -The Application Gateway is available under the ClearML Enterprise plan. +The AI Application Gateway is available under the ClearML Enterprise plan. ::: -The AI Application Gateway enables external access to ClearML tasks, and applications running on workload nodes that +The AI Application Gateway enables external access to ClearML tasks, and applications running on workload nodes that require HTTP or TCP access. The gateway is configured with an endpoint or external address, making these services accessible from the user's machine, outside the workload nodes’ network. -This guide details the installation of the ClearML AI Application Gateway for ClearML users who use ClearML’s SaaS control +This guide details the installation of the App Gateway Router for ClearML users who use ClearML's SaaS control plane while hosting their own workload nodes. ## Requirements @@ -104,12 +104,15 @@ TCP_PORT_START= TCP_PORT_END= ``` -Edit it according to the following guidelines: +**Configuration Options:** -* `ROUTER_NAME`: Unique name for this router. +* `ROUTER_NAME`: In the case of [multiple routers on the same tenant](#multiple-router-in-the-same-tenant), each router + needs to have a unique name. * `CLEARML_API_ACCESS_KEY, CLEARML_API_SECRET_KEY:` API credentials for Admin user or Service Account with admin privileges created in the ClearML web UI. Make sure to label these credentials clearly, so that they will not be revoked by mistake. -* `ROUTER_URL`: The URL for this router. This URL will be shown in the UI of any application for users to access (e.g. hosted Jupyter or LLM UI). +* `ROUTER_URL`: External address to access the router. This can be the IP address or DNS of the node where the router + is running, or the address of a load balancer if the router operates behind a proxy/load balancer. This URL is used + to access AI workload applications (e.g. remote IDE, model deployment, etc.), so it must be reachable and resolvable for them. * `TCP_ROUTER_ADDRESS`: Router external address, can be an IP or the host machine or a load balancer hostname, depends on network configuration. * `TCP_PORT_START`: Start port for the TCP Tasks, chosen by the customer. Ensure that ports are open and can be allocated on the host. * `TCP_PORT_END`: End port for the TCP Tasks, chosen by the customer. Ensure that ports are open and can be allocated on the host. @@ -122,47 +125,42 @@ Run the following command to start the router: sudo docker compose --env-file runtime.env up -d ``` -### Advanced +### Advanced Configuration -#### Running without Certificates +#### Using Open HTTP -When running on `docker-compose` with an HTTP interface and without certificates, set the following entry in the `runtime.env`: - -``` -AUTH_SECURE_ENABLED=false -``` - -#### Install Multiple Routers for the Same Tenant - -To deploy multiple routers within the same tenant, you must configure each router to handle specific workloads. - -Using this setting, each router will only route tasks that originated from its assigned queues. This -is important in case you have multiple networks with different agents. For example: -* Tasks started by Agent A can only be reached by Router A (within the same network), but cannot be reached by Router B -* Agent B will handle a separate set of tasks which can only be reached by Router B - -The assumption in this case is that Agent A and Agent B will service different queues, and routers must be configured to -route tasks based on these queue definitions. - -Each router in the same tenant must have: -* A unique `ROUTER_NAME` -* Distinct set of queues listed in `LISTEN_QUEUE_NAME`. It supports wildcards. - -For example: -* **Router-A** `runtime.env` - - ``` - ROUTER_NAME=router-a - LISTEN_QUEUE_NAME=queue1,queue2 - ``` - -* **Router-B** `runtime.env` - - ``` - ROUTER_NAME=router-b - LISTEN_QUEUE_NAME=queue3,queue4 - ```` - -Ensure that `LISTEN_QUEUE_NAME` is included in the [`docker-compose` environment variables](#docker-compose-file) for each router -instance. +To deploy the App Gateway Router on open HTTP (without a certificate), set the `AUTH_SECURE_ENABLED` entry +to `false` in the `runtime.env` file. +#### Multiple Router in the Same Tenant + + If you have workloads running in separate networks that cannot communicate with each other, you need to deploy multiple + routers, one for each isolated environment. Each router will only process tasks from designated queues, ensuring that + tasks are correctly routed to agents within the same network. + + For example: + * If Agent A and Agent B are in separate networks, each must have its own router to receive tasks. + * Router A will handle tasks from Agent A’s queues. Router B will handle tasks from Agent B’s queues. + + To achieve this, each router must be configured with: + * A unique `ROUTER_NAME` + * A distinct set of queues defined in `LISTEN_QUEUE_NAME`. + + ##### Example Configuration + Each router's `runtime.env` file should include: + + * Router A: + + ``` + ROUTER_NAME=router-a + LISTEN_QUEUE_NAME=queue1,queue2 + ``` + + * Router B: + + ``` + ROUTER_NAME=router-b + LISTEN_QUEUE_NAME=queue3,queue4 + ``` + + Make sure `LISTEN_QUEUE_NAME` is set in the [`docker-compose` environment variables](#docker-compose-file) for each router instance. \ No newline at end of file From 6dd0445ed31572dbe007d783836da4556e1e2c54 Mon Sep 17 00:00:00 2001 From: revital Date: Mon, 24 Mar 2025 07:47:42 +0200 Subject: [PATCH 05/17] Add Simple k8s deployment --- .../enterprise_deploy/k8s.md | 448 ++++++++++++++++++ sidebars.js | 6 +- 2 files changed, 452 insertions(+), 2 deletions(-) create mode 100644 docs/deploying_clearml/enterprise_deploy/k8s.md diff --git a/docs/deploying_clearml/enterprise_deploy/k8s.md b/docs/deploying_clearml/enterprise_deploy/k8s.md new file mode 100644 index 00000000..dacd53ca --- /dev/null +++ b/docs/deploying_clearml/enterprise_deploy/k8s.md @@ -0,0 +1,448 @@ +--- +title: Kubernetes +--- + + +This guide provides step-by-step instructions for installing the ClearML Enterprise setup in a Kubernetes cluster. + + +## Prerequisites + + +* A Kubernetes cluster +* An ingress controller (e.g. `nginx-ingress`) and the ability to create LoadBalancer services (e.g. MetalLB) if needed + to expose ClearML +* Credentials for ClearML Enterprise GitHub Helm chart repository +* Credentials for ClearML Enterprise DockerHub repository +* URL for downloading the ClearML Enterprise applications configuration + + +## Control Plane Installation + + +The following steps cover installing the control plane (server and required charts) and will +require some or all of the tokens/deliverables mentioned above. + + +### Requirements + + +* Add the ClearML Enterprise repository: + + + ``` + helm repo add clearml-enterprise https://raw.githubusercontent.com/clearml/clearml-enterprise-helm-charts/gh-pages --username --password + ``` + + +* Update the repository locally: + + + ``` + helm repo update + ``` + + +### Install ClearML Enterprise Chart + + +#### Configuration + + +The Helm Chart must be installed with an `overrides.yaml` overriding values as follows: + + +:::note +In the following configuration, replace `` with a valid domain +that will have records pointing to the cluster’s ingress controller (see ingress details in the values below). +::: + + +``` +imageCredentials: + password: "" + + +clearml: + cookieDomain: "" + # Set values for improved security + apiserverKey: "" + apiserverSecret: "" + fileserverKey: "" + fileserverSecret: "" + secureAuthTokenSecret: "" + testUserKey: "" + testUserSecret: "" + + +apiserver: + ingress: + enabled: true + hostName: "api." + service: + type: ClusterIP + extraEnvs: + - name: CLEARML__services__organization__features__user_management_advanced + value: "true" + - name: CLEARML__services__auth__ui_features_per_role__user__show_datasets + value: "false" + - name: CLEARML__services__auth__ui_features_per_role__user__show_orchestration + value: "false" + - name: CLEARML__services__workers__resource_usages__supervisor_company + value: "" + - name: CLEARML__secure__credentials__supervisor__role + value: "system" + - name: CLEARML__secure__credentials__supervisor__allow_login + value: "true" + - name: CLEARML__secure__credentials__supervisor__user_key + value: "" + - name: CLEARML__secure__credentials__supervisor__user_secret + value: "" + - name: CLEARML__secure__credentials__supervisor__sec_groups + value: "[\"users\", \"admins\", \"queue_admins\"]" + - name: CLEARML__secure__credentials__supervisor__email + value: "\"\"" + - name: CLEARML__apiserver__company__unique_names + value: "true" + + +fileserver: + ingress: + enabled: true + hostName: "file." + service: + type: ClusterIP + + +webserver: + ingress: + enabled: true + hostName: "app." + service: + type: ClusterIP + + +clearmlApplications: + enabled: true +``` + + +The credentials specified in `` and `` can be used to login as the +supervisor user from the ClearML Web UI accessible using the URL `app.`. + + +Note that the `` value must be explicitly quoted. To do so, put `\"` around the quoted value. +For example `"\"email@example.com\""`. + + +#### Additional Configuration Options +##### Fixed Users (Simple Login) + + +Enable static login with username and password in `overrides.yaml`. + + +This is an optional step in case SSO (Identity provider) configuration will not be performed. + + +``` +apiserver: + additionalConfigs: + apiserver.conf: | + auth { + fixed_users { + enabled: true + pass_hashed: false + users: [ + { + username: "my_user" + password: "my_password" + name: "My User" + admin: true + }, + ] + } + } +``` + + +##### SSO (Identity Provider) + + +The following examples (Auth0 and Keycloak) show how to configure an identity provider on the ClearML server. + + +Add the following values configuring `extraEnvs` for `apiserver` in the `clearml-enterprise` values `override.yaml` file. + + +Substitute all ``s with the correct value for your configuration. + + +##### Auth0 Identity Provider + + +``` +apiserver: + extraEnvs: + - name: CLEARML__secure__login__sso__oauth_client__auth0__client_id + value: "" + - name: CLEARML__secure__login__sso__oauth_client__auth0__client_secret + value: "" + - name: CLEARML__services__login__sso__oauth_client__auth0__base_url + value: "" + - name: CLEARML__services__login__sso__oauth_client__auth0__authorize_url + value: "" + - name: CLEARML__services__login__sso__oauth_client__auth0__access_token_url + value: "" + - name: CLEARML__services__login__sso__oauth_client__auth0__audience + value: "" +``` + + +##### Keycloak Identity Provider + + +``` +apiserver: + extraEnvs: + - name: CLEARML__secure__login__sso__oauth_client__keycloak__client_id + value: "" + - name: CLEARML__secure__login__sso__oauth_client__keycloak__client_secret + value: "" + - name: CLEARML__services__login__sso__oauth_client__keycloak__base_url + value: "/realms//" + - name: CLEARML__services__login__sso__oauth_client__keycloak__authorize_url + value: "/realms//protocol/openid-connect/auth" + - name: CLEARML__services__login__sso__oauth_client__keycloak__access_token_url + value: "/realms//protocol/openid-connect/token" + - name: CLEARML__services__login__sso__oauth_client__keycloak__idp_logout + value: "true" + + +``` + + +#### Installing the Chart + + +``` +helm install -n clearml \ + clearml \ + clearml-enterprise/clearml-enterprise \ + --create-namespace \ + -f overrides.yaml +``` + + +### Install ClearML Agent Chart + + +#### Configuration + + +To configure the agent you will need to choose a Redis password and use that when setting up Redis as well +(see [Shared Redis installation](multi_tenant_k8s.md#shared-redis-installation)). + + +The Helm Chart must be installed with `overrides.yaml`: + + +``` +imageCredentials: + password: "" +clearml: + agentk8sglueKey: "" + agentk8sglueSecret: "" +agentk8sglue: + apiServerUrlReference: "https://api." + fileServerUrlReference: "https://files." + webServerUrlReference: "https://app." + defaultContainerImage: "python:3.9" +``` + + +#### Installing the Chart + + +``` +helm install -n \ + clearml-agent \ + clearml-enterprise/clearml-enterprise-agent \ + --create-namespace \ + -f overrides.yaml +``` + + +To create a queue by API: + + +``` +curl $APISERVER_URL/queues.create \ +-H "Content-Type: application/json" \ +-H "X-Clearml-Impersonate-As:" \ +-u $APISERVER_KEY:$APISERVER_SECRET \ +-d '{"name":"default"}' +``` + + +## ClearML AI Application Gateway Installation + + +### Configuring Chart + + +The Helm Chart must be installed with `overrides.yaml`: + + +``` +imageCredentials: + password: "" +clearml: + apiServerKey: "" + apiServerSecret: "" + apiServerUrlReference: "https://api." + authCookieName: "" +ingress: + enabled: true + hostName: "task-router.dev" +tcpSession: + routerAddress: "" + portRange: + start: + end: +``` + + +**Configuration options:** + + +* **`clearml.apiServerUrlReference`:** URL usually starting with `https://api.` +* **`clearml.apiServerKey`:** ClearML server API key +* **`clearml.apiServerSecret`:** ClearML server secret key +* **`ingress.hostName`:** URL of the router we configured previously for load balancer starting with `https://` +* **`clearml.sslVerify`:** Enable or disable SSL certificate validation on apiserver calls check +* **`clearml.authCookieName`:** Value from `value_prefix` key starting with `allegro_token` in `envoy.yaml` file in ClearML server installation. +* **`tcpSession.routerAddress`**: Router external address can be an IP or the host machine or a load balancer hostname, depends on the network configuration +* **`tcpSession.portRange.start`**: Start port for the TCP Session feature +* **`tcpSession.portRange.end`**: End port for the TCP Session feature + + +### Installing the Chart + + +``` +helm install -n \ + clearml-ttr \ + clearml-enterprise/clearml-enterprise-task-traffic-router \ + --create-namespace \ + -f overrides.yaml +``` + + + + +## Applications Installation + + +To install the ClearML Applications on the newly installed ClearML Enterprise control-plane, download the applications +package using the URL provided by the ClearML staff. + + + + +### Download and Extract + + +``` +wget -O apps.zip "" +unzip apps.zip +``` + + +### Adjust Application Docker Images Location (Air-Gapped Systems) + + +ClearML applications use pre-built docker images provided by ClearML on the ClearML DockerHub +repository. If you are using an air-gapped system, these images must be available as part of your internal docker +registry, and the correct docker images location must be specified before installing the applications. + + +Use the following script to adjust the applications packages accordingly before installing the applications: + + +``` +python convert_image_registry.py \ + --apps-dir /path/to/apps/ \ + --repo local_registry/clearml-apps +``` + + +The script will change the application zip files to point to the new registry, and will output the list of containers +that need to be copied to the local registry. For example: + + +``` +make sure allegroai/clearml-apps:hpo-1.10.0-1062 was added to local_registry/clearml-apps +``` + + +### Install Applications + + +Use the `upload_apps.py` script to upload the application packages to the ClearML server: + + +``` +python upload_apps.py \ + --host $APISERVER_ADDRESS \ + --user $APISERVER_USER --password $APISERVER_PASSWORD \ + --dir apps -ml +``` + + +## Configuring Shared Memory for Large Model Deployment + + +Deploying large models may fail due to shared memory size limitations. This issue commonly arises when the allocated +`/dev/shm` space is insufficient.: + + +``` +> 3d3e22c3066f:168:168 [0] misc/shmutils.cc:72 NCCL WARN Error: failed to extend /dev/shm/nccl-UbzKZ9 to 9637892 bytes +> 3d3e22c3066f:168:168 [0] misc/shmutils.cc:113 NCCL WARN Error while creating shared memory segment /dev/shm/nccl-UbzKZ9 (size 9637888) +> 3d3e22c3066f:168:168 [0] NCCL INFO transport/shm.cc:114 -> 2 +> 3d3e22c3066f:168:168 [0] NCCL INFO transport.cc:33 -> 2 +> 3d3e22c3066f:168:168 [0] NCCL INFO transport.cc:113 -> 2 +> 3d3e22c3066f:168:168 [0] NCCL INFO init.cc:1263 -> 2 +> 3d3e22c3066f:168:168 [0] NCCL INFO init.cc:1548 -> 2 +> 3d3e22c3066f:168:168 [0] NCCL INFO init.cc:1799 -> 2 +``` + + +To configure a proper SHM size you can use the following configuration in the agent `overrides.yaml`. + + +Replace `` with the desired memory allocation in GiB, based on your model requirements. + + +This example configures a specific queue, but you can include this setting in the `basePodTemplate` if you need to +apply it to all tasks. + + +``` +agentk8sglue: + queues: + GPUshm: + templateOverrides: + env: + - name: VLLM_SKIP_P2P_CHECK + value: "1" + volumeMounts: + - name: dshm + mountPath: /dev/shm + volumes: + - name: dshm + emptyDir: + medium: Memory + sizeLimit: Gi +``` diff --git a/sidebars.js b/sidebars.js index 0674d6a0..1a6b15e1 100644 --- a/sidebars.js +++ b/sidebars.js @@ -327,9 +327,10 @@ module.exports = { { 'Open Source': [ - 'release_notes/sdk/open_source/ver_1_17', + 'release_notes/sdk/open_source/ver_1_18', { 'Older Versions': [ + 'release_notes/sdk/open_source/ver_1_17', 'release_notes/sdk/open_source/ver_1_16', 'release_notes/sdk/open_source/ver_1_15', 'release_notes/sdk/open_source/ver_1_14', 'release_notes/sdk/open_source/ver_1_13', 'release_notes/sdk/open_source/ver_1_12', 'release_notes/sdk/open_source/ver_1_11', @@ -639,11 +640,12 @@ module.exports = { { 'Enterprise Server': { 'Deployment Options': [ + 'deploying_clearml/enterprise_deploy/k8s', 'deploying_clearml/enterprise_deploy/multi_tenant_k8s', 'deploying_clearml/enterprise_deploy/vpc_aws', 'deploying_clearml/enterprise_deploy/on_prem_ubuntu', ], - 'Maintenance': [ + 'Maintenance and Migration': [ 'deploying_clearml/enterprise_deploy/import_projects', 'deploying_clearml/enterprise_deploy/change_artifact_links', 'deploying_clearml/enterprise_deploy/delete_tenant', From 3fabb27803ca03d126ce25605a13d3a53eb3a973 Mon Sep 17 00:00:00 2001 From: pollfly <75068813+pollfly@users.noreply.github.com> Date: Wed, 26 Mar 2025 17:33:35 +0200 Subject: [PATCH 06/17] edits --- docs/deploying_clearml/enterprise_deploy/k8s.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/deploying_clearml/enterprise_deploy/k8s.md b/docs/deploying_clearml/enterprise_deploy/k8s.md index dacd53ca..3a217321 100644 --- a/docs/deploying_clearml/enterprise_deploy/k8s.md +++ b/docs/deploying_clearml/enterprise_deploy/k8s.md @@ -60,7 +60,7 @@ that will have records pointing to the cluster’s ingress controller (see ingre ``` imageCredentials: - password: "" + password: "" clearml: From dfde2585f30bb268d5bb8171128397da406bb4e5 Mon Sep 17 00:00:00 2001 From: revital Date: Thu, 27 Mar 2025 12:35:36 +0200 Subject: [PATCH 07/17] small edits --- .../enterprise_deploy/change_artifact_links.md | 17 +++++++++-------- docs/deploying_clearml/enterprise_deploy/k8s.md | 14 +++++++------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/docs/deploying_clearml/enterprise_deploy/change_artifact_links.md b/docs/deploying_clearml/enterprise_deploy/change_artifact_links.md index 34e67d5c..e8492c18 100644 --- a/docs/deploying_clearml/enterprise_deploy/change_artifact_links.md +++ b/docs/deploying_clearml/enterprise_deploy/change_artifact_links.md @@ -29,17 +29,18 @@ script changes the values in the databases, and can't be undone. ## Fixing MongoDB links 1. Access the `apiserver` Docker container: - * In `docker-compose:` + + * In `docker-compose:` - ```commandline - sudo docker exec -it allegro-apiserver /bin/bash - ``` + ```commandline + sudo docker exec -it allegro-apiserver /bin/bash + ``` - * In Kubernetes: + * In Kubernetes: - ```commandline - kubectl exec -it -n clearml -- bash - ``` + ```commandline + kubectl exec -it -n clearml -- bash + ``` 1. Navigate to the script location in the `upgrade` folder: diff --git a/docs/deploying_clearml/enterprise_deploy/k8s.md b/docs/deploying_clearml/enterprise_deploy/k8s.md index 3a217321..e237919b 100644 --- a/docs/deploying_clearml/enterprise_deploy/k8s.md +++ b/docs/deploying_clearml/enterprise_deploy/k8s.md @@ -66,13 +66,13 @@ imageCredentials: clearml: cookieDomain: "" # Set values for improved security - apiserverKey: "" - apiserverSecret: "" - fileserverKey: "" - fileserverSecret: "" - secureAuthTokenSecret: "" - testUserKey: "" - testUserSecret: "" + apiserverKey: "" + apiserverSecret: "" + fileserverKey: "" + fileserverSecret: "" + secureAuthTokenSecret: "" + testUserKey: "" + testUserSecret: "" apiserver: From caa22b179ac3844912366d38831831abc5c0b6c6 Mon Sep 17 00:00:00 2001 From: revital Date: Thu, 27 Mar 2025 12:38:01 +0200 Subject: [PATCH 08/17] small edits --- .../enterprise_deploy/appgw_install_compose_hosted.md | 2 +- docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/deploying_clearml/enterprise_deploy/appgw_install_compose_hosted.md b/docs/deploying_clearml/enterprise_deploy/appgw_install_compose_hosted.md index ad6ce13e..edda145e 100644 --- a/docs/deploying_clearml/enterprise_deploy/appgw_install_compose_hosted.md +++ b/docs/deploying_clearml/enterprise_deploy/appgw_install_compose_hosted.md @@ -10,7 +10,7 @@ The AI Application Gateway enables external access to ClearML tasks, and applic require HTTP or TCP access. The gateway is configured with an endpoint or external address, making these services accessible from the user's machine, outside the workload nodes’ network. -This guide details the installation of the App Gateway Router for ClearML users who use ClearML's SaaS control +This guide details the installation of the App Gateway Router for ClearML users who use ClearML's hosted control plane while hosting their own workload nodes. ## Requirements diff --git a/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md b/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md index 25803059..8b1473f5 100644 --- a/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md +++ b/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md @@ -565,7 +565,7 @@ curl $APISERVER_URL/system.create_company \ ### Limit Features for all Users This Helm Chart value in the `overrides.yaml` will have priority over all tenants, and will limit the features -available to any user in the system. This means that even if the feature is enabled for the tenant, if it;s not in this +available to any user in the system. This means that even if the feature is enabled for the tenant, if it's not in this list, the user will not see it. Example: all users will only have the `applications` feature enabled. From 8a696d5557797492373a55560d724e0fccc1ffc5 Mon Sep 17 00:00:00 2001 From: revital Date: Thu, 27 Mar 2025 12:47:52 +0200 Subject: [PATCH 09/17] Add Enterprise Server 3.24.3 release notes --- docs/release_notes/clearml_server/enterprise/ver_3_24.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/release_notes/clearml_server/enterprise/ver_3_24.md b/docs/release_notes/clearml_server/enterprise/ver_3_24.md index 38c45470..3f1a58a1 100644 --- a/docs/release_notes/clearml_server/enterprise/ver_3_24.md +++ b/docs/release_notes/clearml_server/enterprise/ver_3_24.md @@ -3,6 +3,12 @@ title: Version 3.24 --- +### Enterprise Server 3.24.3 + +**New Features** +* Add option to limit UI application instance endpoint access to the application instance creator only +* Add custom user properties to multi-tenant usage reports + ### Enterprise Server 3.24.2 **New Features** From 131ddc47ff0dcf5a789b6caa2d2e3a163fa84e9c Mon Sep 17 00:00:00 2001 From: alnoam Date: Tue, 1 Apr 2025 15:19:46 +0300 Subject: [PATCH 10/17] Add release notes item GitHub link --- docs/release_notes/clearml_server/open_source/ver_2_0.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/release_notes/clearml_server/open_source/ver_2_0.md b/docs/release_notes/clearml_server/open_source/ver_2_0.md index 2ca3a97e..2e746088 100644 --- a/docs/release_notes/clearml_server/open_source/ver_2_0.md +++ b/docs/release_notes/clearml_server/open_source/ver_2_0.md @@ -18,7 +18,7 @@ Upgrading to ClearML Server v1.17 from a previous version: * New UI task creation options * Support bash as well as Python scripts * Support file upload -* New UI setting for configuring cloud storage credentials with which ClearML can clean up cloud storage artifacts on task deletion. +* New UI setting for configuring cloud storage credentials with which ClearML can clean up cloud storage artifacts on task deletion ([ClearML Server GitHub issue #144](https://github.com/clearml/clearml-server/issues/144)). * Add UI scalar plots presentation of plots in sections grouped by metrics. * Add UI batch export plot embed codes for all metric plots in a single click. * Add UI pipeline presentation of steps grouped into stages From b019dd220dff6c4b86929d3693dc7e6162706aff Mon Sep 17 00:00:00 2001 From: revital Date: Wed, 2 Apr 2025 09:27:29 +0300 Subject: [PATCH 11/17] Add ClearML Server 2.0.1 release notes --- .../clearml_server/open_source/ver_2_0.md | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/release_notes/clearml_server/open_source/ver_2_0.md b/docs/release_notes/clearml_server/open_source/ver_2_0.md index 2e746088..04399774 100644 --- a/docs/release_notes/clearml_server/open_source/ver_2_0.md +++ b/docs/release_notes/clearml_server/open_source/ver_2_0.md @@ -2,6 +2,26 @@ title: Version 2.0 --- +### ClearML Server 2.0.1 + +**New Features** +* New UI task creation options + * Support bash as well as python scripts + * Support file upload + +**Bug Fixes** +* Fix ctrl-f does not open a search bar in UI editor modals ([ClearML Web GitHub issue #99](https://github.com/clearml/clearml-web/issues/99)) +* Fix UI smoothed plots are dimmer than original plots in dark mode ([ClearML Server GitHub issue #270](https://github.com/clearml/clearml-server/issues/270)) +* Fix webserver configuration environment variables don't load with single-quoted strings +* Fix image plots sometimes not rendered in UI +* Fix "All" tag filter not working in UI model selection modal in comparison pages +* Fix manual refresh function sometimes does not work in UI task +* Fix UI embedded plot colors do not change upon UI theme change +* Fix deleting a parameter in the UI task creation modal incorrectly removes another parameter +* Fix UI global search displays aborted tasks as completed +* Fix can't show/hide specific UI plot variants +* Fix UI breadcrumbs sometimes does not display project name + ### ClearML Server 2.0.0 **Breaking Changes** From 72ff19ada8d385b01e4de0c0c1156f3e9f52e01a Mon Sep 17 00:00:00 2001 From: revital Date: Wed, 2 Apr 2025 09:36:26 +0300 Subject: [PATCH 12/17] Add ClearML Server 2.0.1 release notes --- docs/release_notes/clearml_server/open_source/ver_2_0.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/release_notes/clearml_server/open_source/ver_2_0.md b/docs/release_notes/clearml_server/open_source/ver_2_0.md index 04399774..3819c005 100644 --- a/docs/release_notes/clearml_server/open_source/ver_2_0.md +++ b/docs/release_notes/clearml_server/open_source/ver_2_0.md @@ -12,7 +12,7 @@ title: Version 2.0 **Bug Fixes** * Fix ctrl-f does not open a search bar in UI editor modals ([ClearML Web GitHub issue #99](https://github.com/clearml/clearml-web/issues/99)) * Fix UI smoothed plots are dimmer than original plots in dark mode ([ClearML Server GitHub issue #270](https://github.com/clearml/clearml-server/issues/270)) -* Fix webserver configuration environment variables don't load with single-quoted strings +* Fix webserver configuration environment variables don't load with single-quoted strings ([ClearML Server GitHub issue #271](https://github.com/clearml/clearml-server/issues/271)) * Fix image plots sometimes not rendered in UI * Fix "All" tag filter not working in UI model selection modal in comparison pages * Fix manual refresh function sometimes does not work in UI task From dd3e89219c5fed65efa21f6c882de51a41c104dc Mon Sep 17 00:00:00 2001 From: revital Date: Wed, 2 Apr 2025 09:38:11 +0300 Subject: [PATCH 13/17] Update ClearML Enterprise Server 3.24.0 release notes --- docs/release_notes/clearml_server/enterprise/ver_3_24.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/release_notes/clearml_server/enterprise/ver_3_24.md b/docs/release_notes/clearml_server/enterprise/ver_3_24.md index 3f1a58a1..9b7b97de 100644 --- a/docs/release_notes/clearml_server/enterprise/ver_3_24.md +++ b/docs/release_notes/clearml_server/enterprise/ver_3_24.md @@ -60,7 +60,8 @@ title: Version 3.24 **Bug Fixes** * Fix ctrl-f does not open a search bar in UI editor modals ([ClearML Web GitHub issue #99](https://github.com/clearml/clearml-web/issues/99)) -* Fix UI Incorrect project statistics in project page +* Fix webserver configuration environment variables don't load with single-quoted strings ([ClearML Server GitHub issue #271](https://github.com/clearml/clearml-server/issues/271)) +* Fix UI incorrect project statistics in project page * Fix UI Hyper-Dataset version's "Publish" function is sometimes unnecessarily disabled * Fix UI Task manual refresh function does not work in full screen mode * Fix links to tasks are broken in the Orchestration's Queues’ task lists @@ -79,5 +80,5 @@ title: Version 3.24 * Fix UI global search results display aborted tasks as completed * Fix UI breadcrumbs sometimes don't display project name of newly cloned task * Fix scroll sometimes doesn't work in UI global search results -* Fix Hyper-Dataset FrameGroup Details and FrameGroup Metadata sections are not expanding +* Fix Hyper-Dataset FrameGroup Details and Metadata sections are not expanding * Fix unsaved content is not discarded in UI Hyper-Dataset frame viewer when moving to another frame source From af0a43369064e3f7e63112731fe921b481695a33 Mon Sep 17 00:00:00 2001 From: revital Date: Sun, 6 Apr 2025 10:22:55 +0300 Subject: [PATCH 14/17] small edits --- docs/clearml_serving/clearml_serving_tutorial.md | 2 +- .../enterprise_deploy/change_artifact_links.md | 2 +- docs/getting_started/data_management.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/clearml_serving/clearml_serving_tutorial.md b/docs/clearml_serving/clearml_serving_tutorial.md index c13e81b9..a6e21ebe 100644 --- a/docs/clearml_serving/clearml_serving_tutorial.md +++ b/docs/clearml_serving/clearml_serving_tutorial.md @@ -7,7 +7,7 @@ In this tutorial, you will go over the model lifecycle -- from training to servi * Serving the model using **ClearML Serving** * Spinning the inference container -The tutorial also covers the following`clearml-serving` features: +The tutorial also covers the following `clearml-serving` features: * Automatic model deployment * Canary endpoints * Model performance monitoring diff --git a/docs/deploying_clearml/enterprise_deploy/change_artifact_links.md b/docs/deploying_clearml/enterprise_deploy/change_artifact_links.md index e8492c18..49ce8698 100644 --- a/docs/deploying_clearml/enterprise_deploy/change_artifact_links.md +++ b/docs/deploying_clearml/enterprise_deploy/change_artifact_links.md @@ -30,7 +30,7 @@ script changes the values in the databases, and can't be undone. 1. Access the `apiserver` Docker container: - * In `docker-compose:` + * In `docker-compose`: ```commandline sudo docker exec -it allegro-apiserver /bin/bash diff --git a/docs/getting_started/data_management.md b/docs/getting_started/data_management.md index 3064a51f..06ee9984 100644 --- a/docs/getting_started/data_management.md +++ b/docs/getting_started/data_management.md @@ -49,7 +49,7 @@ Your goal is to create an immutable copy of the data to be used by further steps The second step is to preprocess the data. First access the data, then modify it, and lastly create a new version of the data. -1. Create a task for you data preprocessing (not required): +1. Create a task for your data preprocessing (not required): ```python from clearml import Task, Dataset From 49b1b656887b9c40aa2d91a14dbcd7cca11c7ef6 Mon Sep 17 00:00:00 2001 From: revital Date: Sun, 6 Apr 2025 10:32:37 +0300 Subject: [PATCH 15/17] small edits --- .../enterprise_deploy/sso_active_directory.md | 4 ++-- docs/deploying_clearml/enterprise_deploy/sso_keycloak.md | 2 +- .../enterprise_deploy/sso_multi_tenant_login.md | 2 +- .../ml_ci_cd_using_github_actions_and_clearml.md | 2 +- docs/guides/advanced/execute_remotely.md | 2 +- docs/guides/frameworks/pytorch/pytorch_abseil.md | 2 +- docs/guides/frameworks/tensorflow/tensorflow_mnist.md | 2 +- docs/hyperdatasets/webapp/webapp_exp_track_visual.md | 2 +- docs/pipelines/pipelines_sdk_function_decorators.md | 4 ++-- docs/pipelines/pipelines_sdk_tasks.md | 4 ++-- 10 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/deploying_clearml/enterprise_deploy/sso_active_directory.md b/docs/deploying_clearml/enterprise_deploy/sso_active_directory.md index e3b4c9c2..03f91b31 100644 --- a/docs/deploying_clearml/enterprise_deploy/sso_active_directory.md +++ b/docs/deploying_clearml/enterprise_deploy/sso_active_directory.md @@ -30,12 +30,12 @@ To configure groups that should automatically become admins in ClearML set the f CLEARML__services__login__sso__saml_client__microsoft_ad__groups__admins=[, , ...] ``` -To change the the default Group Claim set the following environment variable: +To change the default Group Claim, set the following environment variable: ``` CLEARML__services__login__sso__saml_client__microsoft_ad__groups__claim=... ``` -To make group matching case insensitive set the following environment variable: +To make group matching case-insensitive, set the following environment variable: ``` CLEARML__services__login__sso__saml_client__microsoft_ad__groups__case_sensitive=false ``` diff --git a/docs/deploying_clearml/enterprise_deploy/sso_keycloak.md b/docs/deploying_clearml/enterprise_deploy/sso_keycloak.md index 4462e1af..3674953b 100644 --- a/docs/deploying_clearml/enterprise_deploy/sso_keycloak.md +++ b/docs/deploying_clearml/enterprise_deploy/sso_keycloak.md @@ -10,7 +10,7 @@ browser). In the following sections, you will be instructed to set up different environment variables for the ClearML Server. If using a `docker-compose` deployment, these should be defined in your `docker-compose.override.yaml` file, under the -`apiserver` service’ environment variables, as follows: +`apiserver` service’s environment variables, as follows: ``` services: diff --git a/docs/deploying_clearml/enterprise_deploy/sso_multi_tenant_login.md b/docs/deploying_clearml/enterprise_deploy/sso_multi_tenant_login.md index b1508c4f..cdd3970c 100644 --- a/docs/deploying_clearml/enterprise_deploy/sso_multi_tenant_login.md +++ b/docs/deploying_clearml/enterprise_deploy/sso_multi_tenant_login.md @@ -15,7 +15,7 @@ ClearML tenant can be associated with a particular external tenant /login /login/ ``` -3. Make sure the external tenant ID and groups are returned as claims for a each user +3. Make sure the external tenant ID and groups are returned as claims for each user ## Configure ClearML to use Multi-Tenant Mode diff --git a/docs/getting_started/video_tutorials/hands-on_mlops_tutorials/ml_ci_cd_using_github_actions_and_clearml.md b/docs/getting_started/video_tutorials/hands-on_mlops_tutorials/ml_ci_cd_using_github_actions_and_clearml.md index 754415c2..3abecade 100644 --- a/docs/getting_started/video_tutorials/hands-on_mlops_tutorials/ml_ci_cd_using_github_actions_and_clearml.md +++ b/docs/getting_started/video_tutorials/hands-on_mlops_tutorials/ml_ci_cd_using_github_actions_and_clearml.md @@ -202,7 +202,7 @@ you'll get is the best performance here because our checks already run, so you s open the PR, so basically the dummy task here was found to be the best performance, and it has been tagged but that means that every single time I open a PR or I update a PR, it will search ClearML, and get this dummy task. It will get this one, and then we say if we find the best task, if not we'll just add the best performance anyway because you're the -first task in the list, you'll always be getting best performance, but if you're not then we'll get the best latest +first task in the list, you'll always be getting the best performance, but if you're not then we'll get the best latest metric. For example `get_reported_scalars().get('Performance Metric').get('Series 1').get('y')`, so the `y` value there so this could basically be the best or the highest map from a task or the highest F1 score from a task, or any some such. Then you have the best metric. We do the same thing for the current task as well, and then it's fairly easy. We diff --git a/docs/guides/advanced/execute_remotely.md b/docs/guides/advanced/execute_remotely.md index 171dffb4..0a0b709b 100644 --- a/docs/guides/advanced/execute_remotely.md +++ b/docs/guides/advanced/execute_remotely.md @@ -28,7 +28,7 @@ moved to be executed by a stronger machine. During the execution of the example script, the code does the following: * Uses ClearML's automatic and explicit logging. -* Creates an task named `Remote_execution PyTorch MNIST train` in the `examples` project. +* Creates a task named `Remote_execution PyTorch MNIST train` in the `examples` project. ## Scalars diff --git a/docs/guides/frameworks/pytorch/pytorch_abseil.md b/docs/guides/frameworks/pytorch/pytorch_abseil.md index da1a4cf7..0975cb91 100644 --- a/docs/guides/frameworks/pytorch/pytorch_abseil.md +++ b/docs/guides/frameworks/pytorch/pytorch_abseil.md @@ -9,7 +9,7 @@ The example script does the following: * Trains a simple deep neural network on the PyTorch built-in [MNIST](https://pytorch.org/vision/stable/datasets.html#mnist) dataset * Creates a task named `pytorch mnist train with abseil` in the `examples` project -* ClearML automatically logs the absl.flags, and the models (and their snapshots) created by PyTorch +* ClearML automatically logs the `absl.flags`, and the models (and their snapshots) created by PyTorch * Additional metrics are logged by calling [`Logger.report_scalar()`](../../../references/sdk/logger.md#report_scalar) ## Scalars diff --git a/docs/guides/frameworks/tensorflow/tensorflow_mnist.md b/docs/guides/frameworks/tensorflow/tensorflow_mnist.md index a4afd8f9..d5abe5f7 100644 --- a/docs/guides/frameworks/tensorflow/tensorflow_mnist.md +++ b/docs/guides/frameworks/tensorflow/tensorflow_mnist.md @@ -4,7 +4,7 @@ title: TensorFlow MNIST The [tensorflow_mnist.py](https://github.com/clearml/clearml/blob/master/examples/frameworks/tensorflow/tensorflow_mnist.py) example demonstrates the integration of ClearML into code that uses TensorFlow and Keras to train a neural network on -the Keras built-in [MNIST](https://www.tensorflow.org/api_docs/python/tf/keras/datasets/mnist) handwritten digits dataset. +the Keras built-in [MNIST](https://www.tensorflow.org/api_docs/python/tf/keras/datasets/mnist) handwritten digit dataset. When the script runs, it creates a task named `Tensorflow v2 mnist with summaries` in the `examples` project. diff --git a/docs/hyperdatasets/webapp/webapp_exp_track_visual.md b/docs/hyperdatasets/webapp/webapp_exp_track_visual.md index 569d1fff..57c7d8f6 100644 --- a/docs/hyperdatasets/webapp/webapp_exp_track_visual.md +++ b/docs/hyperdatasets/webapp/webapp_exp_track_visual.md @@ -9,7 +9,7 @@ Dataviews are available under the ClearML Enterprise plan. While a task is running, and any time after it finishes, results are tracked and can be visualized in the ClearML Enterprise WebApp (UI). -In addition to all of ClearML's offerings, ClearML Enterprise keeps track of the Dataviews associated with an +In addition to all of ClearML's offerings, ClearML Enterprise keeps track of the Dataviews associated with a task, which can be viewed and [modified](webapp_exp_modifying.md) in the WebApp. ## Viewing a Task's Dataviews diff --git a/docs/pipelines/pipelines_sdk_function_decorators.md b/docs/pipelines/pipelines_sdk_function_decorators.md index 5345d116..c97216de 100644 --- a/docs/pipelines/pipelines_sdk_function_decorators.md +++ b/docs/pipelines/pipelines_sdk_function_decorators.md @@ -167,8 +167,8 @@ Additionally, you can enable automatic logging of a step's metrics / artifacts / following arguments: * `monitor_metrics` (optional) - Automatically log the step's reported metrics also on the pipeline Task. The expected format is one of the following: - * List of pairs metric (title, series) to log: [(step_metric_title, step_metric_series), ]. Example: `[('test', 'accuracy'), ]` - * List of tuple pairs, to specify a different target metric to use on the pipeline Task: [((step_metric_title, step_metric_series), (target_metric_title, target_metric_series)), ]. + * List of pairs metric (title, series) to log: `[(step_metric_title, step_metric_series), ]`. Example: `[('test', 'accuracy'), ]` + * List of tuple pairs, to specify a different target metric to use on the pipeline Task: `[((step_metric_title, step_metric_series), (target_metric_title, target_metric_series)), ]`. Example: `[[('test', 'accuracy'), ('model', 'accuracy')], ]` * `monitor_artifacts` (optional) - Automatically log the step's artifacts on the pipeline Task. * Provided a list of diff --git a/docs/pipelines/pipelines_sdk_tasks.md b/docs/pipelines/pipelines_sdk_tasks.md index f1d03fd8..14068880 100644 --- a/docs/pipelines/pipelines_sdk_tasks.md +++ b/docs/pipelines/pipelines_sdk_tasks.md @@ -221,8 +221,8 @@ You can enable automatic logging of a step's metrics /artifacts / models to the * `monitor_metrics` (optional) - Automatically log the step's reported metrics also on the pipeline Task. The expected format is one of the following: - * List of pairs metric (title, series) to log: [(step_metric_title, step_metric_series), ]. Example: `[('test', 'accuracy'), ]` - * List of tuple pairs, to specify a different target metric to use on the pipeline Task: [((step_metric_title, step_metric_series), (target_metric_title, target_metric_series)), ]. + * List of pairs metric (title, series) to log: `[(step_metric_title, step_metric_series), ]`. Example: `[('test', 'accuracy'), ]` + * List of tuple pairs, to specify a different target metric to use on the pipeline Task: `[((step_metric_title, step_metric_series), (target_metric_title, target_metric_series)), ]`. Example: `[[('test', 'accuracy'), ('model', 'accuracy')], ]` * `monitor_artifacts` (optional) - Automatically log the step's artifacts on the pipeline Task. * Provided a list of artifact names created by the step function, these artifacts will be logged automatically also From 8db08052dcf1206c664b1e129133a2884443a275 Mon Sep 17 00:00:00 2001 From: revital Date: Sun, 6 Apr 2025 11:38:31 +0300 Subject: [PATCH 16/17] Add API references for pipelines, reports, serving to sidebars --- sidebars.js | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sidebars.js b/sidebars.js index f8362bf3..24cef3c1 100644 --- a/sidebars.js +++ b/sidebars.js @@ -455,14 +455,17 @@ module.exports = { {'Server API': [ 'references/api/index', 'references/api/definitions', - 'references/api/login', 'references/api/debug', + 'references/api/events', + 'references/api/login', + 'references/api/models', + 'references/api/pipelines', 'references/api/projects', 'references/api/queues', - 'references/api/workers', - 'references/api/events', - 'references/api/models', + 'references/api/reports', + 'references/api/serving', 'references/api/tasks', + 'references/api/workers', ]}, { type: 'category', From 60159fd04e5cb8fa558b7ecf34c33bd159b0987e Mon Sep 17 00:00:00 2001 From: revital Date: Sun, 6 Apr 2025 11:44:07 +0300 Subject: [PATCH 17/17] Add placeholders --- docs/references/api/pipelines.md | 5 +++++ docs/references/api/reports.md | 5 +++++ docs/references/api/serving.md | 5 +++++ 3 files changed, 15 insertions(+) create mode 100644 docs/references/api/pipelines.md create mode 100644 docs/references/api/reports.md create mode 100644 docs/references/api/serving.md diff --git a/docs/references/api/pipelines.md b/docs/references/api/pipelines.md new file mode 100644 index 00000000..25c83989 --- /dev/null +++ b/docs/references/api/pipelines.md @@ -0,0 +1,5 @@ +--- +title: pipelines +--- + +**AutoGenerated PlaceHolder** \ No newline at end of file diff --git a/docs/references/api/reports.md b/docs/references/api/reports.md new file mode 100644 index 00000000..07d25539 --- /dev/null +++ b/docs/references/api/reports.md @@ -0,0 +1,5 @@ +--- +title: reports +--- + +**AutoGenerated PlaceHolder** \ No newline at end of file diff --git a/docs/references/api/serving.md b/docs/references/api/serving.md new file mode 100644 index 00000000..3482ecfa --- /dev/null +++ b/docs/references/api/serving.md @@ -0,0 +1,5 @@ +--- +title: serving +--- + +**AutoGenerated PlaceHolder** \ No newline at end of file