From 54229b2be3c7cabb166db6e6ae1ea35ca87dd165 Mon Sep 17 00:00:00 2001 From: revital Date: Thu, 22 May 2025 13:50:07 +0300 Subject: [PATCH] multi_tenant_k8s.md --- .../enterprise_deploy/multi_tenant_k8s.md | 948 ++++++------------ 1 file changed, 314 insertions(+), 634 deletions(-) diff --git a/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md b/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md index b6d2e19e..49dd9679 100644 --- a/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md +++ b/docs/deploying_clearml/enterprise_deploy/multi_tenant_k8s.md @@ -4,600 +4,252 @@ title: Multi-Tenant Service on Kubernetes This guide provides step-by-step instructions for installing a ClearML multi-tenant service on a Kubernetes cluster. -It covers the installation and configuration steps necessary to set up ClearML in a cloud environment, including -enabling specific features and setting up necessary components. - -## Prerequisites - -* A Kubernetes cluster -* Credentials for the ClearML Enterprise Helm chart repository -* Credentials for the ClearML Enterprise DockerHub repository -* Credentials for the ClearML billing DockerHub repository -* URL for downloading the ClearML Enterprise applications configuration -* ClearML Billing server Helm chart - -## Setting up ClearML Helm Repository - -You need to add the ClearML Enterprise Helm repository to your local Helm setup. This repository contains the Helm -charts required for deploying the ClearML Server and its components. - -To add the ClearML Enterprise repository using the following command. Replace `` with the private tokens sent to -you by ClearML: - -``` -helm repo add allegroai-enterprise --username --password -``` - -## Enabling Dynamic MIG GPUs - -Allocating GPU fractions dynamically make use of the NVIDIA GPU operator. - -1. Add the NVIDIA Helm repository: - - ``` - helm repo add nvidia - helm repo update - ``` - -2. Install the NVIDIA GPU operator with the following configuration: - - ``` - helm install -n gpu-operator \\ - gpu-operator \\ - nvidia/gpu-operator \\ - --create-namespace \\ - --set migManager.enabled=false \\ - --set mig.strategy=mixed - ``` - -## Install CDMO Chart - -The ClearML Dynamic MIG Operator (CDMO) enables running AI workloads on k8s with optimized hardware utilization and -workload performance by facilitating MIG GPUs partitioning. - -1. Prepare the `overrides.yaml` file so it will contain the following content. Replace `` - with the private token provided by ClearML: - - ``` - imageCredentials: - password: "" - ``` - -2. Install the CDMO chart: - - ``` - helm install -n cdmo-operator \\ - cdmo \\ - allegroai-enterprise/clearml-dynamic-mig-operator \\ - --create-namespace \\ - -f overrides.yaml - ``` - -### Enable MIG support - -1. Enable dynamic MIG support on your cluster by running the following command on **all nodes used for training** (run - for **each GPU** ID in your cluster): - - ``` - nvidia-smi -i -mig 1 - ``` - - This command can be issued from inside the `nvidia-device-plugin-daemonset` pod on the related node. - - If the result of the previous command indicates that a node reboot is necessary, perform the reboot. - -2. After enabling MIG support, label the MIG GPU nodes accordingly. This labeling helps in identifying nodes configured - with MIG support for resource management and scheduling: - - ``` - kubectl label nodes "cdmo.clear.ml/gpu-partitioning=mig" - ``` - -## Install ClearML Chart - -Install the ClearML chart with the required configuration: - -1. Prepare the `overrides.yaml` file and input the following content. Make sure to replace `` and `` - with a valid domain that will have records pointing to the ingress controller accordingly. - The credentials specified in `` and `` can be used to log in as the - supervisor user in the web UI. - - Note that the `` value must be explicitly quoted. To do so, put `\\"` around the quoted value. - For example `"\\"email@example.com\\””`. - - ``` - imageCredentials: - password: "" - clearml: - cookieDomain: "" - apiserver: - image: - tag: "3.21.6-1443" - ingress: - enabled: true - hostName: "api." - service: - type: ClusterIP - extraEnvs: - - name: CLEARML__billing__enabled: - value: "true" - - name: CLEARML__HOSTS__KAFKA__BILLING__HOST - value: "[clearml-billing-kafka.clearml-billing:9092]" - - name: CLEARML__HOSTS__REDIS__BILLING__HOST - value: clearml-billing-redis-master.clearml-billing - - name: CLEARML__HOSTS__REDIS__BILLING__DB - value: "2" - - name: CLEARML__SECURE__KAFKA__BILLING__security_protocol - value: SASL_PLAINTEXT - - name: CLEARML__SECURE__KAFKA__BILLING__sasl_mechanism - value: SCRAM-SHA-512 - - name: CLEARML__SECURE__KAFKA__BILLING__sasl_plain_username - value: billing - - name: CLEARML__SECURE__KAFKA__BILLING__sasl_plain_password - value: "jdhfKmsd1" - - name: CLEARML__secure__login__sso__oauth_client__auth0__client_id - value: "" - - name: CLEARML__secure__login__sso__oauth_client__auth0__client_secret - value: "" - - name: CLEARML__services__login__sso__oauth_client__auth0__base_url - value: "" - - name: CLEARML__services__login__sso__oauth_client__auth0__authorize_url - value: "" - - name: CLEARML__services__login__sso__oauth_client__auth0__access_token_url - value: "" - - name: CLEARML__services__login__sso__oauth_client__auth0__audience - value: "" - - name: CLEARML__services__organization__features__user_management_advanced - value: "true" - - name: CLEARML__services__auth__ui_features_per_role__user__show_datasets - value: "false" - - name: CLEARML__services__auth__ui_features_per_role__user__show_orchestration - value: "false" - - name: CLEARML__services__applications__max_running_apps_per_company - value: "3" - - name: CLEARML__services__auth__default_groups__users__features - value: "[\\"applications\\"]" - - name: CLEARML__services__auth__default_groups__admins__features - value: "[\\"config_vault\\", \\"experiments\\", \\"queues\\", \\"show_projects\\", \\"resource_dashboard\\", \\"user_management\\", \\"user_management_advanced\\", \\"app_management\\", \\"sso_management\\", \\"service_users\\", \\"resource_policy\\"]" - - name: CLEARML__services__workers__resource_usages__supervisor_company - value: "d1bd92a3b039400cbafc60a7a5b1e52b" # Default company - - name: CLEARML__secure__credentials__supervisor__role - value: "system" - - name: CLEARML__secure__credentials__supervisor__allow_login - value: "true" - - name: CLEARML__secure__credentials__supervisor__user_key - value: "" - - name: CLEARML__secure__credentials__supervisor__user_secret - value: "" - - name: CLEARML__secure__credentials__supervisor__sec_groups - value: "[\\"users\\", \\"admins\\", \\"queue_admins\\"]" - - name: CLEARML__secure__credentials__supervisor__email - value: "\\"\\"" - - name: CLEARML__apiserver__company__unique_names - value: "true" - fileserver: - ingress: - enabled: true - hostName: "file." - service: - type: ClusterIP - webserver: - image: - tag: "3.21.3-1657" - ingress: - enabled: true - hostName: "app." - service: - type: ClusterIP - clearmlApplications: - enabled: true - ``` - -2. Install ClearML: - - ``` - helm install -n clearml \\ - clearml \\ - allegroai-enterprise/clearml-enterprise \\ - --create-namespace \\ - -f overrides.yaml - ``` - -## Shared Redis installation - -Set up a shared Redis instance that multiple components of your ClearML deployment can use: - -1. lf not there already, add Bitnami repository: - - ``` - helm repo add bitnami - ``` - -2. Prepare the `overrides.yaml` with the following content: - - ``` - auth: - password: "sdkWoq23" - ``` - -3. Install Redis: - - ``` - helm install -n redis-shared \\ - redis \\ - bitnami/redis \\ - --create-namespace \\ - --version=17.8.3 \\ - -f overrides.yaml - ``` - -## Install Billing Chart - -The billing chart is not available as part of the ClearML private Helm repo. `clearml-billing-1.1.0.tgz` is directly -provided by the ClearML team. - -1. Prepare `values.override.yaml` - Create the file with the following content, replacing `` - with the appropriate value: - - ``` - imageCredentials: - username: dockerhubcustpocbillingaccess - password: "" - ``` - -1. Install the billing chart: - - ``` - helm install -n clearml-billing \\ - clearml-billing \\ - clearml-billing-1.0.0.tgz \\ - --create-namespace \\ - -f overrides.yaml - ``` - -## Namespace Isolation using Network Policies - -For enhanced security, isolate namespaces using the following NetworkPolicies: - -``` -apiVersion: networking.k8s.io/v1 -kind: NetworkPolicy -metadata: - name: default-deny-ingress - namespace: clearml -spec: - podSelector: {} - policyTypes: - - Ingress - ingress: - - from: - - podSelector: {} +Ready, missing links in TODOs --- -apiVersion: networking.k8s.io/v1 -kind: NetworkPolicy -metadata: - name: allow-clearml-ingress - namespace: clearml -spec: - podSelector: - matchLabels: - app.kubernetes.io/name: clearml-clearml-enterprise - policyTypes: - - Ingress - ingress: - - from: - - ipBlock: - cidr: 0.0.0.0/0 +TODO: +Control Plane: +- Link: SSO login +- Additional envs for control-plane multi-tenancy + +Workers: +- Link: basic Agent installation +- Link: basic AI App Gateway installation + --- -apiVersion: networking.k8s.io/v1 -kind: NetworkPolicy -metadata: - name: allow-clearml-ingress - namespace: clearml-billing -spec: - podSelector: {} - policyTypes: - - Ingress - ingress: - - from: - - podSelector: {} - - namespaceSelector: - matchLabels: - kubernetes.io/metadata.name: clearml + +## Control Plane + +For installing the ClearML control-plane, follow [this guide](k8s.md). + +Update the Server's `clearml-values.override.yaml` with the following values: + +```yaml +apiserver: + extraEnvs: + - name: CLEARML__services__organization__features__user_management_advanced + value: "true" + - name: CLEARML__services__auth__ui_features_per_role__user__show_datasets + value: "false" + - name: CLEARML__services__auth__ui_features_per_role__user__show_orchestration + value: "false" + - name: CLEARML__services__workers__resource_usages__supervisor_company + value: "d1bd92a3b039400cbafc60a7a5b1e52b" # Default company + - name: CLEARML__secure__credentials__supervisor__role + value: "system" + - name: CLEARML__secure__credentials__supervisor__allow_login + value: "true" + - name: CLEARML__secure__credentials__supervisor__user_key + value: "" + - name: CLEARML__secure__credentials__supervisor__user_secret + value: "" + - name: CLEARML__secure__credentials__supervisor__sec_groups + value: "[\"users\", \"admins\", \"queue_admins\"]" + - name: CLEARML__secure__credentials__supervisor__email + value: "\"\"" + - name: CLEARML__apiserver__company__unique_names + value: "true" ``` -## Application Installation +The credentials specified in `` and `` can be used to log in as the +supervisor user from the ClearML Web UI accessible using the URL `app.`. -To install ClearML GUI applications: +Note that the `` value must be explicitly quoted. To do so, put `\"` around the quoted value. +Example `"\"email@example.com\""`. -1. Get the apps to install and the installation script by downloading and extracting the archive provided by ClearML +You will want to configure SSO as well. For this, follow the [SSO (Identity Provider) Setup guide](extra_configs/sso_login.md). - ``` - wget -O apps.zip "" - unzip apps.zip - ``` +### Create a Tenant -2. Install the apps: +The following section will address the steps required to create a new tenant in the ClearML Server using a series of API +calls. - ``` - python upload_apps.py \\ --host $APISERVER_ADDRESS \\ --user $APISERVER_USER --password $APISERVER_PASSWORD \\ --dir apps -ml - ``` +Note that placeholders (``) in the following configuration should be substituted with a valid domain based +on your installation values. -## Tenant Configuration +#### Create a New Tenant in the ClearML Control-plane -Create tenants and corresponding admin users, and set up an SSO domain whitelist for secure access. To configure tenants, -follow these steps (all requests must be authenticated by root or admin). Note that placeholders like `` -must be substituted with valid domain names or values from responses. +* Define variables to use in the next steps:* -1. Define the following variables: - - ``` + ```bash APISERVER_URL="https://api." APISERVER_KEY="" APISERVER_SECRET="" ``` -2. Create a **Tenant** (company): +:::note +The apiserver key and secret should be the same as those used for installing the ClearML Enterprise server Chart. +::: - ``` - curl $APISERVER_URL/system.create_company \\ - -H "Content-Type: application/json" \\ - -u $APISERVER_KEY:$APISERVER_SECRET \\ - -d '{"name":""}' +*Create a Tenant (company):* + + ```bash + curl $APISERVER_URL/system.create_company \ + -H "Content-Type: application/json" \ + -u $APISERVER_KEY:$APISERVER_SECRET \ + -d '{"name":""}' ``` - This returns the new Company ID (``). If needed, you can list all companies with the following command: +The result returns the new Company ID (``). - ``` +If needed, list existing tenants (companies) using: + + ```bash curl -u $APISERVER_KEY:$APISERVER_SECRET $APISERVER_URL/system.get_companies ``` -3. Create an **Admin User**: +*Create an Admin User for the new tenant:* - ``` - curl $APISERVER_URL/auth.create_user \\ - -H "Content-Type: application/json" \\ - -u $APISERVER_KEY:$APISERVER_SECRET \\ - -d '{"name":"","company":"","email":"","role":"admin"}' + ```bash + curl $APISERVER_URL/auth.create_user \ + -H "Content-Type: application/json" \ + -u $APISERVER_KEY:$APISERVER_SECRET \ + -d '{"name":"","company":"","email":"","role":"admin","internal":"true"}' ``` - This returns the new User ID (``). +The result returns the new User ID (``). -4. Generate **Credentials** for the new Admin User: +*Create Credentials for the new Admin User:* - ``` - curl $APISERVER_URL/auth.create_credentials \\ - -H "Content-Type: application/json" \\ - -H "X-Clearml-Impersonate-As: " \\ - -u $APISERVER_KEY:$APISERVER_SECRET + ```bash + curl $APISERVER_URL/auth.create_credentials \ + -H "Content-Type: application/json" \ + -H "X-Clearml-Impersonate-As: " \ + -u $APISERVER_KEY:$APISERVER_SECRET ``` - This returns a set of key and secret credentials associated with the new Admin User. +The result returns a set of key and secret credentials associated with the new Admin User. -5. Create an SSO Domain **Whitelist**. The `` is the email domain setup for users to access through SSO. +:::note +You can use this set of credentials to set up an Agent or App Gateway for the newly created Tenant. +::: - ``` - curl $APISERVER_URL/login.set_domains \\ - -H "Content-Type: application/json" \\ - -H "X-Clearml-Act-As: " \\ - -u $APISERVER_KEY:$APISERVER_SECRET \\ - -d '{"domains":[""]}' +#### Create IDP/SSO Sign-in Rules + +To map new users signing into the system to existing tenants, you can use one or more of the following route methods to route new users (based on their email address) to an existing tenant. + +*Route an email to a tenant based on the email's domain:* + +This will instruct the server to assign any new user whose email domain matches the domain provided below to this specific tenant. + +Note that providing the same domain name for multiple tenants will result in unstable behavior and should be avoided. + + ```bash + curl $APISERVER_URL/login.set_domains \ + -H "Content-Type: application/json" \ + -H "X-Clearml-Act-As: " \ + -u $APISERVER_KEY:$APISERVER_SECRET \ + -d '{"domains":[""]}' ``` -### Install ClearML Agent Chart +`` is the email domain set up for users to access through SSO. -To install the ClearML Agent Chart, follow these steps: +*Route specific email(s) to a tenant:* -1. Prepare the `overrides.yaml` file with the following content. Make sure to replace placeholders like - ``, ``, and `` with the appropriate values: +This will instruct the server to assign any new user whose email is found in this list to this specific tenant. You can use the is_admin property to choose whether these users will be set as admins in this tenant upon login. - ``` - imageCredentials: - password: "" - clearml: - agentk8sglueKey: "-" # TODO --> Generate credentials from API in the new tenant - agentk8sglueSecret: "-" # TODO --> Generate credentials from API in the new tenant - agentk8sglue: - extraEnvs: - - name: CLEARML_K8S_SUPPORT_SUSPENSION - value: "1" - - name: CLEARML_K8S_PORTS_MODE_ON_REQUEST_ONLY - value: "1" - - name: CLEARML_AGENT_REDIS_HOST - value: "redis-master.redis-shared" - - name: CLEARML_AGENT_REDIS_PORT - value: "6379" - - name: CLEARML_AGENT_REDIS_DB - value: "0" - - name: CLEARML_AGENT_REDIS_PASSWORD - value: "sdkWoq23" - image: - tag: 1.24-1.8.1rc99-159 - monitoredResources: - maxResources: 3 - minResourcesFieldName: "metadata|labels|required-resources" - maxResourcesFieldName: "metadata|labels|required-resources" - apiServerUrlReference: "https://api." - fileServerUrlReference: "https://file." - webServerUrlReference: "https://app." - defaultContainerImage: "python:3.9" - debugMode: true - createQueues: true - queues: - default: - templateOverrides: - labels: - required-resources: "0.5" - billing-monitored: "true" - queueSettings: - maxPods: 10 - gpu-fraction-1_00: - templateOverrides: - labels: - required-resources: "1" - billing-monitored: "true" - resources: - limits: - nvidia.com/mig-7g.40gb: 1 - clear.ml/fraction-1: "1" - queueSettings: - maxPods: 10 - gpu-fraction-0_50: - templateOverrides: - labels: - required-resources: "0.5" - billing-monitored: "true" - resources: - limits: - nvidia.com/mig-3g.20gb: 1 - clear.ml/fraction-1: "0.5" - queueSettings: - maxPods: 10 - gpu-fraction-0_25: - templateOverrides: - labels: - required-resources: "0.25" - billing-monitored: "true" - resources: - limits: - nvidia.com/mig-2g.10gb: 1 - clear.ml/fraction-1: "0.25" - queueSettings: - maxPods: 10 - sessions: - portModeEnabled: false # set to true when using TCP ports mode - agentID: "" - externalIP: 0.0.0.0 # IP of one of the workers - startingPort: 31010 # be careful to not overlap other tenants (startingPort + maxServices) - maxServices: 10 - ``` +Note that you can create more than one list per tenant (using multiple API calls) to create one list for admin users and another for non-admin users. -2. Install the ClearML Agent Chart in the specified tenant namespace: +Note that including the same email address in more than a single tenant’s list will result in unstable behavior and should be avoided. - ``` - helm install -n \\ - clearml-agent \\ - allegroai-enterprise/clearml-enterprise-agent \\ - --create-namespace \\ - -f overrides.yaml - ``` - -3. Create a queue via the API: - - ``` - curl $APISERVER_URL/queues.create \\ - -H "Content-Type: application/json" \\ - -H "X-Clearml-Impersonate-As: 75557e2ab172405bbe153705e91d1782" \\ - -u $APISERVER_KEY:$APISERVER_SECRET \\ - -d '{"name":"default"}' - ``` - -### Tenant Namespace Isolation with NetworkPolicies - -To ensure network isolation for each tenant, you need to create a `NetworkPolicy` in the tenant namespace. This way -the entire namespace/tenant will not accept any connection from other namespaces. - -Create a `NetworkPolicy` in the tenant namespace with the following configuration: - - ``` - apiVersion: networking.k8s.io/v1 - kind: NetworkPolicy - metadata: - name: default-deny-ingress - spec: - podSelector: {} - policyTypes: - - Ingress - ingress: - - from: - - podSelector: {} - ``` - -### Install the App Gateway Router Chart - -Install the App Gateway Router in your Kubernetes cluster, allowing it to manage and route tasks: - -1. Prepare the `overrides.yaml` file with the following content: - - ``` - imageCredentials: - password: "" - clearml: - apiServerUrlReference: "" - apiserverKey: "" - apiserverSecret: "" - ingress: - enabled: true - hostName: "" - ``` - -2. Install App Gateway Router in the specified tenant namespace: - - ``` - helm install -n \\ - clearml-ttr \\ - clearml-enterprise/clearml-task-traffic-router \\ - --create-namespace \\ - -f overrides.yaml - ``` - -## Configuring Options per Tenant - -### Override Options When Creating a New Tenant - -When creating a new tenant company, you can specify several tenant options. These include: - -* `features` - Add features to a company -* `exclude_features` - Exclude features from a company. -* `allowed_users` - Set the maximum number of users for a company. - -#### Example: Create a New Tenant with a Specific Feature Set - -``` -curl $APISERVER_URL/system.create_company \ --H "Content-Type: application/json" \ --u $APISERVER_KEY:$APISERVER_SECRET \ --d '{"name":"", "defaults": { "allowed_users": "10", "features": ["experiments"], "exclude_features": ["app_management", "applications", "user_management"] }}' +```bash +curl $APISERVER_URL/login.add_whitelist_entries \ + -H "Content-Type: application/json" \ + -H "X-Clearml-Act-As: " \ + -u $APISERVER_KEY:$APISERVER_SECRET \ + -d '{"emails":["", "", ...],"is_admin":false}' ``` -**Note**: make sure to replace the `` placeholder. - -### Limit Features for all Users - -This Helm Chart value in the `overrides.yaml` will have priority over all tenants, and will limit the features -available to any user in the system. This means that even if the feature is enabled for the tenant, if it's not in this -list, the user will not see it. - -Example: all users will only have the `applications` feature enabled. +To remove existing email(s) from these lists, use the following API call. Note that this will not affect a user who has already logged in using one of these email addresses: +```bash +curl $APISERVER_URL/login.remove_whitelist_entries \ + -H "Content-Type: application/json" \ + -H "X-Clearml-Act-As: " \ + -u $APISERVER_KEY:$APISERVER_SECRET \ + -d '{"emails":["", "", ...]}' ``` + +*Get the current login routing settings:* + +To get the current IDP/SSO login rule settings for this tenant: + +```bash +curl $APISERVER_URL/login.get_settings \ + -H "X-Clearml-Act-As: " \ + -u $APISERVER_KEY:$APISERVER_SECRET +``` + +### Limit Features for all Users in a Group + +The server's `clearml-values.override.yaml` can control some tenants configurations, limiting the features available to some users or groups in the system. + +Example: with the following configuration, all users in the users group will only have the `applications` feature enabled. + +```yaml apiserver: extraEnvs: - name: CLEARML__services__auth__default_groups__users__features value: "[\"applications\"]" ``` -**Available Features**: +A list of available features is available at the Appendix of this page: [Available Features](#available-features) -* `applications` - Viewing and running applications -* `data_management` - Working with hyper-datasets and dataviews -* `experiments` - Viewing experiment table and launching experiments -* `queues` - Viewing the queues screen -* `queue_management` - Creating and deleting queues -* `pipelines` - Viewing/managing pipelines in the system -* `reports` - Viewing and managing reports in the system -* `show_dashboard` - Show the dashboard screen -* `show_projects` - Show the projects menu option -* `resource_dashboard` - Display the resource dashboard in the orchestration page +## Workers +Refer to the following pages for installing and configuring the [ClearML Enterprise Agent](agent_k8s.md) and [App Gateway](appgw.md). -## Configuring Groups +:::note +Make sure to set up Agent and App Gateway using a Tenant's admin user credentials created with the Tenant creation APIs described above. +::: + +### Tenants Separation + +In multi-tenant setups, you can separate the tenants workers in different namespaces. + +Create a Kubernetes Namespace for each tenant and install a dedicated ClearML Agent and AI Application Gateway in each Namespace. + +A tenant Agent and Gateway need to be configured with credentials created on the ClearML server by a user of the same tenant. + +Additional network separation can be achieved via Kubernetes Network Policies. + +## Additional Configuration + +### Override Options When Creating a New Tenant + +When creating a new tenant company, you can specify several tenant options. These include: + +* `features` - Add features to a company. +* `exclude_features` - Exclude features from a company. +* `allowed_users` - Set the maximum number of users for a company. + +```bash +curl $APISERVER_URL/system.create_company \ + -H "Content-Type: application/json" \ + -u $APISERVER_KEY:$APISERVER_SECRET \ + -d '{"name":"", "defaults": { "allowed_users": "10", "features": ["experiments"], "exclude_features": ["app_management", "applications", "user_management"] }}' +``` + +### Limit Features for all Users + +This value in the `clearml-values.override.yaml` will have priority over all tenants, and will limit the features available to any user in the system. This means that even if the feature is enabled for the tenant, if it's not in this list, the user will not see it. + +Example: all users will only have the applications feature enabled. + +```yaml +apiserver: + extraEnvs: + - name: CLEARML__services__auth__default_groups__users__features + value: "[\"applications\"]" +``` + +A list of available features is available at the Appendix of this page: [Available Features](#available-features) + +### Configuring Groups Groups in ClearML are used to manage user permissions and control access to specific features within the platform. -The following section explains the different types of groups and how to configure them, with a focus on configuration-based, -cross-tenant groups. +The following section explains the different types of groups and how to configure them, with a focus on configuration-based, cross-tenant groups. -### Types of Groups +#### Types of Groups ClearML utilizes several types of groups: * **Built-in Groups** - These groups exist by default in every ClearML installation: @@ -616,7 +268,7 @@ ClearML utilizes several types of groups: -### Configuring Cross-Tenant Groups +#### Configuring Cross-Tenant Groups To define a cross-tenant group, you need to set specific configuration variables. These are typically set as environment variables for the relevant ClearML services (like `apiserver`). The naming convention follows this @@ -624,7 +276,7 @@ pattern: `CLEARML__services__auth__default_groups____`. Replace `` with the desired name for your group (e.g., `my_group_name`, `Data_Scientists`, `MLOps_Engineers`). -#### Configuration Variables +##### Configuration Variables For each group you define in the configuration, you need to specify the following properties: @@ -660,12 +312,12 @@ For each group you define in the configuration, you need to specify the followin * Variable Name: `CLEARML__services__auth__default_groups____system` * Example Value: `"false"` -#### Example Configuration +##### Example Configuration The following example demonstrates how you would define a group named `my_group_name` with a specific set of features that cannot be assigned via the UI: -``` +```yaml # Example configuration snippet (e.g., in Helm values.yaml or docker-compose.yml environment section) # Unique group id for my_group_name @@ -689,6 +341,125 @@ that cannot be assigned via the UI: value: "false" ``` +### Feature Assignment Strategy + +#### Combining Features + +If a user belongs to multiple groups (e.g., the default `users` group and a custom `my_group_name` group), their +effective feature set is the **union** (combination) of all features from all groups they belong to. + +#### Configuring the Default 'users' Group + +Because all users belong to the `users` group, and features are combined, it's crucial to configure the `users` group +appropriately. You generally have two options: + +1. **Minimum Shared Features:** Assign only the absolute minimum set of features that *every single user* should have to + the `users` group. +2. **Empty Feature Set:** Assign an empty list (`[]`) to the `users` group's features. This means users only get features + explicitly granted by other groups they are members of. This is often the cleanest approach when using multiple custom groups. + +**Example: Disabling all features by default for the `users` group:** + +```yaml +- name: CLEARML__services__auth__default_groups__users__features + value: '[]' +``` + +:::note +You typically don't need to define the id, name, assignable, or system properties for built-in groups like users unless +you need to override default behavior, but you do configure their features. +::: + + +#### Setting Server-Level or Tenant-level Features + +Features must be enabled for the entire server or for the tenant in order to allow setting it for specific groups. +Setting server wide feature is done using a different configuration pattern: `CLEARML__services__organization__features__`. + +Setting one of these variables to `"true"` enables the feature globally. + +**Example: Enabling `user_management_advanced` for the entire organization:** + +```yaml +- name: CLEARML__services__organization__features__user_management_advanced + value: "true" +``` + +To enable a feature for a specific tenant, use the following API call: + +```bash +curl $APISERVER_URL/system.update_company_settings \ + -H "Content-Type: application/json" \ + -u $APISERVER_KEY:$APISERVER_SECRET \ + -d '{ + "company": "", + "features": ["sso_management", "user_management_advanced", ...] +}' +``` + +By default, all users have access to all features, but this can be changed by setting specific features set per group as described above. + +#### Example: Defining Full Features for Admins + +While the `admins` group has inherent administrative privileges, you might want to explicitly ensure they have access to +*all* configurable features defined via the `features` list, especially if you've restricted the default `users` group +significantly. You might also need to enable certain features organization-wide. + +```yaml +# Enable advanced user management for the whole organization +- name: CLEARML__services__organization__features__user_management_advanced + value: "true" + +# (Optional but good practice) Explicitly assign all features to the built-in admins group +- name: CLEARML__services__auth__default_groups__admins__features + value: '["user_management", "user_management_advanced", "permissions", "applications", "app_management", "queues", "queue_management", "data_management", "config_vault", "pipelines", "reports", "resource_dashboard", "sso_management", "service_users", "resource_policy", "model_serving", "show_dashboard", "show_model_view", "show_projects"]' # List all relevant features + +# You might still want to define other custom groups with fewer features... +# - name: CLEARML__services__auth__default_groups__my_group_name__id +# value: "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" # Replace with a newly generated UUID +# - name: CLEARML__services__auth__default_groups__my_group_name__name +# value: "my_group_name" +# - name: CLEARML__services__auth__default_groups__my_group_name__features +# value: '["some_feature", "another_feature"]' +# - name: CLEARML__services__auth__default_groups__my_group_name__assignable +# value: "false" +# - name: CLEARML__services__auth__default_groups__my_group_name__system +# value: "false" +``` + +By combining configuration-defined groups, careful management of the default users group features, and organization-level +settings, you can create a flexible and secure permission model tailored to your ClearML deployment. Remember to +restart the relevant ClearML services after applying configuration changes. + +### Per-Tenant Applications Settings + +You may want your users' applications in different tenants to have their own configuration and template on Kubernetes. For this reason, the ClearML Enterprise Server and Agent support different queue modes: + +- `global` (default) - A single Apps Agent on the control-plane. The application's controllers will start on the control-plane. +- `per_tenant` - Multiple Apps Agents, one per tenant (will need `agentk8sglue.appsQueue.enabled=true` on Agents). The application's controllers will start on the worker. + +Configure the Server’s `clearml-values.override.yaml`: + +```yaml +clearmlApplications: + queueMode: "per_tenant" +``` + +Configure the Agent's `clearml-agent-values.override.yaml`: + +```yaml +agentk8sglue: + appsQueue: + enabled: true + # -- Here you can define queueSettings and templateOverrides as for other queues. + # queueSettings: + # templateOverrides: +``` + +**Note**: this feature requires the Agent to be configured using an internal admin credentials as previously mentioned in the "Create an Admin User for the new tenant" section, making sure to pass `"internal":"true"` and using the output credentials for `clearml.agentk8sglueKey` and `clearml.agentk8sglueSecret` (or `existingAgentk8sglueSecret`). + +## Appendix + ### Available Features The following features can be assigned to groups via the `features` configuration variable: @@ -716,95 +487,4 @@ The following features can be assigned to groups via the `features` configuratio | `show_model_view` | Makes the "Models" menu item visible in the UI sidebar. | No | | `show_projects` | Makes the "Projects" menu item visible in the UI sidebar. | No | | `show_orchestration` | Makes the "Orchestration" menu item visible in the UI sidebar. | Available from apiserver version 3.25 | -| `show_datasets` | Makes the "Datasets" menu item visible in the UI sidebar. | Available from apiserver version 3.25 | - -### Feature Assignment Strategy - -#### Combining Features - -If a user belongs to multiple groups (e.g., the default `users` group and a custom `my_group_name` group), their -effective feature set is the **union** (combination) of all features from all groups they belong to. - -#### Configuring the Default 'users' Group - -Because all users belong to the `users` group, and features are combined, it's crucial to configure the `users` group -appropriately. You generally have two options: - -1. **Minimum Shared Features:** Assign only the absolute minimum set of features that *every single user* should have to - the `users` group. -2. **Empty Feature Set:** Assign an empty list (`[]`) to the `users` group's features. This means users only get features - explicitly granted by other groups they are members of. This is often the cleanest approach when using multiple custom groups. - -**Example: Disabling all features by default for the `users` group:** - -``` -- name: CLEARML__services__auth__default_groups__users__features - value: '[]' - -``` - -:::note -You typically don't need to define the id, name, assignable, or system properties for built-in groups like users unless -you need to override default behavior, but you do configure their features. -::: - - -### Setting Server-Level or Tenant-level Features - -Features must be enabled for the entire server or for the tenant in order to allow setting it for specific groups. -Setting server wide feature is done using a different configuration pattern: `CLEARML__services__organization__features__`. - -Setting one of these variables to `"true"` enables the feature globally. - -**Example: Enabling `user_management_advanced` for the entire organization:** - -``` -- name: CLEARML__services__organization__features__user_management_advanced - value: "true" -``` - -To enable a feature for a specific tenant, use the following API call: - -``` -curl $APISERVER_URL/system.update_company_settings \ - -H "Content-Type: application/json" \ - -u $APISERVER_KEY:$APISERVER_SECRET \ - -d '{ - "company": "", - "features": ["sso_management", "user_management_advanced", ...] -}' -``` - -By default, all users have access to all features, but this can be changed by setting specific features set per group as described above. - -### Example: Defining Full Features for Admins - -While the `admins` group has inherent administrative privileges, you might want to explicitly ensure they have access to -*all* configurable features defined via the `features` list, especially if you've restricted the default `users` group -significantly. You might also need to enable certain features organization-wide. - -``` -# Enable advanced user management for the whole organization -- name: CLEARML__services__organization__features__user_management_advanced - value: "true" - -# (Optional but good practice) Explicitly assign all features to the built-in admins group -- name: CLEARML__services__auth__default_groups__admins__features - value: '["user_management", "user_management_advanced", "permissions", "applications", "app_management", "queues", "queue_management", "data_management", "config_vault", "pipelines", "reports", "resource_dashboard", "sso_management", "service_users", "resource_policy", "model_serving", "show_dashboard", "show_model_view", "show_projects"]' # List all relevant features - -# You might still want to define other custom groups with fewer features... -# - name: CLEARML__services__auth__default_groups__my_group_name__id -# value: "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" # Replace with a newly generated UUID -# - name: CLEARML__services__auth__default_groups__my_group_name__name -# value: "my_group_name" -# - name: CLEARML__services__auth__default_groups__my_group_name__features -# value: '["some_feature", "another_feature"]' -# - name: CLEARML__services__auth__default_groups__my_group_name__assignable -# value: "false" -# - name: CLEARML__services__auth__default_groups__my_group_name__system -# value: "false" -``` - -By combining configuration-defined groups, careful management of the default users group features, and organization-level -settings, you can create a flexible and secure permission model tailored to your ClearML deployment. Remember to -restart the relevant ClearML services after applying configuration changes. +| `show_datasets` | Makes the "Datasets" menu item visible in the UI sidebar. | Available from apiserver version 3.25 | \ No newline at end of file