From 82cb5588c0ab8fe08826b3a7b49e8445da2d1839 Mon Sep 17 00:00:00 2001
From: pollfly <75068813+pollfly@users.noreply.github.com>
Date: Tue, 5 Apr 2022 14:30:30 +0300
Subject: [PATCH] Add ClearML Serving docs (#219)

---
 docs/clearml_serving/clearml_serving.md       | 153 +++++++++++
 docs/clearml_serving/clearml_serving_cli.md   | 129 ++++++++++
 .../clearml_serving_tutorial.md               | 238 ++++++++++++++++++
 sidebars.js                                   |   1 +
 4 files changed, 521 insertions(+)
 create mode 100644 docs/clearml_serving/clearml_serving.md
 create mode 100644 docs/clearml_serving/clearml_serving_cli.md
 create mode 100644 docs/clearml_serving/clearml_serving_tutorial.md

diff --git a/docs/clearml_serving/clearml_serving.md b/docs/clearml_serving/clearml_serving.md
new file mode 100644
index 00000000..00fef1f7
--- /dev/null
+++ b/docs/clearml_serving/clearml_serving.md
@@ -0,0 +1,153 @@
+---
+title: Introduction
+---
+
+`clearml-serving` is a command line utility for model deployment and orchestration.
+It enables model deployment including serving and preprocessing code to a Kubernetes cluster or custom container based 
+solution.
+
+
+## Features
+
+* Easy to deploy & configure
+    * Support Machine Learning Models (Scikit Learn, XGBoost, LightGBM)
+    * Support Deep Learning Models (Tensorflow, PyTorch, ONNX)
+    * Customizable RestAPI for serving (i.e. allow per model pre/post-processing for easy integration)
+* Flexible
+    * On-line model deployment
+    * On-line endpoint model/version deployment (i.e. no need to take the service down)
+    * Per model standalone preprocessing and postprocessing python code
+* Scalable
+    * Multi model per container
+    * Multi models per serving service
+    * Multi-service support (fully seperated multiple serving service running independently)
+    * Multi cluster support
+    * Out-of-the-box node auto-scaling based on load/usage
+* Efficient
+    * Multi-container resource utilization
+    * Support for CPU & GPU nodes
+    * Auto-batching for DL models
+* [Automatic deployment](clearml_serving_tutorial.md#automatic-model-deployment)
+    * Automatic model upgrades w/ canary support
+    * Programmable API for model deployment
+* [Canary A/B deployment](clearml_serving_tutorial.md#canary-endpoint-setup) - online Canary updates
+* [Model Monitoring](clearml_serving_tutorial.md#model-monitoring-and-performance-metrics)
+    * Usage Metric reporting
+    * Metric Dashboard
+    * Model performance metric
+    * Model performance Dashboard
+
+## Components
+
+![ClearML Serving](https://github.com/allegroai/clearml-serving/raw/main/docs/design_diagram.png?raw=true)
+
+* **CLI** - Secure configuration interface for on-line model upgrade/deployment on running Serving Services
+
+* **Serving Service Task** - Control plane object storing configuration on all the endpoints. Support multiple separated 
+  instance, deployed on multiple clusters.
+
+* **Inference Services** - Inference containers, performing model serving pre/post processing. Also supports CPU model 
+  inferencing.
+
+* **Serving Engine Services** - Inference engine containers (e.g. Nvidia Triton, TorchServe etc.) used by the Inference 
+  Services for heavier model inference.
+
+* **Statistics Service** - Single instance per Serving Service collecting and broadcasting model serving & performance 
+  statistics
+
+* **Time-series DB** - Statistics collection service used by the Statistics Service, e.g. Prometheus
+
+* **Dashboards** - Customizable dashboard solution on top of the collected statistics, e.g. Grafana
+
+## Installation
+### Prerequisites
+
+* ClearML-Server : Model repository, Service Health, Control plane
+* Kubernetes / Single-instance Machine : Deploying containers
+* CLI : Configuration & model deployment interface
+
+### Initial Setup
+1. Set up your [ClearML Server](../deploying_clearml/clearml_server.md) or use the 
+  [free hosted service](https://app.clear.ml)
+1. Connect `clearml` SDK to the server, see instructions [here](../getting_started/ds/ds_first_steps.md#install-clearml)
+
+1. Install clearml-serving CLI:
+   
+   ```bash
+   pip3 install clearml-serving
+   ```
+
+1. Create the Serving Service Controller:
+   
+   ```bash
+   clearml-serving create --name "serving example"
+   ```
+   
+   The new serving service UID should be printed 
+   
+   ```console
+   New Serving Service created: id=aa11bb22aa11bb22
+   ```
+   
+   Write down the Serving Service UID
+
+1. Clone the `clearml-serving` repository:
+   ```bash
+   git clone https://github.com/allegroai/clearml-serving.git
+   ```
+
+1. Edit the environment variables file (docker/example.env) with your clearml-server credentials and Serving Service UID. 
+   For example, you should have something like
+   ```bash
+   cat docker/example.env
+   ```
+   
+   ```console 
+    CLEARML_WEB_HOST="https://app.clear.ml"
+    CLEARML_API_HOST="https://api.clear.ml"
+    CLEARML_FILES_HOST="https://files.clear.ml"
+    CLEARML_API_ACCESS_KEY="<access_key_here>"
+    CLEARML_API_SECRET_KEY="<secret_key_here>"
+    CLEARML_SERVING_TASK_ID="<serving_service_id_here>"
+   ```
+
+1. Spin up the `clearml-serving` containers with `docker-compose` (or if running on Kubernetes, use the helm chart)
+   
+   ```bash
+   cd docker && docker-compose --env-file example.env -f docker-compose.yml up
+   ```
+    
+   If you need Triton support (keras/pytorch/onnx etc.), use the triton docker-compose file
+   ```bash
+   cd docker && docker-compose --env-file example.env -f docker-compose-triton.yml up 
+   ```
+   
+   If running on a GPU instance w/ Triton support (keras/pytorch/onnx etc.), use the triton gpu docker-compose file:
+   ```bash
+   cd docker && docker-compose --env-file example.env -f docker-compose-triton-gpu.yml up
+   ```
+    
+:::note
+Any model that registers with Triton engine will run the pre/post processing code on the Inference service container, 
+and the model inference itself will be executed on the Triton Engine container.
+:::
+
+### Advanced Setup - S3/GS/Azure Access (Optional)
+To add access credentials and allow the inference containers to download models from your S3/GS/Azure object-storage, 
+add the respective environment variables to your env files (example.env). See further details on configuring the storage 
+access [here](../integrations/storage.md#configuring-storage).
+
+```
+AWS_ACCESS_KEY_ID
+AWS_SECRET_ACCESS_KEY
+AWS_DEFAULT_REGION
+
+GOOGLE_APPLICATION_CREDENTIALS
+
+AZURE_STORAGE_ACCOUNT
+AZURE_STORAGE_KEY
+```
+
+## Tutorial
+
+For further details, see the ClearML Serving [Tutorial](clearml_serving_tutorial.md).
\ No newline at end of file
diff --git a/docs/clearml_serving/clearml_serving_cli.md b/docs/clearml_serving/clearml_serving_cli.md
new file mode 100644
index 00000000..da0ec42f
--- /dev/null
+++ b/docs/clearml_serving/clearml_serving_cli.md
@@ -0,0 +1,129 @@
+---
+title: CLI 
+--- 
+
+The `clearml-serving` utility is a CLI tool for model deployment and orchestration. 
+
+The following page provides a reference for `clearml-serving`'s CLI commands:
+* [list](#list) -  List running Serving Services
+* [create](#create) - Create a new Serving Service
+* [metrics](#metrics) - Configure inference metrics Service
+* [config](#config) - Configure a new Serving Service
+* [model](#model) - Configure Model endpoints for a running Service
+
+
+```bash
+clearml-serving [-h] [--debug] [--id ID] {list,create,metrics,config,model} 
+```
+
+**Parameters**
+
+<div className="tbl-cmd">
+
+|Name|Description|Optional|
+|---|---|---|
+|`--id`|Serving Service (Control plane) Task ID to configure (if not provided automatically detect the running control plane Task) | <img src="/docs/latest/icons/ico-optional-no.svg" alt="No" className="icon size-md center-md" /> |
+|`--debug` |  Print debug messages | <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" /> |
+
+</div>
+
+:::info Service ID
+The Serving Service's ID (`--id`) is required to execute the `metrics`, `config`, and `model` commands.
+:::
+
+### list
+```bash
+clearml-serving list [-h]
+```
+
+List running Serving Services. 
+
+### create
+
+```bash
+clearml-serving create [-h] [--name NAME] [--tags TAGS [TAGS ...]] [--project PROJECT]
+```
+
+Create a new Serving Service
+
+**Parameters**
+
+<div className="tbl-cmd">
+
+|Name|Description|Optional|
+|---|---|---|
+|`--name` |Serving service's name. Default: `Serving-Service`| <img src="/docs/latest/icons/ico-optional-no.svg" alt="No" className="icon size-md center-md" /> |
+|`--project`|Serving service's project. Default: `DevOps`| <img src="/docs/latest/icons/ico-optional-no.svg" alt="No" className="icon size-md center-md" /> |
+|`--tags` |Serving service's user tags. The serving service can be labeled, which can be useful for organizing | <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
+
+</div>
+
+### metrics
+
+Configure inference metrics Service
+
+```bash
+clearml-serving metrics [-h] {add,remove,list}
+```
+
+**Parameters**
+
+<div className="tbl-cmd">
+
+|Name|Description|Optional|
+|---|---|---|
+|`--add` | Add/modify metric for a specific endpoint| <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" /> |
+|`--remove` | Remove metric from a specific endpoint| <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" /> |
+|`--list` | list metrics logged on all endpoints | <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" /> |
+
+</div>
+
+<br/>
+
+### config
+
+Configure a new Serving Service. 
+
+```bash
+clearml-serving {base-serving-url, triton-grpc, kafka-metric-server, metric-log-freq}
+```
+
+**Parameters**
+
+<div className="tbl-cmd">
+
+|Name|Description|Optional|
+|---|---|---|
+|`--base-serving-url`|External base serving service url. Example: `http://127.0.0.1:8080/serve`|<img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
+|`--triton-grpc-server`|External ClearML-Triton serving container gRPC address. Example: `127.0.0.1:9001`|<img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
+|`--kafka-metric-server`|External Kafka service url. Example: `127.0.0.1:9092`|<img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
+|`--metric-log-freq`|Set default metric logging frequency. 1.0 is 100% of all requests are logged|<img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
+
+</div>
+
+<br/>
+
+### model
+
+Configure Model endpoints for an already running Service
+
+```bash
+clearml-serving model [-h] {list,remove,upload,canary,auto-update,add}
+```
+
+**Parameters**
+
+<div className="tbl-cmd">
+
+|Name|Description|Optional|
+|---|---|---|
+|`--list`|  List current models| <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" /> |
+|`--remove`| Remove model by its endpoint name | <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" /> |
+|`--upload` | Upload and register model files/folder | <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
+|`--canary` | Add model Canary/A/B endpoint | <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
+|`--auto-update` | Add/Modify model auto update service | <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
+|`--add` | Add/Update model | <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
+
+</div>
+
+<br/>
diff --git a/docs/clearml_serving/clearml_serving_tutorial.md b/docs/clearml_serving/clearml_serving_tutorial.md
new file mode 100644
index 00000000..548b9c33
--- /dev/null
+++ b/docs/clearml_serving/clearml_serving_tutorial.md
@@ -0,0 +1,238 @@
+---
+title: Tutorial
+---
+
+In this tutorial, we will go over the model lifecycle -- from training to serving -- in the following steps:
+* Training a model using the [sklearn example script](https://github.com/allegroai/clearml-serving/blob/main/examples/sklearn/train_model.py) 
+* Serving the model using **ClearML Serving**  
+* Spinning the inference container
+
+The tutorial will also go over these additional options that you can use with `clearml-serving`: 
+* Automatic model deployment 
+* Canary endpoints
+* Model performance monitoring 
+
+## Prerequisite
+
+Before executing the steps below, make sure you have completed `clearml-serving`'s [initial setup](clearml_serving.md#initial-setup).
+
+##  Steps
+### Step 1: Train Model
+
+Train a model. Work from your local `clearml-serving` repository's root. 
+  - Create a python virtual environment
+  - Install the script requirements `pip3 install -r examples/sklearn/requirements.txt`
+  - Execute the [training script](https://github.com/allegroai/clearml-serving/blob/main/examples/sklearn/train_model.py) 
+    `python3 examples/sklearn/train_model.py`. 
+  
+During execution, ClearML automatically registers the sklearn model and uploads it into the model repository. 
+For Manual model registration see [here](#registering--deploying-new-models-manually) 
+
+### Step 2: Register Model
+
+Register the new Model on the Serving Service. 
+
+```bash
+clearml-serving --id <service_id> model add --engine sklearn --endpoint "test_model_sklearn" --preprocess "examples/sklearn/preprocess.py" --name "train sklearn model" --project "serving examples"
+```
+
+:::info Service ID
+Make sure that you have executed `clearml-servings`'s
+[initial setup](clearml_serving.md#initial-setup), in which you create a Serving Service. 
+The Serving Service's ID is required to register a model, and to execute `clearml-serving`'s `metrics` and `config` commands 
+:::
+
+  
+:::note
+The preprocessing python code is packaged and uploaded to the Serving Service, to be used by any inference container, 
+and downloaded in realtime when updated
+:::
+
+### Step 3: Spin Inference Container
+
+Spin the Inference Container
+  - Customize container [Dockerfile](https://github.com/allegroai/clearml-serving/blob/main/clearml_serving/serving/Dockerfile) if needed
+  - Build container `
+    ```bash
+    docker build --tag clearml-serving-inference:latest -f clearml_serving/serving/Dockerfile .
+    ```
+  - Spin the inference container: 
+    ```bash
+    docker run -v ~/clearml.conf:/root/clearml.conf -p 8080:8080 -e CLEARML_SERVING_TASK_ID=<service_id> -e CLEARML_SERVING_POLL_FREQ=5 clearml-serving-inference:latest
+    ``` 
+
+Now, test the new model inference endpoint:
+```bash
+curl -X POST "http://127.0.0.1:8080/serve/test_model_sklearn" -H "accept: application/json" -H "Content-Type: application/json" -d '{"x0": 1, "x1": 2}'
+```  
+
+Now that you have an inference container running, you can add new model inference endpoints directly with the CLI. The 
+inference container will automatically sync once every 5 minutes. On the first few requests the inference container 
+needs to download the model file and preprocessing python code, this means the request might take a little longer, once 
+everything is cached, it will return almost immediately.
+
+:::note
+Review the model repository in the ClearML web UI, under the "serving examples" Project on your ClearML 
+account/server ([free hosted](https://app.clear.ml) or [self-deployed](https://github.com/allegroai/clearml-server)).
+
+Inference services status, console outputs and machine metrics are available in the ClearML UI in the Serving Service 
+project (default: "DevOps" project)
+:::
+
+## Registering & Deploying New Models Manually 
+
+Uploading an existing model file into the model repository can be done via the `clearml` RestAPI, the python interface, 
+or with the `clearml-serving` CLI. 
+
+1. Upload the model file to the `clearml-server` file storage and register it. The `--path` parameter is used to input 
+   the path to a local model file.
+
+   ```bash
+   clearml-serving --id <service_id> model upload --name "manual sklearn model" --project "serving examples" --framework "scikit-learn" --path examples/sklearn/sklearn-model.pkl
+   ```
+    
+  You now have a new Model named `manual sklearn model` in the `serving examples` project. The CLI output prints 
+  the UID of the new model, which you will use it to register a new endpoint. 
+
+  In the [ClearML web UI](../webapp/webapp_overview.md), the new model is listed under the **Models** tab of its project. 
+  You can also download the model file itself directly from the web UI. 
+
+1. Register a new endpoint with the new model
+   ```bash
+   clearml-serving --id <service_id> model add --engine sklearn --endpoint "test_model_sklearn" --preprocess "examples/sklearn/preprocess.py" --model-id <newly_created_model_id_here>
+   ```
+
+:::info Model Storage
+You can also provide a different storage destination for the model, such as S3/GS/Azure, by passing
+`--destination="s3://bucket/folder"`, `gs://bucket/folder`, `azure://bucket/folder`. There is no need to provide a unique 
+path tp the destination argument, the location of the model will be a unique path based on the serving service ID and the 
+model name
+:::
+
+## Additional Options 
+
+### Automatic Model Deployment
+
+The ClearML Serving Service supports automatic model deployment and upgrades, which is connected with the model 
+repository and API. When the model auto-deploy is configured, new model versions will be automatically deployed when you 
+`publish` or `tag` a new model in the ClearML model repository. This automation interface allows for simpler CI/CD model 
+deployment process, as a single API automatically deploy (or remove) a model from the Serving Service.
+
+#### Automatic Model Deployment Example
+
+1. Configure the model auto-update on the Serving Service
+   
+   ```bash
+   clearml-serving --id <service_id> model auto-update --engine sklearn --endpoint "test_model_sklearn_auto" --preprocess "preprocess.py" --name "train sklearn model" --project "serving examples" --max-versions 2`
+   ```
+1. Deploy the Inference container (if not already deployed)
+1. Publish a new model the model repository in one of the following ways:
+    - Go to the "serving examples" project in the ClearML web UI, click on the Models Tab, search for "train sklearn model" right click and select "Publish"
+    - Use the RestAPI (see [details](https://clear.ml/docs/latest/docs/references/api/models#post-modelspublish_many))
+    - Use Python interface:
+   
+    ```python
+    from clearml import Model
+    Model(model_id="unique_model_id_here").publish()
+    ```
+1. The new model is available on a new endpoint version (1), test with: 
+   ```bash
+   curl -X POST "http://127.0.0.1:8080/serve/test_model_sklearn_auto/1" -H "accept: application/json" -H "Content-Type: application/json" -d '{"x0": 1, "x1": 2}'
+   ```
+
+### Canary Endpoint Setup
+
+Canary endpoint deployment add a new endpoint where the actual request is sent to a preconfigured set of endpoints with 
+pre-provided distribution. For example, let's create a new endpoint "test_model_sklearn_canary", you can provide a list 
+of endpoints and probabilities (weights).
+
+```bash
+clearml-serving --id <service_id> model canary --endpoint "test_model_sklearn_canary" --weights 0.1 0.9 --input-endpoints test_model_sklearn/2 test_model_sklearn/1
+```
+This means that any request coming to `/test_model_sklearn_canary/` will be routed with probability of 90% to
+`/test_model_sklearn/1/` and with probability of 10% to `/test_model_sklearn/2/`. 
+
+:::note
+As with any other Serving Service configuration, you can configure the Canary endpoint while the Inference containers are 
+already running and deployed, they will get updated in their next update cycle (default: once every 5 minutes)
+:::
+
+You can also prepare a "fixed" canary endpoint, always splitting the load between the last two deployed models:
+
+```bash
+clearml-serving --id <service_id> model canary --endpoint "test_model_sklearn_canary" --weights 0.1 0.9 --input-endpoints-prefix test_model_sklearn/
+```
+
+This means that you have two model inference endpoints: `/test_model_sklearn/1/` and `/test_model_sklearn/2/`. The 10% 
+probability (weight 0.1) will match the last (order by version number) endpoint, i.e. `/test_model_sklearn/2/`, and the
+90% will match `/test_model_sklearn/2/`. When you add a new model endpoint version, e.g. `/test_model_sklearn/3/`, the 
+canary distribution will automatically match the 90% probability to `/test_model_sklearn/2/` and the 10% to the new 
+endpoint `/test_model_sklearn/3/`.  
+
+Example:
+1. Add two endpoints:
+  ```bash
+  clearml-serving --id <service_id> model add --engine sklearn --endpoint "test_model_sklearn" --preprocess "examples/sklearn/preprocess.py" --name "train sklearn model" --version 1 --project "serving examples"
+  ```
+  ```bash
+  clearml-serving --id <service_id> model add --engine sklearn --endpoint "test_model_sklearn" --preprocess "examples/sklearn/preprocess.py" --name "train sklearn model" --version 2 --project "serving examples"
+  ```  
+
+1. Add Canary endpoint:
+  ```bash
+  clearml-serving --id <service_id> model canary --endpoint "test_model_sklearn_canary" --weights 0.1 0.9 --input-endpoints test_model_sklearn/2 test_model_sklearn/1
+  ```  
+
+1. Test Canary endpoint:
+  ```bash
+  curl -X POST "http://127.0.0.1:8080/serve/test_model" -H "accept: application/json" -H "Content-Type: application/json" -d '{"x0": 1, "x1": 2}'` 
+  ```
+
+### Model Monitoring and Performance Metrics
+
+![Grafana Screenshot](https://github.com/allegroai/clearml-serving/raw/main/docs/grafana_screenshot.png)
+
+ClearML serving instances send serving statistics (count/latency) automatically to Prometheus and Grafana can be used 
+to visualize and create live dashboards. 
+
+The default docker-compose installation is preconfigured with Prometheus and Grafana, do notice that by default data/ate 
+of both containers is *not* persistent. To add persistence, we recommend adding a volume mount.
+
+You can also add many custom metrics on the input/predictions of your models. Once a model endpoint is registered, 
+adding custom metric can be done using the CLI.
+
+For example, assume the mock scikit-learn model is deployed on endpoint `test_model_sklearn`,  you can log the requests 
+inputs and outputs (see examples/sklearn/preprocess.py example):
+
+```bash
+clearml-serving --id <serving_service_id_here> metrics add --endpoint test_model_sklearn --variable-scalar
+x0=0,0.1,0.5,1,10 x1=0,0.1,0.5,1,10 y=0,0.1,0.5,0.75,1
+```
+
+This will create a distribution histogram (buckets specified via a list of less-equal values after `=` sign),
+that you will be able to visualize on Grafana.
+
+:::info time-series values
+You can also log time-series values with `--variable-value x2` or discrete results (e.g. classifications strings) with 
+`--variable-enum animal=cat,dog,sheep`. Additional custom variables can be added in the preprocess and postprocess with 
+a call to `collect_custom_statistics_fn({'new_var': 1.337})`. See [`preprocess_template.py`](https://github.com/allegroai/clearml-serving/blob/main/clearml_serving/preprocess/preprocess_template.py).
+:::
+
+With the new metrics logged, you can create a visualization dashboard over the latency of the calls, and the output distribution. 
+
+#### Grafana Model Performance Example
+
+1. Browse to `http://localhost:3000`
+1. Login with: admin/admin
+1. Create a new dashboard
+1. Select Prometheus as data source
+1. Add a query: `100 * increase(test_model_sklearn:_latency_bucket[1m]) / increase(test_model_sklearn:_latency_sum[1m])`
+1. Change type to heatmap, and select on the right hand-side under "Data Format" select "Time series buckets". You now have 
+   the latency distribution, over time.
+1. Repeat the same process for x0, the query would be `100 * increase(test_model_sklearn:x0_bucket[1m]) / increase(test_model_sklearn:x0_sum[1m])`
+
+:::note
+If not specified all serving requests will be logged, which can be changed with the `CLEARML_DEFAULT_METRIC_LOG_FREQ` 
+environment variable. For example `CLEARML_DEFAULT_METRIC_LOG_FREQ=0.2` means only 20% of all requests will be logged. 
+You can also specify per-endpoint log frequency with the `clearml-serving` CLI. See [`clearml-serving metrics`](clearml_serving_cli.md#metrics)
+:::
\ No newline at end of file
diff --git a/sidebars.js b/sidebars.js
index d771ace8..4f97819c 100644
--- a/sidebars.js
+++ b/sidebars.js
@@ -19,6 +19,7 @@ module.exports = {
         'clearml_agent',
         {'ClearML Data': ['clearml_data/clearml_data', 'clearml_data/clearml_data_cli', 'clearml_data/clearml_data_sdk', 'clearml_data/best_practices',
                 {'Workflows': ['clearml_data/data_management_examples/workflows', 'clearml_data/data_management_examples/data_man_simple', 'clearml_data/data_management_examples/data_man_folder_sync', 'clearml_data/data_management_examples/data_man_cifar_classification', 'clearml_data/data_management_examples/data_man_python']},]},
+        {'ClearML Serving':['clearml_serving/clearml_serving', 'clearml_serving/clearml_serving_cli', 'clearml_serving/clearml_serving_tutorial']},
         {'CLI Tools': ['apps/clearml_session', 'apps/clearml_task']},
         'integrations/libraries',
         'integrations/storage',