Merge remote-tracking branch 'origin/update-fork'

2025-06-26 18:16:00 +00:00 · 2023-04-20 09:55:32 -04:00 · 2023-04-20 09:55:32 -04:00 · 20bd0a5c34
commit 20bd0a5c34
parent 214010b788 2d3ac1fe63
21 changed files with 289 additions and 52 deletions
--- a/README.md
+++ b/README.md
@ -5,7 +5,7 @@

 **ClearML Serving - Model deployment made easy**

-## **`clearml-serving v1.2` </br> :sparkles: Model Serving (ML/DL) Made Easy :tada:**
+## **`clearml-serving v1.3` </br> :sparkles: Model Serving (ML/DL) Made Easy :tada:** <br> :fire: NEW version 1.3 :rocket: 20% faster ! 


 [![GitHub license](https://img.shields.io/github/license/allegroai/clearml-serving.svg)](https://img.shields.io/github/license/allegroai/clearml-serving.svg)
--- a/clearml_serving/main.py
+++ b/clearml_serving/main.py
@ -420,7 +420,7 @@ def cli():
        help='Optional: Add tags to the newly created model')
    parser_model_upload.add_argument(
        '--project', type=str, required=True,
-        help='Specifying the project for the model tp be registered in')
+        help='Specifying the project for the model to be registered in')
    parser_model_upload.add_argument(
        '--framework', type=str,
        choices=[p for p in Framework.__dict__.keys()
--- a/clearml_serving/engines/triton/Dockerfile
+++ b/clearml_serving/engines/triton/Dockerfile
@ -1,17 +1,17 @@

-FROM nvcr.io/nvidia/tritonserver:22.04-py3
+FROM nvcr.io/nvidia/tritonserver:22.08-py3


 ENV LC_ALL=C.UTF-8

 # install base package
-RUN pip3 install --no-cache-dir clearml-serving
+RUN python3 -m pip install --no-cache-dir -U pip clearml-serving

 # get latest execution code from the git repository
 # RUN cd $HOME && git clone https://github.com/allegroai/clearml-serving.git
 COPY clearml_serving /root/clearml/clearml_serving

-RUN pip3 install --no-cache-dir -r /root/clearml/clearml_serving/engines/triton/requirements.txt
+RUN python3 -m pip install --no-cache-dir -r /root/clearml/clearml_serving/engines/triton/requirements.txt

 # default serving port
 EXPOSE 8001
--- a/clearml_serving/engines/triton/requirements.txt
+++ b/clearml_serving/engines/triton/requirements.txt
@ -1,6 +1,6 @@
 clearml >= 1.3.1
 clearml-serving
-tritonclient[grpc]>=2.25,<2.26
+tritonclient[grpc]>=2.32,<2.33
 starlette
 grpcio
 Pillow>=9.0.1,<10
--- a/clearml_serving/preprocess/preprocess_template.py
+++ b/clearml_serving/preprocess/preprocess_template.py
@ -24,12 +24,18 @@ class Preprocess(object):
        # it will also set the internal model_endpoint to reference the specific model endpoint object being served
        self.model_endpoint = None  # type: clearml_serving.serving.endpoints.ModelEndpoint

-    def load(self, local_file_name: str) -> Optional[Any]:  # noqa
+    def load(self, local_file_name: str) -> Any:  # noqa
        """
        Optional: provide loading method for the model
        useful if we need to load a model in a specific way for the prediction engine to work
+
+        Notice! When used with specific engines (i.e. not Custom)
+        The returned object will be passed as is to the inference engine,
+        this means it must not be None, otherwise the endpoint will be ignored!
+
        :param local_file_name: file name / path to read load the model from
-        :return: Object that will be called with .predict() method for inference
+
+        :return: Object that will be called with .predict() method for inference.
        """
        pass

--- a/clearml_serving/serving/Dockerfile
+++ b/clearml_serving/serving/Dockerfile
@ -1,5 +1,5 @@
 # works with python 3.8 as well.
-ARG PYTHON_VERSION=3.9
+ARG PYTHON_VERSION=3.11

 FROM python:${PYTHON_VERSION}-bullseye

--- a/clearml_serving/serving/model_request_processor.py
+++ b/clearml_serving/serving/model_request_processor.py
@ -9,7 +9,6 @@ import itertools
 import threading
 from multiprocessing import Lock
 import asyncio
-from numpy import isin
 from numpy.random import choice

 from clearml import Task, Model
@ -288,6 +287,9 @@ class ModelRequestProcessor(object):
                name=preprocess_artifact_name, artifact_object=Path(preprocess_code), wait_on_upload=True)
            endpoint.preprocess_artifact = preprocess_artifact_name

+        # register the model
+        self._add_registered_input_model(endpoint_url=endpoint.serving_url, model_id=endpoint.model_id)
+
        self._endpoints[url] = endpoint
        return url

@ -348,6 +350,7 @@ class ModelRequestProcessor(object):
        if endpoint_url not in self._endpoints:
            return False
        self._endpoints.pop(endpoint_url, None)
+        self._remove_registered_input_model(endpoint_url)
        return True

    def add_canary_endpoint(
@ -688,17 +691,14 @@ class ModelRequestProcessor(object):
                if not model:
                    # this should never happen
                    continue
-                ep = ModelEndpoint(
-                    engine_type=model.engine_type,
-                    serving_url=serving_base_url,
-                    model_id=model_id,
-                    version=str(version),
-                    preprocess_artifact=model.preprocess_artifact,
-                    input_size=model.input_size,
-                    input_type=model.input_type,
-                    output_size=model.output_size,
-                    output_type=model.output_type
-                )
+                model_endpoint_config = {
+                    i: j for i, j in model.as_dict(remove_null_entries=True).items()
+                    if hasattr(ModelEndpoint.__attrs_attrs__, i)
+                }
+                model_endpoint_config["serving_url"] = serving_base_url
+                model_endpoint_config["model_id"] = model_id
+                model_endpoint_config["version"] = str(version)
+                ep = ModelEndpoint(**model_endpoint_config)
                self._model_monitoring_endpoints[url] = ep
                dirty = True

@ -706,6 +706,7 @@ class ModelRequestProcessor(object):
        for ep_url in list(self._model_monitoring_endpoints.keys()):
            if not any(True for url in self._model_monitoring_versions if ep_url.startswith(url+"/")):
                self._model_monitoring_endpoints.pop(ep_url, None)
+                self._remove_registered_input_model(ep_url)
                dirty = True

        # reset flag
@ -714,6 +715,9 @@ class ModelRequestProcessor(object):
        if dirty:
            config_dict = {k: v.as_dict(remove_null_entries=True) for k, v in self._model_monitoring_endpoints.items()}
            self._task.set_configuration_object(name='model_monitoring_eps', config_dict=config_dict)
+            for m in self._model_monitoring_endpoints.values():
+                # log us on the main task
+                self._add_registered_input_model(endpoint_url=m.serving_url, model_id=m.model_id)

        return dirty

@ -1299,3 +1303,37 @@ class ModelRequestProcessor(object):
            if not endpoint.auxiliary_cfg and missing:
                raise ValueError("Triton engine requires input description - missing values in {}".format(missing))
        return True
+
+    def _add_registered_input_model(self, endpoint_url: str, model_id: str) -> bool:
+        """
+        Add registered endpoint url, return True if successful
+        """
+        if not self._task or not model_id or not endpoint_url:
+            return False
+
+        # noinspection PyBroadException
+        try:
+            self._task.set_input_model(model_id=model_id, name=endpoint_url.strip("/"))
+        except Exception:
+            return False
+
+        return True
+
+    def _remove_registered_input_model(self, endpoint_url: str) -> bool:
+        """
+        Remove registered endpoint url, return True if successful
+        """
+        if not self._task or not endpoint_url:
+            return False
+
+        # noinspection PyBroadException
+        try:
+            # we assume we have the API version ot support it
+            from clearml.backend_api.services import tasks
+            self._task.send(tasks.DeleteModelsRequest(
+                task=self._task.id, models=[dict(name=endpoint_url.strip("/"), type=tasks.ModelTypeEnum.input)]
+            ))
+        except Exception:
+            return False
+
+        return True
--- a/clearml_serving/serving/preprocess_service.py
+++ b/clearml_serving/serving/preprocess_service.py
@ -16,6 +16,7 @@ from .endpoints import ModelEndpoint
 class BasePreprocessRequest(object):
    __preprocessing_lookup = {}
    __preprocessing_modules = set()
+    _grpc_env_conf_prefix = "CLEARML_GRPC_"
    _default_serving_base_url = "http://127.0.0.1:8080/serve/"
    _server_config = {}  # externally configured by the serving inference service
    _timeout = None  # timeout in seconds for the entire request, set in __init__
@ -246,13 +247,14 @@ class BasePreprocessRequest(object):
@BasePreprocessRequest.register_engine("triton", modules=["grpc", "tritonclient"])
 class TritonPreprocessRequest(BasePreprocessRequest):
    _content_lookup = {
+        getattr(np, 'int', int): 'int_contents',
        np.uint8: 'uint_contents',
        np.int8: 'int_contents',
        np.int64: 'int64_contents',
        np.uint64: 'uint64_contents',
        np.int32: 'int_contents',
        np.uint: 'uint_contents',
-        np.bool: 'bool_contents',
+        getattr(np, 'bool', bool): 'bool_contents',
        np.float32: 'fp32_contents',
        np.float64: 'fp64_contents',
    }
@ -325,8 +327,20 @@ class TritonPreprocessRequest(BasePreprocessRequest):
        if self._grpc_stub.get(tid):
            grpc_stub = self._grpc_stub.get(tid)
        else:
+            channel_opt = []
+            for k, v in os.environ.items():
+                if str(k).startswith(self._grpc_env_conf_prefix):
+                    try:
+                        v = int(v)
+                    except:  # noqa
+                        try:
+                            v = float(v)
+                        except:  # noqa
+                            pass
+                    channel_opt.append(('grpc.{}'.format(k[len(self._grpc_env_conf_prefix):]), v))
+
            try:
-                channel = self._ext_grpc.aio.insecure_channel(triton_server_address)
+                channel = self._ext_grpc.aio.insecure_channel(triton_server_address, options=channel_opt or None)
                grpc_stub = self._ext_service_pb2_grpc.GRPCInferenceServiceStub(channel)
                self._grpc_stub[tid] = grpc_stub
            except Exception as ex:
--- a/clearml_serving/serving/requirements.txt
+++ b/clearml_serving/serving/requirements.txt
@ -1,19 +1,19 @@
-clearml>=1.3.1
+clearml>=1.10.1,<2
 attrs>=20.3.0,<21
-fastapi[all]>=0.75.0,<0.76
+fastapi[all]>=0.95.0,<0.96
 uvicorn[standard]
 gunicorn>=20.1.0,<20.2
-asyncio>=3.4.3,<3.5
-aiocache>=0.11.1,<0.12
-tritonclient[grpc]>=2.25,<2.26
+asyncio>=3.4.3,<3.5 ; python_version < '3.10'
+aiocache>=0.12,<0.13
+tritonclient[grpc]>=2.32,<2.33
 starlette
-numpy>=1.20,<1.24
-scikit-learn>=1.0.2,<1.1
-pandas>=1.0.5,<1.5
+numpy>=1.24,<1.25
+scikit-learn>=1.2.2,<1.3
+pandas>=1.5.3,<1.6
 grpcio
 Pillow>=9.0.1,<10
-xgboost>=1.5.2,<1.6
+xgboost>=1.7.5,<1.8
 lightgbm>=3.3.2,<3.4
-requests>=2.25.1,<2.26
+requests>=2.28.2,<2.29
 kafka-python>=2.0.2,<2.1
-lz4>=4.0.0,<5
+lz4>=4.0.0,<5
--- a/clearml_serving/statistics/Dockerfile
+++ b/clearml_serving/statistics/Dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-bullseye
+FROM python:3.11-bullseye


 ENV LC_ALL=C.UTF-8
--- a/clearml_serving/version.py
+++ b/clearml_serving/version.py
@ -1 +1 @@
-__version__ = '1.2.0'
+__version__ = '1.3.0'
--- a/docker/docker-compose-triton-gpu.yml
+++ b/docker/docker-compose-triton-gpu.yml
@ -98,6 +98,12 @@ services:
      CLEARML_USE_GUNICORN: ${CLEARML_USE_GUNICORN:-}
      CLEARML_SERVING_NUM_PROCESS: ${CLEARML_SERVING_NUM_PROCESS:-}
      CLEARML_EXTRA_PYTHON_PACKAGES: ${CLEARML_EXTRA_PYTHON_PACKAGES:-}
+      AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-}
+      AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-}
+      AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION:-}
+      GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-}
+      AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-}
+      AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-}
    depends_on:
      - kafka
      - clearml-serving-triton
@ -122,6 +128,13 @@ services:
      CLEARML_SERVING_TASK_ID: ${CLEARML_SERVING_TASK_ID:-}
      CLEARML_TRITON_POLL_FREQ: ${CLEARML_TRITON_POLL_FREQ:-1.0}
      CLEARML_TRITON_METRIC_FREQ: ${CLEARML_TRITON_METRIC_FREQ:-1.0}
+      CLEARML_EXTRA_PYTHON_PACKAGES: ${CLEARML_EXTRA_PYTHON_PACKAGES:-}      
+      AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-}
+      AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-}
+      AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION:-}
+      GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-}
+      AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-}
+      AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-}
    depends_on:
      - kafka
    networks:
--- a/docker/docker-compose-triton.yml
+++ b/docker/docker-compose-triton.yml
@ -98,6 +98,12 @@ services:
      CLEARML_USE_GUNICORN: ${CLEARML_USE_GUNICORN:-}
      CLEARML_SERVING_NUM_PROCESS: ${CLEARML_SERVING_NUM_PROCESS:-}
      CLEARML_EXTRA_PYTHON_PACKAGES: ${CLEARML_EXTRA_PYTHON_PACKAGES:-}
+      AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-}
+      AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-}
+      AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION:-}
+      GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-}
+      AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-}
+      AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-}
    depends_on:
      - kafka
      - clearml-serving-triton
@ -122,6 +128,13 @@ services:
      CLEARML_SERVING_TASK_ID: ${CLEARML_SERVING_TASK_ID:-}
      CLEARML_TRITON_POLL_FREQ: ${CLEARML_TRITON_POLL_FREQ:-1.0}
      CLEARML_TRITON_METRIC_FREQ: ${CLEARML_TRITON_METRIC_FREQ:-1.0}
+      CLEARML_EXTRA_PYTHON_PACKAGES: ${CLEARML_EXTRA_PYTHON_PACKAGES:-}
+      AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-}
+      AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-}
+      AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION:-}
+      GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-}
+      AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-}
+      AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-}
    depends_on:
      - kafka
    networks:
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@ -98,12 +98,12 @@ services:
      CLEARML_USE_GUNICORN: ${CLEARML_USE_GUNICORN:-}
      CLEARML_SERVING_NUM_PROCESS: ${CLEARML_SERVING_NUM_PROCESS:-}
      CLEARML_EXTRA_PYTHON_PACKAGES: ${CLEARML_EXTRA_PYTHON_PACKAGES:-}
-      AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
-      AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY}
-      AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION}
-      GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS}
-      AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT}
-      AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY}
+      AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-}
+      AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-}
+      AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION:-}
+      GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-}
+      AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-}
+      AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-}
    depends_on:
      - kafka
    networks:
--- a/examples/keras/preprocess.py
+++ b/examples/keras/preprocess.py
@ -31,7 +31,7 @@ class Preprocess(object):
            image = Image.open(local_file)
    
        image = ImageOps.grayscale(image).resize((28, 28))
-        return np.array([np.array(image).flatten()])
+        return np.array([np.array(image)])

    def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict:
        # post process the data returned from the model inference engine
--- a/examples/keras/readme.md
+++ b/examples/keras/readme.md
@ -17,12 +17,12 @@ Prerequisites, Keras/Tensorflow models require Triton engine support, please use
 1. Create serving Service: `clearml-serving create --name "serving example"` (write down the service ID)
 2. Create model endpoint: 

- `clearml-serving --id <service_id> model add --engine triton --endpoint "test_model_keras" --preprocess "examples/keras/preprocess.py" --name "train keras model" --project "serving examples" --input-size 1 784 --input-name "dense_input" --input-type float32 --output-size -1 10 --output-name "activation_2" --output-type float32   
+ `clearml-serving --id <service_id> model add --engine triton --endpoint "test_model_keras" --preprocess "examples/keras/preprocess.py" --name "train keras model - serving_model" --project "serving examples" --input-size 1 784 --input-name "dense_input" --input-type float32 --output-size -1 10 --output-name "activation_2" --output-type float32   
 `

 Or auto update

-`clearml-serving --id <service_id> model auto-update --engine triton --endpoint "test_model_auto" --preprocess "examples/keras/preprocess.py" --name "train keras model" --project "serving examples" --max-versions 2
+`clearml-serving --id <service_id> model auto-update --engine triton --endpoint "test_model_auto" --preprocess "examples/keras/preprocess.py" --name "train keras model - serving_model" --project "serving examples" --max-versions 2
  --input-size 1 784 --input-name "dense_input" --input-type float32   
  --output-size -1 10 --output-name "activation_2" --output-type float32`

--- a/examples/lightgbm/readme.md
+++ b/examples/lightgbm/readme.md
@ -16,11 +16,11 @@ The output will be a model created on the project "serving examples", by the nam

 2. Create model endpoint: 

-`clearml-serving --id <service_id> model add --engine lightgbm --endpoint "test_model_lgbm" --preprocess "examples/lightgbm/preprocess.py" --name "train lightgbm model" --project "serving examples"`
+`clearml-serving --id <service_id> model add --engine lightgbm --endpoint "test_model_lgbm" --preprocess "examples/lightgbm/preprocess.py" --name "train lightgbm model - lgbm_model" --project "serving examples"`

 Or auto-update 

-`clearml-serving --id <service_id> model auto-update --engine lightgbm --endpoint "test_model_auto" --preprocess "examples/lightgbm/preprocess.py" --name "train lightgbm model" --project "serving examples" --max-versions 2`
+`clearml-serving --id <service_id> model auto-update --engine lightgbm --endpoint "test_model_auto" --preprocess "examples/lightgbm/preprocess.py" --name "train lightgbm model - lgbm_model" --project "serving examples" --max-versions 2`

 Or add Canary endpoint

--- a/examples/preprocess_template/preprocess_template.py
+++ b/examples/preprocess_template/preprocess_template.py
@ -0,0 +1,153 @@
+from typing import Any, Optional, Callable, Union
+
+
+# Preprocess class Must be named "Preprocess"
+# No need to inherit or to implement all methods
+class Preprocess(object):
+    """
+    Preprocess class Must be named "Preprocess"
+    Otherwise there are No limitations, No need to inherit or to implement all methods
+    Notice! This is not thread safe! the same instance may be accessed from multiple threads simultaneously
+    to store date in a safe way push it into the `state` dict argument of preprocessing/postprocessing functions
+
+    Notice the execution flows is synchronous as follows:
+
+    1. RestAPI(...) -> body: Union[bytes, dict]
+    2. preprocess(body: Union[bytes, dict], ...) -> data: Any
+    3. process(data: Any, ...) -> data: Any
+    4. postprocess(data: Any, ...) -> result: dict
+    5. RestAPI(result: dict) -> returned request
+    """
+
+    def __init__(self):
+        # set internal state, this will be called only once. (i.e. not per request)
+        # it will also set the internal model_endpoint to reference the specific model endpoint object being served
+        self.model_endpoint = None  # type: clearml_serving.serving.endpoints.ModelEndpoint
+
+    def load(self, local_file_name: str) -> Any:  # noqa
+        """
+        Optional: provide loading method for the model
+        useful if we need to load a model in a specific way for the prediction engine to work
+
+        Notice! When used with specific engines (i.e. not Custom)
+        The returned object will be passed as is to the inference engine,
+        this means it must not be None, otherwise the endpoint will be ignored!
+
+        :param local_file_name: file name / path to read load the model from
+
+        :return: Object that will be called with .predict() method for inference.
+        """
+        pass
+
+    def preprocess(
+            self,
+            body: Union[bytes, dict],
+            state: dict,
+            collect_custom_statistics_fn: Optional[Callable[[dict], None]],
+    ) -> Any:  # noqa
+        """
+        Optional: do something with the request data, return any type of object.
+        The returned object will be passed as is to the inference engine
+
+        :param body: dictionary or bytes as recieved from the RestAPI
+        :param state: Use state dict to store data passed to the post-processing function call.
+            This is a per-request state dict (meaning a new empty dict will be passed per request)
+            Usage example:
+            >>> def preprocess(..., state):
+                    state['preprocess_aux_data'] = [1,2,3]
+            >>> def postprocess(..., state):
+                    print(state['preprocess_aux_data'])
+        :param collect_custom_statistics_fn: Optional, if provided allows to send a custom set of key/values
+            to the statictics collector servicd.
+            None is passed if statiscs collector is not configured, or if the current request should not be collected
+
+            Usage example:
+            >>> print(body)
+            {"x0": 1, "x1": 2}
+            >>> if collect_custom_statistics_fn:
+            >>>   collect_custom_statistics_fn({"x0": 1, "x1": 2})
+
+        :return: Object to be passed directly to the model inference
+        """
+        return body
+
+    def postprocess(
+            self,
+            data: Any,
+            state: dict,
+            collect_custom_statistics_fn: Optional[Callable[[dict], None]],
+    ) -> dict:  # noqa
+        """
+        Optional: post process the data returned from the model inference engine
+        returned dict will be passed back as the request result as is.
+
+        :param data: object as recieved from the inference model function
+        :param state: Use state dict to store data passed to the post-processing function call.
+            This is a per-request state dict (meaning a dict instance per request)
+            Usage example:
+            >>> def preprocess(..., state):
+                    state['preprocess_aux_data'] = [1,2,3]
+            >>> def postprocess(..., state):
+                    print(state['preprocess_aux_data'])
+        :param collect_custom_statistics_fn: Optional, if provided allows to send a custom set of key/values
+            to the statictics collector servicd.
+            None is passed if statiscs collector is not configured, or if the current request should not be collected
+
+            Usage example:
+            >>> if collect_custom_statistics_fn:
+            >>>   collect_custom_statistics_fn({"y": 1})
+
+        :return: Dictionary passed directly as the returned result of the RestAPI
+        """
+        return data
+
+    def process(
+            self,
+            data: Any,
+            state: dict,
+            collect_custom_statistics_fn: Optional[Callable[[dict], None]],
+    ) -> Any:  # noqa
+        """
+        Optional: do something with the actual data, return any type of object.
+        The returned object will be passed as is to the postprocess function engine
+
+        :param data: object as recieved from the preprocessing function
+        :param state: Use state dict to store data passed to the post-processing function call.
+            This is a per-request state dict (meaning a dict instance per request)
+            Usage example:
+            >>> def preprocess(..., state):
+                    state['preprocess_aux_data'] = [1,2,3]
+            >>> def postprocess(..., state):
+                    print(state['preprocess_aux_data'])
+        :param collect_custom_statistics_fn: Optional, if provided allows to send a custom set of key/values
+            to the statictics collector servicd.
+            None is passed if statiscs collector is not configured, or if the current request should not be collected
+
+            Usage example:
+            >>> if collect_custom_statistics_fn:
+            >>>   collect_custom_statistics_fn({"type": "classification"})
+
+        :return: Object to be passed tp the post-processing function
+        """
+        return data
+
+    def send_request(  # noqa
+            self,
+            endpoint: str,
+            version: Optional[str] = None,
+            data: Optional[dict] = None
+    ) -> Optional[dict]:
+        """
+        NOTICE: This method will be replaced in runtime, by the inference service
+
+        Helper method to send model inference requests to the inference service itself.
+        This is designed to help with model ensemble, model pipelines, etc.
+        On request error return None, otherwise the request result data dictionary
+
+        Usage example:
+
+        >>> x0, x1 = 1, 2
+        >>> result = self.send_request(endpoint="test_model_sklearn", version="1", data={"x0": x0, "x1": x1})
+        >>> y = result["y"]
+        """
+        pass
--- a/examples/pytorch/preprocess.py
+++ b/examples/pytorch/preprocess.py
@ -31,7 +31,7 @@ class Preprocess(object):
            image = Image.open(local_file)
    
        image = ImageOps.grayscale(image).resize((28, 28))
-        return np.array([np.array(image).flatten()])
+        return np.array([np.array(image)])

    def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict:
        # post process the data returned from the model inference engine
--- a/examples/sklearn/readme.md
+++ b/examples/sklearn/readme.md
@ -14,11 +14,11 @@ The output will be a model created on the project "serving examples", by the nam

 1. Create serving Service: `clearml-serving create --name "serving example"` (write down the service ID)
 2. Create model endpoint: 
-`clearml-serving --id <service_id> model add --engine sklearn --endpoint "test_model_sklearn" --preprocess "examples/sklearn/preprocess.py" --name "train sklearn model" --project "serving examples"`
+`clearml-serving --id <service_id> model add --engine sklearn --endpoint "test_model_sklearn" --preprocess "examples/sklearn/preprocess.py" --name "train sklearn model - sklearn-model" --project "serving examples"`

 Or auto update 

-`clearml-serving --id <service_id> model auto-update --engine sklearn --endpoint "test_model_sklearn_auto" --preprocess "examples/sklearn/preprocess.py" --name "train sklearn model" --project "serving examples" --max-versions 2`
+`clearml-serving --id <service_id> model auto-update --engine sklearn --endpoint "test_model_sklearn_auto" --preprocess "examples/sklearn/preprocess.py" --name "train sklearn model - sklearn-model" --project "serving examples" --max-versions 2`

 Or add Canary endpoint

--- a/examples/xgboost/readme.md
+++ b/examples/xgboost/readme.md
@ -15,11 +15,11 @@ The output will be a model created on the project "serving examples", by the nam
 1. Create serving Service: `clearml-serving create --name "serving example"` (write down the service ID)
 2. Create model endpoint: 

-3. `clearml-serving --id <service_id> model add --engine xgboost --endpoint "test_model_xgb" --preprocess "examples/xgboost/preprocess.py" --name "train xgboost model" --project "serving examples"`
+3. `clearml-serving --id <service_id> model add --engine xgboost --endpoint "test_model_xgb" --preprocess "examples/xgboost/preprocess.py" --name "train xgboost model - xgb_model" --project "serving examples"`

 Or auto update 

-`clearml-serving --id <service_id> model auto-update --engine xgboost --endpoint "test_model_xgb_auto" --preprocess "examples/xgboost/preprocess.py" --name "train xgboost model" --project "serving examples" --max-versions 2`
+`clearml-serving --id <service_id> model auto-update --engine xgboost --endpoint "test_model_xgb_auto" --preprocess "examples/xgboost/preprocess.py" --name "train xgboost model - xgb_model" --project "serving examples" --max-versions 2`

 Or add Canary endpoint