From e4c07c756ad1d5992c272c0bd7a302001dc5f6f2 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 23 Sep 2023 17:35:21 +0300 Subject: [PATCH 1/9] Add traceback for failing to load preprocess class (#57) --- clearml_serving/serving/preprocess_service.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clearml_serving/serving/preprocess_service.py b/clearml_serving/serving/preprocess_service.py index af4d96d..8e244c0 100644 --- a/clearml_serving/serving/preprocess_service.py +++ b/clearml_serving/serving/preprocess_service.py @@ -1,6 +1,7 @@ import os import sys import threading +import traceback from pathlib import Path from typing import Optional, Any, Callable, List @@ -48,8 +49,8 @@ class BasePreprocessRequest(object): try: self._instantiate_custom_preprocess_cls(task) except Exception as ex: - raise ValueError("Error: Failed loading preprocess code for \'{}\': {}".format( - self.model_endpoint.preprocess_artifact, ex)) + raise ValueError("Error: Failed loading preprocess code for \'{}\': {}\n\n{}".format( + self.model_endpoint.preprocess_artifact, ex, traceback.format_exc())) def _instantiate_custom_preprocess_cls(self, task: Task) -> None: path = task.artifacts[self.model_endpoint.preprocess_artifact].get_local_copy(extract_archive=False) From 58d826e427ab9e53da5d2b9c4b6912fb3520dec4 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 23 Sep 2023 17:36:01 +0300 Subject: [PATCH 2/9] Fail-safe Kafka pulling --- clearml_serving/statistics/metrics.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/clearml_serving/statistics/metrics.py b/clearml_serving/statistics/metrics.py index afdd095..6ef0878 100644 --- a/clearml_serving/statistics/metrics.py +++ b/clearml_serving/statistics/metrics.py @@ -240,13 +240,22 @@ class StatisticsController(object): sleep(30) # we will never leave this loop - for message in consumer: + while True: + # noinspection PyBroadException + try: + message = next(consumer) + except Exception: + print("Warning: failed to pull kafka consumer pipe") + sleep(5) + continue + # noinspection PyBroadException try: list_data = json.loads(message.value.decode("utf-8")) except Exception: print("Warning: failed to decode kafka stats message") continue + for data in list_data: try: url = data.pop("_url", None) From 083635c8035c777a92b1831ad7cb2c11fa644028 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 23 Sep 2023 17:36:28 +0300 Subject: [PATCH 3/9] Add `str` type to Triton type conversion --- clearml_serving/engines/triton/triton_helper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clearml_serving/engines/triton/triton_helper.py b/clearml_serving/engines/triton/triton_helper.py index 533d06a..ff606cb 100644 --- a/clearml_serving/engines/triton/triton_helper.py +++ b/clearml_serving/engines/triton/triton_helper.py @@ -422,6 +422,8 @@ class TritonHelper(object): return "FP32" elif np_dtype == np.float64: return "FP64" + elif np_dtype == str: + return "STRING" elif np_dtype == np.object_ or np_dtype.type == np.bytes_: return "BYTES" return None From 96b335e3c23db1db6a40a871cc993e1e86c7e5c1 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 23 Sep 2023 17:40:54 +0300 Subject: [PATCH 4/9] Fix ignore auto detected `platform` when passing config.pbtxt with `platform` entry --- .gitignore | 6 ++++++ clearml_serving/__main__.py | 11 +++++++++-- clearml_serving/engines/triton/triton_helper.py | 16 ++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index f3038c1..32ff9ae 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,9 @@ build/ # JetBrains IDE .idea/ +.vscode/ + +tests/huggingface # Generated by MacOS .DS_Store @@ -20,6 +23,9 @@ build/ # Generated by Windows Thumbs.db +# Virtual environment +.venv + # Applications *.app *.exe diff --git a/clearml_serving/__main__.py b/clearml_serving/__main__.py index 4ac78e6..fe61bc6 100644 --- a/clearml_serving/__main__.py +++ b/clearml_serving/__main__.py @@ -6,15 +6,22 @@ from pathlib import Path from clearml.model import Framework -from clearml_serving.serving.model_request_processor import ModelRequestProcessor, CanaryEP from clearml_serving.serving.endpoints import ModelMonitoring, ModelEndpoint, EndpointMetricLogging +from clearml_serving.serving.model_request_processor import ModelRequestProcessor, CanaryEP +from clearml_serving.version import __version__ + +# noinspection PyBroadException +try: + from clearml.backend_api import Session + Session.add_client(__package__.partition(".")[0].replace("_", "-"), __version__) # noqa +except Exception: + pass verbosity = False answer_yes = False def verify_session_version(request_processor): - from clearml_serving.version import __version__ current_v = float('.'.join(str(__version__).split(".")[:2])) stored_v = float('.'.join(str(request_processor.get_version()).split(".")[:2])) if stored_v != current_v: diff --git a/clearml_serving/engines/triton/triton_helper.py b/clearml_serving/engines/triton/triton_helper.py index ff606cb..c765627 100644 --- a/clearml_serving/engines/triton/triton_helper.py +++ b/clearml_serving/engines/triton/triton_helper.py @@ -359,6 +359,22 @@ class TritonHelper(object): for i, s in enumerate(endpoint.output_name or []): config_dict.put("output.{}.name".format(i), "\"{}\"".format(s)) + # check if we have platform in the auxiliary config pbtxt + if platform and final_config_pbtxt: + # noinspection PyBroadException + try: + final_config_pbtxt_dict = ConfigFactory.parse_string(final_config_pbtxt) + # if we found it, null the requested platform and use the auxiliary config pbtxt platform `value` + if final_config_pbtxt_dict.get("platform", None): + print( + "WARNING: ignoring auto-detecetd `platform={}` " + "and using auxiliary pbtxt `platform={}`".format( + str(platform).lower(), final_config_pbtxt_dict.get("platform"))) + platform = None + except Exception: + # we failed parsing the auxiliary pbtxt + pass + if platform and not config_dict.get("platform", None) and not config_dict.get("backend", None): platform = str(platform).lower() if platform.startswith("tensorflow") or platform.startswith("keras"): From 82ade1e24af2485278e8719903d9ec4e0aafa2a3 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 23 Sep 2023 17:42:57 +0300 Subject: [PATCH 5/9] Fix check triton config.pbtxt for missing values or colliding specifications (#62) --- .../engines/triton/triton_helper.py | 30 ++++++++++ .../serving/model_request_processor.py | 60 ++++++++++++++++++- 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/clearml_serving/engines/triton/triton_helper.py b/clearml_serving/engines/triton/triton_helper.py index c765627..879174b 100644 --- a/clearml_serving/engines/triton/triton_helper.py +++ b/clearml_serving/engines/triton/triton_helper.py @@ -444,6 +444,36 @@ class TritonHelper(object): return "BYTES" return None + @staticmethod + def triton_to_np_dtype(dtype): + if dtype == "BOOL": + return bool + elif dtype == "INT8": + return np.int8 + elif dtype == "INT16": + return np.int16 + elif dtype == "INT32": + return np.int32 + elif dtype == "INT64": + return np.int64 + elif dtype == "UINT8": + return np.uint8 + elif dtype == "UINT16": + return np.uint16 + elif dtype == "UINT32": + return np.uint32 + elif dtype == "UINT64": + return np.uint64 + elif dtype == "FP16": + return np.float16 + elif dtype == "FP32": + return np.float32 + elif dtype == "FP64": + return np.float64 + elif dtype == "BYTES": + return np.object_ + return None + def main(): title = 'clearml-serving - Nvidia Triton Engine Controller' diff --git a/clearml_serving/serving/model_request_processor.py b/clearml_serving/serving/model_request_processor.py index 2bac29f..e7d1103 100644 --- a/clearml_serving/serving/model_request_processor.py +++ b/clearml_serving/serving/model_request_processor.py @@ -1292,6 +1292,63 @@ class ModelRequestProcessor(object): Raise exception if validation fails, otherwise return True """ if endpoint.engine_type in ("triton", ): + if endpoint.auxiliary_cfg: + aux_config_dict = {} + + if isinstance(endpoint.auxiliary_cfg, dict): + aux_config_dict = endpoint.auxiliary_cfg + elif isinstance(endpoint.auxiliary_cfg, str): + from clearml.utilities.pyhocon import ConfigFactory + + # noinspection PyBroadException + try: + aux_config_dict = ConfigFactory.parse_string(endpoint.auxiliary_cfg) + except Exception: + # we failed parsing the auxiliary pbtxt + aux_config_dict = {} + + if aux_config_dict.get("input", None) or aux_config_dict.get("output", None): + # noinspection PyBroadException + try: + from ..engines.triton.triton_helper import TritonHelper + + suggested_cli_in = {"name": [], "dims": [], "data_type": []} + suggested_cli_out = {"name": [], "dims": [], "data_type": []} + for layer in aux_config_dict.get("input", None) or []: + suggested_cli_in["name"] += ['"{}"'.format(layer["name"])] + suggested_cli_in["data_type"] += [ + TritonHelper.triton_to_np_dtype(layer["data_type"].replace("TYPE_", "", 1)).__name__] + suggested_cli_in["dims"] += ['"{}"'.format(layer["dims"])] + + for layer in aux_config_dict.get("output", None) or []: + suggested_cli_out["name"] += ['"{}"'.format(layer["name"])] + suggested_cli_out["data_type"] += [ + TritonHelper.triton_to_np_dtype(layer["data_type"].replace("TYPE_", "", 1)).__name__] + suggested_cli_out["dims"] += ['"{}"'.format(layer["dims"])] + + suggested_cli = "Add to your command line: "\ + "--input-name {} --input-type {} --input-size {} " \ + "--output-name {} --output-type {} --output-size {} ".format( + " ".join(suggested_cli_in["name"]), + " ".join(suggested_cli_in["data_type"]), + " ".join(suggested_cli_in["dims"]), + " ".join(suggested_cli_out["name"]), + " ".join(suggested_cli_out["data_type"]), + " ".join(suggested_cli_out["dims"]), + ) + except Exception: + suggested_cli = "?" + + raise ValueError( + "Triton engine requires *manual* input/output specification, " + "You input/output in your pbtxt, please remove them and specify manually.\n" + "{}".format(suggested_cli) + ) + + if aux_config_dict.get("default_model_filename", None): + raise ValueError("ERROR: You have `default_model_filename` in your config pbtxt, " + "please remove it. It will be added automatically by the system.") + # verify we have all the info we need d = endpoint.as_dict() missing = [ @@ -1300,7 +1357,8 @@ class ModelRequestProcessor(object): 'output_type', 'output_size', 'output_name', ] if not d.get(k) ] - if not endpoint.auxiliary_cfg and missing: + + if missing: raise ValueError("Triton engine requires input description - missing values in {}".format(missing)) return True From cc5823cfc672d05e6a9ba8336d653182e78d905e Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 23 Sep 2023 17:48:25 +0300 Subject: [PATCH 6/9] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5eb4a4d..1857bec 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ [![PyPI pyversions](https://img.shields.io/pypi/pyversions/clearml-serving.svg)](https://img.shields.io/pypi/pyversions/clearml-serving.svg) [![PyPI version shields.io](https://img.shields.io/pypi/v/clearml-serving.svg)](https://img.shields.io/pypi/v/clearml-serving.svg) [![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/allegroai)](https://artifacthub.io/packages/helm/allegroai/clearml-serving) -[![Slack Channel](https://img.shields.io/badge/slack-%23clearml--community-blueviolet?logo=slack)](https://join.slack.com/t/clearml/shared_invite/zt-1v74jzwkn-~XsuWB0btXOlfFQCh8DJQw4) +[![Slack Channel](https://img.shields.io/badge/slack-%23clearml--community-blueviolet?logo=slack)](https://joinslack.clear.ml) From 05cbfade2a35c40e29f67e64090695dbfdacdebb Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 23 Sep 2023 18:03:24 +0300 Subject: [PATCH 7/9] Update requirements --- clearml_serving/serving/requirements.txt | 2 +- clearml_serving/statistics/requirements.txt | 2 +- examples/ensemble/requirements.txt | 1 + examples/huggingface/requirements.txt | 3 +-- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clearml_serving/serving/requirements.txt b/clearml_serving/serving/requirements.txt index f15e056..2b5d33b 100644 --- a/clearml_serving/serving/requirements.txt +++ b/clearml_serving/serving/requirements.txt @@ -14,6 +14,6 @@ grpcio Pillow>=9.0.1,<10 xgboost>=1.7.5,<1.8 lightgbm>=3.3.2,<3.4 -requests>=2.28.2,<2.29 +requests>=2.31.0,<2.29 kafka-python>=2.0.2,<2.1 lz4>=4.0.0,<5 diff --git a/clearml_serving/statistics/requirements.txt b/clearml_serving/statistics/requirements.txt index 1f153d9..216aabc 100644 --- a/clearml_serving/statistics/requirements.txt +++ b/clearml_serving/statistics/requirements.txt @@ -1,6 +1,6 @@ clearml>=1.3.1 numpy>=1.20,<1.24 -requests>=2.25.1,<2.26 +requests>=2.31.0,<2.26 kafka-python>=2.0.2,<2.1 prometheus_client>=0.13.1,<0.14 lz4>=4.0.0,<5 diff --git a/examples/ensemble/requirements.txt b/examples/ensemble/requirements.txt index bf1ac39..74a9e46 100644 --- a/examples/ensemble/requirements.txt +++ b/examples/ensemble/requirements.txt @@ -1,2 +1,3 @@ clearml >= 1.1.6 scikit-learn >= 1.0.2 +numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/huggingface/requirements.txt b/examples/huggingface/requirements.txt index d2d51d5..834df23 100644 --- a/examples/huggingface/requirements.txt +++ b/examples/huggingface/requirements.txt @@ -1,2 +1 @@ -clearml-serving -jupyter \ No newline at end of file +clearml-serving \ No newline at end of file From c20bbd66b963e7e6259b293eb900acddd591020c Mon Sep 17 00:00:00 2001 From: Jake Henning <59198928+jkhenning@users.noreply.github.com> Date: Wed, 4 Oct 2023 13:22:46 +0300 Subject: [PATCH 8/9] Fix Pillow vulnerability "libwebp: OOB write in BuildHuffmanTable" --- clearml_serving/serving/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml_serving/serving/requirements.txt b/clearml_serving/serving/requirements.txt index 2b5d33b..a5bfe8c 100644 --- a/clearml_serving/serving/requirements.txt +++ b/clearml_serving/serving/requirements.txt @@ -11,7 +11,7 @@ numpy>=1.24,<1.25 scikit-learn>=1.2.2,<1.3 pandas>=1.5.3,<1.6 grpcio -Pillow>=9.0.1,<10 +Pillow>=10.0.1 xgboost>=1.7.5,<1.8 lightgbm>=3.3.2,<3.4 requests>=2.31.0,<2.29 From 6c4bece6638a7341388507a77d6993f447e8c088 Mon Sep 17 00:00:00 2001 From: Jake Henning <59198928+jkhenning@users.noreply.github.com> Date: Wed, 4 Oct 2023 13:23:42 +0300 Subject: [PATCH 9/9] Fix Pillow vulnerability "libwebp: OOB write in BuildHuffmanTable" --- clearml_serving/engines/triton/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml_serving/engines/triton/requirements.txt b/clearml_serving/engines/triton/requirements.txt index 82ed161..e91cda7 100644 --- a/clearml_serving/engines/triton/requirements.txt +++ b/clearml_serving/engines/triton/requirements.txt @@ -3,5 +3,5 @@ clearml-serving tritonclient[grpc]>=2.32,<2.33 starlette grpcio -Pillow>=9.0.1,<10 +Pillow>=10.0.1 pathlib2