diff --git a/.gitignore b/.gitignore index f3038c1..32ff9ae 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,9 @@ build/ # JetBrains IDE .idea/ +.vscode/ + +tests/huggingface # Generated by MacOS .DS_Store @@ -20,6 +23,9 @@ build/ # Generated by Windows Thumbs.db +# Virtual environment +.venv + # Applications *.app *.exe diff --git a/clearml_serving/__main__.py b/clearml_serving/__main__.py index 4ac78e6..fe61bc6 100644 --- a/clearml_serving/__main__.py +++ b/clearml_serving/__main__.py @@ -6,15 +6,22 @@ from pathlib import Path from clearml.model import Framework -from clearml_serving.serving.model_request_processor import ModelRequestProcessor, CanaryEP from clearml_serving.serving.endpoints import ModelMonitoring, ModelEndpoint, EndpointMetricLogging +from clearml_serving.serving.model_request_processor import ModelRequestProcessor, CanaryEP +from clearml_serving.version import __version__ + +# noinspection PyBroadException +try: + from clearml.backend_api import Session + Session.add_client(__package__.partition(".")[0].replace("_", "-"), __version__) # noqa +except Exception: + pass verbosity = False answer_yes = False def verify_session_version(request_processor): - from clearml_serving.version import __version__ current_v = float('.'.join(str(__version__).split(".")[:2])) stored_v = float('.'.join(str(request_processor.get_version()).split(".")[:2])) if stored_v != current_v: diff --git a/clearml_serving/engines/triton/requirements.txt b/clearml_serving/engines/triton/requirements.txt index 82ed161..e91cda7 100644 --- a/clearml_serving/engines/triton/requirements.txt +++ b/clearml_serving/engines/triton/requirements.txt @@ -3,5 +3,5 @@ clearml-serving tritonclient[grpc]>=2.32,<2.33 starlette grpcio -Pillow>=9.0.1,<10 +Pillow>=10.0.1 pathlib2 diff --git a/clearml_serving/engines/triton/triton_helper.py b/clearml_serving/engines/triton/triton_helper.py index 533d06a..879174b 100644 --- a/clearml_serving/engines/triton/triton_helper.py +++ b/clearml_serving/engines/triton/triton_helper.py @@ -359,6 +359,22 @@ class TritonHelper(object): for i, s in enumerate(endpoint.output_name or []): config_dict.put("output.{}.name".format(i), "\"{}\"".format(s)) + # check if we have platform in the auxiliary config pbtxt + if platform and final_config_pbtxt: + # noinspection PyBroadException + try: + final_config_pbtxt_dict = ConfigFactory.parse_string(final_config_pbtxt) + # if we found it, null the requested platform and use the auxiliary config pbtxt platform `value` + if final_config_pbtxt_dict.get("platform", None): + print( + "WARNING: ignoring auto-detecetd `platform={}` " + "and using auxiliary pbtxt `platform={}`".format( + str(platform).lower(), final_config_pbtxt_dict.get("platform"))) + platform = None + except Exception: + # we failed parsing the auxiliary pbtxt + pass + if platform and not config_dict.get("platform", None) and not config_dict.get("backend", None): platform = str(platform).lower() if platform.startswith("tensorflow") or platform.startswith("keras"): @@ -422,10 +438,42 @@ class TritonHelper(object): return "FP32" elif np_dtype == np.float64: return "FP64" + elif np_dtype == str: + return "STRING" elif np_dtype == np.object_ or np_dtype.type == np.bytes_: return "BYTES" return None + @staticmethod + def triton_to_np_dtype(dtype): + if dtype == "BOOL": + return bool + elif dtype == "INT8": + return np.int8 + elif dtype == "INT16": + return np.int16 + elif dtype == "INT32": + return np.int32 + elif dtype == "INT64": + return np.int64 + elif dtype == "UINT8": + return np.uint8 + elif dtype == "UINT16": + return np.uint16 + elif dtype == "UINT32": + return np.uint32 + elif dtype == "UINT64": + return np.uint64 + elif dtype == "FP16": + return np.float16 + elif dtype == "FP32": + return np.float32 + elif dtype == "FP64": + return np.float64 + elif dtype == "BYTES": + return np.object_ + return None + def main(): title = 'clearml-serving - Nvidia Triton Engine Controller' diff --git a/clearml_serving/serving/model_request_processor.py b/clearml_serving/serving/model_request_processor.py index 2bac29f..e7d1103 100644 --- a/clearml_serving/serving/model_request_processor.py +++ b/clearml_serving/serving/model_request_processor.py @@ -1292,6 +1292,63 @@ class ModelRequestProcessor(object): Raise exception if validation fails, otherwise return True """ if endpoint.engine_type in ("triton", ): + if endpoint.auxiliary_cfg: + aux_config_dict = {} + + if isinstance(endpoint.auxiliary_cfg, dict): + aux_config_dict = endpoint.auxiliary_cfg + elif isinstance(endpoint.auxiliary_cfg, str): + from clearml.utilities.pyhocon import ConfigFactory + + # noinspection PyBroadException + try: + aux_config_dict = ConfigFactory.parse_string(endpoint.auxiliary_cfg) + except Exception: + # we failed parsing the auxiliary pbtxt + aux_config_dict = {} + + if aux_config_dict.get("input", None) or aux_config_dict.get("output", None): + # noinspection PyBroadException + try: + from ..engines.triton.triton_helper import TritonHelper + + suggested_cli_in = {"name": [], "dims": [], "data_type": []} + suggested_cli_out = {"name": [], "dims": [], "data_type": []} + for layer in aux_config_dict.get("input", None) or []: + suggested_cli_in["name"] += ['"{}"'.format(layer["name"])] + suggested_cli_in["data_type"] += [ + TritonHelper.triton_to_np_dtype(layer["data_type"].replace("TYPE_", "", 1)).__name__] + suggested_cli_in["dims"] += ['"{}"'.format(layer["dims"])] + + for layer in aux_config_dict.get("output", None) or []: + suggested_cli_out["name"] += ['"{}"'.format(layer["name"])] + suggested_cli_out["data_type"] += [ + TritonHelper.triton_to_np_dtype(layer["data_type"].replace("TYPE_", "", 1)).__name__] + suggested_cli_out["dims"] += ['"{}"'.format(layer["dims"])] + + suggested_cli = "Add to your command line: "\ + "--input-name {} --input-type {} --input-size {} " \ + "--output-name {} --output-type {} --output-size {} ".format( + " ".join(suggested_cli_in["name"]), + " ".join(suggested_cli_in["data_type"]), + " ".join(suggested_cli_in["dims"]), + " ".join(suggested_cli_out["name"]), + " ".join(suggested_cli_out["data_type"]), + " ".join(suggested_cli_out["dims"]), + ) + except Exception: + suggested_cli = "?" + + raise ValueError( + "Triton engine requires *manual* input/output specification, " + "You input/output in your pbtxt, please remove them and specify manually.\n" + "{}".format(suggested_cli) + ) + + if aux_config_dict.get("default_model_filename", None): + raise ValueError("ERROR: You have `default_model_filename` in your config pbtxt, " + "please remove it. It will be added automatically by the system.") + # verify we have all the info we need d = endpoint.as_dict() missing = [ @@ -1300,7 +1357,8 @@ class ModelRequestProcessor(object): 'output_type', 'output_size', 'output_name', ] if not d.get(k) ] - if not endpoint.auxiliary_cfg and missing: + + if missing: raise ValueError("Triton engine requires input description - missing values in {}".format(missing)) return True diff --git a/clearml_serving/serving/preprocess_service.py b/clearml_serving/serving/preprocess_service.py index af4d96d..8e244c0 100644 --- a/clearml_serving/serving/preprocess_service.py +++ b/clearml_serving/serving/preprocess_service.py @@ -1,6 +1,7 @@ import os import sys import threading +import traceback from pathlib import Path from typing import Optional, Any, Callable, List @@ -48,8 +49,8 @@ class BasePreprocessRequest(object): try: self._instantiate_custom_preprocess_cls(task) except Exception as ex: - raise ValueError("Error: Failed loading preprocess code for \'{}\': {}".format( - self.model_endpoint.preprocess_artifact, ex)) + raise ValueError("Error: Failed loading preprocess code for \'{}\': {}\n\n{}".format( + self.model_endpoint.preprocess_artifact, ex, traceback.format_exc())) def _instantiate_custom_preprocess_cls(self, task: Task) -> None: path = task.artifacts[self.model_endpoint.preprocess_artifact].get_local_copy(extract_archive=False) diff --git a/clearml_serving/serving/requirements.txt b/clearml_serving/serving/requirements.txt index f15e056..a5bfe8c 100644 --- a/clearml_serving/serving/requirements.txt +++ b/clearml_serving/serving/requirements.txt @@ -11,9 +11,9 @@ numpy>=1.24,<1.25 scikit-learn>=1.2.2,<1.3 pandas>=1.5.3,<1.6 grpcio -Pillow>=9.0.1,<10 +Pillow>=10.0.1 xgboost>=1.7.5,<1.8 lightgbm>=3.3.2,<3.4 -requests>=2.28.2,<2.29 +requests>=2.31.0,<2.29 kafka-python>=2.0.2,<2.1 lz4>=4.0.0,<5 diff --git a/clearml_serving/statistics/metrics.py b/clearml_serving/statistics/metrics.py index afdd095..6ef0878 100644 --- a/clearml_serving/statistics/metrics.py +++ b/clearml_serving/statistics/metrics.py @@ -240,13 +240,22 @@ class StatisticsController(object): sleep(30) # we will never leave this loop - for message in consumer: + while True: + # noinspection PyBroadException + try: + message = next(consumer) + except Exception: + print("Warning: failed to pull kafka consumer pipe") + sleep(5) + continue + # noinspection PyBroadException try: list_data = json.loads(message.value.decode("utf-8")) except Exception: print("Warning: failed to decode kafka stats message") continue + for data in list_data: try: url = data.pop("_url", None) diff --git a/clearml_serving/statistics/requirements.txt b/clearml_serving/statistics/requirements.txt index 1f153d9..216aabc 100644 --- a/clearml_serving/statistics/requirements.txt +++ b/clearml_serving/statistics/requirements.txt @@ -1,6 +1,6 @@ clearml>=1.3.1 numpy>=1.20,<1.24 -requests>=2.25.1,<2.26 +requests>=2.31.0,<2.26 kafka-python>=2.0.2,<2.1 prometheus_client>=0.13.1,<0.14 lz4>=4.0.0,<5 diff --git a/examples/ensemble/requirements.txt b/examples/ensemble/requirements.txt index bf1ac39..74a9e46 100644 --- a/examples/ensemble/requirements.txt +++ b/examples/ensemble/requirements.txt @@ -1,2 +1,3 @@ clearml >= 1.1.6 scikit-learn >= 1.0.2 +numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/huggingface/requirements.txt b/examples/huggingface/requirements.txt index d2d51d5..834df23 100644 --- a/examples/huggingface/requirements.txt +++ b/examples/huggingface/requirements.txt @@ -1,2 +1 @@ -clearml-serving -jupyter \ No newline at end of file +clearml-serving \ No newline at end of file