Merge branch 'allegroai:main' into main

This commit is contained in:
pollfly 2023-11-28 13:39:39 +02:00 committed by GitHub
commit 91dc126ab3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 141 additions and 12 deletions

6
.gitignore vendored
View File

@ -13,6 +13,9 @@ build/
# JetBrains IDE
.idea/
.vscode/
tests/huggingface
# Generated by MacOS
.DS_Store
@ -20,6 +23,9 @@ build/
# Generated by Windows
Thumbs.db
# Virtual environment
.venv
# Applications
*.app
*.exe

View File

@ -6,15 +6,22 @@ from pathlib import Path
from clearml.model import Framework
from clearml_serving.serving.model_request_processor import ModelRequestProcessor, CanaryEP
from clearml_serving.serving.endpoints import ModelMonitoring, ModelEndpoint, EndpointMetricLogging
from clearml_serving.serving.model_request_processor import ModelRequestProcessor, CanaryEP
from clearml_serving.version import __version__
# noinspection PyBroadException
try:
from clearml.backend_api import Session
Session.add_client(__package__.partition(".")[0].replace("_", "-"), __version__) # noqa
except Exception:
pass
verbosity = False
answer_yes = False
def verify_session_version(request_processor):
from clearml_serving.version import __version__
current_v = float('.'.join(str(__version__).split(".")[:2]))
stored_v = float('.'.join(str(request_processor.get_version()).split(".")[:2]))
if stored_v != current_v:

View File

@ -3,5 +3,5 @@ clearml-serving
tritonclient[grpc]>=2.32,<2.33
starlette
grpcio
Pillow>=9.0.1,<10
Pillow>=10.0.1
pathlib2

View File

@ -359,6 +359,22 @@ class TritonHelper(object):
for i, s in enumerate(endpoint.output_name or []):
config_dict.put("output.{}.name".format(i), "\"{}\"".format(s))
# check if we have platform in the auxiliary config pbtxt
if platform and final_config_pbtxt:
# noinspection PyBroadException
try:
final_config_pbtxt_dict = ConfigFactory.parse_string(final_config_pbtxt)
# if we found it, null the requested platform and use the auxiliary config pbtxt platform `value`
if final_config_pbtxt_dict.get("platform", None):
print(
"WARNING: ignoring auto-detecetd `platform={}` "
"and using auxiliary pbtxt `platform={}`".format(
str(platform).lower(), final_config_pbtxt_dict.get("platform")))
platform = None
except Exception:
# we failed parsing the auxiliary pbtxt
pass
if platform and not config_dict.get("platform", None) and not config_dict.get("backend", None):
platform = str(platform).lower()
if platform.startswith("tensorflow") or platform.startswith("keras"):
@ -422,10 +438,42 @@ class TritonHelper(object):
return "FP32"
elif np_dtype == np.float64:
return "FP64"
elif np_dtype == str:
return "STRING"
elif np_dtype == np.object_ or np_dtype.type == np.bytes_:
return "BYTES"
return None
@staticmethod
def triton_to_np_dtype(dtype):
if dtype == "BOOL":
return bool
elif dtype == "INT8":
return np.int8
elif dtype == "INT16":
return np.int16
elif dtype == "INT32":
return np.int32
elif dtype == "INT64":
return np.int64
elif dtype == "UINT8":
return np.uint8
elif dtype == "UINT16":
return np.uint16
elif dtype == "UINT32":
return np.uint32
elif dtype == "UINT64":
return np.uint64
elif dtype == "FP16":
return np.float16
elif dtype == "FP32":
return np.float32
elif dtype == "FP64":
return np.float64
elif dtype == "BYTES":
return np.object_
return None
def main():
title = 'clearml-serving - Nvidia Triton Engine Controller'

View File

@ -1292,6 +1292,63 @@ class ModelRequestProcessor(object):
Raise exception if validation fails, otherwise return True
"""
if endpoint.engine_type in ("triton", ):
if endpoint.auxiliary_cfg:
aux_config_dict = {}
if isinstance(endpoint.auxiliary_cfg, dict):
aux_config_dict = endpoint.auxiliary_cfg
elif isinstance(endpoint.auxiliary_cfg, str):
from clearml.utilities.pyhocon import ConfigFactory
# noinspection PyBroadException
try:
aux_config_dict = ConfigFactory.parse_string(endpoint.auxiliary_cfg)
except Exception:
# we failed parsing the auxiliary pbtxt
aux_config_dict = {}
if aux_config_dict.get("input", None) or aux_config_dict.get("output", None):
# noinspection PyBroadException
try:
from ..engines.triton.triton_helper import TritonHelper
suggested_cli_in = {"name": [], "dims": [], "data_type": []}
suggested_cli_out = {"name": [], "dims": [], "data_type": []}
for layer in aux_config_dict.get("input", None) or []:
suggested_cli_in["name"] += ['"{}"'.format(layer["name"])]
suggested_cli_in["data_type"] += [
TritonHelper.triton_to_np_dtype(layer["data_type"].replace("TYPE_", "", 1)).__name__]
suggested_cli_in["dims"] += ['"{}"'.format(layer["dims"])]
for layer in aux_config_dict.get("output", None) or []:
suggested_cli_out["name"] += ['"{}"'.format(layer["name"])]
suggested_cli_out["data_type"] += [
TritonHelper.triton_to_np_dtype(layer["data_type"].replace("TYPE_", "", 1)).__name__]
suggested_cli_out["dims"] += ['"{}"'.format(layer["dims"])]
suggested_cli = "Add to your command line: "\
"--input-name {} --input-type {} --input-size {} " \
"--output-name {} --output-type {} --output-size {} ".format(
" ".join(suggested_cli_in["name"]),
" ".join(suggested_cli_in["data_type"]),
" ".join(suggested_cli_in["dims"]),
" ".join(suggested_cli_out["name"]),
" ".join(suggested_cli_out["data_type"]),
" ".join(suggested_cli_out["dims"]),
)
except Exception:
suggested_cli = "?"
raise ValueError(
"Triton engine requires *manual* input/output specification, "
"You input/output in your pbtxt, please remove them and specify manually.\n"
"{}".format(suggested_cli)
)
if aux_config_dict.get("default_model_filename", None):
raise ValueError("ERROR: You have `default_model_filename` in your config pbtxt, "
"please remove it. It will be added automatically by the system.")
# verify we have all the info we need
d = endpoint.as_dict()
missing = [
@ -1300,7 +1357,8 @@ class ModelRequestProcessor(object):
'output_type', 'output_size', 'output_name',
] if not d.get(k)
]
if not endpoint.auxiliary_cfg and missing:
if missing:
raise ValueError("Triton engine requires input description - missing values in {}".format(missing))
return True

View File

@ -1,6 +1,7 @@
import os
import sys
import threading
import traceback
from pathlib import Path
from typing import Optional, Any, Callable, List
@ -48,8 +49,8 @@ class BasePreprocessRequest(object):
try:
self._instantiate_custom_preprocess_cls(task)
except Exception as ex:
raise ValueError("Error: Failed loading preprocess code for \'{}\': {}".format(
self.model_endpoint.preprocess_artifact, ex))
raise ValueError("Error: Failed loading preprocess code for \'{}\': {}\n\n{}".format(
self.model_endpoint.preprocess_artifact, ex, traceback.format_exc()))
def _instantiate_custom_preprocess_cls(self, task: Task) -> None:
path = task.artifacts[self.model_endpoint.preprocess_artifact].get_local_copy(extract_archive=False)

View File

@ -11,9 +11,9 @@ numpy>=1.24,<1.25
scikit-learn>=1.2.2,<1.3
pandas>=1.5.3,<1.6
grpcio
Pillow>=9.0.1,<10
Pillow>=10.0.1
xgboost>=1.7.5,<1.8
lightgbm>=3.3.2,<3.4
requests>=2.28.2,<2.29
requests>=2.31.0,<2.29
kafka-python>=2.0.2,<2.1
lz4>=4.0.0,<5

View File

@ -240,13 +240,22 @@ class StatisticsController(object):
sleep(30)
# we will never leave this loop
for message in consumer:
while True:
# noinspection PyBroadException
try:
message = next(consumer)
except Exception:
print("Warning: failed to pull kafka consumer pipe")
sleep(5)
continue
# noinspection PyBroadException
try:
list_data = json.loads(message.value.decode("utf-8"))
except Exception:
print("Warning: failed to decode kafka stats message")
continue
for data in list_data:
try:
url = data.pop("_url", None)

View File

@ -1,6 +1,6 @@
clearml>=1.3.1
numpy>=1.20,<1.24
requests>=2.25.1,<2.26
requests>=2.31.0,<2.26
kafka-python>=2.0.2,<2.1
prometheus_client>=0.13.1,<0.14
lz4>=4.0.0,<5

View File

@ -1,2 +1,3 @@
clearml >= 1.1.6
scikit-learn >= 1.0.2
numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability

View File

@ -1,2 +1 @@
clearml-serving
jupyter
clearml-serving