Fix gRPC errors print stack traces and full verbose details. Add support for controlling error printouts using CLEARML_SERVING_AIO_RPC_IGNORE_ERRORS and CLEARML_SERVING_AIO_RPC_VERBOSE_ERRORS (pass a whitespace-separated list of error codes or error names)

This commit is contained in:
clearml 2024-12-12 23:57:21 +02:00
parent 724c99c605
commit aff27c62b8
3 changed files with 80 additions and 26 deletions

View File

@ -1,4 +1,5 @@
import os import os
import shlex
import traceback import traceback
import gzip import gzip
import asyncio import asyncio
@ -6,6 +7,7 @@ import asyncio
from fastapi import FastAPI, Request, Response, APIRouter, HTTPException from fastapi import FastAPI, Request, Response, APIRouter, HTTPException
from fastapi.routing import APIRoute from fastapi.routing import APIRoute
from fastapi.responses import PlainTextResponse from fastapi.responses import PlainTextResponse
from grpc.aio import AioRpcError
from starlette.background import BackgroundTask from starlette.background import BackgroundTask
@ -13,8 +15,14 @@ from typing import Optional, Dict, Any, Callable, Union
from clearml_serving.version import __version__ from clearml_serving.version import __version__
from clearml_serving.serving.init import setup_task from clearml_serving.serving.init import setup_task
from clearml_serving.serving.model_request_processor import ModelRequestProcessor, EndpointNotFoundException, \ from clearml_serving.serving.model_request_processor import (
EndpointBackendEngineException, EndpointModelLoadException, ServingInitializationException ModelRequestProcessor,
EndpointNotFoundException,
EndpointBackendEngineException,
EndpointModelLoadException,
ServingInitializationException,
)
from clearml_serving.serving.utils import parse_grpc_errors
class GzipRequest(Request): class GzipRequest(Request):
@ -52,10 +60,16 @@ try:
except (ValueError, TypeError): except (ValueError, TypeError):
pass pass
grpc_aio_ignore_errors = parse_grpc_errors(shlex.split(os.environ.get("CLEARML_SERVING_AIO_RPC_IGNORE_ERRORS", "")))
grpc_aio_verbose_errors = parse_grpc_errors(shlex.split(os.environ.get("CLEARML_SERVING_AIO_RPC_VERBOSE_ERRORS", "")))
class CUDAException(Exception): class CUDAException(Exception):
def __init__(self, exception: str): def __init__(self, exception: str):
self.exception = exception self.exception = exception
# start FastAPI app # start FastAPI app
app = FastAPI(title="ClearML Serving Service", version=__version__, description="ClearML Service Service router") app = FastAPI(title="ClearML Serving Service", version=__version__, description="ClearML Service Service router")
@ -65,26 +79,31 @@ async def startup_event():
global processor global processor
if processor: if processor:
print("ModelRequestProcessor already initialized [pid={}] [service_id={}]".format( print(
os.getpid(), serving_service_task_id)) "ModelRequestProcessor already initialized [pid={}] [service_id={}]".format(
os.getpid(), serving_service_task_id
)
)
else: else:
print("Starting up ModelRequestProcessor [pid={}] [service_id={}]".format( print("Starting up ModelRequestProcessor [pid={}] [service_id={}]".format(os.getpid(), serving_service_task_id))
os.getpid(), serving_service_task_id))
processor = ModelRequestProcessor( processor = ModelRequestProcessor(
task_id=serving_service_task_id, update_lock_guard=singleton_sync_lock, task_id=serving_service_task_id,
update_lock_guard=singleton_sync_lock,
) )
print("ModelRequestProcessor [id={}] loaded".format(processor.get_id())) print("ModelRequestProcessor [id={}] loaded".format(processor.get_id()))
processor.launch(poll_frequency_sec=model_sync_frequency_secs * 60) processor.launch(poll_frequency_sec=model_sync_frequency_secs * 60)
@app.on_event('shutdown') @app.on_event("shutdown")
def shutdown_event(): def shutdown_event():
print('RESTARTING INFERENCE SERVICE!') print("RESTARTING INFERENCE SERVICE!")
async def exit_app(): async def exit_app():
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
loop.stop() loop.stop()
@app.exception_handler(CUDAException) @app.exception_handler(CUDAException)
async def cuda_exception_handler(request, exc): async def cuda_exception_handler(request, exc):
task = BackgroundTask(exit_app) task = BackgroundTask(exit_app)
@ -105,31 +124,49 @@ router = APIRouter(
@router.post("/{model_id}") @router.post("/{model_id}")
async def serve_model(model_id: str, version: Optional[str] = None, request: Union[bytes, Dict[Any, Any]] = None): async def serve_model(model_id: str, version: Optional[str] = None, request: Union[bytes, Dict[Any, Any]] = None):
try: try:
return_value = await processor.process_request( return_value = await processor.process_request(base_url=model_id, version=version, request_body=request)
base_url=model_id,
version=version,
request_body=request
)
except EndpointNotFoundException as ex: except EndpointNotFoundException as ex:
raise HTTPException(status_code=404, detail="Error processing request, endpoint was not found: {}".format(ex)) raise HTTPException(status_code=404, detail="Error processing request, endpoint was not found: {}".format(ex))
except (EndpointModelLoadException, EndpointBackendEngineException) as ex: except (EndpointModelLoadException, EndpointBackendEngineException) as ex:
session_logger.report_text("[{}] Exception [{}] {} while processing request: {}\n{}".format( session_logger.report_text(
instance_id, type(ex), ex, request, "".join(traceback.format_exc()))) "[{}] Exception [{}] {} while processing request: {}\n{}".format(
instance_id, type(ex), ex, request, "".join(traceback.format_exc())
)
)
raise HTTPException(status_code=422, detail="Error [{}] processing request: {}".format(type(ex), ex)) raise HTTPException(status_code=422, detail="Error [{}] processing request: {}".format(type(ex), ex))
except ServingInitializationException as ex: except ServingInitializationException as ex:
session_logger.report_text("[{}] Exception [{}] {} while loading serving inference: {}\n{}".format( session_logger.report_text(
instance_id, type(ex), ex, request, "".join(traceback.format_exc()))) "[{}] Exception [{}] {} while loading serving inference: {}\n{}".format(
instance_id, type(ex), ex, request, "".join(traceback.format_exc())
)
)
raise HTTPException(status_code=500, detail="Error [{}] processing request: {}".format(type(ex), ex)) raise HTTPException(status_code=500, detail="Error [{}] processing request: {}".format(type(ex), ex))
except ValueError as ex: except ValueError as ex:
session_logger.report_text("[{}] Exception [{}] {} while processing request: {}\n{}".format( session_logger.report_text(
instance_id, type(ex), ex, request, "".join(traceback.format_exc()))) "[{}] Exception [{}] {} while processing request: {}\n{}".format(
instance_id, type(ex), ex, request, "".join(traceback.format_exc())
)
)
if "CUDA out of memory. " in str(ex) or "NVML_SUCCESS == r INTERNAL ASSERT FAILED" in str(ex): if "CUDA out of memory. " in str(ex) or "NVML_SUCCESS == r INTERNAL ASSERT FAILED" in str(ex):
raise CUDAException(exception=ex) raise CUDAException(exception=ex)
else: else:
raise HTTPException(status_code=422, detail="Error [{}] processing request: {}".format(type(ex), ex)) raise HTTPException(status_code=422, detail="Error [{}] processing request: {}".format(type(ex), ex))
except AioRpcError as ex:
if grpc_aio_verbose_errors and ex.code() in grpc_aio_verbose_errors:
session_logger.report_text(
"[{}] Exception [AioRpcError] {} while processing request: {}".format(instance_id, ex, request)
)
elif not grpc_aio_ignore_errors or ex.code() not in grpc_aio_ignore_errors:
session_logger.report_text("[{}] Exception [AioRpcError] status={} ".format(instance_id, ex.code()))
raise HTTPException(
status_code=500, detail="Error [AioRpcError] processing request: status={}".format(ex.code())
)
except Exception as ex: except Exception as ex:
session_logger.report_text("[{}] Exception [{}] {} while processing request: {}\n{}".format( session_logger.report_text(
instance_id, type(ex), ex, request, "".join(traceback.format_exc()))) "[{}] Exception [{}] {} while processing request: {}\n{}".format(
instance_id, type(ex), ex, request, "".join(traceback.format_exc())
)
)
raise HTTPException(status_code=500, detail="Error [{}] processing request: {}".format(type(ex), ex)) raise HTTPException(status_code=500, detail="Error [{}] processing request: {}".format(type(ex), ex))
return return_value return return_value

View File

@ -0,0 +1,17 @@
from typing import List, Set
import grpc
def parse_grpc_errors(errors: List[str]) -> Set[grpc.StatusCode]:
try:
typed_errors = {
int(e) if e.isdigit() else e.lower().replace("-", " ").replace("_", " ")
for e in errors
}
if len(typed_errors) == 1 and next(iter(typed_errors)) in ("true", "false"):
return set(grpc.StatusCode if next(iter(typed_errors)) == "true" else [])
return {e for e in grpc.StatusCode if typed_errors.intersection(e.value)}
except (ValueError, TypeError):
pass
return set()

View File

@ -1 +1 @@
__version__ = '1.3.0' __version__ = '1.3.1'