From 6a8e61625682bc2101ca0707bddcefa850096610 Mon Sep 17 00:00:00 2001 From: Eugen Ajechiloae Date: Wed, 14 Aug 2024 19:43:44 +0300 Subject: [PATCH] not tested - exit on cuda oom --- clearml_serving/preprocess/preprocess_template.py | 3 +-- clearml_serving/serving/entrypoint.sh | 3 +++ clearml_serving/serving/main.py | 4 ++++ clearml_serving/serving/model_request_processor.py | 1 + 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/clearml_serving/preprocess/preprocess_template.py b/clearml_serving/preprocess/preprocess_template.py index bd9b936..373204a 100644 --- a/clearml_serving/preprocess/preprocess_template.py +++ b/clearml_serving/preprocess/preprocess_template.py @@ -41,8 +41,7 @@ class Preprocess(object): """ pass - - def unload(self) -> None: # noqa + def unload(self) -> None: """ OPTIONAL: provide unloading method for the model For example: diff --git a/clearml_serving/serving/entrypoint.sh b/clearml_serving/serving/entrypoint.sh index 634c281..c68fa02 100755 --- a/clearml_serving/serving/entrypoint.sh +++ b/clearml_serving/serving/entrypoint.sh @@ -69,8 +69,11 @@ while : ; do $GUNICORN_EXTRA_ARGS fi + echo "[DEBUG] ~~~~~~~~~~~~ Check if we restart here server ~~~~~~~~~~~~" if [ -z "$CLEARML_SERVING_RESTART_ON_FAILURE" ] then + echo "[DEBUG] ~~~~~~~~~~~~ Not restarting ~~~~~~~~~~~~" break fi + echo "[DEBUG] ~~~~~~~~~~~~ Restarting server ~~~~~~~~~~~~" done diff --git a/clearml_serving/serving/main.py b/clearml_serving/serving/main.py index 6865c93..d75e60b 100644 --- a/clearml_serving/serving/main.py +++ b/clearml_serving/serving/main.py @@ -1,6 +1,7 @@ import os import traceback import gzip +import sys from fastapi import FastAPI, Request, Response, APIRouter, HTTPException from fastapi.routing import APIRoute @@ -102,6 +103,9 @@ async def serve_model(model_id: str, version: Optional[str] = None, request: Uni except ValueError as ex: session_logger.report_text("[{}] Exception [{}] {} while processing request: {}\n{}".format( instance_id, type(ex), ex, request, "".join(traceback.format_exc()))) + if "CUDA out of memory. " in str(ex) or "NVML_SUCCESS == r INTERNAL ASSERT FAILED" in str(ex): + # can't always recover from this - prefer to exit the program such that it can be restarted + sys.exit(1) raise HTTPException(status_code=422, detail="Error [{}] processing request: {}".format(type(ex), ex)) except Exception as ex: session_logger.report_text("[{}] Exception [{}] {} while processing request: {}\n{}".format( diff --git a/clearml_serving/serving/model_request_processor.py b/clearml_serving/serving/model_request_processor.py index c22e242..1510034 100644 --- a/clearml_serving/serving/model_request_processor.py +++ b/clearml_serving/serving/model_request_processor.py @@ -917,6 +917,7 @@ class ModelRequestProcessor(object): if k not in self._endpoints: # atomic self._engine_processor_lookup[k]._model = None + print("clearml-serving --id c1a4ebd2586040ad906cf338d16bcb87 model remove --endpoint test_model_sklearn") gc.collect() if hasattr(self._engine_processor_lookup[k]._preprocess, "unload"): try: