diff --git a/clearml_serving/serving/main.py b/clearml_serving/serving/main.py index 1896c88..1e3f8fc 100644 --- a/clearml_serving/serving/main.py +++ b/clearml_serving/serving/main.py @@ -104,7 +104,7 @@ async def serve_model(model_id: str, version: Optional[str] = None, request: Uni instance_id, type(ex), ex, request, "".join(traceback.format_exc()))) if "CUDA out of memory. " in str(ex) or "NVML_SUCCESS == r INTERNAL ASSERT FAILED" in str(ex): # can't always recover from this - prefer to exit the program such that it can be restarted - os._exit() + os._exit(1) raise HTTPException(status_code=422, detail="Error [{}] processing request: {}".format(type(ex), ex)) except Exception as ex: session_logger.report_text("[{}] Exception [{}] {} while processing request: {}\n{}".format(