mirror of
https://github.com/clearml/clearml-serving
synced 2025-06-26 18:16:00 +00:00
fix response import
This commit is contained in:
parent
cadd48f672
commit
77e1f95dbd
@ -214,7 +214,7 @@ async def openai_serve_model(
|
|||||||
):
|
):
|
||||||
combined_request = {"request": request, "raw_request": raw_request}
|
combined_request = {"request": request, "raw_request": raw_request}
|
||||||
return_value = await process_with_exceptions(
|
return_value = await process_with_exceptions(
|
||||||
base_url=request.get("model", None),
|
base_url=request.model,
|
||||||
version=None,
|
version=None,
|
||||||
request_body=combined_request,
|
request_body=combined_request,
|
||||||
serve_type=endpoint_type
|
serve_type=endpoint_type
|
||||||
|
@ -637,12 +637,14 @@ class VllmPreprocessRequest(BasePreprocessRequest):
|
|||||||
ChatCompletionRequest,
|
ChatCompletionRequest,
|
||||||
ChatCompletionResponse,
|
ChatCompletionResponse,
|
||||||
CompletionRequest,
|
CompletionRequest,
|
||||||
|
CompletionResponse,
|
||||||
ErrorResponse
|
ErrorResponse
|
||||||
)
|
)
|
||||||
self._vllm = {}
|
self._vllm = {}
|
||||||
self._vllm["chat_completion_request"] = ChatCompletionRequest
|
self._vllm["chat_completion_request"] = ChatCompletionRequest
|
||||||
self._vllm["chat_completion_response"] = ChatCompletionResponse
|
self._vllm["chat_completion_response"] = ChatCompletionResponse
|
||||||
self._vllm["completion_request"] = CompletionRequest
|
self._vllm["completion_request"] = CompletionRequest
|
||||||
|
self._vllm["completion_response"] = CompletionResponse
|
||||||
self._vllm["error_response"] = ErrorResponse
|
self._vllm["error_response"] = ErrorResponse
|
||||||
|
|
||||||
if self._fastapi is None:
|
if self._fastapi is None:
|
||||||
@ -741,7 +743,7 @@ class VllmPreprocessRequest(BasePreprocessRequest):
|
|||||||
generator = await handler.create_completion(request=request, raw_request=raw_request)
|
generator = await handler.create_completion(request=request, raw_request=raw_request)
|
||||||
if isinstance(generator, self._vllm["error_response"]):
|
if isinstance(generator, self._vllm["error_response"]):
|
||||||
return self._fastapi["json_response"](content=generator.model_dump(), status_code=generator.code)
|
return self._fastapi["json_response"](content=generator.model_dump(), status_code=generator.code)
|
||||||
elif isinstance(generator, self._vllm["chat_completion_response"]):
|
elif isinstance(generator, self._vllm["completion_response"]):
|
||||||
return self._fastapi["json_response"](content=generator.model_dump())
|
return self._fastapi["json_response"](content=generator.model_dump())
|
||||||
return self._fastapi["streaming_response"](content=generator, media_type="text/event-stream")
|
return self._fastapi["streaming_response"](content=generator, media_type="text/event-stream")
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user