mirror of
https://github.com/clearml/clearml-serving
synced 2025-06-26 18:16:00 +00:00
fix response import
This commit is contained in:
parent
cadd48f672
commit
77e1f95dbd
@ -214,7 +214,7 @@ async def openai_serve_model(
|
||||
):
|
||||
combined_request = {"request": request, "raw_request": raw_request}
|
||||
return_value = await process_with_exceptions(
|
||||
base_url=request.get("model", None),
|
||||
base_url=request.model,
|
||||
version=None,
|
||||
request_body=combined_request,
|
||||
serve_type=endpoint_type
|
||||
|
@ -637,12 +637,14 @@ class VllmPreprocessRequest(BasePreprocessRequest):
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
CompletionRequest,
|
||||
CompletionResponse,
|
||||
ErrorResponse
|
||||
)
|
||||
self._vllm = {}
|
||||
self._vllm["chat_completion_request"] = ChatCompletionRequest
|
||||
self._vllm["chat_completion_response"] = ChatCompletionResponse
|
||||
self._vllm["completion_request"] = CompletionRequest
|
||||
self._vllm["completion_response"] = CompletionResponse
|
||||
self._vllm["error_response"] = ErrorResponse
|
||||
|
||||
if self._fastapi is None:
|
||||
@ -741,7 +743,7 @@ class VllmPreprocessRequest(BasePreprocessRequest):
|
||||
generator = await handler.create_completion(request=request, raw_request=raw_request)
|
||||
if isinstance(generator, self._vllm["error_response"]):
|
||||
return self._fastapi["json_response"](content=generator.model_dump(), status_code=generator.code)
|
||||
elif isinstance(generator, self._vllm["chat_completion_response"]):
|
||||
elif isinstance(generator, self._vllm["completion_response"]):
|
||||
return self._fastapi["json_response"](content=generator.model_dump())
|
||||
return self._fastapi["streaming_response"](content=generator, media_type="text/event-stream")
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user