fix response import

This commit is contained in:
IlyaMescheryakov1402 2025-03-09 22:53:44 +03:00
parent cadd48f672
commit 77e1f95dbd
2 changed files with 4 additions and 2 deletions

View File

@ -214,7 +214,7 @@ async def openai_serve_model(
):
combined_request = {"request": request, "raw_request": raw_request}
return_value = await process_with_exceptions(
base_url=request.get("model", None),
base_url=request.model,
version=None,
request_body=combined_request,
serve_type=endpoint_type

View File

@ -637,12 +637,14 @@ class VllmPreprocessRequest(BasePreprocessRequest):
ChatCompletionRequest,
ChatCompletionResponse,
CompletionRequest,
CompletionResponse,
ErrorResponse
)
self._vllm = {}
self._vllm["chat_completion_request"] = ChatCompletionRequest
self._vllm["chat_completion_response"] = ChatCompletionResponse
self._vllm["completion_request"] = CompletionRequest
self._vllm["completion_response"] = CompletionResponse
self._vllm["error_response"] = ErrorResponse
if self._fastapi is None:
@ -741,7 +743,7 @@ class VllmPreprocessRequest(BasePreprocessRequest):
generator = await handler.create_completion(request=request, raw_request=raw_request)
if isinstance(generator, self._vllm["error_response"]):
return self._fastapi["json_response"](content=generator.model_dump(), status_code=generator.code)
elif isinstance(generator, self._vllm["chat_completion_response"]):
elif isinstance(generator, self._vllm["completion_response"]):
return self._fastapi["json_response"](content=generator.model_dump())
return self._fastapi["streaming_response"](content=generator, media_type="text/event-stream")