refac: ollama non stream response

This commit is contained in:
Timothy J. Baek 2024-06-30 01:30:19 -07:00
parent bb53282cb0
commit f6efda9e2f

View File

@ -153,7 +153,7 @@ async def cleanup_response(
await session.close()
async def post_streaming_url(url: str, payload: str):
async def post_streaming_url(url: str, payload: str, stream: bool = True):
r = None
try:
session = aiohttp.ClientSession(
@ -162,12 +162,20 @@ async def post_streaming_url(url: str, payload: str):
r = await session.post(url, data=payload)
r.raise_for_status()
if stream:
return StreamingResponse(
r.content,
status_code=r.status,
headers=dict(r.headers),
background=BackgroundTask(cleanup_response, response=r, session=session),
background=BackgroundTask(
cleanup_response, response=r, session=session
),
)
else:
res = await r.json()
await cleanup_response(r, session)
return res
except Exception as e:
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
@ -963,7 +971,11 @@ async def generate_openai_chat_completion(
url = app.state.config.OLLAMA_BASE_URLS[url_idx]
log.info(f"url: {url}")
return await post_streaming_url(f"{url}/v1/chat/completions", json.dumps(payload))
return await post_streaming_url(
f"{url}/v1/chat/completions",
json.dumps(payload),
stream=payload.get("stream", False),
)
@app.get("/v1/models")