From 0b5aa6dd60c5502ad98a0bea903142763a1e3f91 Mon Sep 17 00:00:00 2001 From: Classic298 <27028174+Classic298@users.noreply.github.com> Date: Sun, 11 Jan 2026 20:34:23 +0100 Subject: [PATCH] fix(db): release connection before LLM call in Ollama /api/chat (#20571) Remove Depends(get_session) from the /api/chat endpoint to prevent database connections from being held during the entire duration of LLM calls (30-60+ seconds for streaming responses). Previously, the database session was acquired at request start and held until the streaming response completed. Under concurrent load, this exhausted the connection pool, causing QueuePool timeout errors for other database operations. The fix allows Models.get_model_by_id() and has_access() to manage their own short-lived sessions internally, releasing the connection immediately after the quick authorization checks complete - before the slow external LLM API call begins. --- backend/open_webui/routers/ollama.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/routers/ollama.py b/backend/open_webui/routers/ollama.py index 0af35de38..b269aa329 100644 --- a/backend/open_webui/routers/ollama.py +++ b/backend/open_webui/routers/ollama.py @@ -1257,8 +1257,11 @@ async def generate_chat_completion( user=Depends(get_verified_user), bypass_filter: Optional[bool] = False, bypass_system_prompt: bool = False, - db: Session = Depends(get_session), ): + # NOTE: We intentionally do NOT use Depends(get_session) here. + # Database operations (get_model_by_id, has_access) manage their own short-lived sessions. + # This prevents holding a connection during the entire LLM call (30-60+ seconds), + # which would exhaust the connection pool under concurrent load. if BYPASS_MODEL_ACCESS_CONTROL: bypass_filter = True @@ -1279,7 +1282,7 @@ async def generate_chat_completion( del payload["metadata"] model_id = payload["model"] - model_info = Models.get_model_by_id(model_id, db=db) + model_info = Models.get_model_by_id(model_id) if model_info: if model_info.base_model_id: @@ -1307,7 +1310,6 @@ async def generate_chat_completion( user.id, type="read", access_control=model_info.access_control, - db=db, ) ): raise HTTPException(