diff --git a/backend/open_webui/routers/ollama.py b/backend/open_webui/routers/ollama.py index 0af35de38..b269aa329 100644 --- a/backend/open_webui/routers/ollama.py +++ b/backend/open_webui/routers/ollama.py @@ -1257,8 +1257,11 @@ async def generate_chat_completion( user=Depends(get_verified_user), bypass_filter: Optional[bool] = False, bypass_system_prompt: bool = False, - db: Session = Depends(get_session), ): + # NOTE: We intentionally do NOT use Depends(get_session) here. + # Database operations (get_model_by_id, has_access) manage their own short-lived sessions. + # This prevents holding a connection during the entire LLM call (30-60+ seconds), + # which would exhaust the connection pool under concurrent load. if BYPASS_MODEL_ACCESS_CONTROL: bypass_filter = True @@ -1279,7 +1282,7 @@ async def generate_chat_completion( del payload["metadata"] model_id = payload["model"] - model_info = Models.get_model_by_id(model_id, db=db) + model_info = Models.get_model_by_id(model_id) if model_info: if model_info.base_model_id: @@ -1307,7 +1310,6 @@ async def generate_chat_completion( user.id, type="read", access_control=model_info.access_control, - db=db, ) ): raise HTTPException(