refac: byebye litellm

2024-11-16 05:24:02 +00:00 · 2024-05-25 14:43:35 -07:00 · 2024-05-25 14:43:35 -07:00 · 79f440f302
commit 79f440f302
parent 50951459c9
13 changed files with 14 additions and 808 deletions
--- a/.env.example
+++ b/.env.example
@ -11,7 +11,3 @@ OPENAI_API_KEY=''
 SCARF_NO_ANALYTICS=true
 DO_NOT_TRACK=true
 ANONYMIZED_TELEMETRY=false
 # Use locally bundled version of the LiteLLM cost map json
 # to avoid repetitive startup connections
 LITELLM_LOCAL_MODEL_COST_MAP="True"
--- a/13
+++ b/13
@ -59,11 +59,6 @@ ENV OPENAI_API_KEY="" \
    DO_NOT_TRACK=true \
    ANONYMIZED_TELEMETRY=false
 # Use locally bundled version of the LiteLLM cost map json
 # to avoid repetitive startup connections
 ENV LITELLM_LOCAL_MODEL_COST_MAP="True"
 #### Other models #########################################################
 ## whisper TTS model settings ##
 ENV WHISPER_MODEL="base" \
@ -83,10 +78,10 @@ WORKDIR /app/backend
 ENV HOME /root
 # Create user and group if not root
 RUN if [ $UID -ne 0 ]; then \
-      if [ $GID -ne 0 ]; then \
+    if [ $GID -ne 0 ]; then \
-        addgroup --gid $GID app; \
+    addgroup --gid $GID app; \
-      fi; \
+    fi; \
-      adduser --uid $UID --gid $GID --home $HOME --disabled-password --no-create-home app; \
+    adduser --uid $UID --gid $GID --home $HOME --disabled-password --no-create-home app; \
    fi
 RUN mkdir -p $HOME/.cache/chroma
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@ -1,388 +0,0 @@
 import sys
 from contextlib import asynccontextmanager
 from fastapi import FastAPI, Depends, HTTPException
 from fastapi.routing import APIRoute
 from fastapi.middleware.cors import CORSMiddleware
 import logging
 from fastapi import FastAPI, Request, Depends, status, Response
 from fastapi.responses import JSONResponse
 from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
 from starlette.responses import StreamingResponse
 import json
 import time
 import requests
 from pydantic import BaseModel, ConfigDict
 from typing import Optional, List
 from apps.web.models.models import Models
 from utils.utils import get_verified_user, get_current_user, get_admin_user
 from config import SRC_LOG_LEVELS
 from constants import MESSAGES
 import os
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["LITELLM"])
 from config import (
    ENABLE_LITELLM,
    ENABLE_MODEL_FILTER,
    MODEL_FILTER_LIST,
    DATA_DIR,
    LITELLM_PROXY_PORT,
    LITELLM_PROXY_HOST,
 )
 import warnings
 warnings.simplefilter("ignore")
 from litellm.utils import get_llm_provider
 import asyncio
 import subprocess
 import yaml
@asynccontextmanager
 async def lifespan(app: FastAPI):
    log.info("startup_event")
    # TODO: Check config.yaml file and create one
    asyncio.create_task(start_litellm_background())
    yield
 app = FastAPI(lifespan=lifespan)
 origins = ["*"]
 app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 LITELLM_CONFIG_DIR = f"{DATA_DIR}/litellm/config.yaml"
 with open(LITELLM_CONFIG_DIR, "r") as file:
    litellm_config = yaml.safe_load(file)
 app.state.ENABLE_MODEL_FILTER = ENABLE_MODEL_FILTER.value
 app.state.MODEL_FILTER_LIST = MODEL_FILTER_LIST.value
 app.state.MODEL_CONFIG = Models.get_all_models()
 app.state.ENABLE = ENABLE_LITELLM
 app.state.CONFIG = litellm_config
 # Global variable to store the subprocess reference
 background_process = None
 CONFLICT_ENV_VARS = [
    # Uvicorn uses PORT, so LiteLLM might use it as well
    "PORT",
    # LiteLLM uses DATABASE_URL for Prisma connections
    "DATABASE_URL",
 ]
 async def run_background_process(command):
    global background_process
    log.info("run_background_process")
    try:
        # Log the command to be executed
        log.info(f"Executing command: {command}")
        # Filter environment variables known to conflict with litellm
        env = {k: v for k, v in os.environ.items() if k not in CONFLICT_ENV_VARS}
        # Execute the command and create a subprocess
        process = await asyncio.create_subprocess_exec(
            *command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env
        )
        background_process = process
        log.info("Subprocess started successfully.")
        # Capture STDERR for debugging purposes
        stderr_output = await process.stderr.read()
        stderr_text = stderr_output.decode().strip()
        if stderr_text:
            log.info(f"Subprocess STDERR: {stderr_text}")
        # log.info output line by line
        async for line in process.stdout:
            log.info(line.decode().strip())
        # Wait for the process to finish
        returncode = await process.wait()
        log.info(f"Subprocess exited with return code {returncode}")
    except Exception as e:
        log.error(f"Failed to start subprocess: {e}")
        raise  # Optionally re-raise the exception if you want it to propagate
 async def start_litellm_background():
    log.info("start_litellm_background")
    # Command to run in the background
    command = [
        "litellm",
        "--port",
        str(LITELLM_PROXY_PORT),
        "--host",
        LITELLM_PROXY_HOST,
        "--telemetry",
        "False",
        "--config",
        LITELLM_CONFIG_DIR,
    ]
    await run_background_process(command)
 async def shutdown_litellm_background():
    log.info("shutdown_litellm_background")
    global background_process
    if background_process:
        background_process.terminate()
        await background_process.wait()  # Ensure the process has terminated
        log.info("Subprocess terminated")
        background_process = None
@app.get("/")
 async def get_status():
    return {"status": True}
 async def restart_litellm():
    """
    Endpoint to restart the litellm background service.
    """
    log.info("Requested restart of litellm service.")
    try:
        # Shut down the existing process if it is running
        await shutdown_litellm_background()
        log.info("litellm service shutdown complete.")
        # Restart the background service
        asyncio.create_task(start_litellm_background())
        log.info("litellm service restart complete.")
        return {
            "status": "success",
            "message": "litellm service restarted successfully.",
        }
    except Exception as e:
        log.info(f"Error restarting litellm service: {e}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
        )
@app.get("/restart")
 async def restart_litellm_handler(user=Depends(get_admin_user)):
    return await restart_litellm()
@app.get("/config")
 async def get_config(user=Depends(get_admin_user)):
    return app.state.CONFIG
 class LiteLLMConfigForm(BaseModel):
    general_settings: Optional[dict] = None
    litellm_settings: Optional[dict] = None
    model_list: Optional[List[dict]] = None
    router_settings: Optional[dict] = None
    model_config = ConfigDict(protected_namespaces=())
@app.post("/config/update")
 async def update_config(form_data: LiteLLMConfigForm, user=Depends(get_admin_user)):
    app.state.CONFIG = form_data.model_dump(exclude_none=True)
    with open(LITELLM_CONFIG_DIR, "w") as file:
        yaml.dump(app.state.CONFIG, file)
    await restart_litellm()
    return app.state.CONFIG
@app.get("/models")
@app.get("/v1/models")
 async def get_models(user=Depends(get_current_user)):
    if app.state.ENABLE:
        while not background_process:
            await asyncio.sleep(0.1)
        url = f"http://localhost:{LITELLM_PROXY_PORT}/v1"
        r = None
        try:
            r = requests.request(method="GET", url=f"{url}/models")
            r.raise_for_status()
            data = r.json()
            if app.state.ENABLE_MODEL_FILTER:
                if user and user.role == "user":
                    data["data"] = list(
                        filter(
                            lambda model: model["id"] in app.state.MODEL_FILTER_LIST,
                            data["data"],
                        )
                    )
            return data
        except Exception as e:
            log.exception(e)
            error_detail = "Open WebUI: Server Connection Error"
            if r is not None:
                try:
                    res = r.json()
                    if "error" in res:
                        error_detail = f"External: {res['error']}"
                except:
                    error_detail = f"External: {e}"
            return {
                "data": [
                    {
                        "id": model["model_name"],
                        "object": "model",
                        "created": int(time.time()),
                        "owned_by": "openai",
                        "custom_info": next(
                            (
                                item
                                for item in app.state.MODEL_CONFIG
                                if item.id == model["model_name"]
                            ),
                            None,
                        ),
                    }
                    for model in app.state.CONFIG["model_list"]
                ],
                "object": "list",
            }
    else:
        return {
            "data": [],
            "object": "list",
        }
@app.get("/model/info")
 async def get_model_list(user=Depends(get_admin_user)):
    return {"data": app.state.CONFIG["model_list"]}
 class AddLiteLLMModelForm(BaseModel):
    model_name: str
    litellm_params: dict
    model_config = ConfigDict(protected_namespaces=())
@app.post("/model/new")
 async def add_model_to_config(
    form_data: AddLiteLLMModelForm, user=Depends(get_admin_user)
 ):
    try:
        get_llm_provider(model=form_data.model_name)
        app.state.CONFIG["model_list"].append(form_data.model_dump())
        with open(LITELLM_CONFIG_DIR, "w") as file:
            yaml.dump(app.state.CONFIG, file)
        await restart_litellm()
        return {"message": MESSAGES.MODEL_ADDED(form_data.model_name)}
    except Exception as e:
        print(e)
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
        )
 class DeleteLiteLLMModelForm(BaseModel):
    id: str
@app.post("/model/delete")
 async def delete_model_from_config(
    form_data: DeleteLiteLLMModelForm, user=Depends(get_admin_user)
 ):
    app.state.CONFIG["model_list"] = [
        model
        for model in app.state.CONFIG["model_list"]
        if model["model_name"] != form_data.id
    ]
    with open(LITELLM_CONFIG_DIR, "w") as file:
        yaml.dump(app.state.CONFIG, file)
    await restart_litellm()
    return {"message": MESSAGES.MODEL_DELETED(form_data.id)}
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
 async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
    body = await request.body()
    url = f"http://localhost:{LITELLM_PROXY_PORT}"
    target_url = f"{url}/{path}"
    headers = {}
    # headers["Authorization"] = f"Bearer {key}"
    headers["Content-Type"] = "application/json"
    r = None
    try:
        r = requests.request(
            method=request.method,
            url=target_url,
            data=body,
            headers=headers,
            stream=True,
        )
        r.raise_for_status()
        # Check if response is SSE
        if "text/event-stream" in r.headers.get("Content-Type", ""):
            return StreamingResponse(
                r.iter_content(chunk_size=8192),
                status_code=r.status_code,
                headers=dict(r.headers),
            )
        else:
            response_data = r.json()
            return response_data
    except Exception as e:
        log.exception(e)
        error_detail = "Open WebUI: Server Connection Error"
        if r is not None:
            try:
                res = r.json()
                if "error" in res:
                    error_detail = f"External: {res['error']['message'] if 'message' in res['error'] else res['error']}"
            except:
                error_detail = f"External: {e}"
        raise HTTPException(
            status_code=r.status_code if r else 500, detail=error_detail
        )
--- a/backend/apps/web/models/modelfiles.py
+++ b/backend/apps/web/models/modelfiles.py
@ -1,144 +0,0 @@
 ################################################################################
 #                              DEPRECATION NOTICE                              #
 #                                                                              #
 # This file has been deprecated since version 0.2.0.                           #
 #                                                                              #
 ################################################################################
 from pydantic import BaseModel
 from peewee import *
 from playhouse.shortcuts import model_to_dict
 from typing import List, Union, Optional
 import time
 from utils.utils import decode_token
 from utils.misc import get_gravatar_url
 from apps.web.internal.db import DB
 import json
 ####################
 # Modelfile DB Schema
 ####################
 class Modelfile(Model):
    tag_name = CharField(unique=True)
    user_id = CharField()
    modelfile = TextField()
    timestamp = BigIntegerField()
    class Meta:
        database = DB
 class ModelfileModel(BaseModel):
    tag_name: str
    user_id: str
    modelfile: str
    timestamp: int  # timestamp in epoch
 ####################
 # Forms
 ####################
 class ModelfileForm(BaseModel):
    modelfile: dict
 class ModelfileTagNameForm(BaseModel):
    tag_name: str
 class ModelfileUpdateForm(ModelfileForm, ModelfileTagNameForm):
    pass
 class ModelfileResponse(BaseModel):
    tag_name: str
    user_id: str
    modelfile: dict
    timestamp: int  # timestamp in epoch
 class ModelfilesTable:
    def __init__(self, db):
        self.db = db
        self.db.create_tables([Modelfile])
    def insert_new_modelfile(
        self, user_id: str, form_data: ModelfileForm
    ) -> Optional[ModelfileModel]:
        if "tagName" in form_data.modelfile:
            modelfile = ModelfileModel(
                **{
                    "user_id": user_id,
                    "tag_name": form_data.modelfile["tagName"],
                    "modelfile": json.dumps(form_data.modelfile),
                    "timestamp": int(time.time()),
                }
            )
            try:
                result = Modelfile.create(**modelfile.model_dump())
                if result:
                    return modelfile
                else:
                    return None
            except:
                return None
        else:
            return None
    def get_modelfile_by_tag_name(self, tag_name: str) -> Optional[ModelfileModel]:
        try:
            modelfile = Modelfile.get(Modelfile.tag_name == tag_name)
            return ModelfileModel(**model_to_dict(modelfile))
        except:
            return None
    def get_modelfiles(self, skip: int = 0, limit: int = 50) -> List[ModelfileResponse]:
        return [
            ModelfileResponse(
                **{
                    **model_to_dict(modelfile),
                    "modelfile": json.loads(modelfile.modelfile),
                }
            )
            for modelfile in Modelfile.select()
            # .limit(limit).offset(skip)
        ]
    def update_modelfile_by_tag_name(
        self, tag_name: str, modelfile: dict
    ) -> Optional[ModelfileModel]:
        try:
            query = Modelfile.update(
                modelfile=json.dumps(modelfile),
                timestamp=int(time.time()),
            ).where(Modelfile.tag_name == tag_name)
            query.execute()
            modelfile = Modelfile.get(Modelfile.tag_name == tag_name)
            return ModelfileModel(**model_to_dict(modelfile))
        except:
            return None
    def delete_modelfile_by_tag_name(self, tag_name: str) -> bool:
        try:
            query = Modelfile.delete().where((Modelfile.tag_name == tag_name))
            query.execute()  # Remove the rows, return number of rows removed.
            return True
        except:
            return False
 Modelfiles = ModelfilesTable(DB)
--- a/backend/config.py
+++ b/backend/config.py
@ -56,7 +56,6 @@ log_sources = [
    "CONFIG",
    "DB",
    "IMAGES",
    "LITELLM",
    "MAIN",
    "MODELS",
    "OLLAMA",
@ -374,10 +373,10 @@ def create_config_file(file_path):
 LITELLM_CONFIG_PATH = f"{DATA_DIR}/litellm/config.yaml"
-if not os.path.exists(LITELLM_CONFIG_PATH):
+# if not os.path.exists(LITELLM_CONFIG_PATH):
-    log.info("Config file doesn't exist. Creating...")
+#     log.info("Config file doesn't exist. Creating...")
-    create_config_file(LITELLM_CONFIG_PATH)
+#     create_config_file(LITELLM_CONFIG_PATH)
-    log.info("Config file created successfully.")
+#     log.info("Config file created successfully.")
 ####################################
@ -826,18 +825,6 @@ AUDIO_OPENAI_API_VOICE = PersistentConfig(
    os.getenv("AUDIO_OPENAI_API_VOICE", "alloy"),
 )
 ####################################
 # LiteLLM
 ####################################
 ENABLE_LITELLM = os.environ.get("ENABLE_LITELLM", "True").lower() == "true"
 LITELLM_PROXY_PORT = int(os.getenv("LITELLM_PROXY_PORT", "14365"))
 if LITELLM_PROXY_PORT < 0 or LITELLM_PROXY_PORT > 65535:
    raise ValueError("Invalid port number for LITELLM_PROXY_PORT")
 LITELLM_PROXY_HOST = os.getenv("LITELLM_PROXY_HOST", "127.0.0.1")
 ####################################
 # Database
--- a/backend/main.py
+++ b/backend/main.py
@ -22,13 +22,6 @@ from starlette.responses import StreamingResponse, Response
 from apps.ollama.main import app as ollama_app, get_all_models as get_ollama_models
 from apps.openai.main import app as openai_app, get_all_models as get_openai_models
 from apps.litellm.main import (
    app as litellm_app,
    start_litellm_background,
    shutdown_litellm_background,
 )
 from apps.audio.main import app as audio_app
 from apps.images.main import app as images_app
 from apps.rag.main import app as rag_app
@ -55,7 +48,6 @@ from config import (
    STATIC_DIR,
    ENABLE_OPENAI_API,
    ENABLE_OLLAMA_API,
    ENABLE_LITELLM,
    ENABLE_MODEL_FILTER,
    MODEL_FILTER_LIST,
    GLOBAL_LOG_LEVEL,
@ -100,11 +92,7 @@ https://github.com/open-webui/open-webui
@asynccontextmanager
 async def lifespan(app: FastAPI):
    if ENABLE_LITELLM:
        asyncio.create_task(start_litellm_background())
    yield
    if ENABLE_LITELLM:
        await shutdown_litellm_background()
 app = FastAPI(
@ -262,9 +250,6 @@ async def update_embedding_function(request: Request, call_next):
    return response
 # TODO: Deprecate LiteLLM
 app.mount("/litellm/api", litellm_app)
 app.mount("/ollama", ollama_app)
 app.mount("/openai", openai_app)
@ -407,9 +392,6 @@ async def update_model_filter_config(
    openai_app.state.config.ENABLE_MODEL_FILTER = app.state.config.ENABLE_MODEL_FILTER
    openai_app.state.config.MODEL_FILTER_LIST = app.state.config.MODEL_FILTER_LIST
    litellm_app.state.ENABLE_MODEL_FILTER = app.state.config.ENABLE_MODEL_FILTER
    litellm_app.state.MODEL_FILTER_LIST = app.state.config.MODEL_FILTER_LIST
    return {
        "enabled": app.state.config.ENABLE_MODEL_FILTER,
        "models": app.state.config.MODEL_FILTER_LIST,
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -18,8 +18,6 @@ psycopg2-binary==2.9.9
 PyMySQL==1.1.1
 bcrypt==4.1.3
 litellm[proxy]==1.37.20
 boto3==1.34.110
 argon2-cffi==23.1.0
--- a/backend/space/litellm_config.yaml
+++ b/backend/space/litellm_config.yaml
@ -1,43 +0,0 @@
 litellm_settings:
  drop_params: true
 model_list:
  - model_name: 'HuggingFace: Mistral: Mistral 7B Instruct v0.1'
    litellm_params:
      model: huggingface/mistralai/Mistral-7B-Instruct-v0.1
      api_key: os.environ/HF_TOKEN
      max_tokens: 1024
  - model_name: 'HuggingFace: Mistral: Mistral 7B Instruct v0.2'
    litellm_params:
      model: huggingface/mistralai/Mistral-7B-Instruct-v0.2
      api_key: os.environ/HF_TOKEN
      max_tokens: 1024
  - model_name: 'HuggingFace: Meta: Llama 3 8B Instruct'
    litellm_params:
      model: huggingface/meta-llama/Meta-Llama-3-8B-Instruct
      api_key: os.environ/HF_TOKEN
      max_tokens: 2047
  - model_name: 'HuggingFace: Mistral: Mixtral 8x7B Instruct v0.1'
    litellm_params:
      model: huggingface/mistralai/Mixtral-8x7B-Instruct-v0.1
      api_key: os.environ/HF_TOKEN
      max_tokens: 8192
  - model_name: 'HuggingFace: Microsoft: Phi-3 Mini-4K-Instruct'
    litellm_params:
      model: huggingface/microsoft/Phi-3-mini-4k-instruct
      api_key: os.environ/HF_TOKEN
      max_tokens: 1024
  - model_name: 'HuggingFace: Google: Gemma 7B 1.1'
    litellm_params:
      model: huggingface/google/gemma-1.1-7b-it
      api_key: os.environ/HF_TOKEN
      max_tokens: 1024
  - model_name: 'HuggingFace: Yi-1.5 34B Chat'
    litellm_params:
      model: huggingface/01-ai/Yi-1.5-34B-Chat
      api_key: os.environ/HF_TOKEN
      max_tokens: 1024
  - model_name: 'HuggingFace: Nous Research: Nous Hermes 2 Mixtral 8x7B DPO'
    litellm_params:
      model: huggingface/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO
      api_key: os.environ/HF_TOKEN
      max_tokens: 2048
--- a/backend/start.sh
+++ b/backend/start.sh
@ -34,11 +34,6 @@ fi
 # Check if SPACE_ID is set, if so, configure for space
 if [ -n "$SPACE_ID" ]; then
  echo "Configuring for HuggingFace Space deployment"
  # Copy litellm_config.yaml with specified ownership
  echo "Copying litellm_config.yaml to the desired location with specified ownership..."
  cp -f ./space/litellm_config.yaml ./data/litellm/config.yaml
  if [ -n "$ADMIN_USER_EMAIL" ] && [ -n "$ADMIN_USER_PASSWORD" ]; then
    echo "Admin user configured, creating"
    WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" uvicorn main:app --host "$HOST" --port "$PORT" --forwarded-allow-ips '*' &
--- a/src/lib/apis/litellm/index.ts
+++ b/src/lib/apis/litellm/index.ts
@ -1,151 +0,0 @@
 import { LITELLM_API_BASE_URL } from '$lib/constants';
 export const getLiteLLMModels = async (token: string = '') => {
 	let error = null;
 	const res = await fetch(`${LITELLM_API_BASE_URL}/v1/models`, {
 		method: 'GET',
 		headers: {
 			Accept: 'application/json',
 			'Content-Type': 'application/json',
 			...(token && { authorization: `Bearer ${token}` })
 		}
 	})
 		.then(async (res) => {
 			if (!res.ok) throw await res.json();
 			return res.json();
 		})
 		.catch((err) => {
 			console.log(err);
 			error = `LiteLLM: ${err?.error?.message ?? 'Network Problem'}`;
 			return [];
 		});
 	if (error) {
 		throw error;
 	}
 	const models = Array.isArray(res) ? res : res?.data ?? null;
 	return models
 		? models
 				.map((model) => ({
 					id: model.id,
 					name: model.name ?? model.id,
 					external: true,
 					source: 'LiteLLM',
 					custom_info: model.custom_info
 				}))
 				.sort((a, b) => {
 					return a.name.localeCompare(b.name);
 				})
 		: models;
 };
 export const getLiteLLMModelInfo = async (token: string = '') => {
 	let error = null;
 	const res = await fetch(`${LITELLM_API_BASE_URL}/model/info`, {
 		method: 'GET',
 		headers: {
 			Accept: 'application/json',
 			'Content-Type': 'application/json',
 			...(token && { authorization: `Bearer ${token}` })
 		}
 	})
 		.then(async (res) => {
 			if (!res.ok) throw await res.json();
 			return res.json();
 		})
 		.catch((err) => {
 			console.log(err);
 			error = `LiteLLM: ${err?.error?.message ?? 'Network Problem'}`;
 			return [];
 		});
 	if (error) {
 		throw error;
 	}
 	const models = Array.isArray(res) ? res : res?.data ?? null;
 	return models;
 };
 type AddLiteLLMModelForm = {
 	name: string;
 	model: string;
 	api_base: string;
 	api_key: string;
 	rpm: string;
 	max_tokens: string;
 };
 export const addLiteLLMModel = async (token: string = '', payload: AddLiteLLMModelForm) => {
 	let error = null;
 	const res = await fetch(`${LITELLM_API_BASE_URL}/model/new`, {
 		method: 'POST',
 		headers: {
 			Accept: 'application/json',
 			'Content-Type': 'application/json',
 			...(token && { authorization: `Bearer ${token}` })
 		},
 		body: JSON.stringify({
 			model_name: payload.name,
 			litellm_params: {
 				model: payload.model,
 				...(payload.api_base === '' ? {} : { api_base: payload.api_base }),
 				...(payload.api_key === '' ? {} : { api_key: payload.api_key }),
 				...(isNaN(parseInt(payload.rpm)) ? {} : { rpm: parseInt(payload.rpm) }),
 				...(payload.max_tokens === '' ? {} : { max_tokens: payload.max_tokens })
 			}
 		})
 	})
 		.then(async (res) => {
 			if (!res.ok) throw await res.json();
 			return res.json();
 		})
 		.catch((err) => {
 			console.log(err);
 			error = `LiteLLM: ${err?.error?.message ?? 'Network Problem'}`;
 			return [];
 		});
 	if (error) {
 		throw error;
 	}
 	return res;
 };
 export const deleteLiteLLMModel = async (token: string = '', id: string) => {
 	let error = null;
 	const res = await fetch(`${LITELLM_API_BASE_URL}/model/delete`, {
 		method: 'POST',
 		headers: {
 			Accept: 'application/json',
 			'Content-Type': 'application/json',
 			...(token && { authorization: `Bearer ${token}` })
 		},
 		body: JSON.stringify({
 			id: id
 		})
 	})
 		.then(async (res) => {
 			if (!res.ok) throw await res.json();
 			return res.json();
 		})
 		.catch((err) => {
 			console.log(err);
 			error = `LiteLLM: ${err?.error?.message ?? 'Network Problem'}`;
 			return [];
 		});
 	if (error) {
 		throw error;
 	}
 	return res;
 };
--- a/src/lib/components/chat/Chat.svelte
+++ b/src/lib/components/chat/Chat.svelte
@ -35,12 +35,7 @@
 	import MessageInput from '$lib/components/chat/MessageInput.svelte';
 	import Messages from '$lib/components/chat/Messages.svelte';
 	import Navbar from '$lib/components/layout/Navbar.svelte';
-	import {
+	import { OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL, WEBUI_BASE_URL } from '$lib/constants';
 		LITELLM_API_BASE_URL,
 		OLLAMA_API_BASE_URL,
 		OPENAI_API_BASE_URL,
 		WEBUI_BASE_URL
 	} from '$lib/constants';
 	import { createOpenAITextStream } from '$lib/apis/streaming';
 	import { queryMemory } from '$lib/apis/memories';
 	import type { Writable } from 'svelte/store';
@ -733,9 +728,7 @@
 					docs: docs.length > 0 ? docs : undefined,
 					citations: docs.length > 0
 				},
-				model?.source?.toLowerCase() === 'litellm'
+				`${OPENAI_API_BASE_URL}`
 					? `${LITELLM_API_BASE_URL}/v1`
 					: `${OPENAI_API_BASE_URL}`
 			);
 			// Wait until history/message have been updated
--- a/src/lib/components/workspace/Playground.svelte
+++ b/src/lib/components/workspace/Playground.svelte
@ -5,12 +5,7 @@
 	import { toast } from 'svelte-sonner';
-	import {
+	import { OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL, WEBUI_API_BASE_URL } from '$lib/constants';
 		LITELLM_API_BASE_URL,
 		OLLAMA_API_BASE_URL,
 		OPENAI_API_BASE_URL,
 		WEBUI_API_BASE_URL
 	} from '$lib/constants';
 	import { WEBUI_NAME, config, user, models, settings } from '$lib/stores';
 	import { cancelOllamaRequest, generateChatCompletion } from '$lib/apis/ollama';
@ -79,11 +74,7 @@
 					}
 				]
 			},
-			model.external
+			model?.owned_by === 'openai' ? `${OPENAI_API_BASE_URL}` : `${OLLAMA_API_BASE_URL}/v1`
 				? model.source === 'litellm'
 					? `${LITELLM_API_BASE_URL}/v1`
 					: `${OPENAI_API_BASE_URL}`
 				: `${OLLAMA_API_BASE_URL}/v1`
 		);
 		if (res && res.ok) {
@ -150,11 +141,7 @@
 					...messages
 				].filter((message) => message)
 			},
-			model.external
+			model?.owned_by === 'openai' ? `${OPENAI_API_BASE_URL}` : `${OLLAMA_API_BASE_URL}/v1`
 				? model.source === 'litellm'
 					? `${LITELLM_API_BASE_URL}/v1`
 					: `${OPENAI_API_BASE_URL}`
 				: `${OLLAMA_API_BASE_URL}/v1`
 		);
 		let responseMessage;
--- a/src/lib/constants.ts
+++ b/src/lib/constants.ts
@ -6,7 +6,6 @@ export const WEBUI_BASE_URL = browser ? (dev ? `http://${location.hostname}:8080
 export const WEBUI_API_BASE_URL = `${WEBUI_BASE_URL}/api/v1`;
 export const LITELLM_API_BASE_URL = `${WEBUI_BASE_URL}/litellm/api`;
 export const OLLAMA_API_BASE_URL = `${WEBUI_BASE_URL}/ollama`;
 export const OPENAI_API_BASE_URL = `${WEBUI_BASE_URL}/openai`;
 export const AUDIO_API_BASE_URL = `${WEBUI_BASE_URL}/audio/api/v1`;