From efa258c69504aff9fab6c47f62e16dce42665318 Mon Sep 17 00:00:00 2001 From: Jun Siang Cheah Date: Sat, 20 Apr 2024 20:03:52 +0100 Subject: [PATCH 01/38] feat: split large openai responses into smaller chunkers --- src/lib/apis/streaming/index.ts | 65 ++++++++++++++++++++++++++++ src/routes/(app)/+page.svelte | 37 +++++----------- src/routes/(app)/c/[id]/+page.svelte | 43 ++++++------------ 3 files changed, 90 insertions(+), 55 deletions(-) create mode 100644 src/lib/apis/streaming/index.ts diff --git a/src/lib/apis/streaming/index.ts b/src/lib/apis/streaming/index.ts new file mode 100644 index 000000000..4d1d2ecec --- /dev/null +++ b/src/lib/apis/streaming/index.ts @@ -0,0 +1,65 @@ +type TextStreamUpdate = { + done: boolean; + value: string; +}; + +// createOpenAITextStream takes a ReadableStreamDefaultReader from an SSE response, +// and returns an async generator that emits delta updates with large deltas chunked into random sized chunks +export async function createOpenAITextStream( + messageStream: ReadableStreamDefaultReader +): Promise> { + return streamLargeDeltasAsRandomChunks(openAIStreamToIterator(messageStream)); +} + +async function* openAIStreamToIterator( + reader: ReadableStreamDefaultReader +): AsyncGenerator { + while (true) { + const { value, done } = await reader.read(); + if (done) { + yield { done: true, value: '' }; + break; + } + const lines = value.split('\n'); + for (const line of lines) { + if (line !== '') { + console.log(line); + if (line === 'data: [DONE]') { + yield { done: true, value: '' }; + } else { + const data = JSON.parse(line.replace(/^data: /, '')); + console.log(data); + + yield { done: false, value: data.choices[0].delta.content ?? '' }; + } + } + } + } +} + +// streamLargeDeltasAsRandomChunks will chunk large deltas (length > 5) into random sized chunks between 1-3 characters +// This is to simulate a more fluid streaming, even though some providers may send large chunks of text at once +async function* streamLargeDeltasAsRandomChunks( + iterator: AsyncGenerator +): AsyncGenerator { + for await (const textStreamUpdate of iterator) { + if (textStreamUpdate.done) { + yield textStreamUpdate; + return; + } + let content = textStreamUpdate.value; + if (content.length < 5) { + yield { done: false, value: content }; + continue; + } + while (content != '') { + const chunkSize = Math.min(Math.floor(Math.random() * 3) + 1, content.length); + const chunk = content.slice(0, chunkSize); + yield { done: false, value: chunk }; + await sleep(5); + content = content.slice(chunkSize); + } + } +} + +const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); diff --git a/src/routes/(app)/+page.svelte b/src/routes/(app)/+page.svelte index bdeff6d7a..bd8676985 100644 --- a/src/routes/(app)/+page.svelte +++ b/src/routes/(app)/+page.svelte @@ -39,6 +39,7 @@ import { RAGTemplate } from '$lib/utils/rag'; import { LITELLM_API_BASE_URL, OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL } from '$lib/constants'; import { WEBUI_BASE_URL } from '$lib/constants'; + import { createOpenAITextStream } from '$lib/apis/streaming'; const i18n = getContext('i18n'); @@ -599,38 +600,22 @@ .pipeThrough(splitStream('\n')) .getReader(); - while (true) { - const { value, done } = await reader.read(); + const textStream = await createOpenAITextStream(reader); + console.log(textStream); + + for await (const update of textStream) { + const { value, done } = update; if (done || stopResponseFlag || _chatId !== $chatId) { responseMessage.done = true; messages = messages; break; } - try { - let lines = value.split('\n'); - - for (const line of lines) { - if (line !== '') { - console.log(line); - if (line === 'data: [DONE]') { - responseMessage.done = true; - messages = messages; - } else { - let data = JSON.parse(line.replace(/^data: /, '')); - console.log(data); - - if (responseMessage.content == '' && data.choices[0].delta.content == '\n') { - continue; - } else { - responseMessage.content += data.choices[0].delta.content ?? ''; - messages = messages; - } - } - } - } - } catch (error) { - console.log(error); + if (responseMessage.content == '' && value == '\n') { + continue; + } else { + responseMessage.content += value; + messages = messages; } if ($settings.notificationEnabled && !document.hasFocus()) { diff --git a/src/routes/(app)/c/[id]/+page.svelte b/src/routes/(app)/c/[id]/+page.svelte index 7502f3c4e..2f8ad7d0b 100644 --- a/src/routes/(app)/c/[id]/+page.svelte +++ b/src/routes/(app)/c/[id]/+page.svelte @@ -42,6 +42,7 @@ OLLAMA_API_BASE_URL, WEBUI_BASE_URL } from '$lib/constants'; + import { createOpenAITextStream } from '$lib/apis/streaming'; const i18n = getContext('i18n'); @@ -551,9 +552,9 @@ messages: [ $settings.system ? { - role: 'system', - content: $settings.system - } + role: 'system', + content: $settings.system + } : undefined, ...messages ] @@ -611,38 +612,22 @@ .pipeThrough(splitStream('\n')) .getReader(); - while (true) { - const { value, done } = await reader.read(); + const textStream = await createOpenAITextStream(reader); + console.log(textStream); + + for await (const update of textStream) { + const { value, done } = update; if (done || stopResponseFlag || _chatId !== $chatId) { responseMessage.done = true; messages = messages; break; } - try { - let lines = value.split('\n'); - - for (const line of lines) { - if (line !== '') { - console.log(line); - if (line === 'data: [DONE]') { - responseMessage.done = true; - messages = messages; - } else { - let data = JSON.parse(line.replace(/^data: /, '')); - console.log(data); - - if (responseMessage.content == '' && data.choices[0].delta.content == '\n') { - continue; - } else { - responseMessage.content += data.choices[0].delta.content ?? ''; - messages = messages; - } - } - } - } - } catch (error) { - console.log(error); + if (responseMessage.content == '' && value == '\n') { + continue; + } else { + responseMessage.content += value; + messages = messages; } if ($settings.notificationEnabled && !document.hasFocus()) { From 38321355d3476657126e0386b494fcdf517b20fb Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 20 Apr 2024 20:37:18 -0500 Subject: [PATCH 02/38] fix --- backend/config.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/backend/config.py b/backend/config.py index 6ca2c67bf..fb9063eb7 100644 --- a/backend/config.py +++ b/backend/config.py @@ -322,9 +322,14 @@ OPENAI_API_BASE_URLS = [ ] OPENAI_API_KEY = "" -OPENAI_API_KEY = OPENAI_API_KEYS[ - OPENAI_API_BASE_URLS.index("https://api.openai.com/v1") -] + +try: + OPENAI_API_KEY = OPENAI_API_KEYS[ + OPENAI_API_BASE_URLS.index("https://api.openai.com/v1") + ] +except: + pass + OPENAI_API_BASE_URL = "https://api.openai.com/v1" From 1e919abda30e500c07d60f9a893f9a1184745c63 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 20 Apr 2024 20:49:16 -0500 Subject: [PATCH 03/38] fix: settings getModels issue --- backend/apps/openai/main.py | 1 + src/lib/components/chat/SettingsModal.svelte | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/backend/apps/openai/main.py b/backend/apps/openai/main.py index 4647d7489..0fbbd365e 100644 --- a/backend/apps/openai/main.py +++ b/backend/apps/openai/main.py @@ -80,6 +80,7 @@ async def get_openai_urls(user=Depends(get_admin_user)): @app.post("/urls/update") async def update_openai_urls(form_data: UrlsUpdateForm, user=Depends(get_admin_user)): + await get_all_models() app.state.OPENAI_API_BASE_URLS = form_data.urls return {"OPENAI_API_BASE_URLS": app.state.OPENAI_API_BASE_URLS} diff --git a/src/lib/components/chat/SettingsModal.svelte b/src/lib/components/chat/SettingsModal.svelte index fd3910e90..95becea12 100644 --- a/src/lib/components/chat/SettingsModal.svelte +++ b/src/lib/components/chat/SettingsModal.svelte @@ -3,7 +3,7 @@ import { toast } from 'svelte-sonner'; import { models, settings, user } from '$lib/stores'; - import { getModels } from '$lib/utils'; + import { getModels as _getModels } from '$lib/utils'; import Modal from '../common/Modal.svelte'; import Account from './Settings/Account.svelte'; @@ -23,10 +23,14 @@ const saveSettings = async (updated) => { console.log(updated); await settings.set({ ...$settings, ...updated }); - await models.set(await getModels(localStorage.token)); + await models.set(await getModels()); localStorage.setItem('settings', JSON.stringify($settings)); }; + const getModels = async () => { + return await _getModels(localStorage.token); + }; + let selectedTab = 'general'; From fe3291acb5416c1ef3f67bc037fe8803e14a69a7 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 20 Apr 2024 21:12:59 -0500 Subject: [PATCH 04/38] fix: multiuser duplicate tag issue --- backend/apps/web/models/tags.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/apps/web/models/tags.py b/backend/apps/web/models/tags.py index 196551b7b..409a788ab 100644 --- a/backend/apps/web/models/tags.py +++ b/backend/apps/web/models/tags.py @@ -136,7 +136,9 @@ class TagTable: return [ TagModel(**model_to_dict(tag)) - for tag in Tag.select().where(Tag.name.in_(tag_names)) + for tag in Tag.select() + .where(Tag.user_id == user_id) + .where(Tag.name.in_(tag_names)) ] def get_tags_by_chat_id_and_user_id( From 1cf4fa96c1bf614c760319de74785e564c0b26cb Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 20 Apr 2024 21:15:39 -0500 Subject: [PATCH 05/38] fix --- backend/apps/web/models/tags.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/apps/web/models/tags.py b/backend/apps/web/models/tags.py index 409a788ab..02de5b9d7 100644 --- a/backend/apps/web/models/tags.py +++ b/backend/apps/web/models/tags.py @@ -153,7 +153,9 @@ class TagTable: return [ TagModel(**model_to_dict(tag)) - for tag in Tag.select().where(Tag.name.in_(tag_names)) + for tag in Tag.select() + .where(Tag.user_id == user_id) + .where(Tag.name.in_(tag_names)) ] def get_chat_ids_by_tag_name_and_user_id( From 7a1f1d36a12ca10715c92b87bb78cdf7a1f461ef Mon Sep 17 00:00:00 2001 From: Silentoplayz <50341825+Silentoplayz@users.noreply.github.com> Date: Sun, 21 Apr 2024 03:12:48 +0000 Subject: [PATCH 06/38] Update README.md Updated features list --- README.md | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cd06bc384..d4d31ba64 100644 --- a/README.md +++ b/README.md @@ -25,22 +25,28 @@ Open WebUI is an extensible, feature-rich, and user-friendly self-hosted WebUI d - 🚀 **Effortless Setup**: Install seamlessly using Docker or Kubernetes (kubectl, kustomize or helm) for a hassle-free experience. +- 🌈 **Theme Customization**: Choose from a variety of themes to personalize your Open WebUI experience. + - 💻 **Code Syntax Highlighting**: Enjoy enhanced code readability with our syntax highlighting feature. - ✒️🔢 **Full Markdown and LaTeX Support**: Elevate your LLM experience with comprehensive Markdown and LaTeX capabilities for enriched interaction. - 📚 **Local RAG Integration**: Dive into the future of chat interactions with the groundbreaking Retrieval Augmented Generation (RAG) support. This feature seamlessly integrates document interactions into your chat experience. You can load documents directly into the chat or add files to your document library, effortlessly accessing them using `#` command in the prompt. In its alpha phase, occasional issues may arise as we actively refine and enhance this feature to ensure optimal performance and reliability. +- 🔍 **RAG Embedding Support**: Change the RAG embedding model directly in document settings, enhancing document processing. This feature supports Ollama and OpenAI models. + - 🌐 **Web Browsing Capability**: Seamlessly integrate websites into your chat experience using the `#` command followed by the URL. This feature allows you to incorporate web content directly into your conversations, enhancing the richness and depth of your interactions. - 📜 **Prompt Preset Support**: Instantly access preset prompts using the `/` command in the chat input. Load predefined conversation starters effortlessly and expedite your interactions. Effortlessly import prompts through [Open WebUI Community](https://openwebui.com/) integration. -- 👍👎 **RLHF Annotation**: Empower your messages by rating them with thumbs up and thumbs down, facilitating the creation of datasets for Reinforcement Learning from Human Feedback (RLHF). Utilize your messages to train or fine-tune models, all while ensuring the confidentiality of locally saved data. +- 👍👎 **RLHF Annotation**: Empower your messages by rating them with thumbs up and thumbs down, followed by the option to provide textual feedback, facilitating the creation of datasets for Reinforcement Learning from Human Feedback (RLHF). Utilize your messages to train or fine-tune models, all while ensuring the confidentiality of locally saved data. - 🏷️ **Conversation Tagging**: Effortlessly categorize and locate specific chats for quick reference and streamlined data collection. - 📥🗑️ **Download/Delete Models**: Easily download or remove models directly from the web UI. +- 🔄 **Update All Ollama Models**: Easily update locally installed models all at once with a convenient button, streamlining model management. + - ⬆️ **GGUF File Model Creation**: Effortlessly create Ollama models by uploading GGUF files directly from the web UI. Streamlined process with options to upload from your machine or download GGUF files from Hugging Face. - 🤖 **Multiple Model Support**: Seamlessly switch between different chat models for diverse interactions. @@ -53,28 +59,42 @@ Open WebUI is an extensible, feature-rich, and user-friendly self-hosted WebUI d - 💬 **Collaborative Chat**: Harness the collective intelligence of multiple models by seamlessly orchestrating group conversations. Use the `@` command to specify the model, enabling dynamic and diverse dialogues within your chat interface. Immerse yourself in the collective intelligence woven into your chat environment. +- 🗨️ **Local Chat Sharing**: Generate and share chat links seamlessly between users, enhancing collaboration and communication. + - 🔄 **Regeneration History Access**: Easily revisit and explore your entire regeneration history. - 📜 **Chat History**: Effortlessly access and manage your conversation history. +- 📬 **Archive Chats**: Effortlessly store away completed conversations with LLMs for future reference, maintaining a tidy and clutter-free chat interface while allowing for easy retrieval and reference. + - 📤📥 **Import/Export Chat History**: Seamlessly move your chat data in and out of the platform. - 🗣️ **Voice Input Support**: Engage with your model through voice interactions; enjoy the convenience of talking to your model directly. Additionally, explore the option for sending voice input automatically after 3 seconds of silence for a streamlined experience. +- 🔊 **Configurable Text-to-Speech Endpoint**: Customize your Text-to-Speech experience with configurable OpenAI endpoints. + - ⚙️ **Fine-Tuned Control with Advanced Parameters**: Gain a deeper level of control by adjusting parameters such as temperature and defining your system prompts to tailor the conversation to your specific preferences and needs. -- 🎨🤖 **Image Generation Integration**: Seamlessly incorporate image generation capabilities using AUTOMATIC1111 API (local) and DALL-E, enriching your chat experience with dynamic visual content. +- 🎨🤖 **Image Generation Integration**: Seamlessly incorporate image generation capabilities using options such as AUTOMATIC1111 API (local), ComfyUI (local), and DALL-E, enriching your chat experience with dynamic visual content. - 🤝 **OpenAI API Integration**: Effortlessly integrate OpenAI-compatible API for versatile conversations alongside Ollama models. Customize the API Base URL to link with **LMStudio, Mistral, OpenRouter, and more**. - ✨ **Multiple OpenAI-Compatible API Support**: Seamlessly integrate and customize various OpenAI-compatible APIs, enhancing the versatility of your chat interactions. +- 🔑 **API Key Generation Support**: Generate secret keys to leverage Open WebUI with OpenAI libraries, simplifying integration and development. + - 🔗 **External Ollama Server Connection**: Seamlessly link to an external Ollama server hosted on a different address by configuring the environment variable. - 🔀 **Multiple Ollama Instance Load Balancing**: Effortlessly distribute chat requests across multiple Ollama instances for enhanced performance and reliability. - 👥 **Multi-User Management**: Easily oversee and administer users via our intuitive admin panel, streamlining user management processes. +- 🔗 **Webhook Integration**: Subscribe to new user sign-up events via webhook (compatible with Google Chat and Microsoft Teams), providing real-time notifications and automation capabilities. + +- 🛡️ **Model Whitelisting**: Admins can whitelist models for users with the 'user' role, enhancing security and access control. + +- 📧 **Trusted Email Authentication**: Authenticate using a trusted email header, adding an additional layer of security and authentication. + - 🔐 **Role-Based Access Control (RBAC)**: Ensure secure access with restricted permissions; only authorized individuals can access your Ollama, and exclusive model creation/pulling rights are reserved for administrators. - 🔒 **Backend Reverse Proxy Support**: Bolster security through direct communication between Open WebUI backend and Ollama. This key feature eliminates the need to expose Ollama over LAN. Requests made to the '/ollama/api' route from the web UI are seamlessly redirected to Ollama from the backend, enhancing overall system security. From 98369fba225da109834369f712248d638a44dab6 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 20 Apr 2024 22:53:00 -0500 Subject: [PATCH 07/38] fix --- backend/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/main.py b/backend/main.py index 4b1809a25..8b5fd76bc 100644 --- a/backend/main.py +++ b/backend/main.py @@ -117,8 +117,8 @@ class RAGMiddleware(BaseHTTPMiddleware): rag_app.state.RAG_EMBEDDING_ENGINE, rag_app.state.RAG_EMBEDDING_MODEL, rag_app.state.sentence_transformer_ef, - rag_app.state.RAG_OPENAI_API_KEY, - rag_app.state.RAG_OPENAI_API_BASE_URL, + rag_app.state.OPENAI_API_KEY, + rag_app.state.OPENAI_API_BASE_URL, ) del data["docs"] From 948f2e913e30a0726dc1a8033913362dc68b0e9e Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 20 Apr 2024 23:53:08 -0500 Subject: [PATCH 08/38] chore: litellm bump --- backend/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index c815d93da..5f41137c9 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -17,7 +17,7 @@ peewee peewee-migrate bcrypt -litellm==1.30.7 +litellm==1.35.17 boto3 argon2-cffi From 5e458d490acf8c57f5a09d50310a58fc1ffe57c9 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 00:52:27 -0500 Subject: [PATCH 09/38] fix: run litellm as subprocess --- backend/apps/litellm/main.py | 71 +++++++++++++++++++++++++++++------- backend/main.py | 7 +--- 2 files changed, 58 insertions(+), 20 deletions(-) diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py index a9922aad7..39f348141 100644 --- a/backend/apps/litellm/main.py +++ b/backend/apps/litellm/main.py @@ -1,8 +1,8 @@ +from fastapi import FastAPI, Depends +from fastapi.routing import APIRoute +from fastapi.middleware.cors import CORSMiddleware + import logging - -from litellm.proxy.proxy_server import ProxyConfig, initialize -from litellm.proxy.proxy_server import app - from fastapi import FastAPI, Request, Depends, status, Response from fastapi.responses import JSONResponse @@ -23,24 +23,39 @@ from config import ( ) -proxy_config = ProxyConfig() +import asyncio +import subprocess -async def config(): - router, model_list, general_settings = await proxy_config.load_config( - router=None, config_file_path="./data/litellm/config.yaml" +app = FastAPI() + +origins = ["*"] + +app.add_middleware( + CORSMiddleware, + allow_origins=origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +async def run_background_process(command): + process = await asyncio.create_subprocess_exec( + *command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE ) - - await initialize(config="./data/litellm/config.yaml", telemetry=False) + return process -async def startup(): - await config() +async def start_litellm_background(): + # Command to run in the background + command = "litellm --config ./data/litellm/config.yaml" + await run_background_process(command) @app.on_event("startup") -async def on_startup(): - await startup() +async def startup_event(): + asyncio.create_task(start_litellm_background()) app.state.MODEL_FILTER_ENABLED = MODEL_FILTER_ENABLED @@ -63,6 +78,11 @@ async def auth_middleware(request: Request, call_next): return response +@app.get("/") +async def get_status(): + return {"status": True} + + class ModifyModelsResponseMiddleware(BaseHTTPMiddleware): async def dispatch( self, request: Request, call_next: RequestResponseEndpoint @@ -98,3 +118,26 @@ class ModifyModelsResponseMiddleware(BaseHTTPMiddleware): app.add_middleware(ModifyModelsResponseMiddleware) + + +# from litellm.proxy.proxy_server import ProxyConfig, initialize +# from litellm.proxy.proxy_server import app + +# proxy_config = ProxyConfig() + + +# async def config(): +# router, model_list, general_settings = await proxy_config.load_config( +# router=None, config_file_path="./data/litellm/config.yaml" +# ) + +# await initialize(config="./data/litellm/config.yaml", telemetry=False) + + +# async def startup(): +# await config() + + +# @app.on_event("startup") +# async def on_startup(): +# await startup() diff --git a/backend/main.py b/backend/main.py index 8b5fd76bc..b5aa7e7d0 100644 --- a/backend/main.py +++ b/backend/main.py @@ -20,7 +20,7 @@ from starlette.middleware.base import BaseHTTPMiddleware from apps.ollama.main import app as ollama_app from apps.openai.main import app as openai_app -from apps.litellm.main import app as litellm_app, startup as litellm_app_startup +from apps.litellm.main import app as litellm_app from apps.audio.main import app as audio_app from apps.images.main import app as images_app from apps.rag.main import app as rag_app @@ -168,11 +168,6 @@ async def check_url(request: Request, call_next): return response -@app.on_event("startup") -async def on_startup(): - await litellm_app_startup() - - app.mount("/api/v1", webui_app) app.mount("/litellm/api", litellm_app) From a41b195f466d7c62eae700186ccc7cc30453c7be Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 01:13:24 -0500 Subject: [PATCH 10/38] DO NOT TRACK ME >:( --- backend/apps/litellm/main.py | 185 ++++++++++++++++++++++------------- 1 file changed, 119 insertions(+), 66 deletions(-) diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py index 39f348141..947456881 100644 --- a/backend/apps/litellm/main.py +++ b/backend/apps/litellm/main.py @@ -1,4 +1,4 @@ -from fastapi import FastAPI, Depends +from fastapi import FastAPI, Depends, HTTPException from fastapi.routing import APIRoute from fastapi.middleware.cors import CORSMiddleware @@ -9,9 +9,11 @@ from fastapi.responses import JSONResponse from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint from starlette.responses import StreamingResponse import json +import requests -from utils.utils import get_http_authorization_cred, get_current_user +from utils.utils import get_verified_user, get_current_user from config import SRC_LOG_LEVELS, ENV +from constants import ERROR_MESSAGES log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["LITELLM"]) @@ -49,12 +51,13 @@ async def run_background_process(command): async def start_litellm_background(): # Command to run in the background - command = "litellm --config ./data/litellm/config.yaml" + command = "litellm --telemetry False --config ./data/litellm/config.yaml" await run_background_process(command) @app.on_event("startup") async def startup_event(): + # TODO: Check config.yaml file and create one asyncio.create_task(start_litellm_background()) @@ -62,82 +65,132 @@ app.state.MODEL_FILTER_ENABLED = MODEL_FILTER_ENABLED app.state.MODEL_FILTER_LIST = MODEL_FILTER_LIST -@app.middleware("http") -async def auth_middleware(request: Request, call_next): - auth_header = request.headers.get("Authorization", "") - request.state.user = None - - try: - user = get_current_user(get_http_authorization_cred(auth_header)) - log.debug(f"user: {user}") - request.state.user = user - except Exception as e: - return JSONResponse(status_code=400, content={"detail": str(e)}) - - response = await call_next(request) - return response - - @app.get("/") async def get_status(): return {"status": True} -class ModifyModelsResponseMiddleware(BaseHTTPMiddleware): - async def dispatch( - self, request: Request, call_next: RequestResponseEndpoint - ) -> Response: +@app.get("/models") +@app.get("/v1/models") +async def get_models(user=Depends(get_current_user)): + url = "http://localhost:4000/v1" + r = None + try: + r = requests.request(method="GET", url=f"{url}/models") + r.raise_for_status() - response = await call_next(request) - user = request.state.user + data = r.json() - if "/models" in request.url.path: - if isinstance(response, StreamingResponse): - # Read the content of the streaming response - body = b"" - async for chunk in response.body_iterator: - body += chunk + if app.state.MODEL_FILTER_ENABLED: + if user and user.role == "user": + data["data"] = list( + filter( + lambda model: model["id"] in app.state.MODEL_FILTER_LIST, + data["data"], + ) + ) - data = json.loads(body.decode("utf-8")) + return data + except Exception as e: + log.exception(e) + error_detail = "Open WebUI: Server Connection Error" + if r is not None: + try: + res = r.json() + if "error" in res: + error_detail = f"External: {res['error']}" + except: + error_detail = f"External: {e}" - if app.state.MODEL_FILTER_ENABLED: - if user and user.role == "user": - data["data"] = list( - filter( - lambda model: model["id"] - in app.state.MODEL_FILTER_LIST, - data["data"], - ) - ) - - # Modified Flag - data["modified"] = True - return JSONResponse(content=data) - - return response + raise HTTPException( + status_code=r.status_code if r else 500, + detail=error_detail, + ) -app.add_middleware(ModifyModelsResponseMiddleware) +@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"]) +async def proxy(path: str, request: Request, user=Depends(get_verified_user)): + body = await request.body() + + url = "http://localhost:4000/v1" + + target_url = f"{url}/{path}" + + headers = {} + # headers["Authorization"] = f"Bearer {key}" + headers["Content-Type"] = "application/json" + + r = None + + try: + r = requests.request( + method=request.method, + url=target_url, + data=body, + headers=headers, + stream=True, + ) + + r.raise_for_status() + + # Check if response is SSE + if "text/event-stream" in r.headers.get("Content-Type", ""): + return StreamingResponse( + r.iter_content(chunk_size=8192), + status_code=r.status_code, + headers=dict(r.headers), + ) + else: + response_data = r.json() + return response_data + except Exception as e: + log.exception(e) + error_detail = "Open WebUI: Server Connection Error" + if r is not None: + try: + res = r.json() + if "error" in res: + error_detail = f"External: {res['error']['message'] if 'message' in res['error'] else res['error']}" + except: + error_detail = f"External: {e}" + + raise HTTPException( + status_code=r.status_code if r else 500, detail=error_detail + ) -# from litellm.proxy.proxy_server import ProxyConfig, initialize -# from litellm.proxy.proxy_server import app +# class ModifyModelsResponseMiddleware(BaseHTTPMiddleware): +# async def dispatch( +# self, request: Request, call_next: RequestResponseEndpoint +# ) -> Response: -# proxy_config = ProxyConfig() +# response = await call_next(request) +# user = request.state.user + +# if "/models" in request.url.path: +# if isinstance(response, StreamingResponse): +# # Read the content of the streaming response +# body = b"" +# async for chunk in response.body_iterator: +# body += chunk + +# data = json.loads(body.decode("utf-8")) + +# if app.state.MODEL_FILTER_ENABLED: +# if user and user.role == "user": +# data["data"] = list( +# filter( +# lambda model: model["id"] +# in app.state.MODEL_FILTER_LIST, +# data["data"], +# ) +# ) + +# # Modified Flag +# data["modified"] = True +# return JSONResponse(content=data) + +# return response -# async def config(): -# router, model_list, general_settings = await proxy_config.load_config( -# router=None, config_file_path="./data/litellm/config.yaml" -# ) - -# await initialize(config="./data/litellm/config.yaml", telemetry=False) - - -# async def startup(): -# await config() - - -# @app.on_event("startup") -# async def on_startup(): -# await startup() +# app.add_middleware(ModifyModelsResponseMiddleware) From 8651bec915ae23f26f02f07b34d52f9099097148 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 01:22:02 -0500 Subject: [PATCH 11/38] pwned :) --- backend/apps/litellm/main.py | 11 ++++++++++- backend/main.py | 8 +++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py index 947456881..5a8b37f47 100644 --- a/backend/apps/litellm/main.py +++ b/backend/apps/litellm/main.py @@ -43,20 +43,29 @@ app.add_middleware( async def run_background_process(command): + # Start the process process = await asyncio.create_subprocess_exec( *command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE ) - return process + # Read output asynchronously + async for line in process.stdout: + print(line.decode().strip()) # Print stdout line by line + + await process.wait() # Wait for the subprocess to finish async def start_litellm_background(): + print("start_litellm_background") # Command to run in the background command = "litellm --telemetry False --config ./data/litellm/config.yaml" + await run_background_process(command) @app.on_event("startup") async def startup_event(): + + print("startup_event") # TODO: Check config.yaml file and create one asyncio.create_task(start_litellm_background()) diff --git a/backend/main.py b/backend/main.py index b5aa7e7d0..48e14f1dd 100644 --- a/backend/main.py +++ b/backend/main.py @@ -20,12 +20,13 @@ from starlette.middleware.base import BaseHTTPMiddleware from apps.ollama.main import app as ollama_app from apps.openai.main import app as openai_app -from apps.litellm.main import app as litellm_app +from apps.litellm.main import app as litellm_app, start_litellm_background from apps.audio.main import app as audio_app from apps.images.main import app as images_app from apps.rag.main import app as rag_app from apps.web.main import app as webui_app +import asyncio from pydantic import BaseModel from typing import List @@ -168,6 +169,11 @@ async def check_url(request: Request, call_next): return response +@app.on_event("startup") +async def on_startup(): + asyncio.create_task(start_litellm_background()) + + app.mount("/api/v1", webui_app) app.mount("/litellm/api", litellm_app) From 3c382d4c6cbea0352a4ad4bc3a90ed8f339a148b Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 01:46:09 -0500 Subject: [PATCH 12/38] refac: close subprocess gracefully --- backend/apps/litellm/main.py | 51 +++++++++++++++++++++++++++++------- backend/main.py | 11 +++++++- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py index 5a8b37f47..68e48858b 100644 --- a/backend/apps/litellm/main.py +++ b/backend/apps/litellm/main.py @@ -42,16 +42,40 @@ app.add_middleware( ) -async def run_background_process(command): - # Start the process - process = await asyncio.create_subprocess_exec( - *command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) - # Read output asynchronously - async for line in process.stdout: - print(line.decode().strip()) # Print stdout line by line +# Global variable to store the subprocess reference +background_process = None - await process.wait() # Wait for the subprocess to finish + +async def run_background_process(command): + global background_process + print("run_background_process") + + try: + # Log the command to be executed + print(f"Executing command: {command}") + # Execute the command and create a subprocess + process = await asyncio.create_subprocess_exec( + *command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + background_process = process + print("Subprocess started successfully.") + + # Capture STDERR for debugging purposes + stderr_output = await process.stderr.read() + stderr_text = stderr_output.decode().strip() + if stderr_text: + print(f"Subprocess STDERR: {stderr_text}") + + # Print output line by line + async for line in process.stdout: + print(line.decode().strip()) + + # Wait for the process to finish + returncode = await process.wait() + print(f"Subprocess exited with return code {returncode}") + except Exception as e: + log.error(f"Failed to start subprocess: {e}") + raise # Optionally re-raise the exception if you want it to propagate async def start_litellm_background(): @@ -62,6 +86,15 @@ async def start_litellm_background(): await run_background_process(command) +async def shutdown_litellm_background(): + print("shutdown_litellm_background") + global background_process + if background_process: + background_process.terminate() + await background_process.wait() # Ensure the process has terminated + print("Subprocess terminated") + + @app.on_event("startup") async def startup_event(): diff --git a/backend/main.py b/backend/main.py index 48e14f1dd..579ff2ee0 100644 --- a/backend/main.py +++ b/backend/main.py @@ -20,7 +20,11 @@ from starlette.middleware.base import BaseHTTPMiddleware from apps.ollama.main import app as ollama_app from apps.openai.main import app as openai_app -from apps.litellm.main import app as litellm_app, start_litellm_background +from apps.litellm.main import ( + app as litellm_app, + start_litellm_background, + shutdown_litellm_background, +) from apps.audio.main import app as audio_app from apps.images.main import app as images_app from apps.rag.main import app as rag_app @@ -316,3 +320,8 @@ app.mount( SPAStaticFiles(directory=FRONTEND_BUILD_DIR, html=True), name="spa-static-files", ) + + +@app.on_event("shutdown") +async def shutdown_event(): + await shutdown_litellm_background() From a59fb6b9eb6bcbe438d15e2020b31d2ef6cdf580 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 01:47:35 -0500 Subject: [PATCH 13/38] fix --- backend/apps/litellm/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py index 68e48858b..486ae4736 100644 --- a/backend/apps/litellm/main.py +++ b/backend/apps/litellm/main.py @@ -154,7 +154,7 @@ async def get_models(user=Depends(get_current_user)): async def proxy(path: str, request: Request, user=Depends(get_verified_user)): body = await request.body() - url = "http://localhost:4000/v1" + url = "http://localhost:4000" target_url = f"{url}/{path}" From 51191168bc77b50165e5d937cbb54f592d71d1e2 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 01:51:38 -0500 Subject: [PATCH 14/38] feat: restart subprocess route --- backend/apps/litellm/main.py | 65 +++++++++++++++--------------------- 1 file changed, 27 insertions(+), 38 deletions(-) diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py index 486ae4736..531e96494 100644 --- a/backend/apps/litellm/main.py +++ b/backend/apps/litellm/main.py @@ -11,7 +11,7 @@ from starlette.responses import StreamingResponse import json import requests -from utils.utils import get_verified_user, get_current_user +from utils.utils import get_verified_user, get_current_user, get_admin_user from config import SRC_LOG_LEVELS, ENV from constants import ERROR_MESSAGES @@ -112,6 +112,32 @@ async def get_status(): return {"status": True} +@app.get("/restart") +async def restart_litellm(user=Depends(get_admin_user)): + """ + Endpoint to restart the litellm background service. + """ + log.info("Requested restart of litellm service.") + try: + # Shut down the existing process if it is running + await shutdown_litellm_background() + log.info("litellm service shutdown complete.") + + # Restart the background service + await start_litellm_background() + log.info("litellm service restart complete.") + + return { + "status": "success", + "message": "litellm service restarted successfully.", + } + except Exception as e: + log.error(f"Error restarting litellm service: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e) + ) + + @app.get("/models") @app.get("/v1/models") async def get_models(user=Depends(get_current_user)): @@ -199,40 +225,3 @@ async def proxy(path: str, request: Request, user=Depends(get_verified_user)): raise HTTPException( status_code=r.status_code if r else 500, detail=error_detail ) - - -# class ModifyModelsResponseMiddleware(BaseHTTPMiddleware): -# async def dispatch( -# self, request: Request, call_next: RequestResponseEndpoint -# ) -> Response: - -# response = await call_next(request) -# user = request.state.user - -# if "/models" in request.url.path: -# if isinstance(response, StreamingResponse): -# # Read the content of the streaming response -# body = b"" -# async for chunk in response.body_iterator: -# body += chunk - -# data = json.loads(body.decode("utf-8")) - -# if app.state.MODEL_FILTER_ENABLED: -# if user and user.role == "user": -# data["data"] = list( -# filter( -# lambda model: model["id"] -# in app.state.MODEL_FILTER_LIST, -# data["data"], -# ) -# ) - -# # Modified Flag -# data["modified"] = True -# return JSONResponse(content=data) - -# return response - - -# app.add_middleware(ModifyModelsResponseMiddleware) From 2717fe7c207b3a0e19e23113e647ec8b6e78e4bc Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 02:00:03 -0500 Subject: [PATCH 15/38] fix --- backend/apps/litellm/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py index 531e96494..68ae54fbc 100644 --- a/backend/apps/litellm/main.py +++ b/backend/apps/litellm/main.py @@ -124,7 +124,7 @@ async def restart_litellm(user=Depends(get_admin_user)): log.info("litellm service shutdown complete.") # Restart the background service - await start_litellm_background() + start_litellm_background() log.info("litellm service restart complete.") return { From 743bbae5d1c3573b7104a7b589e76c7cdc385ed1 Mon Sep 17 00:00:00 2001 From: Entaigner Date: Sun, 21 Apr 2024 11:54:30 +0200 Subject: [PATCH 16/38] Bugfix: FileReader can't be resused so init one per image --- src/lib/components/chat/MessageInput.svelte | 47 ++++++++++----------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index ebf8e9713..2793a8d4b 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -316,24 +316,22 @@ console.log(e); if (e.dataTransfer?.files) { - let reader = new FileReader(); - - reader.onload = (event) => { - files = [ - ...files, - { - type: 'image', - url: `${event.target.result}` - } - ]; - }; - const inputFiles = Array.from(e.dataTransfer?.files); if (inputFiles && inputFiles.length > 0) { inputFiles.forEach((file) => { console.log(file, file.name.split('.').at(-1)); if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { + let reader = new FileReader(); + reader.onload = (event) => { + files = [ + ...files, + { + type: 'image', + url: `${event.target.result}` + } + ]; + }; reader.readAsDataURL(file); } else if ( SUPPORTED_FILE_TYPE.includes(file['type']) || @@ -470,23 +468,22 @@ hidden multiple on:change={async () => { - let reader = new FileReader(); - reader.onload = (event) => { - files = [ - ...files, - { - type: 'image', - url: `${event.target.result}` - } - ]; - inputFiles = null; - filesInputElement.value = ''; - }; - if (inputFiles && inputFiles.length > 0) { const _inputFiles = Array.from(inputFiles); _inputFiles.forEach((file) => { if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { + let reader = new FileReader(); + reader.onload = (event) => { + files = [ + ...files, + { + type: 'image', + url: `${event.target.result}` + } + ]; + inputFiles = null; + filesInputElement.value = ''; + }; reader.readAsDataURL(file); } else if ( SUPPORTED_FILE_TYPE.includes(file['type']) || From 67df928c7ae953e4b725c548de08c0b61ce7d1e6 Mon Sep 17 00:00:00 2001 From: Jun Siang Cheah Date: Sun, 21 Apr 2024 10:45:07 +0100 Subject: [PATCH 17/38] feat: make chunk splitting a configurable option --- src/lib/apis/streaming/index.ts | 9 ++++-- .../components/chat/Settings/Interface.svelte | 28 +++++++++++++++++++ src/lib/i18n/locales/en-US/translation.json | 1 + src/routes/(app)/+page.svelte | 2 +- src/routes/(app)/c/[id]/+page.svelte | 8 +++--- 5 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/lib/apis/streaming/index.ts b/src/lib/apis/streaming/index.ts index 4d1d2ecec..5b89a4668 100644 --- a/src/lib/apis/streaming/index.ts +++ b/src/lib/apis/streaming/index.ts @@ -6,9 +6,14 @@ type TextStreamUpdate = { // createOpenAITextStream takes a ReadableStreamDefaultReader from an SSE response, // and returns an async generator that emits delta updates with large deltas chunked into random sized chunks export async function createOpenAITextStream( - messageStream: ReadableStreamDefaultReader + messageStream: ReadableStreamDefaultReader, + splitLargeDeltas: boolean ): Promise> { - return streamLargeDeltasAsRandomChunks(openAIStreamToIterator(messageStream)); + let iterator = openAIStreamToIterator(messageStream); + if (splitLargeDeltas) { + iterator = streamLargeDeltasAsRandomChunks(iterator); + } + return iterator; } async function* openAIStreamToIterator( diff --git a/src/lib/components/chat/Settings/Interface.svelte b/src/lib/components/chat/Settings/Interface.svelte index ad9e05e7f..37d7fa4ea 100644 --- a/src/lib/components/chat/Settings/Interface.svelte +++ b/src/lib/components/chat/Settings/Interface.svelte @@ -17,11 +17,17 @@ let titleAutoGenerateModelExternal = ''; let fullScreenMode = false; let titleGenerationPrompt = ''; + let splitLargeChunks = false; // Interface let promptSuggestions = []; let showUsername = false; + const toggleSplitLargeChunks = async () => { + splitLargeChunks = !splitLargeChunks; + saveSettings({ splitLargeChunks: splitLargeChunks }); + }; + const toggleFullScreenMode = async () => { fullScreenMode = !fullScreenMode; saveSettings({ fullScreenMode: fullScreenMode }); @@ -197,6 +203,28 @@ + +
+
+
+ {$i18n.t('Fluidly stream large external response chunks')} +
+ + +
+

diff --git a/src/lib/i18n/locales/en-US/translation.json b/src/lib/i18n/locales/en-US/translation.json index be89b1b01..fdfe804ba 100644 --- a/src/lib/i18n/locales/en-US/translation.json +++ b/src/lib/i18n/locales/en-US/translation.json @@ -152,6 +152,7 @@ "File Mode": "", "File not found.": "", "Fingerprint spoofing detected: Unable to use initials as avatar. Defaulting to default profile image.": "", + "Fluidly stream large external response chunks": "", "Focus chat input": "", "Format your variables using square brackets like this:": "", "From (Base Model)": "", diff --git a/src/routes/(app)/+page.svelte b/src/routes/(app)/+page.svelte index bd8676985..9fc261773 100644 --- a/src/routes/(app)/+page.svelte +++ b/src/routes/(app)/+page.svelte @@ -600,7 +600,7 @@ .pipeThrough(splitStream('\n')) .getReader(); - const textStream = await createOpenAITextStream(reader); + const textStream = await createOpenAITextStream(reader, $settings.splitLargeChunks); console.log(textStream); for await (const update of textStream) { diff --git a/src/routes/(app)/c/[id]/+page.svelte b/src/routes/(app)/c/[id]/+page.svelte index 2f8ad7d0b..c230eb5c1 100644 --- a/src/routes/(app)/c/[id]/+page.svelte +++ b/src/routes/(app)/c/[id]/+page.svelte @@ -552,9 +552,9 @@ messages: [ $settings.system ? { - role: 'system', - content: $settings.system - } + role: 'system', + content: $settings.system + } : undefined, ...messages ] @@ -612,7 +612,7 @@ .pipeThrough(splitStream('\n')) .getReader(); - const textStream = await createOpenAITextStream(reader); + const textStream = await createOpenAITextStream(reader, $settings.splitLargeChunks); console.log(textStream); for await (const update of textStream) { From 81b7cdfed7cc129962dc686edc8b5568312e2186 Mon Sep 17 00:00:00 2001 From: Jun Siang Cheah Date: Sun, 21 Apr 2024 11:41:18 +0100 Subject: [PATCH 18/38] fix: add typescript types for models --- src/lib/stores/index.ts | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/lib/stores/index.ts b/src/lib/stores/index.ts index fc58db6bd..038c34195 100644 --- a/src/lib/stores/index.ts +++ b/src/lib/stores/index.ts @@ -1,5 +1,5 @@ import { APP_NAME } from '$lib/constants'; -import { writable } from 'svelte/store'; +import { type Writable, writable } from 'svelte/store'; // Backend export const WEBUI_NAME = writable(APP_NAME); @@ -14,7 +14,7 @@ export const chatId = writable(''); export const chats = writable([]); export const tags = writable([]); -export const models = writable([]); +export const models: Writable = writable([]); export const modelfiles = writable([]); export const prompts = writable([]); @@ -36,3 +36,34 @@ export const documents = writable([ export const settings = writable({}); export const showSettings = writable(false); export const showChangelog = writable(false); + +type Model = OpenAIModel | OllamaModel; + +type OpenAIModel = { + id: string; + name: string; + external: boolean; + source?: string; +} + +type OllamaModel = { + id: string; + name: string; + + // Ollama specific fields + details: OllamaModelDetails; + size: number; + description: string; + model: string; + modified_at: string; + digest: string; +} + +type OllamaModelDetails = { + parent_model: string; + format: string; + family: string; + families: string[] | null; + parameter_size: string; + quantization_level: string; +}; From 489c45ffdf33ef9c9b97e3c52a016d6bf40b09cb Mon Sep 17 00:00:00 2001 From: dyamagishi Date: Mon, 22 Apr 2024 01:19:34 +0900 Subject: [PATCH 19/38] fix: Update websocket protocol based on the original schema. --- backend/apps/images/utils/comfyui.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/apps/images/utils/comfyui.py b/backend/apps/images/utils/comfyui.py index 393333255..05df1c166 100644 --- a/backend/apps/images/utils/comfyui.py +++ b/backend/apps/images/utils/comfyui.py @@ -195,7 +195,7 @@ class ImageGenerationPayload(BaseModel): def comfyui_generate_image( model: str, payload: ImageGenerationPayload, client_id, base_url ): - host = base_url.replace("http://", "").replace("https://", "") + ws_url = base_url.replace("http://", "ws://").replace("https://", "wss://") comfyui_prompt = json.loads(COMFYUI_DEFAULT_PROMPT) @@ -217,7 +217,7 @@ def comfyui_generate_image( try: ws = websocket.WebSocket() - ws.connect(f"ws://{host}/ws?clientId={client_id}") + ws.connect(f"{ws_url}/ws?clientId={client_id}") log.info("WebSocket connection established.") except Exception as e: log.exception(f"Failed to connect to WebSocket server: {e}") From 302c5074e9e6cbfaa7fb47c46fd646d59596c29f Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 12:50:14 -0500 Subject: [PATCH 20/38] revert: litellm bump --- backend/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index 5f41137c9..c815d93da 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -17,7 +17,7 @@ peewee peewee-migrate bcrypt -litellm==1.35.17 +litellm==1.30.7 boto3 argon2-cffi From bfdefbf6e773ebf2ee4cde536b67ef7196d37647 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 13:02:26 -0500 Subject: [PATCH 21/38] fix: archived chats modal styling --- src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte b/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte index b8f6c4b56..70cb0676c 100644 --- a/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte +++ b/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte @@ -75,7 +75,7 @@ > {$i18n.t('Name')} - {$i18n.t('Created At')} + {$i18n.t('Created At')} @@ -93,7 +93,7 @@ - + {dayjs(chat.created_at * 1000).format($i18n.t('MMMM DD, YYYY HH:mm'))} From 6f6be2c03f152b5c91f35b1cd7100094b8d871aa Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 13:16:45 -0500 Subject: [PATCH 22/38] fix: styling --- .../layout/Sidebar/ArchivedChatsModal.svelte | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte b/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte index 70cb0676c..32ec9c6b0 100644 --- a/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte +++ b/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte @@ -67,7 +67,7 @@
{#if chats.length > 0} -
+
- {#each chats as chat, idx} + {#each [...chats, ...chats, ...chats] as chat, idx} - - {#each [...chats, ...chats, ...chats] as chat, idx} + {#each chats as chat, idx} Date: Sun, 21 Apr 2024 13:24:46 -0500 Subject: [PATCH 24/38] refac: styling --- src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte b/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte index 6ae5286b4..51bcf1ad6 100644 --- a/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte +++ b/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte @@ -67,7 +67,7 @@
{#if chats.length > 0} -
+
From 4148d70ec029401b152ccbb7da4ad887489dd68c Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 13:19:48 -0500 Subject: [PATCH 23/38] fix --- src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte b/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte index 32ec9c6b0..6ae5286b4 100644 --- a/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte +++ b/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte @@ -80,7 +80,7 @@
Date: Sun, 21 Apr 2024 14:32:45 -0500 Subject: [PATCH 25/38] refac: port number update --- backend/apps/litellm/main.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py index 68ae54fbc..8d1132bb4 100644 --- a/backend/apps/litellm/main.py +++ b/backend/apps/litellm/main.py @@ -81,7 +81,9 @@ async def run_background_process(command): async def start_litellm_background(): print("start_litellm_background") # Command to run in the background - command = "litellm --telemetry False --config ./data/litellm/config.yaml" + command = ( + "litellm --port 14365 --telemetry False --config ./data/litellm/config.yaml" + ) await run_background_process(command) @@ -141,7 +143,7 @@ async def restart_litellm(user=Depends(get_admin_user)): @app.get("/models") @app.get("/v1/models") async def get_models(user=Depends(get_current_user)): - url = "http://localhost:4000/v1" + url = "http://localhost:14365/v1" r = None try: r = requests.request(method="GET", url=f"{url}/models") @@ -180,7 +182,7 @@ async def get_models(user=Depends(get_current_user)): async def proxy(path: str, request: Request, user=Depends(get_verified_user)): body = await request.body() - url = "http://localhost:4000" + url = "http://localhost:14365" target_url = f"{url}/{path}" From 8422d3ea79c134ff12e9120c3f27220a7ac2bd57 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 14:43:51 -0500 Subject: [PATCH 26/38] Update requirements.txt --- backend/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/requirements.txt b/backend/requirements.txt index 5f41137c9..0b5e90433 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -18,6 +18,8 @@ peewee-migrate bcrypt litellm==1.35.17 +litellm['proxy']==1.35.17 + boto3 argon2-cffi From f83eb7326f7b4fcaf54493c61bc0344855429617 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 14:44:28 -0500 Subject: [PATCH 27/38] Update requirements.txt --- backend/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index 0b5e90433..e04551567 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -18,7 +18,7 @@ peewee-migrate bcrypt litellm==1.35.17 -litellm['proxy']==1.35.17 +litellm[proxy]==1.35.17 boto3 From 31124d9deb08c8283247b7b95313be59646fa7e0 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 16:10:01 -0500 Subject: [PATCH 28/38] feat: litellm config update --- backend/apps/litellm/main.py | 75 ++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py index 8d1132bb4..5696b6945 100644 --- a/backend/apps/litellm/main.py +++ b/backend/apps/litellm/main.py @@ -11,6 +11,9 @@ from starlette.responses import StreamingResponse import json import requests +from pydantic import BaseModel +from typing import Optional, List + from utils.utils import get_verified_user, get_current_user, get_admin_user from config import SRC_LOG_LEVELS, ENV from constants import ERROR_MESSAGES @@ -19,15 +22,12 @@ log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["LITELLM"]) -from config import ( - MODEL_FILTER_ENABLED, - MODEL_FILTER_LIST, -) +from config import MODEL_FILTER_ENABLED, MODEL_FILTER_LIST, DATA_DIR import asyncio import subprocess - +import yaml app = FastAPI() @@ -42,44 +42,51 @@ app.add_middleware( ) +LITELLM_CONFIG_DIR = f"{DATA_DIR}/litellm/config.yaml" + +with open(LITELLM_CONFIG_DIR, "r") as file: + litellm_config = yaml.safe_load(file) + +app.state.CONFIG = litellm_config + # Global variable to store the subprocess reference background_process = None async def run_background_process(command): global background_process - print("run_background_process") + log.info("run_background_process") try: # Log the command to be executed - print(f"Executing command: {command}") + log.info(f"Executing command: {command}") # Execute the command and create a subprocess process = await asyncio.create_subprocess_exec( *command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE ) background_process = process - print("Subprocess started successfully.") + log.info("Subprocess started successfully.") # Capture STDERR for debugging purposes stderr_output = await process.stderr.read() stderr_text = stderr_output.decode().strip() if stderr_text: - print(f"Subprocess STDERR: {stderr_text}") + log.info(f"Subprocess STDERR: {stderr_text}") - # Print output line by line + # log.info output line by line async for line in process.stdout: - print(line.decode().strip()) + log.info(line.decode().strip()) # Wait for the process to finish returncode = await process.wait() - print(f"Subprocess exited with return code {returncode}") + log.info(f"Subprocess exited with return code {returncode}") except Exception as e: log.error(f"Failed to start subprocess: {e}") raise # Optionally re-raise the exception if you want it to propagate async def start_litellm_background(): - print("start_litellm_background") + log.info("start_litellm_background") # Command to run in the background command = ( "litellm --port 14365 --telemetry False --config ./data/litellm/config.yaml" @@ -89,18 +96,18 @@ async def start_litellm_background(): async def shutdown_litellm_background(): - print("shutdown_litellm_background") + log.info("shutdown_litellm_background") global background_process if background_process: background_process.terminate() await background_process.wait() # Ensure the process has terminated - print("Subprocess terminated") + log.info("Subprocess terminated") @app.on_event("startup") async def startup_event(): - print("startup_event") + log.info("startup_event") # TODO: Check config.yaml file and create one asyncio.create_task(start_litellm_background()) @@ -114,8 +121,7 @@ async def get_status(): return {"status": True} -@app.get("/restart") -async def restart_litellm(user=Depends(get_admin_user)): +async def restart_litellm(): """ Endpoint to restart the litellm background service. """ @@ -126,7 +132,8 @@ async def restart_litellm(user=Depends(get_admin_user)): log.info("litellm service shutdown complete.") # Restart the background service - start_litellm_background() + + asyncio.create_task(start_litellm_background()) log.info("litellm service restart complete.") return { @@ -134,12 +141,40 @@ async def restart_litellm(user=Depends(get_admin_user)): "message": "litellm service restarted successfully.", } except Exception as e: - log.error(f"Error restarting litellm service: {e}") + log.info(f"Error restarting litellm service: {e}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e) ) +@app.get("/restart") +async def restart_litellm_handler(user=Depends(get_admin_user)): + return await restart_litellm() + + +@app.get("/config") +async def get_config(user=Depends(get_admin_user)): + return app.state.CONFIG + + +class LiteLLMConfigForm(BaseModel): + general_settings: Optional[dict] = None + litellm_settings: Optional[dict] = None + model_list: Optional[List[dict]] = None + router_settings: Optional[dict] = None + + +@app.post("/config/update") +async def update_config(form_data: LiteLLMConfigForm, user=Depends(get_admin_user)): + app.state.CONFIG = form_data.model_dump(exclude_none=True) + + with open(LITELLM_CONFIG_DIR, "w") as file: + yaml.dump(app.state.CONFIG, file) + + await restart_litellm() + return app.state.CONFIG + + @app.get("/models") @app.get("/v1/models") async def get_models(user=Depends(get_current_user)): From e627b8bf21d2eb5f78f753ed6896ea9255d9e2eb Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 21 Apr 2024 17:26:22 -0500 Subject: [PATCH 29/38] feat: litellm model add/delete --- backend/apps/litellm/main.py | 50 +++++++++++++++++++ .../components/chat/Settings/Models.svelte | 12 ++--- 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py index 5696b6945..9bc08598f 100644 --- a/backend/apps/litellm/main.py +++ b/backend/apps/litellm/main.py @@ -102,6 +102,7 @@ async def shutdown_litellm_background(): background_process.terminate() await background_process.wait() # Ensure the process has terminated log.info("Subprocess terminated") + background_process = None @app.on_event("startup") @@ -178,6 +179,9 @@ async def update_config(form_data: LiteLLMConfigForm, user=Depends(get_admin_use @app.get("/models") @app.get("/v1/models") async def get_models(user=Depends(get_current_user)): + while not background_process: + await asyncio.sleep(0.1) + url = "http://localhost:14365/v1" r = None try: @@ -213,6 +217,52 @@ async def get_models(user=Depends(get_current_user)): ) +@app.get("/model/info") +async def get_model_list(user=Depends(get_admin_user)): + return {"data": app.state.CONFIG["model_list"]} + + +class AddLiteLLMModelForm(BaseModel): + model_name: str + litellm_params: dict + + +@app.post("/model/new") +async def add_model_to_config( + form_data: AddLiteLLMModelForm, user=Depends(get_admin_user) +): + app.state.CONFIG["model_list"].append(form_data.model_dump()) + + with open(LITELLM_CONFIG_DIR, "w") as file: + yaml.dump(app.state.CONFIG, file) + + await restart_litellm() + + return {"message": "model added"} + + +class DeleteLiteLLMModelForm(BaseModel): + id: str + + +@app.post("/model/delete") +async def delete_model_from_config( + form_data: DeleteLiteLLMModelForm, user=Depends(get_admin_user) +): + app.state.CONFIG["model_list"] = [ + model + for model in app.state.CONFIG["model_list"] + if model["model_name"] != form_data.id + ] + + with open(LITELLM_CONFIG_DIR, "w") as file: + yaml.dump(app.state.CONFIG, file) + + await restart_litellm() + + return {"message": "model deleted"} + + @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"]) async def proxy(path: str, request: Request, user=Depends(get_verified_user)): body = await request.body() diff --git a/src/lib/components/chat/Settings/Models.svelte b/src/lib/components/chat/Settings/Models.svelte index 15b054024..688774d78 100644 --- a/src/lib/components/chat/Settings/Models.svelte +++ b/src/lib/components/chat/Settings/Models.svelte @@ -35,7 +35,7 @@ let liteLLMRPM = ''; let liteLLMMaxTokens = ''; - let deleteLiteLLMModelId = ''; + let deleteLiteLLMModelName = ''; $: liteLLMModelName = liteLLMModel; @@ -472,7 +472,7 @@ }; const deleteLiteLLMModelHandler = async () => { - const res = await deleteLiteLLMModel(localStorage.token, deleteLiteLLMModelId).catch( + const res = await deleteLiteLLMModel(localStorage.token, deleteLiteLLMModelName).catch( (error) => { toast.error(error); return null; @@ -485,7 +485,7 @@ } } - deleteLiteLLMModelId = ''; + deleteLiteLLMModelName = ''; liteLLMModelInfo = await getLiteLLMModelInfo(localStorage.token); models.set(await getModels()); }; @@ -1099,14 +1099,14 @@