From efa258c69504aff9fab6c47f62e16dce42665318 Mon Sep 17 00:00:00 2001
From: Jun Siang Cheah <git@jscheah.me>
Date: Sat, 20 Apr 2024 20:03:52 +0100
Subject: [PATCH 01/40] feat: split large openai responses into smaller
 chunkers

---
 src/lib/apis/streaming/index.ts      | 65 ++++++++++++++++++++++++++++
 src/routes/(app)/+page.svelte        | 37 +++++-----------
 src/routes/(app)/c/[id]/+page.svelte | 43 ++++++------------
 3 files changed, 90 insertions(+), 55 deletions(-)
 create mode 100644 src/lib/apis/streaming/index.ts
diff --git a/src/lib/apis/streaming/index.ts b/src/lib/apis/streaming/index.ts
new file mode 100644
index 000000000..4d1d2ecec
--- /dev/null
+++ b/src/lib/apis/streaming/index.ts
@@ -0,0 +1,65 @@
+type TextStreamUpdate = {
+	done: boolean;
+	value: string;
+};
+
+// createOpenAITextStream takes a ReadableStreamDefaultReader from an SSE response,
+// and returns an async generator that emits delta updates with large deltas chunked into random sized chunks
+export async function createOpenAITextStream(
+	messageStream: ReadableStreamDefaultReader
+): Promise<AsyncGenerator<TextStreamUpdate>> {
+	return streamLargeDeltasAsRandomChunks(openAIStreamToIterator(messageStream));
+}
+
+async function* openAIStreamToIterator(
+	reader: ReadableStreamDefaultReader
+): AsyncGenerator<TextStreamUpdate> {
+	while (true) {
+		const { value, done } = await reader.read();
+		if (done) {
+			yield { done: true, value: '' };
+			break;
+		}
+		const lines = value.split('\n');
+		for (const line of lines) {
+			if (line !== '') {
+				console.log(line);
+				if (line === 'data: [DONE]') {
+					yield { done: true, value: '' };
+				} else {
+					const data = JSON.parse(line.replace(/^data: /, ''));
+					console.log(data);
+
+					yield { done: false, value: data.choices[0].delta.content ?? '' };
+				}
+			}
+		}
+	}
+}
+
+// streamLargeDeltasAsRandomChunks will chunk large deltas (length > 5) into random sized chunks between 1-3 characters
+// This is to simulate a more fluid streaming, even though some providers may send large chunks of text at once
+async function* streamLargeDeltasAsRandomChunks(
+	iterator: AsyncGenerator<TextStreamUpdate>
+): AsyncGenerator<TextStreamUpdate> {
+	for await (const textStreamUpdate of iterator) {
+		if (textStreamUpdate.done) {
+			yield textStreamUpdate;
+			return;
+		}
+		let content = textStreamUpdate.value;
+		if (content.length < 5) {
+			yield { done: false, value: content };
+			continue;
+		}
+		while (content != '') {
+			const chunkSize = Math.min(Math.floor(Math.random() * 3) + 1, content.length);
+			const chunk = content.slice(0, chunkSize);
+			yield { done: false, value: chunk };
+			await sleep(5);
+			content = content.slice(chunkSize);
+		}
+	}
+}
+
+const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
diff --git a/src/routes/(app)/+page.svelte b/src/routes/(app)/+page.svelte
index bdeff6d7a..bd8676985 100644
--- a/src/routes/(app)/+page.svelte
+++ b/src/routes/(app)/+page.svelte
@@ -39,6 +39,7 @@
 	import { RAGTemplate } from '$lib/utils/rag';
 	import { LITELLM_API_BASE_URL, OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL } from '$lib/constants';
 	import { WEBUI_BASE_URL } from '$lib/constants';
+	import { createOpenAITextStream } from '$lib/apis/streaming';
 
 	const i18n = getContext('i18n');
 
@@ -599,38 +600,22 @@
 				.pipeThrough(splitStream('\n'))
 				.getReader();
 
-			while (true) {
-				const { value, done } = await reader.read();
+			const textStream = await createOpenAITextStream(reader);
+			console.log(textStream);
+
+			for await (const update of textStream) {
+				const { value, done } = update;
 				if (done || stopResponseFlag || _chatId !== $chatId) {
 					responseMessage.done = true;
 					messages = messages;
 					break;
 				}
 
-				try {
-					let lines = value.split('\n');
-
-					for (const line of lines) {
-						if (line !== '') {
-							console.log(line);
-							if (line === 'data: [DONE]') {
-								responseMessage.done = true;
-								messages = messages;
-							} else {
-								let data = JSON.parse(line.replace(/^data: /, ''));
-								console.log(data);
-
-								if (responseMessage.content == '' && data.choices[0].delta.content == '\n') {
-									continue;
-								} else {
-									responseMessage.content += data.choices[0].delta.content ?? '';
-									messages = messages;
-								}
-							}
-						}
-					}
-				} catch (error) {
-					console.log(error);
+				if (responseMessage.content == '' && value == '\n') {
+					continue;
+				} else {
+					responseMessage.content += value;
+					messages = messages;
 				}
 
 				if ($settings.notificationEnabled && !document.hasFocus()) {
diff --git a/src/routes/(app)/c/[id]/+page.svelte b/src/routes/(app)/c/[id]/+page.svelte
index 7502f3c4e..2f8ad7d0b 100644
--- a/src/routes/(app)/c/[id]/+page.svelte
+++ b/src/routes/(app)/c/[id]/+page.svelte
@@ -42,6 +42,7 @@
 		OLLAMA_API_BASE_URL,
 		WEBUI_BASE_URL
 	} from '$lib/constants';
+	import { createOpenAITextStream } from '$lib/apis/streaming';
 
 	const i18n = getContext('i18n');
 
@@ -551,9 +552,9 @@
 				messages: [
 					$settings.system
 						? {
-								role: 'system',
-								content: $settings.system
-						  }
+							role: 'system',
+							content: $settings.system
+						}
 						: undefined,
 					...messages
 				]
@@ -611,38 +612,22 @@
 				.pipeThrough(splitStream('\n'))
 				.getReader();
 
-			while (true) {
-				const { value, done } = await reader.read();
+			const textStream = await createOpenAITextStream(reader);
+			console.log(textStream);
+
+			for await (const update of textStream) {
+				const { value, done } = update;
 				if (done || stopResponseFlag || _chatId !== $chatId) {
 					responseMessage.done = true;
 					messages = messages;
 					break;
 				}
 
-				try {
-					let lines = value.split('\n');
-
-					for (const line of lines) {
-						if (line !== '') {
-							console.log(line);
-							if (line === 'data: [DONE]') {
-								responseMessage.done = true;
-								messages = messages;
-							} else {
-								let data = JSON.parse(line.replace(/^data: /, ''));
-								console.log(data);
-
-								if (responseMessage.content == '' && data.choices[0].delta.content == '\n') {
-									continue;
-								} else {
-									responseMessage.content += data.choices[0].delta.content ?? '';
-									messages = messages;
-								}
-							}
-						}
-					}
-				} catch (error) {
-					console.log(error);
+				if (responseMessage.content == '' && value == '\n') {
+					continue;
+				} else {
+					responseMessage.content += value;
+					messages = messages;
 				}
 
 				if ($settings.notificationEnabled && !document.hasFocus()) {

From 5e458d490acf8c57f5a09d50310a58fc1ffe57c9 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 00:52:27 -0500
Subject: [PATCH 02/40] fix: run litellm as subprocess

---
 backend/apps/litellm/main.py | 71 +++++++++++++++++++++++++++++-------
 backend/main.py              |  7 +---
 2 files changed, 58 insertions(+), 20 deletions(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index a9922aad7..39f348141 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -1,8 +1,8 @@
+from fastapi import FastAPI, Depends
+from fastapi.routing import APIRoute
+from fastapi.middleware.cors import CORSMiddleware
+
 import logging
-
-from litellm.proxy.proxy_server import ProxyConfig, initialize
-from litellm.proxy.proxy_server import app
-
 from fastapi import FastAPI, Request, Depends, status, Response
 from fastapi.responses import JSONResponse
 
@@ -23,24 +23,39 @@ from config import (
 )
 
 
-proxy_config = ProxyConfig()
+import asyncio
+import subprocess
 
 
-async def config():
-    router, model_list, general_settings = await proxy_config.load_config(
-        router=None, config_file_path="./data/litellm/config.yaml"
+app = FastAPI()
+
+origins = ["*"]
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+async def run_background_process(command):
+    process = await asyncio.create_subprocess_exec(
+        *command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
     )
-
-    await initialize(config="./data/litellm/config.yaml", telemetry=False)
+    return process
 
 
-async def startup():
-    await config()
+async def start_litellm_background():
+    # Command to run in the background
+    command = "litellm --config ./data/litellm/config.yaml"
+    await run_background_process(command)
 
 
 @app.on_event("startup")
-async def on_startup():
-    await startup()
+async def startup_event():
+    asyncio.create_task(start_litellm_background())
 
 
 app.state.MODEL_FILTER_ENABLED = MODEL_FILTER_ENABLED
@@ -63,6 +78,11 @@ async def auth_middleware(request: Request, call_next):
     return response
 
 
+@app.get("/")
+async def get_status():
+    return {"status": True}
+
+
 class ModifyModelsResponseMiddleware(BaseHTTPMiddleware):
     async def dispatch(
         self, request: Request, call_next: RequestResponseEndpoint
@@ -98,3 +118,26 @@ class ModifyModelsResponseMiddleware(BaseHTTPMiddleware):
 
 
 app.add_middleware(ModifyModelsResponseMiddleware)
+
+
+# from litellm.proxy.proxy_server import ProxyConfig, initialize
+# from litellm.proxy.proxy_server import app
+
+# proxy_config = ProxyConfig()
+
+
+# async def config():
+#     router, model_list, general_settings = await proxy_config.load_config(
+#         router=None, config_file_path="./data/litellm/config.yaml"
+#     )
+
+#     await initialize(config="./data/litellm/config.yaml", telemetry=False)
+
+
+# async def startup():
+#     await config()
+
+
+# @app.on_event("startup")
+# async def on_startup():
+#     await startup()
diff --git a/backend/main.py b/backend/main.py
index 8b5fd76bc..b5aa7e7d0 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -20,7 +20,7 @@ from starlette.middleware.base import BaseHTTPMiddleware
 from apps.ollama.main import app as ollama_app
 from apps.openai.main import app as openai_app
 
-from apps.litellm.main import app as litellm_app, startup as litellm_app_startup
+from apps.litellm.main import app as litellm_app
 from apps.audio.main import app as audio_app
 from apps.images.main import app as images_app
 from apps.rag.main import app as rag_app
@@ -168,11 +168,6 @@ async def check_url(request: Request, call_next):
     return response
 
 
-@app.on_event("startup")
-async def on_startup():
-    await litellm_app_startup()
-
-
 app.mount("/api/v1", webui_app)
 app.mount("/litellm/api", litellm_app)
 

From a41b195f466d7c62eae700186ccc7cc30453c7be Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 01:13:24 -0500
Subject: [PATCH 03/40] DO NOT TRACK ME >:(

---
 backend/apps/litellm/main.py | 185 ++++++++++++++++++++++-------------
 1 file changed, 119 insertions(+), 66 deletions(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index 39f348141..947456881 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -1,4 +1,4 @@
-from fastapi import FastAPI, Depends
+from fastapi import FastAPI, Depends, HTTPException
 from fastapi.routing import APIRoute
 from fastapi.middleware.cors import CORSMiddleware
 
@@ -9,9 +9,11 @@ from fastapi.responses import JSONResponse
 from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
 from starlette.responses import StreamingResponse
 import json
+import requests
 
-from utils.utils import get_http_authorization_cred, get_current_user
+from utils.utils import get_verified_user, get_current_user
 from config import SRC_LOG_LEVELS, ENV
+from constants import ERROR_MESSAGES
 
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["LITELLM"])
@@ -49,12 +51,13 @@ async def run_background_process(command):
 
 async def start_litellm_background():
     # Command to run in the background
-    command = "litellm --config ./data/litellm/config.yaml"
+    command = "litellm --telemetry False --config ./data/litellm/config.yaml"
     await run_background_process(command)
 
 
 @app.on_event("startup")
 async def startup_event():
+    # TODO: Check config.yaml file and create one
     asyncio.create_task(start_litellm_background())
 
 
@@ -62,82 +65,132 @@ app.state.MODEL_FILTER_ENABLED = MODEL_FILTER_ENABLED
 app.state.MODEL_FILTER_LIST = MODEL_FILTER_LIST
 
 
-@app.middleware("http")
-async def auth_middleware(request: Request, call_next):
-    auth_header = request.headers.get("Authorization", "")
-    request.state.user = None
-
-    try:
-        user = get_current_user(get_http_authorization_cred(auth_header))
-        log.debug(f"user: {user}")
-        request.state.user = user
-    except Exception as e:
-        return JSONResponse(status_code=400, content={"detail": str(e)})
-
-    response = await call_next(request)
-    return response
-
-
 @app.get("/")
 async def get_status():
     return {"status": True}
 
 
-class ModifyModelsResponseMiddleware(BaseHTTPMiddleware):
-    async def dispatch(
-        self, request: Request, call_next: RequestResponseEndpoint
-    ) -> Response:
+@app.get("/models")
+@app.get("/v1/models")
+async def get_models(user=Depends(get_current_user)):
+    url = "http://localhost:4000/v1"
+    r = None
+    try:
+        r = requests.request(method="GET", url=f"{url}/models")
+        r.raise_for_status()
 
-        response = await call_next(request)
-        user = request.state.user
+        data = r.json()
 
-        if "/models" in request.url.path:
-            if isinstance(response, StreamingResponse):
-                # Read the content of the streaming response
-                body = b""
-                async for chunk in response.body_iterator:
-                    body += chunk
+        if app.state.MODEL_FILTER_ENABLED:
+            if user and user.role == "user":
+                data["data"] = list(
+                    filter(
+                        lambda model: model["id"] in app.state.MODEL_FILTER_LIST,
+                        data["data"],
+                    )
+                )
 
-                data = json.loads(body.decode("utf-8"))
+        return data
+    except Exception as e:
+        log.exception(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"External: {res['error']}"
+            except:
+                error_detail = f"External: {e}"
 
-                if app.state.MODEL_FILTER_ENABLED:
-                    if user and user.role == "user":
-                        data["data"] = list(
-                            filter(
-                                lambda model: model["id"]
-                                in app.state.MODEL_FILTER_LIST,
-                                data["data"],
-                            )
-                        )
-
-                # Modified Flag
-                data["modified"] = True
-                return JSONResponse(content=data)
-
-        return response
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
 
 
-app.add_middleware(ModifyModelsResponseMiddleware)
+@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
+    body = await request.body()
+
+    url = "http://localhost:4000/v1"
+
+    target_url = f"{url}/{path}"
+
+    headers = {}
+    # headers["Authorization"] = f"Bearer {key}"
+    headers["Content-Type"] = "application/json"
+
+    r = None
+
+    try:
+        r = requests.request(
+            method=request.method,
+            url=target_url,
+            data=body,
+            headers=headers,
+            stream=True,
+        )
+
+        r.raise_for_status()
+
+        # Check if response is SSE
+        if "text/event-stream" in r.headers.get("Content-Type", ""):
+            return StreamingResponse(
+                r.iter_content(chunk_size=8192),
+                status_code=r.status_code,
+                headers=dict(r.headers),
+            )
+        else:
+            response_data = r.json()
+            return response_data
+    except Exception as e:
+        log.exception(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"External: {res['error']['message'] if 'message' in res['error'] else res['error']}"
+            except:
+                error_detail = f"External: {e}"
+
+        raise HTTPException(
+            status_code=r.status_code if r else 500, detail=error_detail
+        )
 
 
-# from litellm.proxy.proxy_server import ProxyConfig, initialize
-# from litellm.proxy.proxy_server import app
+# class ModifyModelsResponseMiddleware(BaseHTTPMiddleware):
+#     async def dispatch(
+#         self, request: Request, call_next: RequestResponseEndpoint
+#     ) -> Response:
 
-# proxy_config = ProxyConfig()
+#         response = await call_next(request)
+#         user = request.state.user
+
+#         if "/models" in request.url.path:
+#             if isinstance(response, StreamingResponse):
+#                 # Read the content of the streaming response
+#                 body = b""
+#                 async for chunk in response.body_iterator:
+#                     body += chunk
+
+#                 data = json.loads(body.decode("utf-8"))
+
+#                 if app.state.MODEL_FILTER_ENABLED:
+#                     if user and user.role == "user":
+#                         data["data"] = list(
+#                             filter(
+#                                 lambda model: model["id"]
+#                                 in app.state.MODEL_FILTER_LIST,
+#                                 data["data"],
+#                             )
+#                         )
+
+#                 # Modified Flag
+#                 data["modified"] = True
+#                 return JSONResponse(content=data)
+
+#         return response
 
 
-# async def config():
-#     router, model_list, general_settings = await proxy_config.load_config(
-#         router=None, config_file_path="./data/litellm/config.yaml"
-#     )
-
-#     await initialize(config="./data/litellm/config.yaml", telemetry=False)
-
-
-# async def startup():
-#     await config()
-
-
-# @app.on_event("startup")
-# async def on_startup():
-#     await startup()
+# app.add_middleware(ModifyModelsResponseMiddleware)

From 8651bec915ae23f26f02f07b34d52f9099097148 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 01:22:02 -0500
Subject: [PATCH 04/40] pwned :)

---
 backend/apps/litellm/main.py | 11 ++++++++++-
 backend/main.py              |  8 +++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index 947456881..5a8b37f47 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -43,20 +43,29 @@ app.add_middleware(
 
 
 async def run_background_process(command):
+    # Start the process
     process = await asyncio.create_subprocess_exec(
         *command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
     )
-    return process
+    # Read output asynchronously
+    async for line in process.stdout:
+        print(line.decode().strip())  # Print stdout line by line
+
+    await process.wait()  # Wait for the subprocess to finish
 
 
 async def start_litellm_background():
+    print("start_litellm_background")
     # Command to run in the background
     command = "litellm --telemetry False --config ./data/litellm/config.yaml"
+
     await run_background_process(command)
 
 
 @app.on_event("startup")
 async def startup_event():
+
+    print("startup_event")
     # TODO: Check config.yaml file and create one
     asyncio.create_task(start_litellm_background())
 
diff --git a/backend/main.py b/backend/main.py
index b5aa7e7d0..48e14f1dd 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -20,12 +20,13 @@ from starlette.middleware.base import BaseHTTPMiddleware
 from apps.ollama.main import app as ollama_app
 from apps.openai.main import app as openai_app
 
-from apps.litellm.main import app as litellm_app
+from apps.litellm.main import app as litellm_app, start_litellm_background
 from apps.audio.main import app as audio_app
 from apps.images.main import app as images_app
 from apps.rag.main import app as rag_app
 from apps.web.main import app as webui_app
 
+import asyncio
 from pydantic import BaseModel
 from typing import List
 
@@ -168,6 +169,11 @@ async def check_url(request: Request, call_next):
     return response
 
 
+@app.on_event("startup")
+async def on_startup():
+    asyncio.create_task(start_litellm_background())
+
+
 app.mount("/api/v1", webui_app)
 app.mount("/litellm/api", litellm_app)
 

From 3c382d4c6cbea0352a4ad4bc3a90ed8f339a148b Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 01:46:09 -0500
Subject: [PATCH 05/40] refac: close subprocess gracefully

---
 backend/apps/litellm/main.py | 51 +++++++++++++++++++++++++++++-------
 backend/main.py              | 11 +++++++-
 2 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index 5a8b37f47..68e48858b 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -42,16 +42,40 @@ app.add_middleware(
 )
 
 
-async def run_background_process(command):
-    # Start the process
-    process = await asyncio.create_subprocess_exec(
-        *command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
-    )
-    # Read output asynchronously
-    async for line in process.stdout:
-        print(line.decode().strip())  # Print stdout line by line
+# Global variable to store the subprocess reference
+background_process = None
 
-    await process.wait()  # Wait for the subprocess to finish
+
+async def run_background_process(command):
+    global background_process
+    print("run_background_process")
+
+    try:
+        # Log the command to be executed
+        print(f"Executing command: {command}")
+        # Execute the command and create a subprocess
+        process = await asyncio.create_subprocess_exec(
+            *command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
+        background_process = process
+        print("Subprocess started successfully.")
+
+        # Capture STDERR for debugging purposes
+        stderr_output = await process.stderr.read()
+        stderr_text = stderr_output.decode().strip()
+        if stderr_text:
+            print(f"Subprocess STDERR: {stderr_text}")
+
+        # Print output line by line
+        async for line in process.stdout:
+            print(line.decode().strip())
+
+        # Wait for the process to finish
+        returncode = await process.wait()
+        print(f"Subprocess exited with return code {returncode}")
+    except Exception as e:
+        log.error(f"Failed to start subprocess: {e}")
+        raise  # Optionally re-raise the exception if you want it to propagate
 
 
 async def start_litellm_background():
@@ -62,6 +86,15 @@ async def start_litellm_background():
     await run_background_process(command)
 
 
+async def shutdown_litellm_background():
+    print("shutdown_litellm_background")
+    global background_process
+    if background_process:
+        background_process.terminate()
+        await background_process.wait()  # Ensure the process has terminated
+        print("Subprocess terminated")
+
+
 @app.on_event("startup")
 async def startup_event():
 
diff --git a/backend/main.py b/backend/main.py
index 48e14f1dd..579ff2ee0 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -20,7 +20,11 @@ from starlette.middleware.base import BaseHTTPMiddleware
 from apps.ollama.main import app as ollama_app
 from apps.openai.main import app as openai_app
 
-from apps.litellm.main import app as litellm_app, start_litellm_background
+from apps.litellm.main import (
+    app as litellm_app,
+    start_litellm_background,
+    shutdown_litellm_background,
+)
 from apps.audio.main import app as audio_app
 from apps.images.main import app as images_app
 from apps.rag.main import app as rag_app
@@ -316,3 +320,8 @@ app.mount(
     SPAStaticFiles(directory=FRONTEND_BUILD_DIR, html=True),
     name="spa-static-files",
 )
+
+
+@app.on_event("shutdown")
+async def shutdown_event():
+    await shutdown_litellm_background()

From a59fb6b9eb6bcbe438d15e2020b31d2ef6cdf580 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 01:47:35 -0500
Subject: [PATCH 06/40] fix

---
 backend/apps/litellm/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index 68e48858b..486ae4736 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -154,7 +154,7 @@ async def get_models(user=Depends(get_current_user)):
 async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
     body = await request.body()
 
-    url = "http://localhost:4000/v1"
+    url = "http://localhost:4000"
 
     target_url = f"{url}/{path}"
 

From 51191168bc77b50165e5d937cbb54f592d71d1e2 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 01:51:38 -0500
Subject: [PATCH 07/40] feat: restart subprocess route

---
 backend/apps/litellm/main.py | 65 +++++++++++++++---------------------
 1 file changed, 27 insertions(+), 38 deletions(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index 486ae4736..531e96494 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -11,7 +11,7 @@ from starlette.responses import StreamingResponse
 import json
 import requests
 
-from utils.utils import get_verified_user, get_current_user
+from utils.utils import get_verified_user, get_current_user, get_admin_user
 from config import SRC_LOG_LEVELS, ENV
 from constants import ERROR_MESSAGES
 
@@ -112,6 +112,32 @@ async def get_status():
     return {"status": True}
 
 
+@app.get("/restart")
+async def restart_litellm(user=Depends(get_admin_user)):
+    """
+    Endpoint to restart the litellm background service.
+    """
+    log.info("Requested restart of litellm service.")
+    try:
+        # Shut down the existing process if it is running
+        await shutdown_litellm_background()
+        log.info("litellm service shutdown complete.")
+
+        # Restart the background service
+        await start_litellm_background()
+        log.info("litellm service restart complete.")
+
+        return {
+            "status": "success",
+            "message": "litellm service restarted successfully.",
+        }
+    except Exception as e:
+        log.error(f"Error restarting litellm service: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
+        )
+
+
 @app.get("/models")
 @app.get("/v1/models")
 async def get_models(user=Depends(get_current_user)):
@@ -199,40 +225,3 @@ async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
         raise HTTPException(
             status_code=r.status_code if r else 500, detail=error_detail
         )
-
-
-# class ModifyModelsResponseMiddleware(BaseHTTPMiddleware):
-#     async def dispatch(
-#         self, request: Request, call_next: RequestResponseEndpoint
-#     ) -> Response:
-
-#         response = await call_next(request)
-#         user = request.state.user
-
-#         if "/models" in request.url.path:
-#             if isinstance(response, StreamingResponse):
-#                 # Read the content of the streaming response
-#                 body = b""
-#                 async for chunk in response.body_iterator:
-#                     body += chunk
-
-#                 data = json.loads(body.decode("utf-8"))
-
-#                 if app.state.MODEL_FILTER_ENABLED:
-#                     if user and user.role == "user":
-#                         data["data"] = list(
-#                             filter(
-#                                 lambda model: model["id"]
-#                                 in app.state.MODEL_FILTER_LIST,
-#                                 data["data"],
-#                             )
-#                         )
-
-#                 # Modified Flag
-#                 data["modified"] = True
-#                 return JSONResponse(content=data)
-
-#         return response
-
-
-# app.add_middleware(ModifyModelsResponseMiddleware)

From 2717fe7c207b3a0e19e23113e647ec8b6e78e4bc Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 02:00:03 -0500
Subject: [PATCH 08/40] fix

---
 backend/apps/litellm/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index 531e96494..68ae54fbc 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -124,7 +124,7 @@ async def restart_litellm(user=Depends(get_admin_user)):
         log.info("litellm service shutdown complete.")
 
         # Restart the background service
-        await start_litellm_background()
+        start_litellm_background()
         log.info("litellm service restart complete.")
 
         return {

From 67df928c7ae953e4b725c548de08c0b61ce7d1e6 Mon Sep 17 00:00:00 2001
From: Jun Siang Cheah <git@jscheah.me>
Date: Sun, 21 Apr 2024 10:45:07 +0100
Subject: [PATCH 09/40] feat: make chunk splitting a configurable option

---
 src/lib/apis/streaming/index.ts               |  9 ++++--
 .../components/chat/Settings/Interface.svelte | 28 +++++++++++++++++++
 src/lib/i18n/locales/en-US/translation.json   |  1 +
 src/routes/(app)/+page.svelte                 |  2 +-
 src/routes/(app)/c/[id]/+page.svelte          |  8 +++---
 5 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/src/lib/apis/streaming/index.ts b/src/lib/apis/streaming/index.ts
index 4d1d2ecec..5b89a4668 100644
--- a/src/lib/apis/streaming/index.ts
+++ b/src/lib/apis/streaming/index.ts
@@ -6,9 +6,14 @@ type TextStreamUpdate = {
 // createOpenAITextStream takes a ReadableStreamDefaultReader from an SSE response,
 // and returns an async generator that emits delta updates with large deltas chunked into random sized chunks
 export async function createOpenAITextStream(
-	messageStream: ReadableStreamDefaultReader
+	messageStream: ReadableStreamDefaultReader,
+	splitLargeDeltas: boolean
 ): Promise<AsyncGenerator<TextStreamUpdate>> {
-	return streamLargeDeltasAsRandomChunks(openAIStreamToIterator(messageStream));
+	let iterator = openAIStreamToIterator(messageStream);
+	if (splitLargeDeltas) {
+		iterator = streamLargeDeltasAsRandomChunks(iterator);
+	}
+	return iterator;
 }
 
 async function* openAIStreamToIterator(
diff --git a/src/lib/components/chat/Settings/Interface.svelte b/src/lib/components/chat/Settings/Interface.svelte
index ad9e05e7f..37d7fa4ea 100644
--- a/src/lib/components/chat/Settings/Interface.svelte
+++ b/src/lib/components/chat/Settings/Interface.svelte
@@ -17,11 +17,17 @@
 	let titleAutoGenerateModelExternal = '';
 	let fullScreenMode = false;
 	let titleGenerationPrompt = '';
+	let splitLargeChunks = false;
 
 	// Interface
 	let promptSuggestions = [];
 	let showUsername = false;
 
+	const toggleSplitLargeChunks = async () => {
+		splitLargeChunks = !splitLargeChunks;
+		saveSettings({ splitLargeChunks: splitLargeChunks });
+	};
+
 	const toggleFullScreenMode = async () => {
 		fullScreenMode = !fullScreenMode;
 		saveSettings({ fullScreenMode: fullScreenMode });
@@ -197,6 +203,28 @@
 					</button>
 				</div>
 			</div>
+
+			<div>
+				<div class=" py-0.5 flex w-full justify-between">
+					<div class=" self-center text-xs font-medium">
+						{$i18n.t('Fluidly stream large external response chunks')}
+					</div>
+
+					<button
+						class="p-1 px-3 text-xs flex rounded transition"
+						on:click={() => {
+							toggleSplitLargeChunks();
+						}}
+						type="button"
+					>
+						{#if splitLargeChunks === true}
+							<span class="ml-2 self-center">{$i18n.t('On')}</span>
+						{:else}
+							<span class="ml-2 self-center">{$i18n.t('Off')}</span>
+						{/if}
+					</button>
+				</div>
+			</div>
 		</div>
 
 		<hr class=" dark:border-gray-700" />
diff --git a/src/lib/i18n/locales/en-US/translation.json b/src/lib/i18n/locales/en-US/translation.json
index be89b1b01..fdfe804ba 100644
--- a/src/lib/i18n/locales/en-US/translation.json
+++ b/src/lib/i18n/locales/en-US/translation.json
@@ -152,6 +152,7 @@
 	"File Mode": "",
 	"File not found.": "",
 	"Fingerprint spoofing detected: Unable to use initials as avatar. Defaulting to default profile image.": "",
+	"Fluidly stream large external response chunks": "",
 	"Focus chat input": "",
 	"Format your variables using square brackets like this:": "",
 	"From (Base Model)": "",
diff --git a/src/routes/(app)/+page.svelte b/src/routes/(app)/+page.svelte
index bd8676985..9fc261773 100644
--- a/src/routes/(app)/+page.svelte
+++ b/src/routes/(app)/+page.svelte
@@ -600,7 +600,7 @@
 				.pipeThrough(splitStream('\n'))
 				.getReader();
 
-			const textStream = await createOpenAITextStream(reader);
+			const textStream = await createOpenAITextStream(reader, $settings.splitLargeChunks);
 			console.log(textStream);
 
 			for await (const update of textStream) {
diff --git a/src/routes/(app)/c/[id]/+page.svelte b/src/routes/(app)/c/[id]/+page.svelte
index 2f8ad7d0b..c230eb5c1 100644
--- a/src/routes/(app)/c/[id]/+page.svelte
+++ b/src/routes/(app)/c/[id]/+page.svelte
@@ -552,9 +552,9 @@
 				messages: [
 					$settings.system
 						? {
-							role: 'system',
-							content: $settings.system
-						}
+								role: 'system',
+								content: $settings.system
+						  }
 						: undefined,
 					...messages
 				]
@@ -612,7 +612,7 @@
 				.pipeThrough(splitStream('\n'))
 				.getReader();
 
-			const textStream = await createOpenAITextStream(reader);
+			const textStream = await createOpenAITextStream(reader, $settings.splitLargeChunks);
 			console.log(textStream);
 
 			for await (const update of textStream) {

From 81b7cdfed7cc129962dc686edc8b5568312e2186 Mon Sep 17 00:00:00 2001
From: Jun Siang Cheah <git@jscheah.me>
Date: Sun, 21 Apr 2024 11:41:18 +0100
Subject: [PATCH 10/40] fix: add typescript types for models

---
 src/lib/stores/index.ts | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/src/lib/stores/index.ts b/src/lib/stores/index.ts
index fc58db6bd..038c34195 100644
--- a/src/lib/stores/index.ts
+++ b/src/lib/stores/index.ts
@@ -1,5 +1,5 @@
 import { APP_NAME } from '$lib/constants';
-import { writable } from 'svelte/store';
+import { type Writable, writable } from 'svelte/store';
 
 // Backend
 export const WEBUI_NAME = writable(APP_NAME);
@@ -14,7 +14,7 @@ export const chatId = writable('');
 
 export const chats = writable([]);
 export const tags = writable([]);
-export const models = writable([]);
+export const models: Writable<Model[]> = writable([]);
 
 export const modelfiles = writable([]);
 export const prompts = writable([]);
@@ -36,3 +36,34 @@ export const documents = writable([
 export const settings = writable({});
 export const showSettings = writable(false);
 export const showChangelog = writable(false);
+
+type Model = OpenAIModel | OllamaModel;
+
+type OpenAIModel = {
+	id: string;
+	name: string;
+	external: boolean;
+	source?: string;
+}
+
+type OllamaModel = {
+	id: string;
+	name: string;
+
+	// Ollama specific fields
+	details: OllamaModelDetails;
+	size: number;
+	description: string;
+	model: string;
+	modified_at: string;
+	digest: string;
+}
+
+type OllamaModelDetails = {
+  parent_model: string;
+  format: string;
+  family: string;
+  families: string[] | null;
+  parameter_size: string;
+  quantization_level: string;
+};

From 7d4f9134bc4bbb87239e536f5bf9d5a2fdcf9c6b Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 13:24:46 -0500
Subject: [PATCH 11/40] refac: styling

---
 src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte b/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte
index 6ae5286b4..51bcf1ad6 100644
--- a/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte
+++ b/src/lib/components/layout/Sidebar/ArchivedChatsModal.svelte
@@ -67,7 +67,7 @@
 		<div class="flex flex-col md:flex-row w-full px-5 py-4 md:space-x-4 dark:text-gray-200">
 			<div class=" flex flex-col w-full sm:flex-row sm:justify-center sm:space-x-6">
 				{#if chats.length > 0}
-					<div class="text-left text-sm w-full mb-4 h-[22rem] overflow-y-scroll">
+					<div class="text-left text-sm w-full mb-4 max-h-[22rem] overflow-y-scroll">
 						<div class="relative overflow-x-auto">
 							<table class="w-full text-sm text-left text-gray-500 dark:text-gray-400 table-auto">
 								<thead

From 77426266d24464d51334909ca77474f566ca1c6b Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 14:32:45 -0500
Subject: [PATCH 12/40] refac: port number update

---
 backend/apps/litellm/main.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index 68ae54fbc..8d1132bb4 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -81,7 +81,9 @@ async def run_background_process(command):
 async def start_litellm_background():
     print("start_litellm_background")
     # Command to run in the background
-    command = "litellm --telemetry False --config ./data/litellm/config.yaml"
+    command = (
+        "litellm --port 14365 --telemetry False --config ./data/litellm/config.yaml"
+    )
 
     await run_background_process(command)
 
@@ -141,7 +143,7 @@ async def restart_litellm(user=Depends(get_admin_user)):
 @app.get("/models")
 @app.get("/v1/models")
 async def get_models(user=Depends(get_current_user)):
-    url = "http://localhost:4000/v1"
+    url = "http://localhost:14365/v1"
     r = None
     try:
         r = requests.request(method="GET", url=f"{url}/models")
@@ -180,7 +182,7 @@ async def get_models(user=Depends(get_current_user)):
 async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
     body = await request.body()
 
-    url = "http://localhost:4000"
+    url = "http://localhost:14365"
 
     target_url = f"{url}/{path}"
 

From 8422d3ea79c134ff12e9120c3f27220a7ac2bd57 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 14:43:51 -0500
Subject: [PATCH 13/40] Update requirements.txt

---
 backend/requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/backend/requirements.txt b/backend/requirements.txt
index 5f41137c9..0b5e90433 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -18,6 +18,8 @@ peewee-migrate
 bcrypt
 
 litellm==1.35.17
+litellm['proxy']==1.35.17
+
 boto3
 
 argon2-cffi

From f83eb7326f7b4fcaf54493c61bc0344855429617 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 14:44:28 -0500
Subject: [PATCH 14/40] Update requirements.txt

---
 backend/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/requirements.txt b/backend/requirements.txt
index 0b5e90433..e04551567 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -18,7 +18,7 @@ peewee-migrate
 bcrypt
 
 litellm==1.35.17
-litellm['proxy']==1.35.17
+litellm[proxy]==1.35.17
 
 boto3
 

From 31124d9deb08c8283247b7b95313be59646fa7e0 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 16:10:01 -0500
Subject: [PATCH 15/40] feat: litellm config update

---
 backend/apps/litellm/main.py | 75 ++++++++++++++++++++++++++----------
 1 file changed, 55 insertions(+), 20 deletions(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index 8d1132bb4..5696b6945 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -11,6 +11,9 @@ from starlette.responses import StreamingResponse
 import json
 import requests
 
+from pydantic import BaseModel
+from typing import Optional, List
+
 from utils.utils import get_verified_user, get_current_user, get_admin_user
 from config import SRC_LOG_LEVELS, ENV
 from constants import ERROR_MESSAGES
@@ -19,15 +22,12 @@ log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["LITELLM"])
 
 
-from config import (
-    MODEL_FILTER_ENABLED,
-    MODEL_FILTER_LIST,
-)
+from config import MODEL_FILTER_ENABLED, MODEL_FILTER_LIST, DATA_DIR
 
 
 import asyncio
 import subprocess
-
+import yaml
 
 app = FastAPI()
 
@@ -42,44 +42,51 @@ app.add_middleware(
 )
 
 
+LITELLM_CONFIG_DIR = f"{DATA_DIR}/litellm/config.yaml"
+
+with open(LITELLM_CONFIG_DIR, "r") as file:
+    litellm_config = yaml.safe_load(file)
+
+app.state.CONFIG = litellm_config
+
 # Global variable to store the subprocess reference
 background_process = None
 
 
 async def run_background_process(command):
     global background_process
-    print("run_background_process")
+    log.info("run_background_process")
 
     try:
         # Log the command to be executed
-        print(f"Executing command: {command}")
+        log.info(f"Executing command: {command}")
         # Execute the command and create a subprocess
         process = await asyncio.create_subprocess_exec(
             *command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
         )
         background_process = process
-        print("Subprocess started successfully.")
+        log.info("Subprocess started successfully.")
 
         # Capture STDERR for debugging purposes
         stderr_output = await process.stderr.read()
         stderr_text = stderr_output.decode().strip()
         if stderr_text:
-            print(f"Subprocess STDERR: {stderr_text}")
+            log.info(f"Subprocess STDERR: {stderr_text}")
 
-        # Print output line by line
+        # log.info output line by line
         async for line in process.stdout:
-            print(line.decode().strip())
+            log.info(line.decode().strip())
 
         # Wait for the process to finish
         returncode = await process.wait()
-        print(f"Subprocess exited with return code {returncode}")
+        log.info(f"Subprocess exited with return code {returncode}")
     except Exception as e:
         log.error(f"Failed to start subprocess: {e}")
         raise  # Optionally re-raise the exception if you want it to propagate
 
 
 async def start_litellm_background():
-    print("start_litellm_background")
+    log.info("start_litellm_background")
     # Command to run in the background
     command = (
         "litellm --port 14365 --telemetry False --config ./data/litellm/config.yaml"
@@ -89,18 +96,18 @@ async def start_litellm_background():
 
 
 async def shutdown_litellm_background():
-    print("shutdown_litellm_background")
+    log.info("shutdown_litellm_background")
     global background_process
     if background_process:
         background_process.terminate()
         await background_process.wait()  # Ensure the process has terminated
-        print("Subprocess terminated")
+        log.info("Subprocess terminated")
 
 
 @app.on_event("startup")
 async def startup_event():
 
-    print("startup_event")
+    log.info("startup_event")
     # TODO: Check config.yaml file and create one
     asyncio.create_task(start_litellm_background())
 
@@ -114,8 +121,7 @@ async def get_status():
     return {"status": True}
 
 
-@app.get("/restart")
-async def restart_litellm(user=Depends(get_admin_user)):
+async def restart_litellm():
     """
     Endpoint to restart the litellm background service.
     """
@@ -126,7 +132,8 @@ async def restart_litellm(user=Depends(get_admin_user)):
         log.info("litellm service shutdown complete.")
 
         # Restart the background service
-        start_litellm_background()
+
+        asyncio.create_task(start_litellm_background())
         log.info("litellm service restart complete.")
 
         return {
@@ -134,12 +141,40 @@ async def restart_litellm(user=Depends(get_admin_user)):
             "message": "litellm service restarted successfully.",
         }
     except Exception as e:
-        log.error(f"Error restarting litellm service: {e}")
+        log.info(f"Error restarting litellm service: {e}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
         )
 
 
+@app.get("/restart")
+async def restart_litellm_handler(user=Depends(get_admin_user)):
+    return await restart_litellm()
+
+
+@app.get("/config")
+async def get_config(user=Depends(get_admin_user)):
+    return app.state.CONFIG
+
+
+class LiteLLMConfigForm(BaseModel):
+    general_settings: Optional[dict] = None
+    litellm_settings: Optional[dict] = None
+    model_list: Optional[List[dict]] = None
+    router_settings: Optional[dict] = None
+
+
+@app.post("/config/update")
+async def update_config(form_data: LiteLLMConfigForm, user=Depends(get_admin_user)):
+    app.state.CONFIG = form_data.model_dump(exclude_none=True)
+
+    with open(LITELLM_CONFIG_DIR, "w") as file:
+        yaml.dump(app.state.CONFIG, file)
+
+    await restart_litellm()
+    return app.state.CONFIG
+
+
 @app.get("/models")
 @app.get("/v1/models")
 async def get_models(user=Depends(get_current_user)):

From e627b8bf21d2eb5f78f753ed6896ea9255d9e2eb Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 17:26:22 -0500
Subject: [PATCH 16/40] feat: litellm model add/delete

---
 backend/apps/litellm/main.py                  | 50 +++++++++++++++++++
 .../components/chat/Settings/Models.svelte    | 12 ++---
 2 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index 5696b6945..9bc08598f 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -102,6 +102,7 @@ async def shutdown_litellm_background():
         background_process.terminate()
         await background_process.wait()  # Ensure the process has terminated
         log.info("Subprocess terminated")
+        background_process = None
 
 
 @app.on_event("startup")
@@ -178,6 +179,9 @@ async def update_config(form_data: LiteLLMConfigForm, user=Depends(get_admin_use
 @app.get("/models")
 @app.get("/v1/models")
 async def get_models(user=Depends(get_current_user)):
+    while not background_process:
+        await asyncio.sleep(0.1)
+
     url = "http://localhost:14365/v1"
     r = None
     try:
@@ -213,6 +217,52 @@ async def get_models(user=Depends(get_current_user)):
         )
 
 
+@app.get("/model/info")
+async def get_model_list(user=Depends(get_admin_user)):
+    return {"data": app.state.CONFIG["model_list"]}
+
+
+class AddLiteLLMModelForm(BaseModel):
+    model_name: str
+    litellm_params: dict
+
+
+@app.post("/model/new")
+async def add_model_to_config(
+    form_data: AddLiteLLMModelForm, user=Depends(get_admin_user)
+):
+    app.state.CONFIG["model_list"].append(form_data.model_dump())
+
+    with open(LITELLM_CONFIG_DIR, "w") as file:
+        yaml.dump(app.state.CONFIG, file)
+
+    await restart_litellm()
+
+    return {"message": "model added"}
+
+
+class DeleteLiteLLMModelForm(BaseModel):
+    id: str
+
+
+@app.post("/model/delete")
+async def delete_model_from_config(
+    form_data: DeleteLiteLLMModelForm, user=Depends(get_admin_user)
+):
+    app.state.CONFIG["model_list"] = [
+        model
+        for model in app.state.CONFIG["model_list"]
+        if model["model_name"] != form_data.id
+    ]
+
+    with open(LITELLM_CONFIG_DIR, "w") as file:
+        yaml.dump(app.state.CONFIG, file)
+
+    await restart_litellm()
+
+    return {"message": "model deleted"}
+
+
 @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
 async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
     body = await request.body()
diff --git a/src/lib/components/chat/Settings/Models.svelte b/src/lib/components/chat/Settings/Models.svelte
index 15b054024..688774d78 100644
--- a/src/lib/components/chat/Settings/Models.svelte
+++ b/src/lib/components/chat/Settings/Models.svelte
@@ -35,7 +35,7 @@
 	let liteLLMRPM = '';
 	let liteLLMMaxTokens = '';
 
-	let deleteLiteLLMModelId = '';
+	let deleteLiteLLMModelName = '';
 
 	$: liteLLMModelName = liteLLMModel;
 
@@ -472,7 +472,7 @@
 	};
 
 	const deleteLiteLLMModelHandler = async () => {
-		const res = await deleteLiteLLMModel(localStorage.token, deleteLiteLLMModelId).catch(
+		const res = await deleteLiteLLMModel(localStorage.token, deleteLiteLLMModelName).catch(
 			(error) => {
 				toast.error(error);
 				return null;
@@ -485,7 +485,7 @@
 			}
 		}
 
-		deleteLiteLLMModelId = '';
+		deleteLiteLLMModelName = '';
 		liteLLMModelInfo = await getLiteLLMModelInfo(localStorage.token);
 		models.set(await getModels());
 	};
@@ -1099,14 +1099,14 @@
 								<div class="flex-1 mr-2">
 									<select
 										class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
-										bind:value={deleteLiteLLMModelId}
+										bind:value={deleteLiteLLMModelName}
 										placeholder={$i18n.t('Select a model')}
 									>
-										{#if !deleteLiteLLMModelId}
+										{#if !deleteLiteLLMModelName}
 											<option value="" disabled selected>{$i18n.t('Select a model')}</option>
 										{/if}
 										{#each liteLLMModelInfo as model}
-											<option value={model.model_info.id} class="bg-gray-100 dark:bg-gray-700"
+											<option value={model.model_name} class="bg-gray-100 dark:bg-gray-700"
 												>{model.model_name}</option
 											>
 										{/each}

From 760c62739a391af5db111eb3cb6497c8ba5326ac Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 17:37:59 -0500
Subject: [PATCH 17/40] refac: improved error handling

---
 backend/apps/litellm/main.py | 26 +++++++++++++++++++-------
 backend/constants.py         |  4 ++++
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index 9bc08598f..40619be2f 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -9,6 +9,7 @@ from fastapi.responses import JSONResponse
 from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
 from starlette.responses import StreamingResponse
 import json
+import time
 import requests
 
 from pydantic import BaseModel
@@ -16,7 +17,7 @@ from typing import Optional, List
 
 from utils.utils import get_verified_user, get_current_user, get_admin_user
 from config import SRC_LOG_LEVELS, ENV
-from constants import ERROR_MESSAGES
+from constants import MESSAGES
 
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["LITELLM"])
@@ -201,6 +202,7 @@ async def get_models(user=Depends(get_current_user)):
 
         return data
     except Exception as e:
+
         log.exception(e)
         error_detail = "Open WebUI: Server Connection Error"
         if r is not None:
@@ -211,10 +213,18 @@ async def get_models(user=Depends(get_current_user)):
             except:
                 error_detail = f"External: {e}"
 
-        raise HTTPException(
-            status_code=r.status_code if r else 500,
-            detail=error_detail,
-        )
+        return {
+            "data": [
+                {
+                    "id": model["model_name"],
+                    "object": "model",
+                    "created": int(time.time()),
+                    "owned_by": "openai",
+                }
+                for model in app.state.CONFIG["model_list"]
+            ],
+            "object": "list",
+        }
 
 
 @app.get("/model/info")
@@ -231,6 +241,8 @@ class AddLiteLLMModelForm(BaseModel):
 async def add_model_to_config(
     form_data: AddLiteLLMModelForm, user=Depends(get_admin_user)
 ):
+    # TODO: Validate model form
+
     app.state.CONFIG["model_list"].append(form_data.model_dump())
 
     with open(LITELLM_CONFIG_DIR, "w") as file:
@@ -238,7 +250,7 @@ async def add_model_to_config(
 
     await restart_litellm()
 
-    return {"message": "model added"}
+    return {"message": MESSAGES.MODEL_ADDED(form_data.model_name)}
 
 
 class DeleteLiteLLMModelForm(BaseModel):
@@ -260,7 +272,7 @@ async def delete_model_from_config(
 
     await restart_litellm()
 
-    return {"message": "model deleted"}
+    return {"message": MESSAGES.MODEL_DELETED(form_data.id)}
 
 
 @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
diff --git a/backend/constants.py b/backend/constants.py
index da1ee0b3f..310c13311 100644
--- a/backend/constants.py
+++ b/backend/constants.py
@@ -3,6 +3,10 @@ from enum import Enum
 
 class MESSAGES(str, Enum):
     DEFAULT = lambda msg="": f"{msg if msg else ''}"
+    MODEL_ADDED = lambda model="": f"The model '{model}' has been added successfully."
+    MODEL_DELETED = (
+        lambda model="": f"The model '{model}' has been deleted successfully."
+    )
 
 
 class WEBHOOK_MESSAGES(str, Enum):

From 4651db8c09d90383fc3c8df5670ebd914c68b8e2 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 21 Apr 2024 18:25:53 -0500
Subject: [PATCH 18/40] refac: litellm model name validation

---
 backend/apps/litellm/main.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index 40619be2f..52e0c7002 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -12,7 +12,7 @@ import json
 import time
 import requests
 
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict
 from typing import Optional, List
 
 from utils.utils import get_verified_user, get_current_user, get_admin_user
@@ -25,6 +25,7 @@ log.setLevel(SRC_LOG_LEVELS["LITELLM"])
 
 from config import MODEL_FILTER_ENABLED, MODEL_FILTER_LIST, DATA_DIR
 
+from litellm.utils import get_llm_provider
 
 import asyncio
 import subprocess
@@ -165,6 +166,8 @@ class LiteLLMConfigForm(BaseModel):
     model_list: Optional[List[dict]] = None
     router_settings: Optional[dict] = None
 
+    model_config = ConfigDict(protected_namespaces=())
+
 
 @app.post("/config/update")
 async def update_config(form_data: LiteLLMConfigForm, user=Depends(get_admin_user)):
@@ -236,21 +239,28 @@ class AddLiteLLMModelForm(BaseModel):
     model_name: str
     litellm_params: dict
 
+    model_config = ConfigDict(protected_namespaces=())
+
 
 @app.post("/model/new")
 async def add_model_to_config(
     form_data: AddLiteLLMModelForm, user=Depends(get_admin_user)
 ):
-    # TODO: Validate model form
+    try:
+        get_llm_provider(model=form_data.model_name)
+        app.state.CONFIG["model_list"].append(form_data.model_dump())
 
-    app.state.CONFIG["model_list"].append(form_data.model_dump())
+        with open(LITELLM_CONFIG_DIR, "w") as file:
+            yaml.dump(app.state.CONFIG, file)
 
-    with open(LITELLM_CONFIG_DIR, "w") as file:
-        yaml.dump(app.state.CONFIG, file)
+        await restart_litellm()
 
-    await restart_litellm()
-
-    return {"message": MESSAGES.MODEL_ADDED(form_data.model_name)}
+        return {"message": MESSAGES.MODEL_ADDED(form_data.model_name)}
+    except Exception as e:
+        print(e)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
+        )
 
 
 class DeleteLiteLLMModelForm(BaseModel):

From 8e94618c514b3c67ceeea7d2fb3c0f43ef7d22d7 Mon Sep 17 00:00:00 2001
From: Danny Liu <dannyjialiliu@gmail.com>
Date: Mon, 22 Apr 2024 00:16:05 -0700
Subject: [PATCH 19/40] fix: <br> is not escaped in output text

---
 src/lib/utils/index.ts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts
index a24834c33..99f2a5048 100644
--- a/src/lib/utils/index.ts
+++ b/src/lib/utils/index.ts
@@ -35,13 +35,12 @@ export const sanitizeResponseContent = (content: string) => {
 		.replace(/<\|[a-z]+\|$/, '')
 		.replace(/<$/, '')
 		.replaceAll(/<\|[a-z]+\|>/g, ' ')
-		.replaceAll(/<br\s?\/?>/gi, '\n')
 		.replaceAll('<', '&lt;')
 		.trim();
 };
 
 export const revertSanitizedResponseContent = (content: string) => {
-	return content.replaceAll('&lt;', '<');
+    return content.replaceAll('&lt;', '<');
 };
 
 export const capitalizeFirstLetter = (string) => {

From 40c1b49e6db6abb7a13d4172b076364e86d59a65 Mon Sep 17 00:00:00 2001
From: Danny Liu <dannyjialiliu@gmail.com>
Date: Mon, 22 Apr 2024 00:17:43 -0700
Subject: [PATCH 20/40] chore: run format

---
 src/lib/utils/index.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts
index 99f2a5048..04cc22079 100644
--- a/src/lib/utils/index.ts
+++ b/src/lib/utils/index.ts
@@ -40,7 +40,7 @@ export const sanitizeResponseContent = (content: string) => {
 };
 
 export const revertSanitizedResponseContent = (content: string) => {
-    return content.replaceAll('&lt;', '<');
+	return content.replaceAll('&lt;', '<');
 };
 
 export const capitalizeFirstLetter = (string) => {

From 190b934ab5176f3c0ed95ac621638ee7741ee500 Mon Sep 17 00:00:00 2001
From: Jun Siang Cheah <git@jscheah.me>
Date: Wed, 17 Apr 2024 09:33:22 +0100
Subject: [PATCH 21/40] feat: add ALLOW_ADMIN_EXPORT to disable exporting of
 chats and the db

---
 backend/apps/web/routers/chats.py             | 10 +++-
 backend/apps/web/routers/utils.py             |  6 ++-
 backend/config.py                             |  2 +
 backend/main.py                               |  2 +
 .../components/admin/Settings/Database.svelte | 47 ++++++++++---------
 src/lib/components/chat/Settings/Chats.svelte |  2 +-
 6 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/backend/apps/web/routers/chats.py b/backend/apps/web/routers/chats.py
index 678c9aea7..07f483698 100644
--- a/backend/apps/web/routers/chats.py
+++ b/backend/apps/web/routers/chats.py
@@ -28,7 +28,10 @@ from apps.web.models.tags import (
 
 from constants import ERROR_MESSAGES
 
-from config import SRC_LOG_LEVELS
+from config import (
+    SRC_LOG_LEVELS,
+    ALLOW_ADMIN_EXPORT
+)
 
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["MODELS"])
@@ -79,6 +82,11 @@ async def get_all_user_chats(user=Depends(get_current_user)):
 
 @router.get("/all/db", response_model=List[ChatResponse])
 async def get_all_user_chats_in_db(user=Depends(get_admin_user)):
+    if not ALLOW_ADMIN_EXPORT:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
+        )
     return [
         ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)})
         for chat in Chats.get_all_chats()
diff --git a/backend/apps/web/routers/utils.py b/backend/apps/web/routers/utils.py
index 0ee75cfe6..d40a56d54 100644
--- a/backend/apps/web/routers/utils.py
+++ b/backend/apps/web/routers/utils.py
@@ -91,7 +91,11 @@ async def download_chat_as_pdf(
 
 @router.get("/db/download")
 async def download_db(user=Depends(get_admin_user)):
-
+    if not ALLOW_ADMIN_EXPORT:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
+        )
     return FileResponse(
         f"{DATA_DIR}/webui.db",
         media_type="application/octet-stream",
diff --git a/backend/config.py b/backend/config.py
index fb9063eb7..c6e05811b 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -382,6 +382,8 @@ MODEL_FILTER_LIST = [model.strip() for model in MODEL_FILTER_LIST.split(";")]
 
 WEBHOOK_URL = os.environ.get("WEBHOOK_URL", "")
 
+ALLOW_ADMIN_EXPORT = os.environ.get("ALLOW_ADMIN_EXPORT", "True").lower() == "true"
+
 ####################################
 # WEBUI_VERSION
 ####################################
diff --git a/backend/main.py b/backend/main.py
index 579ff2ee0..0ee1ad18c 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -52,6 +52,7 @@ from config import (
     GLOBAL_LOG_LEVEL,
     SRC_LOG_LEVELS,
     WEBHOOK_URL,
+    ALLOW_ADMIN_EXPORT,
 )
 from constants import ERROR_MESSAGES
 
@@ -207,6 +208,7 @@ async def get_app_config():
         "default_models": webui_app.state.DEFAULT_MODELS,
         "default_prompt_suggestions": webui_app.state.DEFAULT_PROMPT_SUGGESTIONS,
         "trusted_header_auth": bool(webui_app.state.AUTH_TRUSTED_EMAIL_HEADER),
+        "allow_admin_export": ALLOW_ADMIN_EXPORT,
     }
 
 
diff --git a/src/lib/components/admin/Settings/Database.svelte b/src/lib/components/admin/Settings/Database.svelte
index 7d3a34444..483f83251 100644
--- a/src/lib/components/admin/Settings/Database.svelte
+++ b/src/lib/components/admin/Settings/Database.svelte
@@ -1,6 +1,7 @@
 <script lang="ts">
 	import { downloadDatabase } from '$lib/apis/utils';
 	import { onMount, getContext } from 'svelte';
+	import { config } from '$lib/stores';
 
 	const i18n = getContext('i18n');
 
@@ -24,32 +25,34 @@
 			<div class="  flex w-full justify-between">
 				<!-- <div class=" self-center text-xs font-medium">{$i18n.t('Allow Chat Deletion')}</div> -->
 
-				<button
-					class=" flex rounded-md py-1.5 px-3 w-full hover:bg-gray-200 dark:hover:bg-gray-800 transition"
-					type="button"
-					on:click={() => {
+				{#if $config?.allow_admin_export ?? true}
+					<button
+						class=" flex rounded-md py-1.5 px-3 w-full hover:bg-gray-200 dark:hover:bg-gray-800 transition"
+						type="button"
+						on:click={() => {
 						// exportAllUserChats();
 
 						downloadDatabase(localStorage.token);
 					}}
-				>
-					<div class=" self-center mr-3">
-						<svg
-							xmlns="http://www.w3.org/2000/svg"
-							viewBox="0 0 16 16"
-							fill="currentColor"
-							class="w-4 h-4"
-						>
-							<path d="M2 3a1 1 0 0 1 1-1h10a1 1 0 0 1 1 1v1a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V3Z" />
-							<path
-								fill-rule="evenodd"
-								d="M13 6H3v6a2 2 0 0 0 2 2h6a2 2 0 0 0 2-2V6ZM8.75 7.75a.75.75 0 0 0-1.5 0v2.69L6.03 9.22a.75.75 0 0 0-1.06 1.06l2.5 2.5a.75.75 0 0 0 1.06 0l2.5-2.5a.75.75 0 1 0-1.06-1.06l-1.22 1.22V7.75Z"
-								clip-rule="evenodd"
-							/>
-						</svg>
-					</div>
-					<div class=" self-center text-sm font-medium">{$i18n.t('Download Database')}</div>
-				</button>
+					>
+						<div class=" self-center mr-3">
+							<svg
+								xmlns="http://www.w3.org/2000/svg"
+								viewBox="0 0 16 16"
+								fill="currentColor"
+								class="w-4 h-4"
+							>
+								<path d="M2 3a1 1 0 0 1 1-1h10a1 1 0 0 1 1 1v1a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V3Z" />
+								<path
+									fill-rule="evenodd"
+									d="M13 6H3v6a2 2 0 0 0 2 2h6a2 2 0 0 0 2-2V6ZM8.75 7.75a.75.75 0 0 0-1.5 0v2.69L6.03 9.22a.75.75 0 0 0-1.06 1.06l2.5 2.5a.75.75 0 0 0 1.06 0l2.5-2.5a.75.75 0 1 0-1.06-1.06l-1.22 1.22V7.75Z"
+									clip-rule="evenodd"
+								/>
+							</svg>
+						</div>
+						<div class=" self-center text-sm font-medium">{$i18n.t('Download Database')}</div>
+					</button>
+				{/if}
 			</div>
 		</div>
 	</div>
diff --git a/src/lib/components/chat/Settings/Chats.svelte b/src/lib/components/chat/Settings/Chats.svelte
index 191e64d35..c704d3832 100644
--- a/src/lib/components/chat/Settings/Chats.svelte
+++ b/src/lib/components/chat/Settings/Chats.svelte
@@ -301,7 +301,7 @@
 			</button>
 		{/if}
 
-		{#if $user?.role === 'admin'}
+		{#if $user?.role === 'admin' && ($config?.allow_admin_export ?? true)}
 			<hr class=" dark:border-gray-700" />
 
 			<button

From e2a8ad5fca0bae1d619cae9b436a9a9c3c0ab50e Mon Sep 17 00:00:00 2001
From: Jun Siang Cheah <git@jscheah.me>
Date: Mon, 22 Apr 2024 19:55:46 +0100
Subject: [PATCH 22/40] address comments, rename to ENABLE_ADMIN_EXPORT

---
 backend/apps/web/routers/chats.py                 | 7 ++-----
 backend/apps/web/routers/utils.py                 | 2 +-
 backend/config.py                                 | 2 +-
 backend/main.py                                   | 4 ++--
 src/lib/components/admin/Settings/Database.svelte | 8 ++++----
 src/lib/components/chat/Settings/Chats.svelte     | 2 +-
 6 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/backend/apps/web/routers/chats.py b/backend/apps/web/routers/chats.py
index 07f483698..bbe3d84b9 100644
--- a/backend/apps/web/routers/chats.py
+++ b/backend/apps/web/routers/chats.py
@@ -28,10 +28,7 @@ from apps.web.models.tags import (
 
 from constants import ERROR_MESSAGES
 
-from config import (
-    SRC_LOG_LEVELS,
-    ALLOW_ADMIN_EXPORT
-)
+from config import SRC_LOG_LEVELS, ENABLE_ADMIN_EXPORT
 
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["MODELS"])
@@ -82,7 +79,7 @@ async def get_all_user_chats(user=Depends(get_current_user)):
 
 @router.get("/all/db", response_model=List[ChatResponse])
 async def get_all_user_chats_in_db(user=Depends(get_admin_user)):
-    if not ALLOW_ADMIN_EXPORT:
+    if not ENABLE_ADMIN_EXPORT:
         raise HTTPException(
             status_code=status.HTTP_401_UNAUTHORIZED,
             detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
diff --git a/backend/apps/web/routers/utils.py b/backend/apps/web/routers/utils.py
index d40a56d54..284f350a0 100644
--- a/backend/apps/web/routers/utils.py
+++ b/backend/apps/web/routers/utils.py
@@ -91,7 +91,7 @@ async def download_chat_as_pdf(
 
 @router.get("/db/download")
 async def download_db(user=Depends(get_admin_user)):
-    if not ALLOW_ADMIN_EXPORT:
+    if not ENABLE_ADMIN_EXPORT:
         raise HTTPException(
             status_code=status.HTTP_401_UNAUTHORIZED,
             detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
diff --git a/backend/config.py b/backend/config.py
index c6e05811b..db226183e 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -382,7 +382,7 @@ MODEL_FILTER_LIST = [model.strip() for model in MODEL_FILTER_LIST.split(";")]
 
 WEBHOOK_URL = os.environ.get("WEBHOOK_URL", "")
 
-ALLOW_ADMIN_EXPORT = os.environ.get("ALLOW_ADMIN_EXPORT", "True").lower() == "true"
+ENABLE_ADMIN_EXPORT = os.environ.get("ENABLE_ADMIN_EXPORT", "True").lower() == "true"
 
 ####################################
 # WEBUI_VERSION
diff --git a/backend/main.py b/backend/main.py
index 0ee1ad18c..c7c78e18d 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -52,7 +52,7 @@ from config import (
     GLOBAL_LOG_LEVEL,
     SRC_LOG_LEVELS,
     WEBHOOK_URL,
-    ALLOW_ADMIN_EXPORT,
+    ENABLE_ADMIN_EXPORT,
 )
 from constants import ERROR_MESSAGES
 
@@ -208,7 +208,7 @@ async def get_app_config():
         "default_models": webui_app.state.DEFAULT_MODELS,
         "default_prompt_suggestions": webui_app.state.DEFAULT_PROMPT_SUGGESTIONS,
         "trusted_header_auth": bool(webui_app.state.AUTH_TRUSTED_EMAIL_HEADER),
-        "allow_admin_export": ALLOW_ADMIN_EXPORT,
+        "admin_export_enabled": ENABLE_ADMIN_EXPORT,
     }
 
 
diff --git a/src/lib/components/admin/Settings/Database.svelte b/src/lib/components/admin/Settings/Database.svelte
index 483f83251..06a0d595c 100644
--- a/src/lib/components/admin/Settings/Database.svelte
+++ b/src/lib/components/admin/Settings/Database.svelte
@@ -25,15 +25,15 @@
 			<div class="  flex w-full justify-between">
 				<!-- <div class=" self-center text-xs font-medium">{$i18n.t('Allow Chat Deletion')}</div> -->
 
-				{#if $config?.allow_admin_export ?? true}
+				{#if $config?.admin_export_enabled ?? true}
 					<button
 						class=" flex rounded-md py-1.5 px-3 w-full hover:bg-gray-200 dark:hover:bg-gray-800 transition"
 						type="button"
 						on:click={() => {
-						// exportAllUserChats();
+							// exportAllUserChats();
 
-						downloadDatabase(localStorage.token);
-					}}
+							downloadDatabase(localStorage.token);
+						}}
 					>
 						<div class=" self-center mr-3">
 							<svg
diff --git a/src/lib/components/chat/Settings/Chats.svelte b/src/lib/components/chat/Settings/Chats.svelte
index c704d3832..26d263625 100644
--- a/src/lib/components/chat/Settings/Chats.svelte
+++ b/src/lib/components/chat/Settings/Chats.svelte
@@ -301,7 +301,7 @@
 			</button>
 		{/if}
 
-		{#if $user?.role === 'admin' && ($config?.allow_admin_export ?? true)}
+		{#if $user?.role === 'admin' && ($config?.admin_export_enabled ?? true)}
 			<hr class=" dark:border-gray-700" />
 
 			<button

From ed13da8aba4cb767a7afc7b6036461f48a382e9a Mon Sep 17 00:00:00 2001
From: Jun Siang Cheah <git@jscheah.me>
Date: Mon, 22 Apr 2024 18:15:07 +0100
Subject: [PATCH 23/40] feat: add types to some frontend stores

---
 src/lib/stores/index.ts | 96 +++++++++++++++++++++++++++++++++++------
 1 file changed, 84 insertions(+), 12 deletions(-)

diff --git a/src/lib/stores/index.ts b/src/lib/stores/index.ts
index 038c34195..1d1826e3f 100644
--- a/src/lib/stores/index.ts
+++ b/src/lib/stores/index.ts
@@ -3,8 +3,8 @@ import { type Writable, writable } from 'svelte/store';
 
 // Backend
 export const WEBUI_NAME = writable(APP_NAME);
-export const config = writable(undefined);
-export const user = writable(undefined);
+export const config: Writable<Config | undefined> = writable(undefined);
+export const user: Writable<SessionUser | undefined> = writable(undefined);
 
 // Frontend
 export const MODEL_DOWNLOAD_POOL = writable({});
@@ -17,7 +17,7 @@ export const tags = writable([]);
 export const models: Writable<Model[]> = writable([]);
 
 export const modelfiles = writable([]);
-export const prompts = writable([]);
+export const prompts: Writable<Prompt[]> = writable([]);
 export const documents = writable([
 	{
 		collection_name: 'collection_name',
@@ -33,7 +33,7 @@ export const documents = writable([
 	}
 ]);
 
-export const settings = writable({});
+export const settings: Writable<Settings> = writable({});
 export const showSettings = writable(false);
 export const showChangelog = writable(false);
 
@@ -44,7 +44,7 @@ type OpenAIModel = {
 	name: string;
 	external: boolean;
 	source?: string;
-}
+};
 
 type OllamaModel = {
 	id: string;
@@ -57,13 +57,85 @@ type OllamaModel = {
 	model: string;
 	modified_at: string;
 	digest: string;
-}
+};
 
 type OllamaModelDetails = {
-  parent_model: string;
-  format: string;
-  family: string;
-  families: string[] | null;
-  parameter_size: string;
-  quantization_level: string;
+	parent_model: string;
+	format: string;
+	family: string;
+	families: string[] | null;
+	parameter_size: string;
+	quantization_level: string;
+};
+
+type Settings = {
+	models?: string[];
+	conversationMode?: boolean;
+	speechAutoSend?: boolean;
+	responseAutoPlayback?: boolean;
+	audio?: AudioSettings;
+	showUsername?: boolean;
+	saveChatHistory?: boolean;
+	notificationEnabled?: boolean;
+	title?: TitleSettings;
+
+	system?: string;
+	requestFormat?: string;
+	keepAlive?: string;
+	seed?: number;
+	temperature?: string;
+	repeat_penalty?: string;
+	top_k?: string;
+	top_p?: string;
+	num_ctx?: string;
+	options?: ModelOptions;
+};
+
+type ModelOptions = {
+	stop?: boolean;
+};
+
+type AudioSettings = {
+	STTEngine?: string;
+	TTSEngine?: string;
+	speaker?: string;
+};
+
+type TitleSettings = {
+	auto?: boolean;
+	model?: string;
+	modelExternal?: string;
+	prompt?: string;
+};
+
+type Prompt = {
+	command: string;
+	user_id: string;
+	title: string;
+	content: string;
+	timestamp: number;
+};
+
+type Config = {
+	status?: boolean;
+	name?: string;
+	version?: string;
+	default_locale?: string;
+	images?: boolean;
+	default_models?: string[];
+	default_prompt_suggestions?: PromptSuggestion[];
+	trusted_header_auth?: boolean;
+};
+
+type PromptSuggestion = {
+	content: string;
+	title: [string, string];
+};
+
+type SessionUser = {
+	id: string;
+	email: string;
+	name: string;
+	role: string;
+	profile_image_url: string;
 };

From f3e5700d49d5c7fe609aa16530b1b5d83ae10b90 Mon Sep 17 00:00:00 2001
From: Steven Kreitzer <Steve.Kreitzer@GDIT.com>
Date: Mon, 22 Apr 2024 13:27:43 -0500
Subject: [PATCH 24/40] feat: move to native sentence_transformer

---
 CHANGELOG.md                                  |   6 +
 Dockerfile                                    |  12 +-
 backend/apps/rag/main.py                      | 206 ++++++++----------
 backend/apps/rag/utils.py                     | 182 ++++------------
 backend/config.py                             |  11 +-
 backend/requirements.txt                      |   2 +
 .../documents/Settings/General.svelte         |   2 +-
 7 files changed, 153 insertions(+), 268 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dad583399..1eaffc692 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.1.121] - 2024-04-22
+
+### Added
+
+- **🛠️ Improved Embedding Model Support**: You can now use any embedding model `sentence_transformers` supports.
+
 ## [0.1.120] - 2024-04-20
 
 ### Added
diff --git a/Dockerfile b/Dockerfile
index f19952909..a8f664ada 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,8 +8,8 @@ ARG USE_CUDA_VER=cu121
 # any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
 # Leaderboard: https://huggingface.co/spaces/mteb/leaderboard 
 # for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
-# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
-ARG USE_EMBEDDING_MODEL=all-MiniLM-L6-v2
+# IMPORTANT: If you change the default model (sentence-transformers/all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
+ARG USE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 
 ######## WebUI frontend ########
 FROM --platform=$BUILDPLATFORM node:21-alpine3.19 as build
@@ -98,13 +98,13 @@ RUN pip3 install uv && \
         # If you use CUDA the whisper and embedding model will be downloaded on first use
         pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
         uv pip install --system -r requirements.txt --no-cache-dir && \
-        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
-        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
+        python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \
+        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
     else \
         pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
         uv pip install --system -r requirements.txt --no-cache-dir && \
-        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
-        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
+        python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \
+        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
     fi
 
 
diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py
index ac8410dbe..5da7489f1 100644
--- a/backend/apps/rag/main.py
+++ b/backend/apps/rag/main.py
@@ -13,7 +13,6 @@ import os, shutil, logging, re
 from pathlib import Path
 from typing import List
 
-from chromadb.utils import embedding_functions
 from chromadb.utils.batch_utils import create_batches
 
 from langchain_community.document_loaders import (
@@ -38,6 +37,7 @@ import mimetypes
 import uuid
 import json
 
+import sentence_transformers
 
 from apps.ollama.main import generate_ollama_embeddings, GenerateEmbeddingsForm
 
@@ -48,11 +48,8 @@ from apps.web.models.documents import (
 )
 
 from apps.rag.utils import (
-    query_doc,
     query_embeddings_doc,
-    query_collection,
     query_embeddings_collection,
-    get_embedding_model_path,
     generate_openai_embeddings,
 )
 
@@ -69,7 +66,7 @@ from config import (
     DOCS_DIR,
     RAG_EMBEDDING_ENGINE,
     RAG_EMBEDDING_MODEL,
-    RAG_EMBEDDING_MODEL_AUTO_UPDATE,
+    RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
     RAG_OPENAI_API_BASE_URL,
     RAG_OPENAI_API_KEY,
     DEVICE_TYPE,
@@ -101,15 +98,12 @@ app.state.OPENAI_API_KEY = RAG_OPENAI_API_KEY
 
 app.state.PDF_EXTRACT_IMAGES = False
 
-
-app.state.sentence_transformer_ef = (
-    embedding_functions.SentenceTransformerEmbeddingFunction(
-        model_name=get_embedding_model_path(
-            app.state.RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL_AUTO_UPDATE
-        ),
+if app.state.RAG_EMBEDDING_ENGINE == "":
+    app.state.sentence_transformer_ef = sentence_transformers.SentenceTransformer(
+        app.state.RAG_EMBEDDING_MODEL,
         device=DEVICE_TYPE,
+        trust_remote_code=RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
     )
-)
 
 
 origins = ["*"]
@@ -185,13 +179,10 @@ async def update_embedding_config(
                 app.state.OPENAI_API_BASE_URL = form_data.openai_config.url
                 app.state.OPENAI_API_KEY = form_data.openai_config.key
         else:
-            sentence_transformer_ef = (
-                embedding_functions.SentenceTransformerEmbeddingFunction(
-                    model_name=get_embedding_model_path(
-                        form_data.embedding_model, True
-                    ),
-                    device=DEVICE_TYPE,
-                )
+            sentence_transformer_ef = sentence_transformers.SentenceTransformer(
+                app.state.RAG_EMBEDDING_MODEL,
+                device=DEVICE_TYPE,
+                trust_remote_code=RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
             )
             app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model
             app.state.sentence_transformer_ef = sentence_transformer_ef
@@ -294,38 +285,34 @@ def query_doc_handler(
     form_data: QueryDocForm,
     user=Depends(get_current_user),
 ):
-
     try:
         if app.state.RAG_EMBEDDING_ENGINE == "":
-            return query_doc(
-                collection_name=form_data.collection_name,
-                query=form_data.query,
-                k=form_data.k if form_data.k else app.state.TOP_K,
-                embedding_function=app.state.sentence_transformer_ef,
+            query_embeddings = app.state.sentence_transformer_ef.encode(
+                form_data.query
+            ).tolist()
+        elif app.state.RAG_EMBEDDING_ENGINE == "ollama":
+            query_embeddings = generate_ollama_embeddings(
+                GenerateEmbeddingsForm(
+                    **{
+                        "model": app.state.RAG_EMBEDDING_MODEL,
+                        "prompt": form_data.query,
+                    }
+                )
+            )
+        elif app.state.RAG_EMBEDDING_ENGINE == "openai":
+            query_embeddings = generate_openai_embeddings(
+                model=app.state.RAG_EMBEDDING_MODEL,
+                text=form_data.query,
+                key=app.state.OPENAI_API_KEY,
+                url=app.state.OPENAI_API_BASE_URL,
             )
-        else:
-            if app.state.RAG_EMBEDDING_ENGINE == "ollama":
-                query_embeddings = generate_ollama_embeddings(
-                    GenerateEmbeddingsForm(
-                        **{
-                            "model": app.state.RAG_EMBEDDING_MODEL,
-                            "prompt": form_data.query,
-                        }
-                    )
-                )
-            elif app.state.RAG_EMBEDDING_ENGINE == "openai":
-                query_embeddings = generate_openai_embeddings(
-                    model=app.state.RAG_EMBEDDING_MODEL,
-                    text=form_data.query,
-                    key=app.state.OPENAI_API_KEY,
-                    url=app.state.OPENAI_API_BASE_URL,
-                )
 
-            return query_embeddings_doc(
-                collection_name=form_data.collection_name,
-                query_embeddings=query_embeddings,
-                k=form_data.k if form_data.k else app.state.TOP_K,
-            )
+        return query_embeddings_doc(
+            collection_name=form_data.collection_name,
+            query=form_data.query,
+            query_embeddings=query_embeddings,
+            k=form_data.k if form_data.k else app.state.TOP_K,
+        )
 
     except Exception as e:
         log.exception(e)
@@ -348,36 +335,31 @@ def query_collection_handler(
 ):
     try:
         if app.state.RAG_EMBEDDING_ENGINE == "":
-            return query_collection(
-                collection_names=form_data.collection_names,
-                query=form_data.query,
-                k=form_data.k if form_data.k else app.state.TOP_K,
-                embedding_function=app.state.sentence_transformer_ef,
-            )
-        else:
-
-            if app.state.RAG_EMBEDDING_ENGINE == "ollama":
-                query_embeddings = generate_ollama_embeddings(
-                    GenerateEmbeddingsForm(
-                        **{
-                            "model": app.state.RAG_EMBEDDING_MODEL,
-                            "prompt": form_data.query,
-                        }
-                    )
+            query_embeddings = app.state.sentence_transformer_ef.encode(
+                form_data.query
+            ).tolist()
+        elif app.state.RAG_EMBEDDING_ENGINE == "ollama":
+            query_embeddings = generate_ollama_embeddings(
+                GenerateEmbeddingsForm(
+                    **{
+                        "model": app.state.RAG_EMBEDDING_MODEL,
+                        "prompt": form_data.query,
+                    }
                 )
-            elif app.state.RAG_EMBEDDING_ENGINE == "openai":
-                query_embeddings = generate_openai_embeddings(
-                    model=app.state.RAG_EMBEDDING_MODEL,
-                    text=form_data.query,
-                    key=app.state.OPENAI_API_KEY,
-                    url=app.state.OPENAI_API_BASE_URL,
-                )
-
-            return query_embeddings_collection(
-                collection_names=form_data.collection_names,
-                query_embeddings=query_embeddings,
-                k=form_data.k if form_data.k else app.state.TOP_K,
             )
+        elif app.state.RAG_EMBEDDING_ENGINE == "openai":
+            query_embeddings = generate_openai_embeddings(
+                model=app.state.RAG_EMBEDDING_MODEL,
+                text=form_data.query,
+                key=app.state.OPENAI_API_KEY,
+                url=app.state.OPENAI_API_BASE_URL,
+            )
+
+        return query_embeddings_collection(
+            collection_names=form_data.collection_names,
+            query_embeddings=query_embeddings,
+            k=form_data.k if form_data.k else app.state.TOP_K,
+        )
 
     except Exception as e:
         log.exception(e)
@@ -445,6 +427,8 @@ def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> b
     log.info(f"store_docs_in_vector_db {docs} {collection_name}")
 
     texts = [doc.page_content for doc in docs]
+    texts = list(map(lambda x: x.replace("\n", " "), texts))
+
     metadatas = [doc.metadata for doc in docs]
 
     try:
@@ -454,52 +438,38 @@ def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> b
                     log.info(f"deleting existing collection {collection_name}")
                     CHROMA_CLIENT.delete_collection(name=collection_name)
 
+        collection = CHROMA_CLIENT.create_collection(name=collection_name)
+
         if app.state.RAG_EMBEDDING_ENGINE == "":
-
-            collection = CHROMA_CLIENT.create_collection(
-                name=collection_name,
-                embedding_function=app.state.sentence_transformer_ef,
-            )
-
-            for batch in create_batches(
-                api=CHROMA_CLIENT,
-                ids=[str(uuid.uuid1()) for _ in texts],
-                metadatas=metadatas,
-                documents=texts,
-            ):
-                collection.add(*batch)
-
-        else:
-            collection = CHROMA_CLIENT.create_collection(name=collection_name)
-
-            if app.state.RAG_EMBEDDING_ENGINE == "ollama":
-                embeddings = [
-                    generate_ollama_embeddings(
-                        GenerateEmbeddingsForm(
-                            **{"model": app.state.RAG_EMBEDDING_MODEL, "prompt": text}
-                        )
+            embeddings = app.state.sentence_transformer_ef.encode(texts).tolist()
+        elif app.state.RAG_EMBEDDING_ENGINE == "ollama":
+            embeddings = [
+                generate_ollama_embeddings(
+                    GenerateEmbeddingsForm(
+                        **{"model": app.state.RAG_EMBEDDING_MODEL, "prompt": text}
                     )
-                    for text in texts
-                ]
-            elif app.state.RAG_EMBEDDING_ENGINE == "openai":
-                embeddings = [
-                    generate_openai_embeddings(
-                        model=app.state.RAG_EMBEDDING_MODEL,
-                        text=text,
-                        key=app.state.OPENAI_API_KEY,
-                        url=app.state.OPENAI_API_BASE_URL,
-                    )
-                    for text in texts
-                ]
+                )
+                for text in texts
+            ]
+        elif app.state.RAG_EMBEDDING_ENGINE == "openai":
+            embeddings = [
+                generate_openai_embeddings(
+                    model=app.state.RAG_EMBEDDING_MODEL,
+                    text=text,
+                    key=app.state.OPENAI_API_KEY,
+                    url=app.state.OPENAI_API_BASE_URL,
+                )
+                for text in texts
+            ]
 
-            for batch in create_batches(
-                api=CHROMA_CLIENT,
-                ids=[str(uuid.uuid1()) for _ in texts],
-                metadatas=metadatas,
-                embeddings=embeddings,
-                documents=texts,
-            ):
-                collection.add(*batch)
+        for batch in create_batches(
+            api=CHROMA_CLIENT,
+            ids=[str(uuid.uuid1()) for _ in texts],
+            metadatas=metadatas,
+            embeddings=embeddings,
+            documents=texts,
+        ):
+            collection.add(*batch)
 
         return True
     except Exception as e:
diff --git a/backend/apps/rag/utils.py b/backend/apps/rag/utils.py
index f4d1246c7..0ce299279 100644
--- a/backend/apps/rag/utils.py
+++ b/backend/apps/rag/utils.py
@@ -1,13 +1,12 @@
-import os
-import re
 import logging
-from typing import List
 import requests
 
+from typing import List
 
-from huggingface_hub import snapshot_download
-from apps.ollama.main import generate_ollama_embeddings, GenerateEmbeddingsForm
-
+from apps.ollama.main import (
+    generate_ollama_embeddings,
+    GenerateEmbeddingsForm,
+)
 
 from config import SRC_LOG_LEVELS, CHROMA_CLIENT
 
@@ -16,29 +15,12 @@ log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])
 
 
-def query_doc(collection_name: str, query: str, k: int, embedding_function):
-    try:
-        # if you use docker use the model from the environment variable
-        collection = CHROMA_CLIENT.get_collection(
-            name=collection_name,
-            embedding_function=embedding_function,
-        )
-        result = collection.query(
-            query_texts=[query],
-            n_results=k,
-        )
-        return result
-    except Exception as e:
-        raise e
-
-
-def query_embeddings_doc(collection_name: str, query_embeddings, k: int):
+def query_embeddings_doc(collection_name: str, query: str, query_embeddings, k: int):
     try:
         # if you use docker use the model from the environment variable
         log.info(f"query_embeddings_doc {query_embeddings}")
-        collection = CHROMA_CLIENT.get_collection(
-            name=collection_name,
-        )
+        collection = CHROMA_CLIENT.get_collection(name=collection_name)
+
         result = collection.query(
             query_embeddings=[query_embeddings],
             n_results=k,
@@ -95,43 +77,20 @@ def merge_and_sort_query_results(query_results, k):
     return merged_query_results
 
 
-def query_collection(
-    collection_names: List[str], query: str, k: int, embedding_function
+def query_embeddings_collection(
+    collection_names: List[str], query: str, query_embeddings, k: int
 ):
 
-    results = []
-
-    for collection_name in collection_names:
-        try:
-            # if you use docker use the model from the environment variable
-            collection = CHROMA_CLIENT.get_collection(
-                name=collection_name,
-                embedding_function=embedding_function,
-            )
-
-            result = collection.query(
-                query_texts=[query],
-                n_results=k,
-            )
-            results.append(result)
-        except:
-            pass
-
-    return merge_and_sort_query_results(results, k)
-
-
-def query_embeddings_collection(collection_names: List[str], query_embeddings, k: int):
-
     results = []
     log.info(f"query_embeddings_collection {query_embeddings}")
 
     for collection_name in collection_names:
         try:
-            collection = CHROMA_CLIENT.get_collection(name=collection_name)
-
-            result = collection.query(
-                query_embeddings=[query_embeddings],
-                n_results=k,
+            result = query_embeddings_doc(
+                collection_name=collection_name,
+                query=query,
+                query_embeddings=query_embeddings,
+                k=k,
             )
             results.append(result)
         except:
@@ -197,51 +156,38 @@ def rag_messages(
                 context = doc["content"]
             else:
                 if embedding_engine == "":
-                    if doc["type"] == "collection":
-                        context = query_collection(
-                            collection_names=doc["collection_names"],
-                            query=query,
-                            k=k,
-                            embedding_function=embedding_function,
-                        )
-                    else:
-                        context = query_doc(
-                            collection_name=doc["collection_name"],
-                            query=query,
-                            k=k,
-                            embedding_function=embedding_function,
+                    query_embeddings = embedding_function.encode(query).tolist()
+                elif embedding_engine == "ollama":
+                    query_embeddings = generate_ollama_embeddings(
+                        GenerateEmbeddingsForm(
+                            **{
+                                "model": embedding_model,
+                                "prompt": query,
+                            }
                         )
+                    )
+                elif embedding_engine == "openai":
+                    query_embeddings = generate_openai_embeddings(
+                        model=embedding_model,
+                        text=query,
+                        key=openai_key,
+                        url=openai_url,
+                    )
 
+                if doc["type"] == "collection":
+                    context = query_embeddings_collection(
+                        collection_names=doc["collection_names"],
+                        query=query,
+                        query_embeddings=query_embeddings,
+                        k=k,
+                    )
                 else:
-                    if embedding_engine == "ollama":
-                        query_embeddings = generate_ollama_embeddings(
-                            GenerateEmbeddingsForm(
-                                **{
-                                    "model": embedding_model,
-                                    "prompt": query,
-                                }
-                            )
-                        )
-                    elif embedding_engine == "openai":
-                        query_embeddings = generate_openai_embeddings(
-                            model=embedding_model,
-                            text=query,
-                            key=openai_key,
-                            url=openai_url,
-                        )
-
-                    if doc["type"] == "collection":
-                        context = query_embeddings_collection(
-                            collection_names=doc["collection_names"],
-                            query_embeddings=query_embeddings,
-                            k=k,
-                        )
-                    else:
-                        context = query_embeddings_doc(
-                            collection_name=doc["collection_name"],
-                            query_embeddings=query_embeddings,
-                            k=k,
-                        )
+                    context = query_embeddings_doc(
+                        collection_name=doc["collection_name"],
+                        query=query,
+                        query_embeddings=query_embeddings,
+                        k=k,
+                    )
 
         except Exception as e:
             log.exception(e)
@@ -283,46 +229,6 @@ def rag_messages(
     return messages
 
 
-def get_embedding_model_path(
-    embedding_model: str, update_embedding_model: bool = False
-):
-    # Construct huggingface_hub kwargs with local_files_only to return the snapshot path
-    cache_dir = os.getenv("SENTENCE_TRANSFORMERS_HOME")
-
-    local_files_only = not update_embedding_model
-
-    snapshot_kwargs = {
-        "cache_dir": cache_dir,
-        "local_files_only": local_files_only,
-    }
-
-    log.debug(f"embedding_model: {embedding_model}")
-    log.debug(f"snapshot_kwargs: {snapshot_kwargs}")
-
-    # Inspiration from upstream sentence_transformers
-    if (
-        os.path.exists(embedding_model)
-        or ("\\" in embedding_model or embedding_model.count("/") > 1)
-        and local_files_only
-    ):
-        # If fully qualified path exists, return input, else set repo_id
-        return embedding_model
-    elif "/" not in embedding_model:
-        # Set valid repo_id for model short-name
-        embedding_model = "sentence-transformers" + "/" + embedding_model
-
-    snapshot_kwargs["repo_id"] = embedding_model
-
-    # Attempt to query the huggingface_hub library to determine the local path and/or to update
-    try:
-        embedding_model_repo_path = snapshot_download(**snapshot_kwargs)
-        log.debug(f"embedding_model_repo_path: {embedding_model_repo_path}")
-        return embedding_model_repo_path
-    except Exception as e:
-        log.exception(f"Cannot determine embedding model snapshot path: {e}")
-        return embedding_model
-
-
 def generate_openai_embeddings(
     model: str, text: str, key: str, url: str = "https://api.openai.com/v1"
 ):
diff --git a/backend/config.py b/backend/config.py
index 6ca2c67bf..17f8f91bf 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -411,18 +411,19 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "":
 ####################################
 
 CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
-# this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2)
+# this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (sentence-transformers/all-MiniLM-L6-v2)
 
 RAG_EMBEDDING_ENGINE = os.environ.get("RAG_EMBEDDING_ENGINE", "")
 
-RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
+RAG_EMBEDDING_MODEL = os.environ.get(
+    "RAG_EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2"
+)
 log.info(f"Embedding model set: {RAG_EMBEDDING_MODEL}"),
 
-RAG_EMBEDDING_MODEL_AUTO_UPDATE = (
-    os.environ.get("RAG_EMBEDDING_MODEL_AUTO_UPDATE", "").lower() == "true"
+RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE = (
+    os.environ.get("RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE", "").lower() == "true"
 )
 
-
 # device type embedding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance
 USE_CUDA = os.environ.get("USE_CUDA_DOCKER", "false")
 
diff --git a/backend/requirements.txt b/backend/requirements.txt
index c815d93da..d5c179d86 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -25,6 +25,7 @@ apscheduler
 google-generativeai
 
 langchain
+langchain-chroma
 langchain-community
 fake_useragent
 chromadb
@@ -43,6 +44,7 @@ opencv-python-headless
 rapidocr-onnxruntime
 
 fpdf2
+rank_bm25
 
 faster-whisper
 
diff --git a/src/lib/components/documents/Settings/General.svelte b/src/lib/components/documents/Settings/General.svelte
index a2bbec852..6abdda5af 100644
--- a/src/lib/components/documents/Settings/General.svelte
+++ b/src/lib/components/documents/Settings/General.svelte
@@ -180,7 +180,7 @@
 							}
 						}}
 					>
-						<option value="">{$i18n.t('Default (SentenceTransformer)')}</option>
+						<option value="">{$i18n.t('Default (SentenceTransformers)')}</option>
 						<option value="ollama">{$i18n.t('Ollama')}</option>
 						<option value="openai">{$i18n.t('OpenAI')}</option>
 					</select>

From e3d253b0403cd55950a37a95a24e1dee1a96bc94 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Tue, 23 Apr 2024 06:53:04 -0400
Subject: [PATCH 25/40] feat: image env var

---
 backend/apps/images/main.py | 8 ++++----
 backend/config.py           | 4 ++++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/backend/apps/images/main.py b/backend/apps/images/main.py
index a3939d206..5f7ce388b 100644
--- a/backend/apps/images/main.py
+++ b/backend/apps/images/main.py
@@ -35,8 +35,8 @@ from config import (
     ENABLE_IMAGE_GENERATION,
     AUTOMATIC1111_BASE_URL,
     COMFYUI_BASE_URL,
-    OPENAI_API_BASE_URL,
-    OPENAI_API_KEY,
+    IMAGE_OPENAI_API_BASE_URL,
+    IMAGE_OPENAI_API_KEY,
 )
 
 
@@ -58,8 +58,8 @@ app.add_middleware(
 app.state.ENGINE = ""
 app.state.ENABLED = ENABLE_IMAGE_GENERATION
 
-app.state.OPENAI_API_BASE_URL = OPENAI_API_BASE_URL
-app.state.OPENAI_API_KEY = OPENAI_API_KEY
+app.state.OPENAI_API_BASE_URL = IMAGE_OPENAI_API_BASE_URL
+app.state.OPENAI_API_KEY = IMAGE_OPENAI_API_KEY
 
 app.state.MODEL = ""
 
diff --git a/backend/config.py b/backend/config.py
index 199037e4f..4d05a001e 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -487,6 +487,10 @@ AUTOMATIC1111_BASE_URL = os.getenv("AUTOMATIC1111_BASE_URL", "")
 COMFYUI_BASE_URL = os.getenv("COMFYUI_BASE_URL", "")
 
 
+IMAGE_OPENAI_API_BASE_URL = os.getenv("IMAGE_OPENAI_API_BASE_URL", OPENAI_API_BASE_URL)
+IMAGE_OPENAI_API_KEY = os.getenv("IMAGE_OPENAI_API_KEY", OPENAI_API_KEY)
+
+
 ####################################
 # Audio
 ####################################

From aa489be53b737be69b8f0880f13484d599d730b4 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Tue, 23 Apr 2024 06:58:57 -0400
Subject: [PATCH 26/40] Update config.py

---
 backend/config.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/backend/config.py b/backend/config.py
index 4d05a001e..489e80647 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -487,8 +487,10 @@ AUTOMATIC1111_BASE_URL = os.getenv("AUTOMATIC1111_BASE_URL", "")
 COMFYUI_BASE_URL = os.getenv("COMFYUI_BASE_URL", "")
 
 
-IMAGE_OPENAI_API_BASE_URL = os.getenv("IMAGE_OPENAI_API_BASE_URL", OPENAI_API_BASE_URL)
-IMAGE_OPENAI_API_KEY = os.getenv("IMAGE_OPENAI_API_KEY", OPENAI_API_KEY)
+IMAGES_OPENAI_API_BASE_URL = os.getenv(
+    "IMAGES_OPENAI_API_BASE_URL", OPENAI_API_BASE_URL
+)
+IMAGES_OPENAI_API_KEY = os.getenv("IMAGES_OPENAI_API_KEY", OPENAI_API_KEY)
 
 
 ####################################

From 25d09363dfc511cb25d634aa45c9f769aa243495 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Tue, 23 Apr 2024 07:14:31 -0400
Subject: [PATCH 27/40] feat: editable openai url for images

---
 backend/apps/images/main.py                   | 32 +++++++++++--------
 src/lib/apis/images/index.ts                  | 13 ++++----
 .../components/chat/Settings/Images.svelte    | 29 ++++++++++++-----
 static/manifest.json                          |  1 +
 4 files changed, 48 insertions(+), 27 deletions(-)
 create mode 100644 static/manifest.json

diff --git a/backend/apps/images/main.py b/backend/apps/images/main.py
index 5f7ce388b..2059ac3c0 100644
--- a/backend/apps/images/main.py
+++ b/backend/apps/images/main.py
@@ -35,8 +35,8 @@ from config import (
     ENABLE_IMAGE_GENERATION,
     AUTOMATIC1111_BASE_URL,
     COMFYUI_BASE_URL,
-    IMAGE_OPENAI_API_BASE_URL,
-    IMAGE_OPENAI_API_KEY,
+    IMAGES_OPENAI_API_BASE_URL,
+    IMAGES_OPENAI_API_KEY,
 )
 
 
@@ -58,8 +58,8 @@ app.add_middleware(
 app.state.ENGINE = ""
 app.state.ENABLED = ENABLE_IMAGE_GENERATION
 
-app.state.OPENAI_API_BASE_URL = IMAGE_OPENAI_API_BASE_URL
-app.state.OPENAI_API_KEY = IMAGE_OPENAI_API_KEY
+app.state.OPENAI_API_BASE_URL = IMAGES_OPENAI_API_BASE_URL
+app.state.OPENAI_API_KEY = IMAGES_OPENAI_API_KEY
 
 app.state.MODEL = ""
 
@@ -135,27 +135,33 @@ async def update_engine_url(
     }
 
 
-class OpenAIKeyUpdateForm(BaseModel):
+class OpenAIConfigUpdateForm(BaseModel):
+    url: str
     key: str
 
 
-@app.get("/key")
-async def get_openai_key(user=Depends(get_admin_user)):
-    return {"OPENAI_API_KEY": app.state.OPENAI_API_KEY}
+@app.get("/openai/config")
+async def get_openai_config(user=Depends(get_admin_user)):
+    return {
+        "OPENAI_API_BASE_URL": app.state.OPENAI_API_BASE_URL,
+        "OPENAI_API_KEY": app.state.OPENAI_API_KEY,
+    }
 
 
-@app.post("/key/update")
-async def update_openai_key(
-    form_data: OpenAIKeyUpdateForm, user=Depends(get_admin_user)
+@app.post("/openai/config/update")
+async def update_openai_config(
+    form_data: OpenAIConfigUpdateForm, user=Depends(get_admin_user)
 ):
-
     if form_data.key == "":
         raise HTTPException(status_code=400, detail=ERROR_MESSAGES.API_KEY_NOT_FOUND)
 
+    app.state.OPENAI_API_BASE_URL = form_data.url
     app.state.OPENAI_API_KEY = form_data.key
+
     return {
-        "OPENAI_API_KEY": app.state.OPENAI_API_KEY,
         "status": True,
+        "OPENAI_API_BASE_URL": app.state.OPENAI_API_BASE_URL,
+        "OPENAI_API_KEY": app.state.OPENAI_API_KEY,
     }
 
 
diff --git a/src/lib/apis/images/index.ts b/src/lib/apis/images/index.ts
index aadfafd14..3f624704e 100644
--- a/src/lib/apis/images/index.ts
+++ b/src/lib/apis/images/index.ts
@@ -72,10 +72,10 @@ export const updateImageGenerationConfig = async (
 	return res;
 };
 
-export const getOpenAIKey = async (token: string = '') => {
+export const getOpenAIConfig = async (token: string = '') => {
 	let error = null;
 
-	const res = await fetch(`${IMAGES_API_BASE_URL}/key`, {
+	const res = await fetch(`${IMAGES_API_BASE_URL}/openai/config`, {
 		method: 'GET',
 		headers: {
 			Accept: 'application/json',
@@ -101,13 +101,13 @@ export const getOpenAIKey = async (token: string = '') => {
 		throw error;
 	}
 
-	return res.OPENAI_API_KEY;
+	return res;
 };
 
-export const updateOpenAIKey = async (token: string = '', key: string) => {
+export const updateOpenAIConfig = async (token: string = '', url: string, key: string) => {
 	let error = null;
 
-	const res = await fetch(`${IMAGES_API_BASE_URL}/key/update`, {
+	const res = await fetch(`${IMAGES_API_BASE_URL}/openai/config/update`, {
 		method: 'POST',
 		headers: {
 			Accept: 'application/json',
@@ -115,6 +115,7 @@ export const updateOpenAIKey = async (token: string = '', key: string) => {
 			...(token && { authorization: `Bearer ${token}` })
 		},
 		body: JSON.stringify({
+			url: url,
 			key: key
 		})
 	})
@@ -136,7 +137,7 @@ export const updateOpenAIKey = async (token: string = '', key: string) => {
 		throw error;
 	}
 
-	return res.OPENAI_API_KEY;
+	return res;
 };
 
 export const getImageGenerationEngineUrls = async (token: string = '') => {
diff --git a/src/lib/components/chat/Settings/Images.svelte b/src/lib/components/chat/Settings/Images.svelte
index 7282c184a..b1a31f8b9 100644
--- a/src/lib/components/chat/Settings/Images.svelte
+++ b/src/lib/components/chat/Settings/Images.svelte
@@ -15,8 +15,8 @@
 		updateImageSize,
 		getImageSteps,
 		updateImageSteps,
-		getOpenAIKey,
-		updateOpenAIKey
+		getOpenAIConfig,
+		updateOpenAIConfig
 	} from '$lib/apis/images';
 	import { getBackendConfig } from '$lib/apis';
 	const dispatch = createEventDispatcher();
@@ -33,6 +33,7 @@
 	let AUTOMATIC1111_BASE_URL = '';
 	let COMFYUI_BASE_URL = '';
 
+	let OPENAI_API_BASE_URL = '';
 	let OPENAI_API_KEY = '';
 
 	let selectedModel = '';
@@ -131,7 +132,10 @@
 			AUTOMATIC1111_BASE_URL = URLS.AUTOMATIC1111_BASE_URL;
 			COMFYUI_BASE_URL = URLS.COMFYUI_BASE_URL;
 
-			OPENAI_API_KEY = await getOpenAIKey(localStorage.token);
+			const config = await getOpenAIConfig(localStorage.token);
+
+			OPENAI_API_KEY = config.OPENAI_API_KEY;
+			OPENAI_API_BASE_URL = config.OPENAI_API_BASE_URL;
 
 			imageSize = await getImageSize(localStorage.token);
 			steps = await getImageSteps(localStorage.token);
@@ -149,7 +153,7 @@
 		loading = true;
 
 		if (imageGenerationEngine === 'openai') {
-			await updateOpenAIKey(localStorage.token, OPENAI_API_KEY);
+			await updateOpenAIConfig(localStorage.token, OPENAI_API_BASE_URL, OPENAI_API_KEY);
 		}
 
 		await updateDefaultImageGenerationModel(localStorage.token, selectedModel);
@@ -300,13 +304,22 @@
 				</button>
 			</div>
 		{:else if imageGenerationEngine === 'openai'}
-			<div class=" mb-2.5 text-sm font-medium">{$i18n.t('OpenAI API Key')}</div>
-			<div class="flex w-full">
-				<div class="flex-1 mr-2">
+			<div>
+				<div class=" mb-1.5 text-sm font-medium">{$i18n.t('OpenAI API Config')}</div>
+
+				<div class="flex gap-2 mb-1">
 					<input
 						class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
-						placeholder={$i18n.t('Enter API Key')}
+						placeholder={$i18n.t('API Base URL')}
+						bind:value={OPENAI_API_BASE_URL}
+						required
+					/>
+
+					<input
+						class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+						placeholder={$i18n.t('API Key')}
 						bind:value={OPENAI_API_KEY}
+						required
 					/>
 				</div>
 			</div>
diff --git a/static/manifest.json b/static/manifest.json
new file mode 100644
index 000000000..9e26dfeeb
--- /dev/null
+++ b/static/manifest.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file

From b1d204fdd40b1d1f8240e8954001636ea9f17d7e Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Tue, 23 Apr 2024 07:20:24 -0400
Subject: [PATCH 28/40] feat: allow custom model name

---
 .../components/chat/Settings/Images.svelte    | 46 +++++++++++++------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/lib/components/chat/Settings/Images.svelte b/src/lib/components/chat/Settings/Images.svelte
index b1a31f8b9..19e050ca8 100644
--- a/src/lib/components/chat/Settings/Images.svelte
+++ b/src/lib/components/chat/Settings/Images.svelte
@@ -332,19 +332,39 @@
 				<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Default Model')}</div>
 				<div class="flex w-full">
 					<div class="flex-1 mr-2">
-						<select
-							class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
-							bind:value={selectedModel}
-							placeholder={$i18n.t('Select a model')}
-							required
-						>
-							{#if !selectedModel}
-								<option value="" disabled selected>{$i18n.t('Select a model')}</option>
-							{/if}
-							{#each models ?? [] as model}
-								<option value={model.id} class="bg-gray-100 dark:bg-gray-700">{model.name}</option>
-							{/each}
-						</select>
+						{#if imageGenerationEngine === 'openai' && !OPENAI_API_BASE_URL.includes('https://api.openai.com')}
+							<div class="flex w-full">
+								<div class="flex-1">
+									<input
+										list="model-list"
+										class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+										bind:value={selectedModel}
+										placeholder="Select a model"
+									/>
+
+									<datalist id="model-list">
+										{#each models ?? [] as model}
+											<option value={model.id}>{model.name}</option>
+										{/each}
+									</datalist>
+								</div>
+							</div>
+						{:else}
+							<select
+								class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+								bind:value={selectedModel}
+								placeholder={$i18n.t('Select a model')}
+								required
+							>
+								{#if !selectedModel}
+									<option value="" disabled selected>{$i18n.t('Select a model')}</option>
+								{/if}
+								{#each models ?? [] as model}
+									<option value={model.id} class="bg-gray-100 dark:bg-gray-700">{model.name}</option
+									>
+								{/each}
+							</select>
+						{/if}
 					</div>
 				</div>
 			</div>

From 4809d363b3ceb61fbea6b802748fc95b8422f396 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Tue, 23 Apr 2024 07:21:20 -0400
Subject: [PATCH 29/40] Update manifest.json

---
 static/manifest.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/static/manifest.json b/static/manifest.json
index 9e26dfeeb..0967ef424 100644
--- a/static/manifest.json
+++ b/static/manifest.json
@@ -1 +1 @@
-{}
\ No newline at end of file
+{}

From cc3312157b3a7be80b4b2f841e4c157bc67cabb8 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Tue, 23 Apr 2024 07:36:46 -0400
Subject: [PATCH 30/40] refac: model download

---
 .../components/chat/Settings/Models.svelte    | 248 +++++++++---------
 1 file changed, 119 insertions(+), 129 deletions(-)

diff --git a/src/lib/components/chat/Settings/Models.svelte b/src/lib/components/chat/Settings/Models.svelte
index 688774d78..821d0fc4c 100644
--- a/src/lib/components/chat/Settings/Models.svelte
+++ b/src/lib/components/chat/Settings/Models.svelte
@@ -13,7 +13,7 @@
 		uploadModel
 	} from '$lib/apis/ollama';
 	import { WEBUI_API_BASE_URL, WEBUI_BASE_URL } from '$lib/constants';
-	import { WEBUI_NAME, models, user } from '$lib/stores';
+	import { WEBUI_NAME, models, MODEL_DOWNLOAD_POOL, user } from '$lib/stores';
 	import { splitStream } from '$lib/utils';
 	import { onMount, getContext } from 'svelte';
 	import { addLiteLLMModel, deleteLiteLLMModel, getLiteLLMModelInfo } from '$lib/apis/litellm';
@@ -50,12 +50,6 @@
 	let showExperimentalOllama = false;
 	let ollamaVersion = '';
 	const MAX_PARALLEL_DOWNLOADS = 3;
-	const modelDownloadQueue = queue(
-		(task: { modelName: string }, cb) =>
-			pullModelHandlerProcessor({ modelName: task.modelName, callback: cb }),
-		MAX_PARALLEL_DOWNLOADS
-	);
-	let modelDownloadStatus: Record<string, any> = {};
 
 	let modelTransferring = false;
 	let modelTag = '';
@@ -140,7 +134,8 @@
 
 	const pullModelHandler = async () => {
 		const sanitizedModelTag = modelTag.trim().replace(/^ollama\s+(run|pull)\s+/, '');
-		if (modelDownloadStatus[sanitizedModelTag]) {
+		console.log($MODEL_DOWNLOAD_POOL);
+		if ($MODEL_DOWNLOAD_POOL[sanitizedModelTag]) {
 			toast.error(
 				$i18n.t(`Model '{{modelTag}}' is already in queue for downloading.`, {
 					modelTag: sanitizedModelTag
@@ -148,40 +143,117 @@
 			);
 			return;
 		}
-		if (Object.keys(modelDownloadStatus).length === 3) {
+		if (Object.keys($MODEL_DOWNLOAD_POOL).length === MAX_PARALLEL_DOWNLOADS) {
 			toast.error(
 				$i18n.t('Maximum of 3 models can be downloaded simultaneously. Please try again later.')
 			);
 			return;
 		}
 
-		modelTransferring = true;
+		const res = await pullModel(localStorage.token, sanitizedModelTag, '0').catch((error) => {
+			toast.error(error);
+			return null;
+		});
 
-		modelDownloadQueue.push(
-			{ modelName: sanitizedModelTag },
-			async (data: { modelName: string; success: boolean; error?: Error }) => {
-				const { modelName } = data;
-				// Remove the downloaded model
-				delete modelDownloadStatus[modelName];
+		if (res) {
+			const reader = res.body
+				.pipeThrough(new TextDecoderStream())
+				.pipeThrough(splitStream('\n'))
+				.getReader();
 
-				modelDownloadStatus = { ...modelDownloadStatus };
+			while (true) {
+				try {
+					const { value, done } = await reader.read();
+					if (done) break;
 
-				if (!data.success) {
-					toast.error(data.error);
-				} else {
-					toast.success(
-						$i18n.t(`Model '{{modelName}}' has been successfully downloaded.`, { modelName })
-					);
+					let lines = value.split('\n');
 
-					const notification = new Notification($WEBUI_NAME, {
-						body: $i18n.t(`Model '{{modelName}}' has been successfully downloaded.`, { modelName }),
-						icon: `${WEBUI_BASE_URL}/static/favicon.png`
-					});
+					for (const line of lines) {
+						if (line !== '') {
+							let data = JSON.parse(line);
+							console.log(data);
+							if (data.error) {
+								throw data.error;
+							}
+							if (data.detail) {
+								throw data.detail;
+							}
 
-					models.set(await getModels());
+							if (data.id) {
+								MODEL_DOWNLOAD_POOL.set({
+									...$MODEL_DOWNLOAD_POOL,
+									[sanitizedModelTag]: {
+										...$MODEL_DOWNLOAD_POOL[sanitizedModelTag],
+										requestId: data.id,
+										reader,
+										done: false
+									}
+								});
+								console.log(data);
+							}
+
+							if (data.status) {
+								if (data.digest) {
+									let downloadProgress = 0;
+									if (data.completed) {
+										downloadProgress = Math.round((data.completed / data.total) * 1000) / 10;
+									} else {
+										downloadProgress = 100;
+									}
+
+									MODEL_DOWNLOAD_POOL.set({
+										...$MODEL_DOWNLOAD_POOL,
+										[sanitizedModelTag]: {
+											...$MODEL_DOWNLOAD_POOL[sanitizedModelTag],
+											pullProgress: downloadProgress,
+											digest: data.digest
+										}
+									});
+								} else {
+									toast.success(data.status);
+
+									MODEL_DOWNLOAD_POOL.set({
+										...$MODEL_DOWNLOAD_POOL,
+										[sanitizedModelTag]: {
+											...$MODEL_DOWNLOAD_POOL[sanitizedModelTag],
+											done: data.status === 'success'
+										}
+									});
+								}
+							}
+						}
+					}
+				} catch (error) {
+					console.log(error);
+					if (typeof error !== 'string') {
+						error = error.message;
+					}
+
+					toast.error(error);
+					// opts.callback({ success: false, error, modelName: opts.modelName });
 				}
 			}
-		);
+
+			console.log($MODEL_DOWNLOAD_POOL[sanitizedModelTag]);
+
+			if ($MODEL_DOWNLOAD_POOL[sanitizedModelTag].done) {
+				toast.success(
+					$i18n.t(`Model '{{modelName}}' has been successfully downloaded.`, {
+						modelName: sanitizedModelTag
+					})
+				);
+
+				models.set(await getModels(localStorage.token));
+			} else {
+				toast.error('Download canceled');
+			}
+
+			delete $MODEL_DOWNLOAD_POOL[sanitizedModelTag];
+
+			MODEL_DOWNLOAD_POOL.set({
+				...$MODEL_DOWNLOAD_POOL
+			});
+		}
 
 		modelTag = '';
 		modelTransferring = false;
@@ -352,88 +424,18 @@
 		models.set(await getModels());
 	};
 
-	const pullModelHandlerProcessor = async (opts: { modelName: string; callback: Function }) => {
-		const res = await pullModel(localStorage.token, opts.modelName, selectedOllamaUrlIdx).catch(
-			(error) => {
-				opts.callback({ success: false, error, modelName: opts.modelName });
-				return null;
-			}
-		);
+	const cancelModelPullHandler = async (model: string) => {
+		const { reader, requestId } = $MODEL_DOWNLOAD_POOL[model];
+		if (reader) {
+			await reader.cancel();
 
-		if (res) {
-			const reader = res.body
-				.pipeThrough(new TextDecoderStream())
-				.pipeThrough(splitStream('\n'))
-				.getReader();
-
-			while (true) {
-				try {
-					const { value, done } = await reader.read();
-					if (done) break;
-
-					let lines = value.split('\n');
-
-					for (const line of lines) {
-						if (line !== '') {
-							let data = JSON.parse(line);
-							console.log(data);
-							if (data.error) {
-								throw data.error;
-							}
-							if (data.detail) {
-								throw data.detail;
-							}
-
-							if (data.id) {
-								modelDownloadStatus[opts.modelName] = {
-									...modelDownloadStatus[opts.modelName],
-									requestId: data.id,
-									reader,
-									done: false
-								};
-								console.log(data);
-							}
-
-							if (data.status) {
-								if (data.digest) {
-									let downloadProgress = 0;
-									if (data.completed) {
-										downloadProgress = Math.round((data.completed / data.total) * 1000) / 10;
-									} else {
-										downloadProgress = 100;
-									}
-									modelDownloadStatus[opts.modelName] = {
-										...modelDownloadStatus[opts.modelName],
-										pullProgress: downloadProgress,
-										digest: data.digest
-									};
-								} else {
-									toast.success(data.status);
-
-									modelDownloadStatus[opts.modelName] = {
-										...modelDownloadStatus[opts.modelName],
-										done: data.status === 'success'
-									};
-								}
-							}
-						}
-					}
-				} catch (error) {
-					console.log(error);
-					if (typeof error !== 'string') {
-						error = error.message;
-					}
-					opts.callback({ success: false, error, modelName: opts.modelName });
-				}
-			}
-
-			console.log(modelDownloadStatus[opts.modelName]);
-
-			if (modelDownloadStatus[opts.modelName].done) {
-				opts.callback({ success: true, modelName: opts.modelName });
-			} else {
-				opts.callback({ success: false, error: 'Download canceled', modelName: opts.modelName });
-			}
+			await cancelOllamaRequest(localStorage.token, requestId);
+			delete $MODEL_DOWNLOAD_POOL[model];
+			MODEL_DOWNLOAD_POOL.set({
+				...$MODEL_DOWNLOAD_POOL
+			});
+			await deleteModel(localStorage.token, model);
+			toast.success(`${model} download has been canceled`);
 		}
 	};
 
@@ -503,18 +505,6 @@
 		ollamaVersion = await getOllamaVersion(localStorage.token).catch((error) => false);
 		liteLLMModelInfo = await getLiteLLMModelInfo(localStorage.token);
 	});
-
-	const cancelModelPullHandler = async (model: string) => {
-		const { reader, requestId } = modelDownloadStatus[model];
-		if (reader) {
-			await reader.cancel();
-
-			await cancelOllamaRequest(localStorage.token, requestId);
-			delete modelDownloadStatus[model];
-			await deleteModel(localStorage.token, model);
-			toast.success(`${model} download has been canceled`);
-		}
-	};
 </script>
 
 <div class="flex flex-col h-full justify-between text-sm">
@@ -643,9 +633,9 @@
 							>
 						</div>
 
-						{#if Object.keys(modelDownloadStatus).length > 0}
-							{#each Object.keys(modelDownloadStatus) as model}
-								{#if 'pullProgress' in modelDownloadStatus[model]}
+						{#if Object.keys($MODEL_DOWNLOAD_POOL).length > 0}
+							{#each Object.keys($MODEL_DOWNLOAD_POOL) as model}
+								{#if 'pullProgress' in $MODEL_DOWNLOAD_POOL[model]}
 									<div class="flex flex-col">
 										<div class="font-medium mb-1">{model}</div>
 										<div class="">
@@ -655,10 +645,10 @@
 														class="dark:bg-gray-600 bg-gray-500 text-xs font-medium text-gray-100 text-center p-0.5 leading-none rounded-full"
 														style="width: {Math.max(
 															15,
-															modelDownloadStatus[model].pullProgress ?? 0
+															$MODEL_DOWNLOAD_POOL[model].pullProgress ?? 0
 														)}%"
 													>
-														{modelDownloadStatus[model].pullProgress ?? 0}%
+														{$MODEL_DOWNLOAD_POOL[model].pullProgress ?? 0}%
 													</div>
 												</div>
 
@@ -689,9 +679,9 @@
 													</button>
 												</Tooltip>
 											</div>
-											{#if 'digest' in modelDownloadStatus[model]}
+											{#if 'digest' in $MODEL_DOWNLOAD_POOL[model]}
 												<div class="mt-1 text-xs dark:text-gray-500" style="font-size: 0.5rem;">
-													{modelDownloadStatus[model].digest}
+													{$MODEL_DOWNLOAD_POOL[model].digest}
 												</div>
 											{/if}
 										</div>

From 858f5ae1fe6a97f7fc9d4bbe394f5fe0f7fe6af8 Mon Sep 17 00:00:00 2001
From: Axodouble <53979495+Axodouble@users.noreply.github.com>
Date: Tue, 23 Apr 2024 17:46:17 +0200
Subject: [PATCH 31/40] Fixed a single translation key for nl-NL

Fixed a single translation key.
---
 src/lib/i18n/locales/nl-NL/translation.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lib/i18n/locales/nl-NL/translation.json b/src/lib/i18n/locales/nl-NL/translation.json
index 35f33ad0d..881ef59f8 100644
--- a/src/lib/i18n/locales/nl-NL/translation.json
+++ b/src/lib/i18n/locales/nl-NL/translation.json
@@ -62,7 +62,7 @@
 	"Click here to check other modelfiles.": "Klik hier om andere modelfiles te controleren.",
 	"Click here to select": "Klik hier om te selecteren",
 	"Click here to select documents.": "Klik hier om documenten te selecteren",
-	"click here.": "click here.",
+	"click here.": "klik hier.",
 	"Click on the user role button to change a user's role.": "Klik op de gebruikersrol knop om de rol van een gebruiker te wijzigen.",
 	"Close": "Sluiten",
 	"Collection": "Verzameling",

From 0ea9e19d793c3b3aecd9532b220932f647ee0f31 Mon Sep 17 00:00:00 2001
From: Jun Siang Cheah <git@jscheah.me>
Date: Tue, 23 Apr 2024 19:14:01 +0100
Subject: [PATCH 32/40] feat: add LITELLM_PROXY_PORT to configure internal
 proxy port

---
 backend/apps/litellm/main.py | 16 +++++++++-------
 backend/config.py            |  8 ++++++++
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index 52e0c7002..ad5c2afd5 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -23,7 +23,12 @@ log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["LITELLM"])
 
 
-from config import MODEL_FILTER_ENABLED, MODEL_FILTER_LIST, DATA_DIR
+from config import (
+    MODEL_FILTER_ENABLED,
+    MODEL_FILTER_LIST,
+    DATA_DIR,
+    LITELLM_PROXY_PORT,
+)
 
 from litellm.utils import get_llm_provider
 
@@ -90,9 +95,7 @@ async def run_background_process(command):
 async def start_litellm_background():
     log.info("start_litellm_background")
     # Command to run in the background
-    command = (
-        "litellm --port 14365 --telemetry False --config ./data/litellm/config.yaml"
-    )
+    command = f"litellm --port {LITELLM_PROXY_PORT} --telemetry False --config ./data/litellm/config.yaml"
 
     await run_background_process(command)
 
@@ -109,7 +112,6 @@ async def shutdown_litellm_background():
 
 @app.on_event("startup")
 async def startup_event():
-
     log.info("startup_event")
     # TODO: Check config.yaml file and create one
     asyncio.create_task(start_litellm_background())
@@ -186,7 +188,7 @@ async def get_models(user=Depends(get_current_user)):
     while not background_process:
         await asyncio.sleep(0.1)
 
-    url = "http://localhost:14365/v1"
+    url = f"http://localhost:{LITELLM_PROXY_PORT}/v1"
     r = None
     try:
         r = requests.request(method="GET", url=f"{url}/models")
@@ -289,7 +291,7 @@ async def delete_model_from_config(
 async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
     body = await request.body()
 
-    url = "http://localhost:14365"
+    url = f"http://localhost:{LITELLM_PROXY_PORT}"
 
     target_url = f"{url}/{path}"
 
diff --git a/backend/config.py b/backend/config.py
index 489e80647..3f67c00d2 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -499,3 +499,11 @@ IMAGES_OPENAI_API_KEY = os.getenv("IMAGES_OPENAI_API_KEY", OPENAI_API_KEY)
 
 AUDIO_OPENAI_API_BASE_URL = os.getenv("AUDIO_OPENAI_API_BASE_URL", OPENAI_API_BASE_URL)
 AUDIO_OPENAI_API_KEY = os.getenv("AUDIO_OPENAI_API_KEY", OPENAI_API_KEY)
+
+####################################
+# LiteLLM
+####################################
+
+LITELLM_PROXY_PORT = int(os.getenv("LITELLM_PROXY_PORT", "14365"))
+if LITELLM_PROXY_PORT < 0 or LITELLM_PROXY_PORT > 65535:
+    raise ValueError("Invalid port number for LITELLM_PROXY_PORT")

From 9e9306fd2b3e2398966c4884f4e0ede370ed5efc Mon Sep 17 00:00:00 2001
From: Jun Siang Cheah <git@jscheah.me>
Date: Tue, 23 Apr 2024 19:19:16 +0100
Subject: [PATCH 33/40] feat: add LITELLM_PROXY_HOST to configure address
 litellm listens on

---
 backend/apps/litellm/main.py | 3 ++-
 backend/config.py            | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index ad5c2afd5..b1752f8c6 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -28,6 +28,7 @@ from config import (
     MODEL_FILTER_LIST,
     DATA_DIR,
     LITELLM_PROXY_PORT,
+    LITELLM_PROXY_HOST,
 )
 
 from litellm.utils import get_llm_provider
@@ -95,7 +96,7 @@ async def run_background_process(command):
 async def start_litellm_background():
     log.info("start_litellm_background")
     # Command to run in the background
-    command = f"litellm --port {LITELLM_PROXY_PORT} --telemetry False --config ./data/litellm/config.yaml"
+    command = f"litellm --port {LITELLM_PROXY_PORT} --host {LITELLM_PROXY_HOST} --telemetry False --config ./data/litellm/config.yaml"
 
     await run_background_process(command)
 
diff --git a/backend/config.py b/backend/config.py
index 3f67c00d2..f421c8aea 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -507,3 +507,4 @@ AUDIO_OPENAI_API_KEY = os.getenv("AUDIO_OPENAI_API_KEY", OPENAI_API_KEY)
 LITELLM_PROXY_PORT = int(os.getenv("LITELLM_PROXY_PORT", "14365"))
 if LITELLM_PROXY_PORT < 0 or LITELLM_PROXY_PORT > 65535:
     raise ValueError("Invalid port number for LITELLM_PROXY_PORT")
+LITELLM_PROXY_HOST = os.getenv("LITELLM_PROXY_HOST", "127.0.0.1")

From 58bead039892136ac16e601d37e0dd87a3a75bf3 Mon Sep 17 00:00:00 2001
From: Jun Siang Cheah <git@jscheah.me>
Date: Tue, 23 Apr 2024 19:22:41 +0100
Subject: [PATCH 34/40] fix: DATA_DIR was not respected when loading litellm
 configs

---
 backend/apps/litellm/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index b1752f8c6..bdf70615c 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -96,7 +96,7 @@ async def run_background_process(command):
 async def start_litellm_background():
     log.info("start_litellm_background")
     # Command to run in the background
-    command = f"litellm --port {LITELLM_PROXY_PORT} --host {LITELLM_PROXY_HOST} --telemetry False --config ./data/litellm/config.yaml"
+    command = f"litellm --port {LITELLM_PROXY_PORT} --host {LITELLM_PROXY_HOST} --telemetry False --config {LITELLM_CONFIG_DIR}"
 
     await run_background_process(command)
 

From 5245d037aca3dd3e885be0bd723767cce69b087d Mon Sep 17 00:00:00 2001
From: Jun Siang Cheah <git@jscheah.me>
Date: Tue, 23 Apr 2024 19:25:43 +0100
Subject: [PATCH 35/40] fix: harden litellm exec command to prevent unintended
 commands

logic was previously to split on space for arguments, but if any of the user controlled variables LITELLM_PROXY_HOST or DATA_DIR had spaces in them, this would not behave correctly.
---
 backend/apps/litellm/main.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
index bdf70615c..119e9107e 100644
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -1,3 +1,5 @@
+import sys
+
 from fastapi import FastAPI, Depends, HTTPException
 from fastapi.routing import APIRoute
 from fastapi.middleware.cors import CORSMiddleware
@@ -70,7 +72,7 @@ async def run_background_process(command):
         log.info(f"Executing command: {command}")
         # Execute the command and create a subprocess
         process = await asyncio.create_subprocess_exec(
-            *command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
+            *command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
         )
         background_process = process
         log.info("Subprocess started successfully.")
@@ -96,7 +98,17 @@ async def run_background_process(command):
 async def start_litellm_background():
     log.info("start_litellm_background")
     # Command to run in the background
-    command = f"litellm --port {LITELLM_PROXY_PORT} --host {LITELLM_PROXY_HOST} --telemetry False --config {LITELLM_CONFIG_DIR}"
+    command = [
+        "litellm",
+        "--port",
+        str(LITELLM_PROXY_PORT),
+        "--host",
+        LITELLM_PROXY_HOST,
+        "--telemetry",
+        "False",
+        "--config",
+        LITELLM_CONFIG_DIR,
+    ]
 
     await run_background_process(command)
 

From 589de36af791c54dc0be775eff74c1084c78755f Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Tue, 23 Apr 2024 15:56:09 -0400
Subject: [PATCH 36/40] fix: #1705

---
 src/lib/components/chat/Settings/Audio.svelte | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/lib/components/chat/Settings/Audio.svelte b/src/lib/components/chat/Settings/Audio.svelte
index 32a71dc18..71fb7957e 100644
--- a/src/lib/components/chat/Settings/Audio.svelte
+++ b/src/lib/components/chat/Settings/Audio.svelte
@@ -75,14 +75,16 @@
 	};
 
 	const updateConfigHandler = async () => {
-		const res = await updateAudioConfig(localStorage.token, {
-			url: OpenAIUrl,
-			key: OpenAIKey
-		});
+		if (TTSEngine === 'openai') {
+			const res = await updateAudioConfig(localStorage.token, {
+				url: OpenAIUrl,
+				key: OpenAIKey
+			});
 
-		if (res) {
-			OpenAIUrl = res.OPENAI_API_BASE_URL;
-			OpenAIKey = res.OPENAI_API_KEY;
+			if (res) {
+				OpenAIUrl = res.OPENAI_API_BASE_URL;
+				OpenAIKey = res.OPENAI_API_KEY;
+			}
 		}
 	};
 

From e318f921778112aedc92a60c308631ed9238b5c7 Mon Sep 17 00:00:00 2001
From: velaton <98014243+velaton618@users.noreply.github.com>
Date: Wed, 24 Apr 2024 17:12:02 +0800
Subject: [PATCH 37/40] Update translation.json

Fixed some grammar mistakes
---
 src/lib/i18n/locales/ru-RU/translation.json | 26 ++++++++++-----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/lib/i18n/locales/ru-RU/translation.json b/src/lib/i18n/locales/ru-RU/translation.json
index 3b4c551a9..431d53766 100644
--- a/src/lib/i18n/locales/ru-RU/translation.json
+++ b/src/lib/i18n/locales/ru-RU/translation.json
@@ -2,39 +2,39 @@
 	"'s', 'm', 'h', 'd', 'w' or '-1' for no expiration.": "'s', 'm', 'h', 'd', 'w' или '-1' для не истечение.",
 	"(Beta)": "(бета)",
 	"(e.g. `sh webui.sh --api`)": "(например: `sh webui.sh --api`)",
-	"(latest)": "(новый)",
-	"{{modelName}} is thinking...": "{{modelName}} это думает...",
+	"(latest)": "(последний)",
+	"{{modelName}} is thinking...": "{{modelName}} думает...",
 	"{{webUIName}} Backend Required": "{{webUIName}} бэкенд требуемый",
-	"a user": "юзер",
-	"About": "Относительно",
+	"a user": "пользователь",
+	"About": "Об",
 	"Account": "Аккаунт",
 	"Action": "Действие",
 	"Add a model": "Добавьте модель",
-	"Add a model tag name": "Добавьте тэг модели имя",
-	"Add a short description about what this modelfile does": "Добавьте краткое описание, что делает этот моделифайл",
-	"Add a short title for this prompt": "Добавьте краткое название для этого взаимодействия",
+	"Add a model tag name": "Добавьте имя тэга модели",
+	"Add a short description about what this modelfile does": "Добавьте краткое описание, что делает этот моделфайл",
+	"Add a short title for this prompt": "Добавьте краткий заголовок для этого ввода",
 	"Add a tag": "Добавьте тэг",
 	"Add Docs": "Добавьте документы",
 	"Add Files": "Добавьте файлы",
-	"Add message": "Добавьте message",
+	"Add message": "Добавьте сообщение",
 	"add tags": "Добавьте тэгы",
-	"Adjusting these settings will apply changes universally to all users.": "Регулирующий этих настроек приведет к изменениям для все юзеры.",
+	"Adjusting these settings will apply changes universally to all users.": "Регулирующий этих настроек приведет к изменениям для все пользователей.",
 	"admin": "админ",
 	"Admin Panel": "Панель админ",
 	"Admin Settings": "Настройки админ",
 	"Advanced Parameters": "Расширенные Параметры",
 	"all": "всё",
-	"All Users": "Всё юзеры",
-	"Allow": "Дозволять",
+	"All Users": "Все пользователи",
+	"Allow": "Разрешить",
 	"Allow Chat Deletion": "Дозволять удаление чат",
 	"alphanumeric characters and hyphens": "буквенно цифровые символы и дефисы",
-	"Already have an account?": "у вас есть аккаунт уже?",
+	"Already have an account?": "у вас уже есть аккаунт?",
 	"an assistant": "ассистент",
 	"and": "и",
 	"API Base URL": "Базовый адрес API",
 	"API Key": "Ключ API",
 	"API RPM": "API RPM",
-	"are allowed - Activate this command by typing": "разрешено - активируйте эту команду набором",
+	"are allowed - Activate this command by typing": "разрешено - активируйте эту команду вводом",
 	"Are you sure?": "Вы уверены?",
 	"Audio": "Аудио",
 	"Auto-playback response": "Автоматическое воспроизведение ответа",

From ed326f02c05cc2f1d1fbe70dc71f593761b19968 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Wed, 24 Apr 2024 15:23:09 -0400
Subject: [PATCH 38/40] doc: changelog

---
 CHANGELOG.md      | 13 ++++++++++---
 package-lock.json |  4 ++--
 package.json      |  4 ++--
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1eaffc692..f9eb0d966 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,11 +5,18 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.1.121] - 2024-04-22
+## [0.1.121] - 2024-04-24
 
-### Added
+### Fixed
 
-- **🛠️ Improved Embedding Model Support**: You can now use any embedding model `sentence_transformers` supports.
+- **🔧 Translation Issues**: Addressed various translation discrepancies.
+- **🔒 LiteLLM Security Fix**: Updated LiteLLM version to resolve a security vulnerability.
+- **🖥️ HTML Tag Display**: Rectified the issue where the '<br>' tag wasn't displaying correctly.
+- **🔗 WebSocket Connection**: Resolved the failure of WebSocket connection under HTTPS security for ComfyUI server.
+- **📜 FileReader Optimization**: Implemented FileReader initialization per image in multi-file drag & drop to ensure reusability.
+- **🏷️ Tag Display**: Corrected tag display inconsistencies.
+- **📦 Archived Chat Styling**: Fixed styling issues in archived chat.
+- **🔖 Safari Copy Button Bug**: Addressed the bug where the copy button failed to copy links in Safari.
 
 ## [0.1.120] - 2024-04-20
 
diff --git a/package-lock.json b/package-lock.json
index a310c609d..55b35dd58 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
 	"name": "open-webui",
-	"version": "0.1.120",
+	"version": "0.1.121",
 	"lockfileVersion": 3,
 	"requires": true,
 	"packages": {
 		"": {
 			"name": "open-webui",
-			"version": "0.1.120",
+			"version": "0.1.121",
 			"dependencies": {
 				"@sveltejs/adapter-node": "^1.3.1",
 				"async": "^3.2.5",
diff --git a/package.json b/package.json
index 12afea0f4..12f537c5f 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "open-webui",
-	"version": "0.1.120",
+	"version": "0.1.121",
 	"private": true,
 	"scripts": {
 		"dev": "vite dev --host",
@@ -60,4 +60,4 @@
 		"tippy.js": "^6.3.7",
 		"uuid": "^9.0.1"
 	}
-}
+}
\ No newline at end of file

From 08f7c2fd630b2bea11de39bbbab0572c45c25b9b Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Wed, 24 Apr 2024 15:24:21 -0400
Subject: [PATCH 39/40] chore: format

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 12f537c5f..777f0f07b 100644
--- a/package.json
+++ b/package.json
@@ -60,4 +60,4 @@
 		"tippy.js": "^6.3.7",
 		"uuid": "^9.0.1"
 	}
-}
\ No newline at end of file
+}

From 348186c405a983c701cc7397242862919b8a0442 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Wed, 24 Apr 2024 15:28:50 -0400
Subject: [PATCH 40/40] Update CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f9eb0d966..505ded309 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - **🔧 Translation Issues**: Addressed various translation discrepancies.
 - **🔒 LiteLLM Security Fix**: Updated LiteLLM version to resolve a security vulnerability.
-- **🖥️ HTML Tag Display**: Rectified the issue where the '<br>' tag wasn't displaying correctly.
+- **🖥️ HTML Tag Display**: Rectified the issue where the '< br >' tag wasn't displaying correctly.
 - **🔗 WebSocket Connection**: Resolved the failure of WebSocket connection under HTTPS security for ComfyUI server.
 - **📜 FileReader Optimization**: Implemented FileReader initialization per image in multi-file drag & drop to ensure reusability.
 - **🏷️ Tag Display**: Corrected tag display inconsistencies.