From 2952e6116762f1d1b43466fa92bf8762bf09e447 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 14 Apr 2024 17:55:00 -0400
Subject: [PATCH 1/3] feat: external embeddings support

---
 backend/apps/ollama/main.py                   |  49 +++++
 backend/apps/rag/main.py                      | 121 ++++++++---
 backend/apps/rag/utils.py                     |  36 ++++
 backend/config.py                             |   3 +
 src/lib/apis/ollama/index.ts                  |  26 +++
 .../documents/Settings/General.svelte         | 195 ++++++++++--------
 6 files changed, 312 insertions(+), 118 deletions(-)

diff --git a/backend/apps/ollama/main.py b/backend/apps/ollama/main.py
index 7140cad9d..0132179f5 100644
--- a/backend/apps/ollama/main.py
+++ b/backend/apps/ollama/main.py
@@ -654,6 +654,55 @@ async def generate_embeddings(
         )
 
 
+def generate_ollama_embeddings(
+    form_data: GenerateEmbeddingsForm,
+    url_idx: Optional[int] = None,
+):
+    if url_idx == None:
+        model = form_data.model
+
+        if ":" not in model:
+            model = f"{model}:latest"
+
+        if model in app.state.MODELS:
+            url_idx = random.choice(app.state.MODELS[model]["urls"])
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
+            )
+
+    url = app.state.OLLAMA_BASE_URLS[url_idx]
+    log.info(f"url: {url}")
+
+    try:
+        r = requests.request(
+            method="POST",
+            url=f"{url}/api/embeddings",
+            data=form_data.model_dump_json(exclude_none=True).encode(),
+        )
+        r.raise_for_status()
+
+        data = r.json()
+
+        if "embedding" in data:
+            return data["embedding"]
+        else:
+            raise "Something went wrong :/"
+    except Exception as e:
+        log.exception(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except:
+                error_detail = f"Ollama: {e}"
+
+        raise error_detail
+
+
 class GenerateCompletionForm(BaseModel):
     model: str
     prompt: str
diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py
index f03aa4b7f..423f1e032 100644
--- a/backend/apps/rag/main.py
+++ b/backend/apps/rag/main.py
@@ -39,13 +39,21 @@ import uuid
 import json
 
 
+from apps.ollama.main import generate_ollama_embeddings
+
 from apps.web.models.documents import (
     Documents,
     DocumentForm,
     DocumentResponse,
 )
 
-from apps.rag.utils import query_doc, query_collection, get_embedding_model_path
+from apps.rag.utils import (
+    query_doc,
+    query_embeddings_doc,
+    query_collection,
+    query_embeddings_collection,
+    get_embedding_model_path,
+)
 
 from utils.misc import (
     calculate_sha256,
@@ -58,6 +66,7 @@ from config import (
     SRC_LOG_LEVELS,
     UPLOAD_DIR,
     DOCS_DIR,
+    RAG_EMBEDDING_ENGINE,
     RAG_EMBEDDING_MODEL,
     RAG_EMBEDDING_MODEL_AUTO_UPDATE,
     DEVICE_TYPE,
@@ -74,17 +83,20 @@ log.setLevel(SRC_LOG_LEVELS["RAG"])
 
 app = FastAPI()
 
-app.state.PDF_EXTRACT_IMAGES = False
+
+app.state.TOP_K = 4
 app.state.CHUNK_SIZE = CHUNK_SIZE
 app.state.CHUNK_OVERLAP = CHUNK_OVERLAP
+
+
+app.state.RAG_EMBEDDING_ENGINE = RAG_EMBEDDING_ENGINE
+app.state.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL
 app.state.RAG_TEMPLATE = RAG_TEMPLATE
 
 
-app.state.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL
+app.state.PDF_EXTRACT_IMAGES = False
 
 
-app.state.TOP_K = 4
-
 app.state.sentence_transformer_ef = (
     embedding_functions.SentenceTransformerEmbeddingFunction(
         model_name=get_embedding_model_path(
@@ -121,6 +133,7 @@ async def get_status():
         "chunk_size": app.state.CHUNK_SIZE,
         "chunk_overlap": app.state.CHUNK_OVERLAP,
         "template": app.state.RAG_TEMPLATE,
+        "embedding_engine": app.state.RAG_EMBEDDING_ENGINE,
         "embedding_model": app.state.RAG_EMBEDDING_MODEL,
     }
 
@@ -252,12 +265,23 @@ def query_doc_handler(
 ):
 
     try:
-        return query_doc(
-            collection_name=form_data.collection_name,
-            query=form_data.query,
-            k=form_data.k if form_data.k else app.state.TOP_K,
-            embedding_function=app.state.sentence_transformer_ef,
-        )
+        if app.state.RAG_EMBEDDING_ENGINE == "ollama":
+            query_embeddings = generate_ollama_embeddings(
+                {"model": app.state.RAG_EMBEDDING_MODEL, "prompt": form_data.query}
+            )
+
+            return query_embeddings_doc(
+                collection_name=form_data.collection_name,
+                query_embeddings=query_embeddings,
+                k=form_data.k if form_data.k else app.state.TOP_K,
+            )
+        else:
+            return query_doc(
+                collection_name=form_data.collection_name,
+                query=form_data.query,
+                k=form_data.k if form_data.k else app.state.TOP_K,
+                embedding_function=app.state.sentence_transformer_ef,
+            )
     except Exception as e:
         log.exception(e)
         raise HTTPException(
@@ -277,12 +301,30 @@ def query_collection_handler(
     form_data: QueryCollectionsForm,
     user=Depends(get_current_user),
 ):
-    return query_collection(
-        collection_names=form_data.collection_names,
-        query=form_data.query,
-        k=form_data.k if form_data.k else app.state.TOP_K,
-        embedding_function=app.state.sentence_transformer_ef,
-    )
+    try:
+        if app.state.RAG_EMBEDDING_ENGINE == "ollama":
+            query_embeddings = generate_ollama_embeddings(
+                {"model": app.state.RAG_EMBEDDING_MODEL, "prompt": form_data.query}
+            )
+
+            return query_embeddings_collection(
+                collection_names=form_data.collection_names,
+                query_embeddings=query_embeddings,
+                k=form_data.k if form_data.k else app.state.TOP_K,
+            )
+        else:
+            return query_collection(
+                collection_names=form_data.collection_names,
+                query=form_data.query,
+                k=form_data.k if form_data.k else app.state.TOP_K,
+                embedding_function=app.state.sentence_transformer_ef,
+            )
+    except Exception as e:
+        log.exception(e)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=ERROR_MESSAGES.DEFAULT(e),
+        )
 
 
 @app.post("/web")
@@ -317,6 +359,7 @@ def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> b
         chunk_overlap=app.state.CHUNK_OVERLAP,
         add_start_index=True,
     )
+
     docs = text_splitter.split_documents(data)
 
     if len(docs) > 0:
@@ -337,7 +380,9 @@ def store_text_in_vector_db(
     return store_docs_in_vector_db(docs, collection_name, overwrite)
 
 
-def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> bool:
+async def store_docs_in_vector_db(
+    docs, collection_name, overwrite: bool = False
+) -> bool:
 
     texts = [doc.page_content for doc in docs]
     metadatas = [doc.metadata for doc in docs]
@@ -349,20 +394,36 @@ def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> b
                     log.info(f"deleting existing collection {collection_name}")
                     CHROMA_CLIENT.delete_collection(name=collection_name)
 
-        collection = CHROMA_CLIENT.create_collection(
-            name=collection_name,
-            embedding_function=app.state.sentence_transformer_ef,
-        )
+        if app.state.RAG_EMBEDDING_ENGINE == "ollama":
+            collection = CHROMA_CLIENT.create_collection(name=collection_name)
 
-        for batch in create_batches(
-            api=CHROMA_CLIENT,
-            ids=[str(uuid.uuid1()) for _ in texts],
-            metadatas=metadatas,
-            documents=texts,
-        ):
-            collection.add(*batch)
+            for batch in create_batches(
+                api=CHROMA_CLIENT,
+                ids=[str(uuid.uuid1()) for _ in texts],
+                metadatas=metadatas,
+                embeddings=[
+                    generate_ollama_embeddings(
+                        {"model": RAG_EMBEDDING_MODEL, "prompt": text}
+                    )
+                    for text in texts
+                ],
+            ):
+                collection.add(*batch)
+        else:
+            collection = CHROMA_CLIENT.create_collection(
+                name=collection_name,
+                embedding_function=app.state.sentence_transformer_ef,
+            )
 
-        return True
+            for batch in create_batches(
+                api=CHROMA_CLIENT,
+                ids=[str(uuid.uuid1()) for _ in texts],
+                metadatas=metadatas,
+                documents=texts,
+            ):
+                collection.add(*batch)
+
+            return True
     except Exception as e:
         log.exception(e)
         if e.__class__.__name__ == "UniqueConstraintError":
diff --git a/backend/apps/rag/utils.py b/backend/apps/rag/utils.py
index 7bbfe0b88..301c63b99 100644
--- a/backend/apps/rag/utils.py
+++ b/backend/apps/rag/utils.py
@@ -2,6 +2,9 @@ import os
 import re
 import logging
 from typing import List
+import requests
+
+
 from huggingface_hub import snapshot_download
 
 from config import SRC_LOG_LEVELS, CHROMA_CLIENT
@@ -26,6 +29,21 @@ def query_doc(collection_name: str, query: str, k: int, embedding_function):
         raise e
 
 
+def query_embeddings_doc(collection_name: str, query_embeddings, k: int):
+    try:
+        # if you use docker use the model from the environment variable
+        collection = CHROMA_CLIENT.get_collection(
+            name=collection_name,
+        )
+        result = collection.query(
+            query_embeddings=[query_embeddings],
+            n_results=k,
+        )
+        return result
+    except Exception as e:
+        raise e
+
+
 def merge_and_sort_query_results(query_results, k):
     # Initialize lists to store combined data
     combined_ids = []
@@ -96,6 +114,24 @@ def query_collection(
     return merge_and_sort_query_results(results, k)
 
 
+def query_embeddings_collection(collection_names: List[str], query_embeddings, k: int):
+
+    results = []
+    for collection_name in collection_names:
+        try:
+            collection = CHROMA_CLIENT.get_collection(name=collection_name)
+
+            result = collection.query(
+                query_embeddings=[query_embeddings],
+                n_results=k,
+            )
+            results.append(result)
+        except:
+            pass
+
+    return merge_and_sort_query_results(results, k)
+
+
 def rag_template(template: str, context: str, query: str):
     template = template.replace("[context]", context)
     template = template.replace("[query]", query)
diff --git a/backend/config.py b/backend/config.py
index 6d93115bb..938df9961 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -405,6 +405,9 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "":
 
 CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
 # this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2)
+
+RAG_EMBEDDING_ENGINE = os.environ.get("RAG_EMBEDDING_ENGINE", "")
+
 RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
 log.info(f"Embedding model set: {RAG_EMBEDDING_MODEL}"),
 
diff --git a/src/lib/apis/ollama/index.ts b/src/lib/apis/ollama/index.ts
index 4618acc4d..a94aceace 100644
--- a/src/lib/apis/ollama/index.ts
+++ b/src/lib/apis/ollama/index.ts
@@ -220,6 +220,32 @@ export const generatePrompt = async (token: string = '', model: string, conversa
 	return res;
 };
 
+export const generateEmbeddings = async (token: string = '', model: string, text: string) => {
+	let error = null;
+
+	const res = await fetch(`${OLLAMA_API_BASE_URL}/api/embeddings`, {
+		method: 'POST',
+		headers: {
+			Accept: 'application/json',
+			'Content-Type': 'application/json',
+			Authorization: `Bearer ${token}`
+		},
+		body: JSON.stringify({
+			model: model,
+			prompt: text
+		})
+	}).catch((err) => {
+		error = err;
+		return null;
+	});
+
+	if (error) {
+		throw error;
+	}
+
+	return res;
+};
+
 export const generateTextCompletion = async (token: string = '', model: string, text: string) => {
 	let error = null;
 
diff --git a/src/lib/components/documents/Settings/General.svelte b/src/lib/components/documents/Settings/General.svelte
index c94c1250b..85df678c0 100644
--- a/src/lib/components/documents/Settings/General.svelte
+++ b/src/lib/components/documents/Settings/General.svelte
@@ -26,6 +26,7 @@
 
 	let showResetConfirm = false;
 
+	let embeddingEngine = '';
 	let chunkSize = 0;
 	let chunkOverlap = 0;
 	let pdfExtractImages = true;
@@ -118,81 +119,119 @@
 		<div>
 			<div class=" mb-2 text-sm font-medium">{$i18n.t('General Settings')}</div>
 
-			<div class="  flex w-full justify-between">
-				<div class=" self-center text-xs font-medium">
-					{$i18n.t('Scan for documents from {{path}}', { path: '/data/docs' })}
+			<div class=" flex w-full justify-between">
+				<div class=" self-center text-xs font-medium">{$i18n.t('Embedding Engine')}</div>
+				<div class="flex items-center relative">
+					<select
+						class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
+						bind:value={embeddingEngine}
+						placeholder="Select an embedding engine"
+					>
+						<option value="">{$i18n.t('Default (SentenceTransformer)')}</option>
+						<option value="ollama">{$i18n.t('Ollama')}</option>
+					</select>
 				</div>
-
-				<button
-					class=" self-center text-xs p-1 px-3 bg-gray-100 dark:bg-gray-800 dark:hover:bg-gray-700 rounded-lg flex flex-row space-x-1 items-center {scanDirLoading
-						? ' cursor-not-allowed'
-						: ''}"
-					on:click={() => {
-						scanHandler();
-						console.log('check');
-					}}
-					type="button"
-					disabled={scanDirLoading}
-				>
-					<div class="self-center font-medium">{$i18n.t('Scan')}</div>
-
-					{#if scanDirLoading}
-						<div class="ml-3 self-center">
-							<svg
-								class=" w-3 h-3"
-								viewBox="0 0 24 24"
-								fill="currentColor"
-								xmlns="http://www.w3.org/2000/svg"
-								><style>
-									.spinner_ajPY {
-										transform-origin: center;
-										animation: spinner_AtaB 0.75s infinite linear;
-									}
-									@keyframes spinner_AtaB {
-										100% {
-											transform: rotate(360deg);
-										}
-									}
-								</style><path
-									d="M12,1A11,11,0,1,0,23,12,11,11,0,0,0,12,1Zm0,19a8,8,0,1,1,8-8A8,8,0,0,1,12,20Z"
-									opacity=".25"
-								/><path
-									d="M10.14,1.16a11,11,0,0,0-9,8.92A1.59,1.59,0,0,0,2.46,12,1.52,1.52,0,0,0,4.11,10.7a8,8,0,0,1,6.66-6.61A1.42,1.42,0,0,0,12,2.69h0A1.57,1.57,0,0,0,10.14,1.16Z"
-									class="spinner_ajPY"
-								/></svg
-							>
-						</div>
-					{/if}
-				</button>
 			</div>
 		</div>
 
-		<hr class=" dark:border-gray-700" />
-
 		<div class="space-y-2">
 			<div>
-				<div class=" mb-2 text-sm font-medium">{$i18n.t('Update Embedding Model')}</div>
-				<div class="flex w-full">
-					<div class="flex-1 mr-2">
-						<input
-							class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
-							placeholder={$i18n.t('Update embedding model (e.g. {{model}})', {
-								model: embeddingModel.slice(-40)
-							})}
-							bind:value={embeddingModel}
-						/>
-					</div>
-					<button
-						class="px-2.5 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition"
-						on:click={() => {
-							embeddingModelUpdateHandler();
-						}}
-						disabled={updateEmbeddingModelLoading}
-					>
-						{#if updateEmbeddingModelLoading}
-							<div class="self-center">
+				{#if embeddingEngine === 'ollama'}
+					<div>da</div>
+				{:else}
+					<div class=" mb-2 text-sm font-medium">{$i18n.t('Update Embedding Model')}</div>
+					<div class="flex w-full">
+						<div class="flex-1 mr-2">
+							<input
+								class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+								placeholder={$i18n.t('Update embedding model (e.g. {{model}})', {
+									model: embeddingModel.slice(-40)
+								})}
+								bind:value={embeddingModel}
+							/>
+						</div>
+						<button
+							class="px-2.5 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition"
+							on:click={() => {
+								embeddingModelUpdateHandler();
+							}}
+							disabled={updateEmbeddingModelLoading}
+						>
+							{#if updateEmbeddingModelLoading}
+								<div class="self-center">
+									<svg
+										class=" w-4 h-4"
+										viewBox="0 0 24 24"
+										fill="currentColor"
+										xmlns="http://www.w3.org/2000/svg"
+										><style>
+											.spinner_ajPY {
+												transform-origin: center;
+												animation: spinner_AtaB 0.75s infinite linear;
+											}
+											@keyframes spinner_AtaB {
+												100% {
+													transform: rotate(360deg);
+												}
+											}
+										</style><path
+											d="M12,1A11,11,0,1,0,23,12,11,11,0,0,0,12,1Zm0,19a8,8,0,1,1,8-8A8,8,0,0,1,12,20Z"
+											opacity=".25"
+										/><path
+											d="M10.14,1.16a11,11,0,0,0-9,8.92A1.59,1.59,0,0,0,2.46,12,1.52,1.52,0,0,0,4.11,10.7a8,8,0,0,1,6.66-6.61A1.42,1.42,0,0,0,12,2.69h0A1.57,1.57,0,0,0,10.14,1.16Z"
+											class="spinner_ajPY"
+										/></svg
+									>
+								</div>
+							{:else}
 								<svg
-									class=" w-4 h-4"
+									xmlns="http://www.w3.org/2000/svg"
+									viewBox="0 0 16 16"
+									fill="currentColor"
+									class="w-4 h-4"
+								>
+									<path
+										d="M8.75 2.75a.75.75 0 0 0-1.5 0v5.69L5.03 6.22a.75.75 0 0 0-1.06 1.06l3.5 3.5a.75.75 0 0 0 1.06 0l3.5-3.5a.75.75 0 0 0-1.06-1.06L8.75 8.44V2.75Z"
+									/>
+									<path
+										d="M3.5 9.75a.75.75 0 0 0-1.5 0v1.5A2.75 2.75 0 0 0 4.75 14h6.5A2.75 2.75 0 0 0 14 11.25v-1.5a.75.75 0 0 0-1.5 0v1.5c0 .69-.56 1.25-1.25 1.25h-6.5c-.69 0-1.25-.56-1.25-1.25v-1.5Z"
+									/>
+								</svg>
+							{/if}
+						</button>
+					</div>
+
+					<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
+						{$i18n.t(
+							'Warning: If you update or change your embedding model, you will need to re-import all documents.'
+						)}
+					</div>
+				{/if}
+
+				<hr class=" dark:border-gray-700 my-3" />
+
+				<div class="  flex w-full justify-between">
+					<div class=" self-center text-xs font-medium">
+						{$i18n.t('Scan for documents from {{path}}', { path: '/data/docs' })}
+					</div>
+
+					<button
+						class=" self-center text-xs p-1 px-3 bg-gray-100 dark:bg-gray-800 dark:hover:bg-gray-700 rounded-lg flex flex-row space-x-1 items-center {scanDirLoading
+							? ' cursor-not-allowed'
+							: ''}"
+						on:click={() => {
+							scanHandler();
+							console.log('check');
+						}}
+						type="button"
+						disabled={scanDirLoading}
+					>
+						<div class="self-center font-medium">{$i18n.t('Scan')}</div>
+
+						{#if scanDirLoading}
+							<div class="ml-3 self-center">
+								<svg
+									class=" w-3 h-3"
 									viewBox="0 0 24 24"
 									fill="currentColor"
 									xmlns="http://www.w3.org/2000/svg"
@@ -215,30 +254,10 @@
 									/></svg
 								>
 							</div>
-						{:else}
-							<svg
-								xmlns="http://www.w3.org/2000/svg"
-								viewBox="0 0 16 16"
-								fill="currentColor"
-								class="w-4 h-4"
-							>
-								<path
-									d="M8.75 2.75a.75.75 0 0 0-1.5 0v5.69L5.03 6.22a.75.75 0 0 0-1.06 1.06l3.5 3.5a.75.75 0 0 0 1.06 0l3.5-3.5a.75.75 0 0 0-1.06-1.06L8.75 8.44V2.75Z"
-								/>
-								<path
-									d="M3.5 9.75a.75.75 0 0 0-1.5 0v1.5A2.75 2.75 0 0 0 4.75 14h6.5A2.75 2.75 0 0 0 14 11.25v-1.5a.75.75 0 0 0-1.5 0v1.5c0 .69-.56 1.25-1.25 1.25h-6.5c-.69 0-1.25-.56-1.25-1.25v-1.5Z"
-								/>
-							</svg>
 						{/if}
 					</button>
 				</div>
 
-				<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
-					{$i18n.t(
-						'Warning: If you update or change your embedding model, you will need to re-import all documents.'
-					)}
-				</div>
-
 				<hr class=" dark:border-gray-700 my-3" />
 
 				<div class=" ">

From 9cdb5bf9fe656fb26d1e0a2fc7551af1e08cbfb2 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 14 Apr 2024 18:31:40 -0400
Subject: [PATCH 2/3] feat: frontend integration

---
 backend/apps/rag/main.py                      |  34 +++--
 src/lib/apis/rag/index.ts                     |   9 +-
 .../documents/Settings/General.svelte         | 124 +++++++++++++++---
 3 files changed, 134 insertions(+), 33 deletions(-)

diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py
index 423f1e032..e1a5e6eb8 100644
--- a/backend/apps/rag/main.py
+++ b/backend/apps/rag/main.py
@@ -138,20 +138,22 @@ async def get_status():
     }
 
 
-@app.get("/embedding/model")
-async def get_embedding_model(user=Depends(get_admin_user)):
+@app.get("/embedding")
+async def get_embedding_config(user=Depends(get_admin_user)):
     return {
         "status": True,
+        "embedding_engine": app.state.RAG_EMBEDDING_ENGINE,
         "embedding_model": app.state.RAG_EMBEDDING_MODEL,
     }
 
 
 class EmbeddingModelUpdateForm(BaseModel):
+    embedding_engine: str
     embedding_model: str
 
 
-@app.post("/embedding/model/update")
-async def update_embedding_model(
+@app.post("/embedding/update")
+async def update_embedding_config(
     form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user)
 ):
 
@@ -160,18 +162,26 @@ async def update_embedding_model(
     )
 
     try:
-        sentence_transformer_ef = (
-            embedding_functions.SentenceTransformerEmbeddingFunction(
-                model_name=get_embedding_model_path(form_data.embedding_model, True),
-                device=DEVICE_TYPE,
-            )
-        )
+        app.state.RAG_EMBEDDING_ENGINE = form_data.embedding_engine
 
-        app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model
-        app.state.sentence_transformer_ef = sentence_transformer_ef
+        if app.state.RAG_EMBEDDING_ENGINE == "ollama":
+            app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model
+            app.state.sentence_transformer_ef = None
+        else:
+            sentence_transformer_ef = (
+                embedding_functions.SentenceTransformerEmbeddingFunction(
+                    model_name=get_embedding_model_path(
+                        form_data.embedding_model, True
+                    ),
+                    device=DEVICE_TYPE,
+                )
+            )
+            app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model
+            app.state.sentence_transformer_ef = sentence_transformer_ef
 
         return {
             "status": True,
+            "embedding_engine": app.state.RAG_EMBEDDING_ENGINE,
             "embedding_model": app.state.RAG_EMBEDDING_MODEL,
         }
 
diff --git a/src/lib/apis/rag/index.ts b/src/lib/apis/rag/index.ts
index 33c70e2b1..bfcee55fc 100644
--- a/src/lib/apis/rag/index.ts
+++ b/src/lib/apis/rag/index.ts
@@ -346,10 +346,10 @@ export const resetVectorDB = async (token: string) => {
 	return res;
 };
 
-export const getEmbeddingModel = async (token: string) => {
+export const getEmbeddingConfig = async (token: string) => {
 	let error = null;
 
-	const res = await fetch(`${RAG_API_BASE_URL}/embedding/model`, {
+	const res = await fetch(`${RAG_API_BASE_URL}/embedding`, {
 		method: 'GET',
 		headers: {
 			'Content-Type': 'application/json',
@@ -374,13 +374,14 @@ export const getEmbeddingModel = async (token: string) => {
 };
 
 type EmbeddingModelUpdateForm = {
+	embedding_engine: string;
 	embedding_model: string;
 };
 
-export const updateEmbeddingModel = async (token: string, payload: EmbeddingModelUpdateForm) => {
+export const updateEmbeddingConfig = async (token: string, payload: EmbeddingModelUpdateForm) => {
 	let error = null;
 
-	const res = await fetch(`${RAG_API_BASE_URL}/embedding/model/update`, {
+	const res = await fetch(`${RAG_API_BASE_URL}/embedding/update`, {
 		method: 'POST',
 		headers: {
 			'Content-Type': 'application/json',
diff --git a/src/lib/components/documents/Settings/General.svelte b/src/lib/components/documents/Settings/General.svelte
index 85df678c0..c9142fbe5 100644
--- a/src/lib/components/documents/Settings/General.svelte
+++ b/src/lib/components/documents/Settings/General.svelte
@@ -7,11 +7,11 @@
 		scanDocs,
 		updateQuerySettings,
 		resetVectorDB,
-		getEmbeddingModel,
-		updateEmbeddingModel
+		getEmbeddingConfig,
+		updateEmbeddingConfig
 	} from '$lib/apis/rag';
 
-	import { documents } from '$lib/stores';
+	import { documents, models } from '$lib/stores';
 	import { onMount, getContext } from 'svelte';
 	import { toast } from 'svelte-sonner';
 
@@ -27,6 +27,8 @@
 	let showResetConfirm = false;
 
 	let embeddingEngine = '';
+	let embeddingModel = '';
+
 	let chunkSize = 0;
 	let chunkOverlap = 0;
 	let pdfExtractImages = true;
@@ -36,8 +38,6 @@
 		k: 4
 	};
 
-	let embeddingModel = '';
-
 	const scanHandler = async () => {
 		scanDirLoading = true;
 		const res = await scanDocs(localStorage.token);
@@ -50,7 +50,16 @@
 	};
 
 	const embeddingModelUpdateHandler = async () => {
-		if (embeddingModel.split('/').length - 1 > 1) {
+		if (embeddingModel === '') {
+			toast.error(
+				$i18n.t(
+					'Model filesystem path detected. Model shortname is required for update, cannot continue.'
+				)
+			);
+			return;
+		}
+
+		if (embeddingEngine === '' && embeddingModel.split('/').length - 1 > 1) {
 			toast.error(
 				$i18n.t(
 					'Model filesystem path detected. Model shortname is required for update, cannot continue.'
@@ -62,11 +71,17 @@
 		console.log('Update embedding model attempt:', embeddingModel);
 
 		updateEmbeddingModelLoading = true;
-		const res = await updateEmbeddingModel(localStorage.token, {
+		const res = await updateEmbeddingConfig(localStorage.token, {
+			embedding_engine: embeddingEngine,
 			embedding_model: embeddingModel
 		}).catch(async (error) => {
 			toast.error(error);
-			embeddingModel = (await getEmbeddingModel(localStorage.token)).embedding_model;
+
+			const embeddingConfig = await getEmbeddingConfig(localStorage.token);
+			if (embeddingConfig) {
+				embeddingEngine = embeddingConfig.embedding_engine;
+				embeddingModel = embeddingConfig.embedding_model;
+			}
 			return null;
 		});
 		updateEmbeddingModelLoading = false;
@@ -102,7 +117,12 @@
 			chunkOverlap = res.chunk.chunk_overlap;
 		}
 
-		embeddingModel = (await getEmbeddingModel(localStorage.token)).embedding_model;
+		const embeddingConfig = await getEmbeddingConfig(localStorage.token);
+
+		if (embeddingConfig) {
+			embeddingEngine = embeddingConfig.embedding_engine;
+			embeddingModel = embeddingConfig.embedding_model;
+		}
 
 		querySettings = await getQuerySettings(localStorage.token);
 	});
@@ -126,6 +146,9 @@
 						class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
 						bind:value={embeddingEngine}
 						placeholder="Select an embedding engine"
+						on:change={() => {
+							embeddingModel = '';
+						}}
 					>
 						<option value="">{$i18n.t('Default (SentenceTransformer)')}</option>
 						<option value="ollama">{$i18n.t('Ollama')}</option>
@@ -136,10 +159,77 @@
 
 		<div class="space-y-2">
 			<div>
+				<div class=" mb-2 text-sm font-medium">{$i18n.t('Update Embedding Model')}</div>
+
 				{#if embeddingEngine === 'ollama'}
-					<div>da</div>
+					<div class="flex w-full">
+						<div class="flex-1 mr-2">
+							<select
+								class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+								bind:value={embeddingModel}
+								placeholder={$i18n.t('Select a model')}
+								required
+							>
+								{#if !embeddingModel}
+									<option value="" disabled selected>{$i18n.t('Select a model')}</option>
+								{/if}
+								{#each $models.filter((m) => m.id && !m.external) as model}
+									<option value={model.name} class="bg-gray-100 dark:bg-gray-700"
+										>{model.name + ' (' + (model.size / 1024 ** 3).toFixed(1) + ' GB)'}</option
+									>
+								{/each}
+							</select>
+						</div>
+						<button
+							class="px-2.5 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition"
+							on:click={() => {
+								embeddingModelUpdateHandler();
+							}}
+							disabled={updateEmbeddingModelLoading}
+						>
+							{#if updateEmbeddingModelLoading}
+								<div class="self-center">
+									<svg
+										class=" w-4 h-4"
+										viewBox="0 0 24 24"
+										fill="currentColor"
+										xmlns="http://www.w3.org/2000/svg"
+										><style>
+											.spinner_ajPY {
+												transform-origin: center;
+												animation: spinner_AtaB 0.75s infinite linear;
+											}
+											@keyframes spinner_AtaB {
+												100% {
+													transform: rotate(360deg);
+												}
+											}
+										</style><path
+											d="M12,1A11,11,0,1,0,23,12,11,11,0,0,0,12,1Zm0,19a8,8,0,1,1,8-8A8,8,0,0,1,12,20Z"
+											opacity=".25"
+										/><path
+											d="M10.14,1.16a11,11,0,0,0-9,8.92A1.59,1.59,0,0,0,2.46,12,1.52,1.52,0,0,0,4.11,10.7a8,8,0,0,1,6.66-6.61A1.42,1.42,0,0,0,12,2.69h0A1.57,1.57,0,0,0,10.14,1.16Z"
+											class="spinner_ajPY"
+										/></svg
+									>
+								</div>
+							{:else}
+								<svg
+									xmlns="http://www.w3.org/2000/svg"
+									viewBox="0 0 16 16"
+									fill="currentColor"
+									class="w-4 h-4"
+								>
+									<path
+										fill-rule="evenodd"
+										d="M12.416 3.376a.75.75 0 0 1 .208 1.04l-5 7.5a.75.75 0 0 1-1.154.114l-3-3a.75.75 0 0 1 1.06-1.06l2.353 2.353 4.493-6.74a.75.75 0 0 1 1.04-.207Z"
+										clip-rule="evenodd"
+									/>
+								</svg>
+							{/if}
+						</button>
+					</div>
 				{:else}
-					<div class=" mb-2 text-sm font-medium">{$i18n.t('Update Embedding Model')}</div>
 					<div class="flex w-full">
 						<div class="flex-1 mr-2">
 							<input
@@ -200,14 +290,14 @@
 							{/if}
 						</button>
 					</div>
-
-					<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
-						{$i18n.t(
-							'Warning: If you update or change your embedding model, you will need to re-import all documents.'
-						)}
-					</div>
 				{/if}
 
+				<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
+					{$i18n.t(
+						'Warning: If you update or change your embedding model, you will need to re-import all documents.'
+					)}
+				</div>
+
 				<hr class=" dark:border-gray-700 my-3" />
 
 				<div class="  flex w-full justify-between">

From 36ce157907b8800bed25fd671d60359ce97c93c7 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 14 Apr 2024 18:47:45 -0400
Subject: [PATCH 3/3] fix: integration

---
 backend/apps/ollama/main.py |  5 +++++
 backend/apps/rag/main.py    | 27 ++++++++++++++++++++-------
 backend/apps/rag/utils.py   |  3 +++
 3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/backend/apps/ollama/main.py b/backend/apps/ollama/main.py
index 0132179f5..387ff05da 100644
--- a/backend/apps/ollama/main.py
+++ b/backend/apps/ollama/main.py
@@ -658,6 +658,9 @@ def generate_ollama_embeddings(
     form_data: GenerateEmbeddingsForm,
     url_idx: Optional[int] = None,
 ):
+
+    log.info("generate_ollama_embeddings", form_data)
+
     if url_idx == None:
         model = form_data.model
 
@@ -685,6 +688,8 @@ def generate_ollama_embeddings(
 
         data = r.json()
 
+        log.info("generate_ollama_embeddings", data)
+
         if "embedding" in data:
             return data["embedding"]
         else:
diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py
index e1a5e6eb8..976c7735b 100644
--- a/backend/apps/rag/main.py
+++ b/backend/apps/rag/main.py
@@ -39,7 +39,7 @@ import uuid
 import json
 
 
-from apps.ollama.main import generate_ollama_embeddings
+from apps.ollama.main import generate_ollama_embeddings, GenerateEmbeddingsForm
 
 from apps.web.models.documents import (
     Documents,
@@ -277,7 +277,12 @@ def query_doc_handler(
     try:
         if app.state.RAG_EMBEDDING_ENGINE == "ollama":
             query_embeddings = generate_ollama_embeddings(
-                {"model": app.state.RAG_EMBEDDING_MODEL, "prompt": form_data.query}
+                GenerateEmbeddingsForm(
+                    **{
+                        "model": app.state.RAG_EMBEDDING_MODEL,
+                        "prompt": form_data.query,
+                    }
+                )
             )
 
             return query_embeddings_doc(
@@ -314,7 +319,12 @@ def query_collection_handler(
     try:
         if app.state.RAG_EMBEDDING_ENGINE == "ollama":
             query_embeddings = generate_ollama_embeddings(
-                {"model": app.state.RAG_EMBEDDING_MODEL, "prompt": form_data.query}
+                GenerateEmbeddingsForm(
+                    **{
+                        "model": app.state.RAG_EMBEDDING_MODEL,
+                        "prompt": form_data.query,
+                    }
+                )
             )
 
             return query_embeddings_collection(
@@ -373,6 +383,7 @@ def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> b
     docs = text_splitter.split_documents(data)
 
     if len(docs) > 0:
+        log.info("store_data_in_vector_db", "store_docs_in_vector_db")
         return store_docs_in_vector_db(docs, collection_name, overwrite), None
     else:
         raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT)
@@ -390,9 +401,8 @@ def store_text_in_vector_db(
     return store_docs_in_vector_db(docs, collection_name, overwrite)
 
 
-async def store_docs_in_vector_db(
-    docs, collection_name, overwrite: bool = False
-) -> bool:
+def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> bool:
+    log.info("store_docs_in_vector_db", docs, collection_name)
 
     texts = [doc.page_content for doc in docs]
     metadatas = [doc.metadata for doc in docs]
@@ -413,13 +423,16 @@ async def store_docs_in_vector_db(
                 metadatas=metadatas,
                 embeddings=[
                     generate_ollama_embeddings(
-                        {"model": RAG_EMBEDDING_MODEL, "prompt": text}
+                        GenerateEmbeddingsForm(
+                            **{"model": RAG_EMBEDDING_MODEL, "prompt": text}
+                        )
                     )
                     for text in texts
                 ],
             ):
                 collection.add(*batch)
         else:
+
             collection = CHROMA_CLIENT.create_collection(
                 name=collection_name,
                 embedding_function=app.state.sentence_transformer_ef,
diff --git a/backend/apps/rag/utils.py b/backend/apps/rag/utils.py
index 301c63b99..17d8e4a9a 100644
--- a/backend/apps/rag/utils.py
+++ b/backend/apps/rag/utils.py
@@ -32,6 +32,7 @@ def query_doc(collection_name: str, query: str, k: int, embedding_function):
 def query_embeddings_doc(collection_name: str, query_embeddings, k: int):
     try:
         # if you use docker use the model from the environment variable
+        log.info("query_embeddings_doc", query_embeddings)
         collection = CHROMA_CLIENT.get_collection(
             name=collection_name,
         )
@@ -117,6 +118,8 @@ def query_collection(
 def query_embeddings_collection(collection_names: List[str], query_embeddings, k: int):
 
     results = []
+    log.info("query_embeddings_collection", query_embeddings)
+
     for collection_name in collection_names:
         try:
             collection = CHROMA_CLIENT.get_collection(name=collection_name)