From 41a4cf7106639b46b68a3ef4117129e14b54c633 Mon Sep 17 00:00:00 2001
From: Marko Henning <marko.henning@posteo.de>
Date: Thu, 6 Mar 2025 10:47:57 +0100
Subject: [PATCH 1/7] Added new k_reranker parameter

---
 backend/open_webui/config.py                   |  5 +++++
 backend/open_webui/main.py                     |  2 ++
 backend/open_webui/retrieval/utils.py          |  7 ++++++-
 backend/open_webui/routers/retrieval.py        |  8 ++++++++
 backend/open_webui/utils/middleware.py         |  1 +
 .../components/admin/Settings/Documents.svelte | 18 ++++++++++++++++++
 6 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py
index 1e265f2ce..c832b88a2 100644
--- a/backend/open_webui/config.py
+++ b/backend/open_webui/config.py
@@ -1646,6 +1646,11 @@ BYPASS_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
 RAG_TOP_K = PersistentConfig(
     "RAG_TOP_K", "rag.top_k", int(os.environ.get("RAG_TOP_K", "3"))
 )
+RAG_TOP_K_RERANKER = PersistentConfig(
+    "RAG_TOP_K_RERANKER",
+    "rag.top_k_reranker",
+    int(os.environ.get("RAG_TOP_K_RERANKER", "3"))
+)
 RAG_RELEVANCE_THRESHOLD = PersistentConfig(
     "RAG_RELEVANCE_THRESHOLD",
     "rag.relevance_threshold",
diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py
index 416460837..3c83aba11 100644
--- a/backend/open_webui/main.py
+++ b/backend/open_webui/main.py
@@ -189,6 +189,7 @@ from open_webui.config import (
     DOCUMENT_INTELLIGENCE_ENDPOINT,
     DOCUMENT_INTELLIGENCE_KEY,
     RAG_TOP_K,
+    RAG_TOP_K_RERANKER,
     RAG_TEXT_SPLITTER,
     TIKTOKEN_ENCODING_NAME,
     PDF_EXTRACT_IMAGES,
@@ -535,6 +536,7 @@ app.state.FUNCTIONS = {}
 
 
 app.state.config.TOP_K = RAG_TOP_K
+app.state.config.TOP_K_RERANKER = RAG_TOP_K_RERANKER
 app.state.config.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD
 app.state.config.FILE_MAX_SIZE = RAG_FILE_MAX_SIZE
 app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT
diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py
index 029a33a56..965b49b88 100644
--- a/backend/open_webui/retrieval/utils.py
+++ b/backend/open_webui/retrieval/utils.py
@@ -106,6 +106,7 @@ def query_doc_with_hybrid_search(
     embedding_function,
     k: int,
     reranking_function,
+    k_reranker: int,
     r: float,
 ) -> dict:
     try:
@@ -128,7 +129,7 @@ def query_doc_with_hybrid_search(
         )
         compressor = RerankCompressor(
             embedding_function=embedding_function,
-            top_n=k,
+            top_n=k_reranker,
             reranking_function=reranking_function,
             r_score=r,
         )
@@ -267,6 +268,7 @@ def query_collection_with_hybrid_search(
     embedding_function,
     k: int,
     reranking_function,
+    k_reranker: int,
     r: float,
 ) -> dict:
     results = []
@@ -280,6 +282,7 @@ def query_collection_with_hybrid_search(
                     embedding_function=embedding_function,
                     k=k,
                     reranking_function=reranking_function,
+                    k_reranker=k_reranker,
                     r=r,
                 )
                 results.append(result)
@@ -345,6 +348,7 @@ def get_sources_from_files(
     embedding_function,
     k,
     reranking_function,
+    k_reranker,
     r,
     hybrid_search,
     full_context=False,
@@ -461,6 +465,7 @@ def get_sources_from_files(
                                     embedding_function=embedding_function,
                                     k=k,
                                     reranking_function=reranking_function,
+                                    k_reranker=k_reranker,
                                     r=r,
                                 )
                             except Exception as e:
diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py
index ac38c236e..9ab28fd39 100644
--- a/backend/open_webui/routers/retrieval.py
+++ b/backend/open_webui/routers/retrieval.py
@@ -713,6 +713,7 @@ async def get_query_settings(request: Request, user=Depends(get_admin_user)):
         "status": True,
         "template": request.app.state.config.RAG_TEMPLATE,
         "k": request.app.state.config.TOP_K,
+        "k_reranker": request.app.state.config.TOP_K_RERANKER,
         "r": request.app.state.config.RELEVANCE_THRESHOLD,
         "hybrid": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
     }
@@ -720,6 +721,7 @@ async def get_query_settings(request: Request, user=Depends(get_admin_user)):
 
 class QuerySettingsForm(BaseModel):
     k: Optional[int] = None
+    k_reranker: Optional[int] = None
     r: Optional[float] = None
     template: Optional[str] = None
     hybrid: Optional[bool] = None
@@ -731,6 +733,7 @@ async def update_query_settings(
 ):
     request.app.state.config.RAG_TEMPLATE = form_data.template
     request.app.state.config.TOP_K = form_data.k if form_data.k else 4
+    request.app.state.config.TOP_K_RERANKER = form_data.k_reranker or 4
     request.app.state.config.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0
 
     request.app.state.config.ENABLE_RAG_HYBRID_SEARCH = (
@@ -741,6 +744,7 @@ async def update_query_settings(
         "status": True,
         "template": request.app.state.config.RAG_TEMPLATE,
         "k": request.app.state.config.TOP_K,
+        "k_reranker": request.app.state.config.TOP_K_RERANKER,
         "r": request.app.state.config.RELEVANCE_THRESHOLD,
         "hybrid": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
     }
@@ -1488,6 +1492,7 @@ class QueryDocForm(BaseModel):
     collection_name: str
     query: str
     k: Optional[int] = None
+    k_reranker: Optional[int] = None
     r: Optional[float] = None
     hybrid: Optional[bool] = None
 
@@ -1508,6 +1513,7 @@ def query_doc_handler(
                 ),
                 k=form_data.k if form_data.k else request.app.state.config.TOP_K,
                 reranking_function=request.app.state.rf,
+                k_reranker=form_data.k_reranker or request.app.state.config.TOP_K_RERANKER,
                 r=(
                     form_data.r
                     if form_data.r
@@ -1536,6 +1542,7 @@ class QueryCollectionsForm(BaseModel):
     collection_names: list[str]
     query: str
     k: Optional[int] = None
+    k_reranker: Optional[int] = None
     r: Optional[float] = None
     hybrid: Optional[bool] = None
 
@@ -1556,6 +1563,7 @@ def query_collection_handler(
                 ),
                 k=form_data.k if form_data.k else request.app.state.config.TOP_K,
                 reranking_function=request.app.state.rf,
+                k_reranker=form_data.k_reranker or request.app.state.config.TOP_K_RERANKER,
                 r=(
                     form_data.r
                     if form_data.r
diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py
index 289d887df..0ec034b8f 100644
--- a/backend/open_webui/utils/middleware.py
+++ b/backend/open_webui/utils/middleware.py
@@ -567,6 +567,7 @@ async def chat_completion_files_handler(
                         ),
                         k=request.app.state.config.TOP_K,
                         reranking_function=request.app.state.rf,
+                        k_reranker=request.app.state.config.TOP_K_RERANKER,
                         r=request.app.state.config.RELEVANCE_THRESHOLD,
                         hybrid_search=request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
                         full_context=request.app.state.config.RAG_FULL_CONTEXT,
diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte
index 0d911af89..1835f330a 100644
--- a/src/lib/components/admin/Settings/Documents.svelte
+++ b/src/lib/components/admin/Settings/Documents.svelte
@@ -74,6 +74,7 @@
 		template: '',
 		r: 0.0,
 		k: 4,
+		k_reranker: 4,
 		hybrid: false
 	};
 
@@ -738,6 +739,23 @@
 						</div>
 					</div>
 
+					{#if querySettings.hybrid === true}
+						<div class="mb-2.5 flex w-full justify-between">
+							<div class="self-center text-xs font-medium">{$i18n.t('Top K Reranker')}</div>
+							<div class="flex items-center relative">
+								<input
+									class="flex-1 w-full rounded-lg text-sm bg-transparent outline-hidden"
+									type="number"
+									placeholder={$i18n.t('Enter Top K Reranker')}
+									bind:value={querySettings.k_reranker}
+									autocomplete="off"
+									min="0"
+								/>
+							</div>
+						</div>
+					{/if}
+
+
 					{#if querySettings.hybrid === true}
 						<div class="  mb-2.5 flex flex-col w-full justify-between">
 							<div class=" flex w-full justify-between">

From 8b5b3f165adc22050d2f8c86cd6f172f0c7eda06 Mon Sep 17 00:00:00 2001
From: Marko Henning <marko.henning@posteo.de>
Date: Mon, 10 Mar 2025 12:35:44 +0100
Subject: [PATCH 2/7] Add translation (partially)

---
 src/lib/i18n/locales/de-DE/translation.json | 1 +
 src/lib/i18n/locales/en-GB/translation.json | 1 +
 src/lib/i18n/locales/en-US/translation.json | 1 +
 3 files changed, 3 insertions(+)

diff --git a/src/lib/i18n/locales/de-DE/translation.json b/src/lib/i18n/locales/de-DE/translation.json
index 7e721e838..15abaa70b 100644
--- a/src/lib/i18n/locales/de-DE/translation.json
+++ b/src/lib/i18n/locales/de-DE/translation.json
@@ -429,6 +429,7 @@
 	"Enter timeout in seconds": "",
 	"Enter to Send": "",
 	"Enter Top K": "Geben Sie Top K ein",
+	"Enter Top K Reranker": "Geben Sie Top K für Reranker ein",
 	"Enter URL (e.g. http://127.0.0.1:7860/)": "Geben Sie die URL ein (z. B. http://127.0.0.1:7860/)",
 	"Enter URL (e.g. http://localhost:11434)": "Geben Sie die URL ein (z. B. http://localhost:11434)",
 	"Enter your current password": "Geben Sie Ihr aktuelles Passwort ein",
diff --git a/src/lib/i18n/locales/en-GB/translation.json b/src/lib/i18n/locales/en-GB/translation.json
index e1f06f335..dc00a5836 100644
--- a/src/lib/i18n/locales/en-GB/translation.json
+++ b/src/lib/i18n/locales/en-GB/translation.json
@@ -429,6 +429,7 @@
 	"Enter timeout in seconds": "",
 	"Enter to Send": "",
 	"Enter Top K": "",
+	"Enter Top K Reranker": "",
 	"Enter URL (e.g. http://127.0.0.1:7860/)": "",
 	"Enter URL (e.g. http://localhost:11434)": "",
 	"Enter your current password": "",
diff --git a/src/lib/i18n/locales/en-US/translation.json b/src/lib/i18n/locales/en-US/translation.json
index e1f06f335..dc00a5836 100644
--- a/src/lib/i18n/locales/en-US/translation.json
+++ b/src/lib/i18n/locales/en-US/translation.json
@@ -429,6 +429,7 @@
 	"Enter timeout in seconds": "",
 	"Enter to Send": "",
 	"Enter Top K": "",
+	"Enter Top K Reranker": "",
 	"Enter URL (e.g. http://127.0.0.1:7860/)": "",
 	"Enter URL (e.g. http://localhost:11434)": "",
 	"Enter your current password": "",

From c877b59cbcf6e6c141be2241201b7726e5f78af9 Mon Sep 17 00:00:00 2001
From: Marko Henning <marko.henning@posteo.de>
Date: Tue, 18 Mar 2025 11:31:17 +0100
Subject: [PATCH 3/7] Address edge case with k < k_reranker, sort results for
 cutting off

---
 backend/open_webui/retrieval/utils.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py
index 965b49b88..df53da78d 100644
--- a/backend/open_webui/retrieval/utils.py
+++ b/backend/open_webui/retrieval/utils.py
@@ -139,10 +139,20 @@ def query_doc_with_hybrid_search(
         )
 
         result = compression_retriever.invoke(query)
+
+        distances = [d.metadata.get("score") for d in result]
+        documents = [d.page_content for d in result]
+        metadatas = [d.metadata for d in result]
+
+        # retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker
+        if k < k_reranker:
+            sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True)
+            sorted_items = sorted_items[:k]
+            distances, documents, metadatas = map(list, zip(*sorted_items))
         result = {
-            "distances": [[d.metadata.get("score") for d in result]],
-            "documents": [[d.page_content for d in result]],
-            "metadatas": [[d.metadata for d in result]],
+            "distances": [distances],
+            "documents": [documents]
+            "metadatas": [metadatas],
         }
 
         log.info(

From f13948d805c973194955bb043ffbd306472edc63 Mon Sep 17 00:00:00 2001
From: Marko Henning <marko.henning@posteo.de>
Date: Tue, 18 Mar 2025 12:14:59 +0100
Subject: [PATCH 4/7] Fixed typo

---
 backend/open_webui/retrieval/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py
index df53da78d..106c9da06 100644
--- a/backend/open_webui/retrieval/utils.py
+++ b/backend/open_webui/retrieval/utils.py
@@ -151,7 +151,7 @@ def query_doc_with_hybrid_search(
             distances, documents, metadatas = map(list, zip(*sorted_items))
         result = {
             "distances": [distances],
-            "documents": [documents]
+            "documents": [documents],
             "metadatas": [metadatas],
         }
 

From ba676b7ed6a4ce141474d7c31797ea2fd8aa513a Mon Sep 17 00:00:00 2001
From: Marko Henning <marko.henning@posteo.de>
Date: Tue, 18 Mar 2025 16:25:24 +0100
Subject: [PATCH 5/7] Use k_reranker also for result merge, and add special
 sorting use case for ChromaDB

---
 backend/open_webui/retrieval/utils.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py
index 106c9da06..9b8d58352 100644
--- a/backend/open_webui/retrieval/utils.py
+++ b/backend/open_webui/retrieval/utils.py
@@ -146,7 +146,10 @@ def query_doc_with_hybrid_search(
 
         # retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker
         if k < k_reranker:
-            sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True)
+            if VECTOR_DB == "chroma":
+                sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=False)
+            else:
+                sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True)
             sorted_items = sorted_items[:k]
             distances, documents, metadatas = map(list, zip(*sorted_items))
         result = {
@@ -310,9 +313,9 @@ def query_collection_with_hybrid_search(
     if VECTOR_DB == "chroma":
         # Chroma uses unconventional cosine similarity, so we don't need to reverse the results
         # https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections
-        return merge_and_sort_query_results(results, k=k, reverse=False)
+        return merge_and_sort_query_results(results, k=k_reranker, reverse=False)
     else:
-        return merge_and_sort_query_results(results, k=k, reverse=True)
+        return merge_and_sort_query_results(results, k=k_reranker, reverse=True)
 
 
 def get_embedding_function(

From 5ab789e83e124af5383a06a80dd3e55cef713f2b Mon Sep 17 00:00:00 2001
From: Marko Henning <marko.henning@posteo.de>
Date: Tue, 18 Mar 2025 16:44:58 +0100
Subject: [PATCH 6/7] Add documentation on chroma special case

---
 backend/open_webui/retrieval/utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py
index 9b8d58352..1afb333b1 100644
--- a/backend/open_webui/retrieval/utils.py
+++ b/backend/open_webui/retrieval/utils.py
@@ -147,6 +147,8 @@ def query_doc_with_hybrid_search(
         # retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker
         if k < k_reranker:
             if VECTOR_DB == "chroma":
+                # Chroma uses unconventional cosine similarity, so we don't need to reverse the results
+                # https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections
                 sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=False)
             else:
                 sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True)

From 5f48af5b9114d480d7c9bea71c298f1fde001563 Mon Sep 17 00:00:00 2001
From: Marko Henning <marko.henning@posteo.de>
Date: Wed, 19 Mar 2025 17:04:45 +0100
Subject: [PATCH 7/7] Revert the ordering change with chromadb, not necessary
 with reranker results

---
 backend/open_webui/retrieval/utils.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py
index 1afb333b1..d50d4d44c 100644
--- a/backend/open_webui/retrieval/utils.py
+++ b/backend/open_webui/retrieval/utils.py
@@ -146,12 +146,7 @@ def query_doc_with_hybrid_search(
 
         # retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker
         if k < k_reranker:
-            if VECTOR_DB == "chroma":
-                # Chroma uses unconventional cosine similarity, so we don't need to reverse the results
-                # https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections
-                sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=False)
-            else:
-                sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True)
+            sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True)
             sorted_items = sorted_items[:k]
             distances, documents, metadatas = map(list, zip(*sorted_items))
         result = {