From 41a4cf7106639b46b68a3ef4117129e14b54c633 Mon Sep 17 00:00:00 2001 From: Marko Henning <marko.henning@posteo.de> Date: Thu, 6 Mar 2025 10:47:57 +0100 Subject: [PATCH 1/7] Added new k_reranker parameter --- backend/open_webui/config.py | 5 +++++ backend/open_webui/main.py | 2 ++ backend/open_webui/retrieval/utils.py | 7 ++++++- backend/open_webui/routers/retrieval.py | 8 ++++++++ backend/open_webui/utils/middleware.py | 1 + .../components/admin/Settings/Documents.svelte | 18 ++++++++++++++++++ 6 files changed, 40 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 1e265f2ce..c832b88a2 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1646,6 +1646,11 @@ BYPASS_EMBEDDING_AND_RETRIEVAL = PersistentConfig( RAG_TOP_K = PersistentConfig( "RAG_TOP_K", "rag.top_k", int(os.environ.get("RAG_TOP_K", "3")) ) +RAG_TOP_K_RERANKER = PersistentConfig( + "RAG_TOP_K_RERANKER", + "rag.top_k_reranker", + int(os.environ.get("RAG_TOP_K_RERANKER", "3")) +) RAG_RELEVANCE_THRESHOLD = PersistentConfig( "RAG_RELEVANCE_THRESHOLD", "rag.relevance_threshold", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 416460837..3c83aba11 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -189,6 +189,7 @@ from open_webui.config import ( DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_KEY, RAG_TOP_K, + RAG_TOP_K_RERANKER, RAG_TEXT_SPLITTER, TIKTOKEN_ENCODING_NAME, PDF_EXTRACT_IMAGES, @@ -535,6 +536,7 @@ app.state.FUNCTIONS = {} app.state.config.TOP_K = RAG_TOP_K +app.state.config.TOP_K_RERANKER = RAG_TOP_K_RERANKER app.state.config.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD app.state.config.FILE_MAX_SIZE = RAG_FILE_MAX_SIZE app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 029a33a56..965b49b88 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -106,6 +106,7 @@ def query_doc_with_hybrid_search( embedding_function, k: int, reranking_function, + k_reranker: int, r: float, ) -> dict: try: @@ -128,7 +129,7 @@ def query_doc_with_hybrid_search( ) compressor = RerankCompressor( embedding_function=embedding_function, - top_n=k, + top_n=k_reranker, reranking_function=reranking_function, r_score=r, ) @@ -267,6 +268,7 @@ def query_collection_with_hybrid_search( embedding_function, k: int, reranking_function, + k_reranker: int, r: float, ) -> dict: results = [] @@ -280,6 +282,7 @@ def query_collection_with_hybrid_search( embedding_function=embedding_function, k=k, reranking_function=reranking_function, + k_reranker=k_reranker, r=r, ) results.append(result) @@ -345,6 +348,7 @@ def get_sources_from_files( embedding_function, k, reranking_function, + k_reranker, r, hybrid_search, full_context=False, @@ -461,6 +465,7 @@ def get_sources_from_files( embedding_function=embedding_function, k=k, reranking_function=reranking_function, + k_reranker=k_reranker, r=r, ) except Exception as e: diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index ac38c236e..9ab28fd39 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -713,6 +713,7 @@ async def get_query_settings(request: Request, user=Depends(get_admin_user)): "status": True, "template": request.app.state.config.RAG_TEMPLATE, "k": request.app.state.config.TOP_K, + "k_reranker": request.app.state.config.TOP_K_RERANKER, "r": request.app.state.config.RELEVANCE_THRESHOLD, "hybrid": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, } @@ -720,6 +721,7 @@ async def get_query_settings(request: Request, user=Depends(get_admin_user)): class QuerySettingsForm(BaseModel): k: Optional[int] = None + k_reranker: Optional[int] = None r: Optional[float] = None template: Optional[str] = None hybrid: Optional[bool] = None @@ -731,6 +733,7 @@ async def update_query_settings( ): request.app.state.config.RAG_TEMPLATE = form_data.template request.app.state.config.TOP_K = form_data.k if form_data.k else 4 + request.app.state.config.TOP_K_RERANKER = form_data.k_reranker or 4 request.app.state.config.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0 request.app.state.config.ENABLE_RAG_HYBRID_SEARCH = ( @@ -741,6 +744,7 @@ async def update_query_settings( "status": True, "template": request.app.state.config.RAG_TEMPLATE, "k": request.app.state.config.TOP_K, + "k_reranker": request.app.state.config.TOP_K_RERANKER, "r": request.app.state.config.RELEVANCE_THRESHOLD, "hybrid": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, } @@ -1488,6 +1492,7 @@ class QueryDocForm(BaseModel): collection_name: str query: str k: Optional[int] = None + k_reranker: Optional[int] = None r: Optional[float] = None hybrid: Optional[bool] = None @@ -1508,6 +1513,7 @@ def query_doc_handler( ), k=form_data.k if form_data.k else request.app.state.config.TOP_K, reranking_function=request.app.state.rf, + k_reranker=form_data.k_reranker or request.app.state.config.TOP_K_RERANKER, r=( form_data.r if form_data.r @@ -1536,6 +1542,7 @@ class QueryCollectionsForm(BaseModel): collection_names: list[str] query: str k: Optional[int] = None + k_reranker: Optional[int] = None r: Optional[float] = None hybrid: Optional[bool] = None @@ -1556,6 +1563,7 @@ def query_collection_handler( ), k=form_data.k if form_data.k else request.app.state.config.TOP_K, reranking_function=request.app.state.rf, + k_reranker=form_data.k_reranker or request.app.state.config.TOP_K_RERANKER, r=( form_data.r if form_data.r diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 289d887df..0ec034b8f 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -567,6 +567,7 @@ async def chat_completion_files_handler( ), k=request.app.state.config.TOP_K, reranking_function=request.app.state.rf, + k_reranker=request.app.state.config.TOP_K_RERANKER, r=request.app.state.config.RELEVANCE_THRESHOLD, hybrid_search=request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, full_context=request.app.state.config.RAG_FULL_CONTEXT, diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 0d911af89..1835f330a 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -74,6 +74,7 @@ template: '', r: 0.0, k: 4, + k_reranker: 4, hybrid: false }; @@ -738,6 +739,23 @@ </div> </div> + {#if querySettings.hybrid === true} + <div class="mb-2.5 flex w-full justify-between"> + <div class="self-center text-xs font-medium">{$i18n.t('Top K Reranker')}</div> + <div class="flex items-center relative"> + <input + class="flex-1 w-full rounded-lg text-sm bg-transparent outline-hidden" + type="number" + placeholder={$i18n.t('Enter Top K Reranker')} + bind:value={querySettings.k_reranker} + autocomplete="off" + min="0" + /> + </div> + </div> + {/if} + + {#if querySettings.hybrid === true} <div class=" mb-2.5 flex flex-col w-full justify-between"> <div class=" flex w-full justify-between"> From 8b5b3f165adc22050d2f8c86cd6f172f0c7eda06 Mon Sep 17 00:00:00 2001 From: Marko Henning <marko.henning@posteo.de> Date: Mon, 10 Mar 2025 12:35:44 +0100 Subject: [PATCH 2/7] Add translation (partially) --- src/lib/i18n/locales/de-DE/translation.json | 1 + src/lib/i18n/locales/en-GB/translation.json | 1 + src/lib/i18n/locales/en-US/translation.json | 1 + 3 files changed, 3 insertions(+) diff --git a/src/lib/i18n/locales/de-DE/translation.json b/src/lib/i18n/locales/de-DE/translation.json index 7e721e838..15abaa70b 100644 --- a/src/lib/i18n/locales/de-DE/translation.json +++ b/src/lib/i18n/locales/de-DE/translation.json @@ -429,6 +429,7 @@ "Enter timeout in seconds": "", "Enter to Send": "", "Enter Top K": "Geben Sie Top K ein", + "Enter Top K Reranker": "Geben Sie Top K für Reranker ein", "Enter URL (e.g. http://127.0.0.1:7860/)": "Geben Sie die URL ein (z. B. http://127.0.0.1:7860/)", "Enter URL (e.g. http://localhost:11434)": "Geben Sie die URL ein (z. B. http://localhost:11434)", "Enter your current password": "Geben Sie Ihr aktuelles Passwort ein", diff --git a/src/lib/i18n/locales/en-GB/translation.json b/src/lib/i18n/locales/en-GB/translation.json index e1f06f335..dc00a5836 100644 --- a/src/lib/i18n/locales/en-GB/translation.json +++ b/src/lib/i18n/locales/en-GB/translation.json @@ -429,6 +429,7 @@ "Enter timeout in seconds": "", "Enter to Send": "", "Enter Top K": "", + "Enter Top K Reranker": "", "Enter URL (e.g. http://127.0.0.1:7860/)": "", "Enter URL (e.g. http://localhost:11434)": "", "Enter your current password": "", diff --git a/src/lib/i18n/locales/en-US/translation.json b/src/lib/i18n/locales/en-US/translation.json index e1f06f335..dc00a5836 100644 --- a/src/lib/i18n/locales/en-US/translation.json +++ b/src/lib/i18n/locales/en-US/translation.json @@ -429,6 +429,7 @@ "Enter timeout in seconds": "", "Enter to Send": "", "Enter Top K": "", + "Enter Top K Reranker": "", "Enter URL (e.g. http://127.0.0.1:7860/)": "", "Enter URL (e.g. http://localhost:11434)": "", "Enter your current password": "", From c877b59cbcf6e6c141be2241201b7726e5f78af9 Mon Sep 17 00:00:00 2001 From: Marko Henning <marko.henning@posteo.de> Date: Tue, 18 Mar 2025 11:31:17 +0100 Subject: [PATCH 3/7] Address edge case with k < k_reranker, sort results for cutting off --- backend/open_webui/retrieval/utils.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 965b49b88..df53da78d 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -139,10 +139,20 @@ def query_doc_with_hybrid_search( ) result = compression_retriever.invoke(query) + + distances = [d.metadata.get("score") for d in result] + documents = [d.page_content for d in result] + metadatas = [d.metadata for d in result] + + # retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker + if k < k_reranker: + sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True) + sorted_items = sorted_items[:k] + distances, documents, metadatas = map(list, zip(*sorted_items)) result = { - "distances": [[d.metadata.get("score") for d in result]], - "documents": [[d.page_content for d in result]], - "metadatas": [[d.metadata for d in result]], + "distances": [distances], + "documents": [documents] + "metadatas": [metadatas], } log.info( From f13948d805c973194955bb043ffbd306472edc63 Mon Sep 17 00:00:00 2001 From: Marko Henning <marko.henning@posteo.de> Date: Tue, 18 Mar 2025 12:14:59 +0100 Subject: [PATCH 4/7] Fixed typo --- backend/open_webui/retrieval/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index df53da78d..106c9da06 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -151,7 +151,7 @@ def query_doc_with_hybrid_search( distances, documents, metadatas = map(list, zip(*sorted_items)) result = { "distances": [distances], - "documents": [documents] + "documents": [documents], "metadatas": [metadatas], } From ba676b7ed6a4ce141474d7c31797ea2fd8aa513a Mon Sep 17 00:00:00 2001 From: Marko Henning <marko.henning@posteo.de> Date: Tue, 18 Mar 2025 16:25:24 +0100 Subject: [PATCH 5/7] Use k_reranker also for result merge, and add special sorting use case for ChromaDB --- backend/open_webui/retrieval/utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 106c9da06..9b8d58352 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -146,7 +146,10 @@ def query_doc_with_hybrid_search( # retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker if k < k_reranker: - sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True) + if VECTOR_DB == "chroma": + sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=False) + else: + sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True) sorted_items = sorted_items[:k] distances, documents, metadatas = map(list, zip(*sorted_items)) result = { @@ -310,9 +313,9 @@ def query_collection_with_hybrid_search( if VECTOR_DB == "chroma": # Chroma uses unconventional cosine similarity, so we don't need to reverse the results # https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections - return merge_and_sort_query_results(results, k=k, reverse=False) + return merge_and_sort_query_results(results, k=k_reranker, reverse=False) else: - return merge_and_sort_query_results(results, k=k, reverse=True) + return merge_and_sort_query_results(results, k=k_reranker, reverse=True) def get_embedding_function( From 5ab789e83e124af5383a06a80dd3e55cef713f2b Mon Sep 17 00:00:00 2001 From: Marko Henning <marko.henning@posteo.de> Date: Tue, 18 Mar 2025 16:44:58 +0100 Subject: [PATCH 6/7] Add documentation on chroma special case --- backend/open_webui/retrieval/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 9b8d58352..1afb333b1 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -147,6 +147,8 @@ def query_doc_with_hybrid_search( # retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker if k < k_reranker: if VECTOR_DB == "chroma": + # Chroma uses unconventional cosine similarity, so we don't need to reverse the results + # https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=False) else: sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True) From 5f48af5b9114d480d7c9bea71c298f1fde001563 Mon Sep 17 00:00:00 2001 From: Marko Henning <marko.henning@posteo.de> Date: Wed, 19 Mar 2025 17:04:45 +0100 Subject: [PATCH 7/7] Revert the ordering change with chromadb, not necessary with reranker results --- backend/open_webui/retrieval/utils.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 1afb333b1..d50d4d44c 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -146,12 +146,7 @@ def query_doc_with_hybrid_search( # retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker if k < k_reranker: - if VECTOR_DB == "chroma": - # Chroma uses unconventional cosine similarity, so we don't need to reverse the results - # https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections - sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=False) - else: - sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True) + sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True) sorted_items = sorted_items[:k] distances, documents, metadatas = map(list, zip(*sorted_items)) result = {