mirror of
https://github.com/open-webui/open-webui
synced 2025-06-08 23:47:14 +00:00
Run hybrid_search in parallel
This commit is contained in:
parent
6ac153c989
commit
9c64310db5
@ -4,6 +4,7 @@ from typing import Optional, Union
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
import hashlib
|
import hashlib
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
from huggingface_hub import snapshot_download
|
from huggingface_hub import snapshot_download
|
||||||
from langchain.retrievers import ContextualCompressionRetriever, EnsembleRetriever
|
from langchain.retrievers import ContextualCompressionRetriever, EnsembleRetriever
|
||||||
@ -298,9 +299,9 @@ def query_collection_with_hybrid_search(
|
|||||||
log.exception(f"Failed to fetch collection {collection_name}: {e}")
|
log.exception(f"Failed to fetch collection {collection_name}: {e}")
|
||||||
collection_results[collection_name] = None
|
collection_results[collection_name] = None
|
||||||
|
|
||||||
for collection_name in collection_names:
|
log.info(f"Starting hybrid search for {len(queries)} queries in {len(collection_names)} collections...")
|
||||||
|
def process_query(collection_name, query):
|
||||||
try:
|
try:
|
||||||
for query in queries:
|
|
||||||
result = query_doc_with_hybrid_search(
|
result = query_doc_with_hybrid_search(
|
||||||
collection_name=collection_name,
|
collection_name=collection_name,
|
||||||
collection_result=collection_results[collection_name],
|
collection_result=collection_results[collection_name],
|
||||||
@ -311,17 +312,26 @@ def query_collection_with_hybrid_search(
|
|||||||
k_reranker=k_reranker,
|
k_reranker=k_reranker,
|
||||||
r=r,
|
r=r,
|
||||||
)
|
)
|
||||||
results.append(result)
|
return result, None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception(
|
log.exception(f"Error when querying the collection with hybrid_search: {e}")
|
||||||
"Error when querying the collection with " f"hybrid_search: {e}"
|
return None, e
|
||||||
)
|
|
||||||
error = True
|
tasks = [(collection_name, query) for collection_name in collection_names for query in queries]
|
||||||
|
|
||||||
|
with ThreadPoolExecutor() as executor:
|
||||||
|
future_results = [executor.submit(process_query, cn, q) for cn, q in tasks]
|
||||||
|
task_results = [future.result() for future in future_results]
|
||||||
|
|
||||||
|
for result, err in task_results:
|
||||||
|
if err is not None:
|
||||||
|
error = True
|
||||||
|
elif result is not None:
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
if error and not results:
|
||||||
|
raise Exception("Hybrid search failed for all collections. Using Non-hybrid search as fallback.")
|
||||||
|
|
||||||
if error:
|
|
||||||
raise Exception(
|
|
||||||
"Hybrid search failed for all collections. Using Non hybrid search as fallback."
|
|
||||||
)
|
|
||||||
return merge_and_sort_query_results(results, k=k)
|
return merge_and_sort_query_results(results, k=k)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user