mirror of
https://github.com/open-webui/open-webui
synced 2025-05-30 10:32:53 +00:00
Merge pull request #12239 from Phlogi/dev-threads-on-hybrid
perf: parallelize hybrid search
This commit is contained in:
commit
3ba12e7a43
@ -4,6 +4,7 @@ from typing import Optional, Union
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
import hashlib
|
import hashlib
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
from huggingface_hub import snapshot_download
|
from huggingface_hub import snapshot_download
|
||||||
from langchain.retrievers import ContextualCompressionRetriever, EnsembleRetriever
|
from langchain.retrievers import ContextualCompressionRetriever, EnsembleRetriever
|
||||||
@ -298,30 +299,39 @@ def query_collection_with_hybrid_search(
|
|||||||
log.exception(f"Failed to fetch collection {collection_name}: {e}")
|
log.exception(f"Failed to fetch collection {collection_name}: {e}")
|
||||||
collection_results[collection_name] = None
|
collection_results[collection_name] = None
|
||||||
|
|
||||||
for collection_name in collection_names:
|
log.info(f"Starting hybrid search for {len(queries)} queries in {len(collection_names)} collections...")
|
||||||
|
def process_query(collection_name, query):
|
||||||
try:
|
try:
|
||||||
for query in queries:
|
result = query_doc_with_hybrid_search(
|
||||||
result = query_doc_with_hybrid_search(
|
collection_name=collection_name,
|
||||||
collection_name=collection_name,
|
collection_result=collection_results[collection_name],
|
||||||
collection_result=collection_results[collection_name],
|
query=query,
|
||||||
query=query,
|
embedding_function=embedding_function,
|
||||||
embedding_function=embedding_function,
|
k=k,
|
||||||
k=k,
|
reranking_function=reranking_function,
|
||||||
reranking_function=reranking_function,
|
k_reranker=k_reranker,
|
||||||
k_reranker=k_reranker,
|
r=r,
|
||||||
r=r,
|
|
||||||
)
|
|
||||||
results.append(result)
|
|
||||||
except Exception as e:
|
|
||||||
log.exception(
|
|
||||||
"Error when querying the collection with " f"hybrid_search: {e}"
|
|
||||||
)
|
)
|
||||||
error = True
|
return result, None
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(f"Error when querying the collection with hybrid_search: {e}")
|
||||||
|
return None, e
|
||||||
|
|
||||||
if error:
|
tasks = [(collection_name, query) for collection_name in collection_names for query in queries]
|
||||||
raise Exception(
|
|
||||||
"Hybrid search failed for all collections. Using Non hybrid search as fallback."
|
with ThreadPoolExecutor() as executor:
|
||||||
)
|
future_results = [executor.submit(process_query, cn, q) for cn, q in tasks]
|
||||||
|
task_results = [future.result() for future in future_results]
|
||||||
|
|
||||||
|
for result, err in task_results:
|
||||||
|
if err is not None:
|
||||||
|
error = True
|
||||||
|
elif result is not None:
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
if error and not results:
|
||||||
|
raise Exception("Hybrid search failed for all collections. Using Non-hybrid search as fallback.")
|
||||||
|
|
||||||
return merge_and_sort_query_results(results, k=k)
|
return merge_and_sort_query_results(results, k=k)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user