~ call knowledge searches in parallel in non-hybrid mode

This commit is contained in:
Alexander Grimm 2025-04-23 11:17:12 +02:00
parent 5030041683
commit d182155fac

View File

@ -260,23 +260,47 @@ def query_collection(
k: int, k: int,
) -> dict: ) -> dict:
results = [] results = []
for query in queries: error = False
log.debug(f"query_collection:query {query}")
query_embedding = embedding_function(query, prefix=RAG_EMBEDDING_QUERY_PREFIX) def process_query_collection(collection_name, query_embedding):
for collection_name in collection_names: try:
if collection_name: if collection_name:
try: result = query_doc(
result = query_doc( collection_name=collection_name,
collection_name=collection_name, k=k,
k=k, query_embedding=query_embedding,
query_embedding=query_embedding, )
) if result is not None:
if result is not None: return result.model_dump(), None
results.append(result.model_dump()) return None, None
except Exception as e: except Exception as e:
log.exception(f"Error when querying the collection: {e}") log.exception(f"Error when querying the collection: {e}")
else: return None, e
pass
# Generate all query embeddings (in one call)
query_embeddings = embedding_function(queries, prefix=RAG_EMBEDDING_QUERY_PREFIX)
log.debug(
f"query_collection: processing {len(queries)} queries across {len(collection_names)} collections"
)
with ThreadPoolExecutor() as executor:
future_results = []
for query_embedding in query_embeddings:
for collection_name in collection_names:
result = executor.submit(
process_query_collection, collection_name, query_embedding
)
future_results.append(result)
task_results = [future.result() for future in future_results]
for result, err in task_results:
if err is not None:
error = True
elif result is not None:
results.append(result)
if error and not results:
log.warning("All collection queries failed. No results returned.")
return merge_and_sort_query_results(results, k=k) return merge_and_sort_query_results(results, k=k)