Merge pull request #5829 from jannikstdl/query-embedding-perf-fix

fix:  performance issues on large collections
This commit is contained in:
Timothy Jaeryang Baek 2024-10-04 10:01:17 +02:00 committed by GitHub
commit 0876c9b5ef
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -65,14 +65,13 @@ class VectorSearchRetriever(BaseRetriever):
def query_doc( def query_doc(
collection_name: str, collection_name: str,
query: str, query_embedding: list[float],
embedding_function,
k: int, k: int,
): ):
try: try:
result = VECTOR_DB_CLIENT.search( result = VECTOR_DB_CLIENT.search(
collection_name=collection_name, collection_name=collection_name,
vectors=[embedding_function(query)], vectors=[query_embedding],
limit=k, limit=k,
) )
@ -182,15 +181,17 @@ def query_collection(
embedding_function, embedding_function,
k: int, k: int,
) -> dict: ) -> dict:
results = [] results = []
query_embedding = embedding_function(query)
for collection_name in collection_names: for collection_name in collection_names:
if collection_name: if collection_name:
try: try:
result = query_doc( result = query_doc(
collection_name=collection_name, collection_name=collection_name,
query=query,
k=k, k=k,
embedding_function=embedding_function, query_embedding=query_embedding,
) )
results.append(result.model_dump()) results.append(result.model_dump())
except Exception as e: except Exception as e: