Merge pull request #12615 from robert-norberg/additional-RAG-logging

chore: Add debug logging to RAG utils
This commit is contained in:
Tim Jaeryang Baek 2025-04-08 13:03:38 -07:00 committed by GitHub
commit 8c3146f6f8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -77,6 +77,7 @@ def query_doc(
collection_name: str, query_embedding: list[float], k: int, user: UserModel = None
):
try:
log.debug(f"query_doc:doc {collection_name}")
result = VECTOR_DB_CLIENT.search(
collection_name=collection_name,
vectors=[query_embedding],
@ -94,6 +95,7 @@ def query_doc(
def get_doc(collection_name: str, user: UserModel = None):
try:
log.debug(f"get_doc:doc {collection_name}")
result = VECTOR_DB_CLIENT.get(collection_name=collection_name)
if result:
@ -116,6 +118,7 @@ def query_doc_with_hybrid_search(
r: float,
) -> dict:
try:
log.debug(f"query_doc_with_hybrid_search:doc {collection_name}")
bm25_retriever = BM25Retriever.from_texts(
texts=collection_result.documents[0],
metadatas=collection_result.metadatas[0],
@ -168,6 +171,7 @@ def query_doc_with_hybrid_search(
)
return result
except Exception as e:
log.exception(f"Error querying doc {collection_name} with hybrid search: {e}")
raise e
@ -257,6 +261,7 @@ def query_collection(
) -> dict:
results = []
for query in queries:
log.debug(f"query_collection:query {query}")
query_embedding = embedding_function(query, prefix=RAG_EMBEDDING_QUERY_PREFIX)
for collection_name in collection_names:
if collection_name:
@ -292,6 +297,7 @@ def query_collection_with_hybrid_search(
collection_results = {}
for collection_name in collection_names:
try:
log.debug(f"query_collection_with_hybrid_search:VECTOR_DB_CLIENT.get:collection {collection_name}")
collection_results[collection_name] = VECTOR_DB_CLIENT.get(
collection_name=collection_name
)
@ -613,6 +619,7 @@ def generate_openai_batch_embeddings(
user: UserModel = None,
) -> Optional[list[list[float]]]:
try:
log.debug(f"generate_openai_batch_embeddings:model {model} batch size: {len(texts)}")
json_data = {"input": texts, "model": model}
if isinstance(RAG_EMBEDDING_PREFIX_FIELD_NAME, str) and isinstance(prefix, str):
json_data[RAG_EMBEDDING_PREFIX_FIELD_NAME] = prefix
@ -655,6 +662,7 @@ def generate_ollama_batch_embeddings(
user: UserModel = None,
) -> Optional[list[list[float]]]:
try:
log.debug(f"generate_ollama_batch_embeddings:model {model} batch size: {len(texts)}")
json_data = {"input": texts, "model": model}
if isinstance(RAG_EMBEDDING_PREFIX_FIELD_NAME, str) and isinstance(prefix, str):
json_data[RAG_EMBEDDING_PREFIX_FIELD_NAME] = prefix