diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 12d48f869..a00e6982c 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -77,6 +77,7 @@ def query_doc( collection_name: str, query_embedding: list[float], k: int, user: UserModel = None ): try: + log.debug(f"query_doc:doc {collection_name}") result = VECTOR_DB_CLIENT.search( collection_name=collection_name, vectors=[query_embedding], @@ -94,6 +95,7 @@ def query_doc( def get_doc(collection_name: str, user: UserModel = None): try: + log.debug(f"get_doc:doc {collection_name}") result = VECTOR_DB_CLIENT.get(collection_name=collection_name) if result: @@ -116,6 +118,7 @@ def query_doc_with_hybrid_search( r: float, ) -> dict: try: + log.debug(f"query_doc_with_hybrid_search:doc {collection_name}") bm25_retriever = BM25Retriever.from_texts( texts=collection_result.documents[0], metadatas=collection_result.metadatas[0], @@ -168,6 +171,7 @@ def query_doc_with_hybrid_search( ) return result except Exception as e: + log.exception(f"Error querying doc {collection_name} with hybrid search: {e}") raise e @@ -257,6 +261,7 @@ def query_collection( ) -> dict: results = [] for query in queries: + log.debug(f"query_collection:query {query}") query_embedding = embedding_function(query, prefix=RAG_EMBEDDING_QUERY_PREFIX) for collection_name in collection_names: if collection_name: @@ -292,6 +297,7 @@ def query_collection_with_hybrid_search( collection_results = {} for collection_name in collection_names: try: + log.debug(f"query_collection_with_hybrid_search:VECTOR_DB_CLIENT.get:collection {collection_name}") collection_results[collection_name] = VECTOR_DB_CLIENT.get( collection_name=collection_name ) @@ -613,6 +619,7 @@ def generate_openai_batch_embeddings( user: UserModel = None, ) -> Optional[list[list[float]]]: try: + log.debug(f"generate_openai_batch_embeddings:model {model} batch size: {len(texts)}") json_data = {"input": texts, "model": model} if isinstance(RAG_EMBEDDING_PREFIX_FIELD_NAME, str) and isinstance(prefix, str): json_data[RAG_EMBEDDING_PREFIX_FIELD_NAME] = prefix @@ -655,6 +662,7 @@ def generate_ollama_batch_embeddings( user: UserModel = None, ) -> Optional[list[list[float]]]: try: + log.debug(f"generate_ollama_batch_embeddings:model {model} batch size: {len(texts)}") json_data = {"input": texts, "model": model} if isinstance(RAG_EMBEDDING_PREFIX_FIELD_NAME, str) and isinstance(prefix, str): json_data[RAG_EMBEDDING_PREFIX_FIELD_NAME] = prefix