diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index 0f81d6573..21953b4d7 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -637,6 +637,25 @@ async def update_query_settings( #################################### +def _get_docs_info( + docs: list[Document] +) -> str: + docs_info = set() + + # Trying to select relevant metadata identifying the document. + for doc in docs: + metadata = getattr(doc, 'metadata', {}) + doc_name = metadata.get('name', '') + if not doc_name: + doc_name = metadata.get('title', '') + if not doc_name: + doc_name = metadata.get('source', '') + if doc_name: + docs_info.add(doc_name) + + return ', '.join(docs_info) + + def save_docs_to_vector_db( docs, collection_name, @@ -645,7 +664,7 @@ def save_docs_to_vector_db( split: bool = True, add: bool = False, ) -> bool: - log.info(f"save_docs_to_vector_db {docs} {collection_name}") + log.info(f"save_docs_to_vector_db: document {_get_docs_info(docs)} {collection_name}") # Check if entries with the same hash (metadata.hash) already exist if metadata and "hash" in metadata: diff --git a/backend/open_webui/apps/retrieval/utils.py b/backend/open_webui/apps/retrieval/utils.py index 153bd804f..1866d6d2f 100644 --- a/backend/open_webui/apps/retrieval/utils.py +++ b/backend/open_webui/apps/retrieval/utils.py @@ -76,7 +76,7 @@ def query_doc( limit=k, ) - log.info(f"query_doc:result {result}") + log.info(f"query_doc:result {result.ids} {result.metadatas}") return result except Exception as e: print(e) @@ -127,7 +127,10 @@ def query_doc_with_hybrid_search( "metadatas": [[d.metadata for d in result]], } - log.info(f"query_doc_with_hybrid_search:result {result}") + log.info( + "query_doc_with_hybrid_search:result " + + f"{result.metadatas} {result.distances}" + ) return result except Exception as e: raise e