From bc7622c0fe33c08014e77d83002e2afda33e8d4a Mon Sep 17 00:00:00 2001 From: execgit Date: Tue, 29 Oct 2024 14:31:47 +0200 Subject: [PATCH] Avoid logging file contents at level INFO I had problems with document handling in rootless containers. Long documents caused the container to hang. Reducing the verbosity of logging from retrieval.main seemed to fix the issues I was experiencing. --- backend/open_webui/apps/retrieval/main.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index e67d1df23..fe99c566e 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -636,6 +636,25 @@ async def update_query_settings( #################################### +def _get_docs_info( + docs: list[Document] +) -> str: + docs_info = set() + + # Trying to select relevant metadata identifying the document. + for doc in docs: + metadata = getattr(doc, 'metadata', {}) + doc_name = metadata.get('name', '') + if not doc_name: + doc_name = metadata.get('title', '') + if not doc_name: + doc_name = metadata.get('source', '') + if doc_name: + docs_info.add(doc_name) + + return ', '.join(docs_info) + + def save_docs_to_vector_db( docs, collection_name, @@ -644,7 +663,7 @@ def save_docs_to_vector_db( split: bool = True, add: bool = False, ) -> bool: - log.info(f"save_docs_to_vector_db {docs} {collection_name}") + log.info(f"save_docs_to_vector_db: document {_get_docs_info(docs)} {collection_name}") # Check if entries with the same hash (metadata.hash) already exist if metadata and "hash" in metadata: