Avoid logging file contents at level INFO

I had problems with document handling in rootless containers. Long documents caused the container to hang. Reducing the verbosity of logging from retrieval.main seemed to fix the issues I was experiencing.
This commit is contained in:
execgit 2024-10-29 14:31:47 +02:00
parent 09935d191f
commit bc7622c0fe

View File

@ -636,6 +636,25 @@ async def update_query_settings(
#################################### ####################################
def _get_docs_info(
docs: list[Document]
) -> str:
docs_info = set()
# Trying to select relevant metadata identifying the document.
for doc in docs:
metadata = getattr(doc, 'metadata', {})
doc_name = metadata.get('name', '')
if not doc_name:
doc_name = metadata.get('title', '')
if not doc_name:
doc_name = metadata.get('source', '')
if doc_name:
docs_info.add(doc_name)
return ', '.join(docs_info)
def save_docs_to_vector_db( def save_docs_to_vector_db(
docs, docs,
collection_name, collection_name,
@ -644,7 +663,7 @@ def save_docs_to_vector_db(
split: bool = True, split: bool = True,
add: bool = False, add: bool = False,
) -> bool: ) -> bool:
log.info(f"save_docs_to_vector_db {docs} {collection_name}") log.info(f"save_docs_to_vector_db: document {_get_docs_info(docs)} {collection_name}")
# Check if entries with the same hash (metadata.hash) already exist # Check if entries with the same hash (metadata.hash) already exist
if metadata and "hash" in metadata: if metadata and "hash" in metadata: