Merge pull request #6543 from execgit/main
Some checks are pending
Deploy to HuggingFace Spaces / check-secret (push) Waiting to run
Deploy to HuggingFace Spaces / deploy (push) Blocked by required conditions
Create and publish Docker images with specific build args / build-main-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-main-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / merge-main-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-cuda-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-ollama-images (push) Blocked by required conditions
Python CI / Format Backend (3.11) (push) Waiting to run
Frontend Build / Format & Build Frontend (push) Waiting to run
Frontend Build / Frontend Unit Tests (push) Waiting to run
Integration Test / Run Cypress Integration Tests (push) Waiting to run
Integration Test / Run Migration Tests (push) Waiting to run

fix: omit document contents in logging document embedding
This commit is contained in:
Timothy Jaeryang Baek 2024-10-29 19:17:53 -07:00 committed by GitHub
commit adfce28732
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 25 additions and 3 deletions

View File

@ -637,6 +637,25 @@ async def update_query_settings(
####################################
def _get_docs_info(
docs: list[Document]
) -> str:
docs_info = set()
# Trying to select relevant metadata identifying the document.
for doc in docs:
metadata = getattr(doc, 'metadata', {})
doc_name = metadata.get('name', '')
if not doc_name:
doc_name = metadata.get('title', '')
if not doc_name:
doc_name = metadata.get('source', '')
if doc_name:
docs_info.add(doc_name)
return ', '.join(docs_info)
def save_docs_to_vector_db(
docs,
collection_name,
@ -645,7 +664,7 @@ def save_docs_to_vector_db(
split: bool = True,
add: bool = False,
) -> bool:
log.info(f"save_docs_to_vector_db {docs} {collection_name}")
log.info(f"save_docs_to_vector_db: document {_get_docs_info(docs)} {collection_name}")
# Check if entries with the same hash (metadata.hash) already exist
if metadata and "hash" in metadata:

View File

@ -76,7 +76,7 @@ def query_doc(
limit=k,
)
log.info(f"query_doc:result {result}")
log.info(f"query_doc:result {result.ids} {result.metadatas}")
return result
except Exception as e:
print(e)
@ -127,7 +127,10 @@ def query_doc_with_hybrid_search(
"metadatas": [[d.metadata for d in result]],
}
log.info(f"query_doc_with_hybrid_search:result {result}")
log.info(
"query_doc_with_hybrid_search:result " +
f"{result.metadatas} {result.distances}"
)
return result
except Exception as e:
raise e