fix: fix reindex not working due to unnecessary dupe check (#20857)
* Update retrieval.py * Update knowledge.py * Update retrieval.py * Update knowledge.py
This commit is contained in:
@@ -1425,8 +1425,16 @@ def save_docs_to_vector_db(
|
|||||||
if result is not None and result.ids and len(result.ids) > 0:
|
if result is not None and result.ids and len(result.ids) > 0:
|
||||||
existing_doc_ids = result.ids[0]
|
existing_doc_ids = result.ids[0]
|
||||||
if existing_doc_ids:
|
if existing_doc_ids:
|
||||||
log.info(f"Document with hash {metadata['hash']} already exists")
|
# Check if the existing document belongs to the same file
|
||||||
raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)
|
# If same file_id, this is a re-add/reindex - allow it
|
||||||
|
# If different file_id, this is a duplicate - block it
|
||||||
|
existing_file_id = None
|
||||||
|
if result.metadatas and result.metadatas[0]:
|
||||||
|
existing_file_id = result.metadatas[0][0].get("file_id")
|
||||||
|
|
||||||
|
if existing_file_id != metadata.get("file_id"):
|
||||||
|
log.info(f"Document with hash {metadata['hash']} already exists")
|
||||||
|
raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)
|
||||||
|
|
||||||
if split:
|
if split:
|
||||||
if request.app.state.config.ENABLE_MARKDOWN_HEADER_TEXT_SPLITTER:
|
if request.app.state.config.ENABLE_MARKDOWN_HEADER_TEXT_SPLITTER:
|
||||||
|
|||||||
Reference in New Issue
Block a user