enh: show extracted file content

This commit is contained in:
Timothy J. Baek
2024-09-28 10:53:25 +02:00
parent 9636913de0
commit 90ec458c4c
6 changed files with 123 additions and 16 deletions

View File

@@ -725,13 +725,15 @@ def process_file(
PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
)
docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
raw_text_content = " ".join([doc.page_content for doc in docs])
text_content = " ".join([doc.page_content for doc in docs])
log.debug(f"text_content: {text_content}")
Files.update_files_metadata_by_id(
form_data.file_id,
{
"content": {
"text": raw_text_content,
"text": text_content,
}
},
)
@@ -751,6 +753,7 @@ def process_file(
"status": True,
"collection_name": collection_name,
"filename": file.meta.get("name", file.filename),
"content": text_content,
}
except Exception as e:
raise HTTPException(