This commit is contained in:
Timothy J. Baek
2024-09-28 02:56:56 +02:00
parent b8b994a820
commit 9d2ed3d2be
4 changed files with 34 additions and 4 deletions

View File

@@ -2,7 +2,6 @@ import requests
import logging
import ftfy
from langchain_community.document_loaders import (
BSHTMLLoader,
CSVLoader,
@@ -24,7 +23,6 @@ from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
known_source_ext = [
"go",
"py",

View File

@@ -725,8 +725,16 @@ def process_file(
PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
)
docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
raw_content = " ".join([doc.page_content for doc in docs])
print(raw_content)
raw_text_content = " ".join([doc.page_content for doc in docs])
Files.update_files_metadata_by_id(
form_data.file_id,
{
"content": {
"text": raw_text_content,
}
},
)
try:
result = save_docs_to_vector_db(