mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
refac
This commit is contained in:
@@ -2,7 +2,6 @@ import requests
|
||||
import logging
|
||||
import ftfy
|
||||
|
||||
|
||||
from langchain_community.document_loaders import (
|
||||
BSHTMLLoader,
|
||||
CSVLoader,
|
||||
@@ -24,7 +23,6 @@ from open_webui.env import SRC_LOG_LEVELS
|
||||
log = logging.getLogger(__name__)
|
||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||
|
||||
|
||||
known_source_ext = [
|
||||
"go",
|
||||
"py",
|
||||
|
||||
@@ -725,8 +725,16 @@ def process_file(
|
||||
PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
|
||||
)
|
||||
docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
|
||||
raw_content = " ".join([doc.page_content for doc in docs])
|
||||
print(raw_content)
|
||||
raw_text_content = " ".join([doc.page_content for doc in docs])
|
||||
|
||||
Files.update_files_metadata_by_id(
|
||||
form_data.file_id,
|
||||
{
|
||||
"content": {
|
||||
"text": raw_text_content,
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
result = save_docs_to_vector_db(
|
||||
|
||||
Reference in New Issue
Block a user