diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 99aa69594..45ad69707 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -425,7 +425,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str): ] if file_ext == "pdf": - loader = PyPDFLoader(file_path) + loader = PyPDFLoader(file_path, extract_images=True) elif file_ext == "csv": loader = CSVLoader(file_path) elif file_ext == "rst": diff --git a/backend/requirements.txt b/backend/requirements.txt index 190d12980..fd38df0b0 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -35,6 +35,7 @@ openpyxl pyxlsb xlrd rapidocr-onnxruntime +opencv-python-headless faster-whisper