mirror of
https://github.com/open-webui/open-webui
synced 2024-11-16 13:40:55 +00:00
feat: added ocr functionality to the pdf loader
This commit is contained in:
parent
eb51ad14e4
commit
089a63e0c6
@ -419,7 +419,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
|
|||||||
]
|
]
|
||||||
|
|
||||||
if file_ext == "pdf":
|
if file_ext == "pdf":
|
||||||
loader = PyPDFLoader(file_path)
|
loader = PyPDFLoader(file_path, extract_images=True)
|
||||||
elif file_ext == "csv":
|
elif file_ext == "csv":
|
||||||
loader = CSVLoader(file_path)
|
loader = CSVLoader(file_path)
|
||||||
elif file_ext == "rst":
|
elif file_ext == "rst":
|
||||||
|
@ -33,6 +33,7 @@ pandas
|
|||||||
openpyxl
|
openpyxl
|
||||||
pyxlsb
|
pyxlsb
|
||||||
xlrd
|
xlrd
|
||||||
|
rapidocr-onnxruntime
|
||||||
|
|
||||||
faster-whisper
|
faster-whisper
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user