Merge remote-tracking branch 'upstream/dev' into feat/model-config

This commit is contained in:
Jun Siang Cheah
2024-05-21 19:17:23 +01:00
52 changed files with 3964 additions and 4840 deletions

View File

@@ -28,6 +28,7 @@ from langchain_community.document_loaders import (
UnstructuredXMLLoader,
UnstructuredRSTLoader,
UnstructuredExcelLoader,
UnstructuredPowerPointLoader,
YoutubeLoader,
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -768,6 +769,11 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
] or file_ext in ["xls", "xlsx"]:
loader = UnstructuredExcelLoader(file_path)
elif file_content_type in [
"application/vnd.ms-powerpoint",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
] or file_ext in ["ppt", "pptx"]:
loader = UnstructuredPowerPointLoader(file_path)
elif file_ext in known_source_ext or (
file_content_type and file_content_type.find("text/") >= 0
):