feat: odt file parse support

This commit is contained in:
Timothy Jaeryang Baek 2025-06-19 18:39:00 +04:00
parent 4f8f349c8f
commit 81b8267e85

View File

@ -14,7 +14,7 @@ from langchain_community.document_loaders import (
TextLoader,
UnstructuredEPubLoader,
UnstructuredExcelLoader,
UnstructuredMarkdownLoader,
UnstructuredODTLoader,
UnstructuredPowerPointLoader,
UnstructuredRSTLoader,
UnstructuredXMLLoader,
@ -389,6 +389,8 @@ class Loader:
loader = UnstructuredPowerPointLoader(file_path)
elif file_ext == "msg":
loader = OutlookMessageLoader(file_path)
elif file_ext == "odt":
loader = UnstructuredODTLoader(file_path)
elif self._is_text_file(file_ext, file_content_type):
loader = TextLoader(file_path, autodetect_encoding=True)
else: