feat: odt file parse support

This commit is contained in:
Timothy Jaeryang Baek 2025-06-19 18:39:00 +04:00
parent 4f8f349c8f
commit 81b8267e85

View File

@ -14,7 +14,7 @@ from langchain_community.document_loaders import (
TextLoader, TextLoader,
UnstructuredEPubLoader, UnstructuredEPubLoader,
UnstructuredExcelLoader, UnstructuredExcelLoader,
UnstructuredMarkdownLoader, UnstructuredODTLoader,
UnstructuredPowerPointLoader, UnstructuredPowerPointLoader,
UnstructuredRSTLoader, UnstructuredRSTLoader,
UnstructuredXMLLoader, UnstructuredXMLLoader,
@ -389,6 +389,8 @@ class Loader:
loader = UnstructuredPowerPointLoader(file_path) loader = UnstructuredPowerPointLoader(file_path)
elif file_ext == "msg": elif file_ext == "msg":
loader = OutlookMessageLoader(file_path) loader = OutlookMessageLoader(file_path)
elif file_ext == "odt":
loader = UnstructuredODTLoader(file_path)
elif self._is_text_file(file_ext, file_content_type): elif self._is_text_file(file_ext, file_content_type):
loader = TextLoader(file_path, autodetect_encoding=True) loader = TextLoader(file_path, autodetect_encoding=True)
else: else: