From 3688955c776c5c03afd94aa86636f1f8f80de738 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Mon, 25 Mar 2024 23:50:52 -0700 Subject: [PATCH] fix: encoding issue --- backend/apps/rag/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index d87f7bc73..da7bb307d 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -411,7 +411,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str): elif file_ext == "xml": loader = UnstructuredXMLLoader(file_path) elif file_ext in ["htm", "html"]: - loader = BSHTMLLoader(file_path) + loader = BSHTMLLoader(file_path, open_encoding="unicode_escape") elif file_ext == "md": loader = UnstructuredMarkdownLoader(file_path) elif file_content_type == "application/epub+zip":