diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 4cde679e8..6e4f5c09c 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -18,6 +18,7 @@ from langchain_community.document_loaders import ( TextLoader, PyPDFLoader, CSVLoader, + Docx2txtLoader, ) from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma @@ -135,7 +136,12 @@ def store_doc( ): # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm" - if file.content_type not in ["application/pdf", "text/plain", "text/csv"]: + if file.content_type not in [ + "application/pdf", + "text/plain", + "text/csv", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ]: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED, @@ -156,6 +162,11 @@ def store_doc( if file.content_type == "application/pdf": loader = PyPDFLoader(file_path) + elif ( + file.content_type + == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + ): + loader = Docx2txtLoader(file_path) elif file.content_type == "text/plain": loader = TextLoader(file_path) elif file.content_type == "text/csv": diff --git a/backend/requirements.txt b/backend/requirements.txt index d3355b5fd..06af0efc6 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -16,12 +16,12 @@ aiohttp peewee bcrypt - langchain langchain-community chromadb sentence_transformers pypdf +docx2txt PyJWT pyjwt[crypto] diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index f76a74d6f..54ccc8f44 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -143,7 +143,14 @@ const file = inputFiles[0]; if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { reader.readAsDataURL(file); - } else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) { + } else if ( + [ + 'application/pdf', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'text/plain', + 'text/csv' + ].includes(file['type']) + ) { uploadDoc(file); } else { toast.error(`Unsupported File Type '${file['type']}'.`); @@ -249,7 +256,14 @@ const file = inputFiles[0]; if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { reader.readAsDataURL(file); - } else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) { + } else if ( + [ + 'application/pdf', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'text/plain', + 'text/csv' + ].includes(file['type']) + ) { uploadDoc(file); filesInputElement.value = ''; } else {