mirror of
https://github.com/open-webui/open-webui
synced 2025-01-18 16:51:17 +00:00
Merge pull request #2923 from mindspawn/outlook-msg
Support Outlook Message File Format
This commit is contained in:
commit
dbde628141
@ -9,6 +9,7 @@ from fastapi import (
|
|||||||
)
|
)
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
import os, shutil, logging, re
|
import os, shutil, logging, re
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Union, Sequence
|
from typing import List, Union, Sequence
|
||||||
@ -30,6 +31,7 @@ from langchain_community.document_loaders import (
|
|||||||
UnstructuredExcelLoader,
|
UnstructuredExcelLoader,
|
||||||
UnstructuredPowerPointLoader,
|
UnstructuredPowerPointLoader,
|
||||||
YoutubeLoader,
|
YoutubeLoader,
|
||||||
|
OutlookMessageLoader,
|
||||||
)
|
)
|
||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
|
|
||||||
@ -879,6 +881,13 @@ def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> b
|
|||||||
texts = [doc.page_content for doc in docs]
|
texts = [doc.page_content for doc in docs]
|
||||||
metadatas = [doc.metadata for doc in docs]
|
metadatas = [doc.metadata for doc in docs]
|
||||||
|
|
||||||
|
# ChromaDB does not like datetime formats
|
||||||
|
# for meta-data so convert them to string.
|
||||||
|
for metadata in metadatas:
|
||||||
|
for key, value in metadata.items():
|
||||||
|
if isinstance(value, datetime):
|
||||||
|
metadata[key] = str(value)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if overwrite:
|
if overwrite:
|
||||||
for collection in CHROMA_CLIENT.list_collections():
|
for collection in CHROMA_CLIENT.list_collections():
|
||||||
@ -965,6 +974,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
|
|||||||
"swift",
|
"swift",
|
||||||
"vue",
|
"vue",
|
||||||
"svelte",
|
"svelte",
|
||||||
|
"msg",
|
||||||
]
|
]
|
||||||
|
|
||||||
if file_ext == "pdf":
|
if file_ext == "pdf":
|
||||||
@ -999,6 +1009,8 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
|
|||||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||||
] or file_ext in ["ppt", "pptx"]:
|
] or file_ext in ["ppt", "pptx"]:
|
||||||
loader = UnstructuredPowerPointLoader(file_path)
|
loader = UnstructuredPowerPointLoader(file_path)
|
||||||
|
elif file_ext == "msg":
|
||||||
|
loader = OutlookMessageLoader(file_path)
|
||||||
elif file_ext in known_source_ext or (
|
elif file_ext in known_source_ext or (
|
||||||
file_content_type and file_content_type.find("text/") >= 0
|
file_content_type and file_content_type.find("text/") >= 0
|
||||||
):
|
):
|
||||||
|
@ -56,4 +56,6 @@ PyJWT[crypto]==2.8.0
|
|||||||
black==24.4.2
|
black==24.4.2
|
||||||
langfuse==2.33.0
|
langfuse==2.33.0
|
||||||
youtube-transcript-api==0.6.2
|
youtube-transcript-api==0.6.2
|
||||||
pytube==15.0.0
|
pytube==15.0.0
|
||||||
|
|
||||||
|
extract_msg
|
||||||
|
@ -89,7 +89,8 @@ export const SUPPORTED_FILE_EXTENSIONS = [
|
|||||||
'xls',
|
'xls',
|
||||||
'xlsx',
|
'xlsx',
|
||||||
'pptx',
|
'pptx',
|
||||||
'ppt'
|
'ppt',
|
||||||
|
'msg'
|
||||||
];
|
];
|
||||||
|
|
||||||
// Source: https://kit.svelte.dev/docs/modules#$env-static-public
|
// Source: https://kit.svelte.dev/docs/modules#$env-static-public
|
||||||
|
Loading…
Reference in New Issue
Block a user