mirror of
https://github.com/open-webui/open-webui
synced 2025-04-07 22:25:05 +00:00
refac: documents file handling
This commit is contained in:
parent
5e8a74ef74
commit
dbc352f01b
@ -930,7 +930,9 @@ def store_web_search(form_data: SearchForm, user=Depends(get_verified_user)):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> bool:
|
def store_data_in_vector_db(
|
||||||
|
data, collection_name, metadata: Optional[dict] = None, overwrite: bool = False
|
||||||
|
) -> bool:
|
||||||
|
|
||||||
text_splitter = RecursiveCharacterTextSplitter(
|
text_splitter = RecursiveCharacterTextSplitter(
|
||||||
chunk_size=app.state.config.CHUNK_SIZE,
|
chunk_size=app.state.config.CHUNK_SIZE,
|
||||||
@ -942,7 +944,7 @@ def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> b
|
|||||||
|
|
||||||
if len(docs) > 0:
|
if len(docs) > 0:
|
||||||
log.info(f"store_data_in_vector_db {docs}")
|
log.info(f"store_data_in_vector_db {docs}")
|
||||||
return store_docs_in_vector_db(docs, collection_name, overwrite), None
|
return store_docs_in_vector_db(docs, collection_name, metadata, overwrite), None
|
||||||
else:
|
else:
|
||||||
raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT)
|
raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT)
|
||||||
|
|
||||||
@ -956,14 +958,16 @@ def store_text_in_vector_db(
|
|||||||
add_start_index=True,
|
add_start_index=True,
|
||||||
)
|
)
|
||||||
docs = text_splitter.create_documents([text], metadatas=[metadata])
|
docs = text_splitter.create_documents([text], metadatas=[metadata])
|
||||||
return store_docs_in_vector_db(docs, collection_name, overwrite)
|
return store_docs_in_vector_db(docs, collection_name, overwrite=overwrite)
|
||||||
|
|
||||||
|
|
||||||
def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> bool:
|
def store_docs_in_vector_db(
|
||||||
|
docs, collection_name, metadata: Optional[dict] = None, overwrite: bool = False
|
||||||
|
) -> bool:
|
||||||
log.info(f"store_docs_in_vector_db {docs} {collection_name}")
|
log.info(f"store_docs_in_vector_db {docs} {collection_name}")
|
||||||
|
|
||||||
texts = [doc.page_content for doc in docs]
|
texts = [doc.page_content for doc in docs]
|
||||||
metadatas = [doc.metadata for doc in docs]
|
metadatas = [{**doc.metadata, **(metadata if metadata else {})} for doc in docs]
|
||||||
|
|
||||||
# ChromaDB does not like datetime formats
|
# ChromaDB does not like datetime formats
|
||||||
# for meta-data so convert them to string.
|
# for meta-data so convert them to string.
|
||||||
@ -1237,13 +1241,21 @@ def process_doc(
|
|||||||
data = loader.load()
|
data = loader.load()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = store_data_in_vector_db(data, collection_name)
|
result = store_data_in_vector_db(
|
||||||
|
data,
|
||||||
|
collection_name,
|
||||||
|
{
|
||||||
|
"file_id": form_data.file_id,
|
||||||
|
"name": file.meta.get("name", file.filename),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
if result:
|
if result:
|
||||||
return {
|
return {
|
||||||
"status": True,
|
"status": True,
|
||||||
"collection_name": collection_name,
|
"collection_name": collection_name,
|
||||||
"known_type": known_type,
|
"known_type": known_type,
|
||||||
|
"filename": file.meta.get("name", file.filename),
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
@ -58,6 +58,7 @@ def upload_file(file: UploadFile = File(...), user=Depends(get_verified_user)):
|
|||||||
|
|
||||||
# replace filename with uuid
|
# replace filename with uuid
|
||||||
id = str(uuid.uuid4())
|
id = str(uuid.uuid4())
|
||||||
|
name = filename
|
||||||
filename = f"{id}_{filename}"
|
filename = f"{id}_{filename}"
|
||||||
file_path = f"{UPLOAD_DIR}/{filename}"
|
file_path = f"{UPLOAD_DIR}/{filename}"
|
||||||
|
|
||||||
@ -73,6 +74,7 @@ def upload_file(file: UploadFile = File(...), user=Depends(get_verified_user)):
|
|||||||
"id": id,
|
"id": id,
|
||||||
"filename": filename,
|
"filename": filename,
|
||||||
"meta": {
|
"meta": {
|
||||||
|
"name": name,
|
||||||
"content_type": file.content_type,
|
"content_type": file.content_type,
|
||||||
"size": len(contents),
|
"size": len(contents),
|
||||||
"path": file_path,
|
"path": file_path,
|
||||||
|
@ -57,12 +57,12 @@
|
|||||||
{#if document.source?.name}
|
{#if document.source?.name}
|
||||||
<div class="text-sm dark:text-gray-400">
|
<div class="text-sm dark:text-gray-400">
|
||||||
<a
|
<a
|
||||||
href={document?.source?.url
|
href={document?.metadata?.file_id
|
||||||
? `${document?.source?.url}/content`
|
? `/api/v1/files/${document?.metadata?.file_id}/content`
|
||||||
: document.source.name}
|
: document.source.name}
|
||||||
target="_blank"
|
target="_blank"
|
||||||
>
|
>
|
||||||
{document.source.name}
|
{document?.metadata?.name ?? document.source.name}
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
{:else}
|
{:else}
|
||||||
|
@ -8,14 +8,16 @@
|
|||||||
import { createNewDoc, deleteDocByName, getDocs } from '$lib/apis/documents';
|
import { createNewDoc, deleteDocByName, getDocs } from '$lib/apis/documents';
|
||||||
|
|
||||||
import { SUPPORTED_FILE_TYPE, SUPPORTED_FILE_EXTENSIONS } from '$lib/constants';
|
import { SUPPORTED_FILE_TYPE, SUPPORTED_FILE_EXTENSIONS } from '$lib/constants';
|
||||||
import { uploadDocToVectorDB } from '$lib/apis/rag';
|
import { processDocToVectorDB, uploadDocToVectorDB } from '$lib/apis/rag';
|
||||||
import { transformFileName } from '$lib/utils';
|
import { blobToFile, transformFileName } from '$lib/utils';
|
||||||
|
|
||||||
import Checkbox from '$lib/components/common/Checkbox.svelte';
|
import Checkbox from '$lib/components/common/Checkbox.svelte';
|
||||||
|
|
||||||
import EditDocModal from '$lib/components/documents/EditDocModal.svelte';
|
import EditDocModal from '$lib/components/documents/EditDocModal.svelte';
|
||||||
import AddFilesPlaceholder from '$lib/components/AddFilesPlaceholder.svelte';
|
import AddFilesPlaceholder from '$lib/components/AddFilesPlaceholder.svelte';
|
||||||
import AddDocModal from '$lib/components/documents/AddDocModal.svelte';
|
import AddDocModal from '$lib/components/documents/AddDocModal.svelte';
|
||||||
|
import { transcribeAudio } from '$lib/apis/audio';
|
||||||
|
import { uploadFile } from '$lib/apis/files';
|
||||||
|
|
||||||
const i18n = getContext('i18n');
|
const i18n = getContext('i18n');
|
||||||
|
|
||||||
@ -50,7 +52,28 @@
|
|||||||
};
|
};
|
||||||
|
|
||||||
const uploadDoc = async (file) => {
|
const uploadDoc = async (file) => {
|
||||||
const res = await uploadDocToVectorDB(localStorage.token, '', file).catch((error) => {
|
console.log(file);
|
||||||
|
// Check if the file is an audio file and transcribe/convert it to text file
|
||||||
|
if (['audio/mpeg', 'audio/wav'].includes(file['type'])) {
|
||||||
|
const transcribeRes = await transcribeAudio(localStorage.token, file).catch((error) => {
|
||||||
|
toast.error(error);
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (transcribeRes) {
|
||||||
|
console.log(transcribeRes);
|
||||||
|
const blob = new Blob([transcribeRes.text], { type: 'text/plain' });
|
||||||
|
file = blobToFile(blob, `${file.name}.txt`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upload the file to the server
|
||||||
|
const uploadedFile = await uploadFile(localStorage.token, file).catch((error) => {
|
||||||
|
toast.error(error);
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
const res = await processDocToVectorDB(localStorage.token, uploadedFile.id).catch((error) => {
|
||||||
toast.error(error);
|
toast.error(error);
|
||||||
return null;
|
return null;
|
||||||
});
|
});
|
||||||
|
Loading…
Reference in New Issue
Block a user