diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index 4d280193f..dee70fbaa 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -1,3 +1,5 @@ +# TODO: Merge this with the webui_app and make it a single app + import json import logging import mimetypes @@ -728,11 +730,13 @@ def process_file( docs = loader.load(file.filename, file.meta.get("content_type"), file_path) text_content = " ".join([doc.page_content for doc in docs]) log.debug(f"text_content: {text_content}") + hash = calculate_sha256_string(text_content) - Files.update_files_data_by_id( + Files.update_file_data_by_id( form_data.file_id, {"content": text_content}, ) + Files.update_file_hash_by_id(form_data.file_id, hash) try: result = save_docs_to_vector_db( diff --git a/backend/open_webui/apps/webui/models/files.py b/backend/open_webui/apps/webui/models/files.py index ec79a2d9f..8e07a969c 100644 --- a/backend/open_webui/apps/webui/models/files.py +++ b/backend/open_webui/apps/webui/models/files.py @@ -20,7 +20,7 @@ class File(Base): id = Column(String, primary_key=True) user_id = Column(String) - hash = Column(String) + hash = Column(Text, nullable=True) filename = Column(Text) data = Column(JSON) @@ -35,7 +35,7 @@ class FileModel(BaseModel): id: str user_id: str - hash: str + hash: Optional[str] = None filename: str data: dict @@ -53,7 +53,7 @@ class FileModel(BaseModel): class FileModelResponse(BaseModel): id: str user_id: str - hash: str + hash: Optional[str] = None filename: str data: dict @@ -65,6 +65,7 @@ class FileModelResponse(BaseModel): class FileForm(BaseModel): id: str + hash: Optional[str] = None filename: str meta: dict = {} @@ -120,7 +121,18 @@ class FilesTable: for file in db.query(File).filter_by(user_id=user_id).all() ] - def update_files_data_by_id(self, id: str, data: dict) -> Optional[FileModel]: + def update_file_hash_by_id(self, id: str, hash: str) -> Optional[FileModel]: + with get_db() as db: + try: + file = db.query(File).filter_by(id=id).first() + file.hash = hash + db.commit() + + return FileModel.model_validate(file) + except Exception: + return None + + def update_file_data_by_id(self, id: str, data: dict) -> Optional[FileModel]: with get_db() as db: try: file = db.query(File).filter_by(id=id).first() @@ -131,7 +143,7 @@ class FilesTable: except Exception: return None - def update_files_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]: + def update_file_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]: with get_db() as db: try: file = db.query(File).filter_by(id=id).first() diff --git a/backend/open_webui/migrations/versions/c0fbf31ca0db_update_file_table.py b/backend/open_webui/migrations/versions/c0fbf31ca0db_update_file_table.py index 6a1f17042..5f7f2abf7 100644 --- a/backend/open_webui/migrations/versions/c0fbf31ca0db_update_file_table.py +++ b/backend/open_webui/migrations/versions/c0fbf31ca0db_update_file_table.py @@ -20,7 +20,7 @@ depends_on: Union[str, Sequence[str], None] = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column("file", sa.Column("hash", sa.String(), nullable=True)) + op.add_column("file", sa.Column("hash", sa.Text(), nullable=True)) op.add_column("file", sa.Column("data", sa.JSON(), nullable=True)) op.add_column("file", sa.Column("updated_at", sa.BigInteger(), nullable=True)) diff --git a/src/lib/apis/retrieval/index.ts b/src/lib/apis/retrieval/index.ts index cf86e951c..2c4c24cb6 100644 --- a/src/lib/apis/retrieval/index.ts +++ b/src/lib/apis/retrieval/index.ts @@ -306,7 +306,11 @@ export interface SearchDocument { filenames: string[]; } -export const processFile = async (token: string, file_id: string) => { +export const processFile = async ( + token: string, + file_id: string, + collection_name: string | null = null +) => { let error = null; const res = await fetch(`${RAG_API_BASE_URL}/process/file`, { @@ -317,7 +321,8 @@ export const processFile = async (token: string, file_id: string) => { authorization: `Bearer ${token}` }, body: JSON.stringify({ - file_id: file_id + file_id: file_id, + collection_name: collection_name ? collection_name : undefined }) }) .then(async (res) => { diff --git a/src/lib/components/workspace/Knowledge/AddContentModal.svelte b/src/lib/components/workspace/Knowledge/AddContentModal.svelte new file mode 100644 index 000000000..b5f09be5c --- /dev/null +++ b/src/lib/components/workspace/Knowledge/AddContentModal.svelte @@ -0,0 +1,128 @@ + + + +
+
+
{$i18n.t('Add Content')}
+ +
+
+
+
{ + submitHandler(); + }} + > +
+ + + +
+ +
+ +
+
+
+
+
+
+ + diff --git a/src/lib/components/workspace/Knowledge/Item.svelte b/src/lib/components/workspace/Knowledge/Item.svelte index ef1d2abb5..dda621fd8 100644 --- a/src/lib/components/workspace/Knowledge/Item.svelte +++ b/src/lib/components/workspace/Knowledge/Item.svelte @@ -16,6 +16,10 @@ import Badge from '$lib/components/common/Badge.svelte'; import Files from './Files.svelte'; import AddFilesPlaceholder from '$lib/components/AddFilesPlaceholder.svelte'; + import AddContentModal from './AddContentModal.svelte'; + import { transcribeAudio } from '$lib/apis/audio'; + import { blobToFile } from '$lib/utils'; + import { processFile } from '$lib/apis/retrieval'; let largeScreen = true; @@ -60,6 +64,59 @@ }, 1000); }; + const uploadFileHandler = async (file) => { + console.log(file); + + // Check if the file is an audio file and transcribe/convert it to text file + if (['audio/mpeg', 'audio/wav', 'audio/ogg', 'audio/x-m4a'].includes(file['type'])) { + const res = await transcribeAudio(localStorage.token, file).catch((error) => { + toast.error(error); + return null; + }); + + if (res) { + console.log(res); + const blob = new Blob([res.text], { type: 'text/plain' }); + file = blobToFile(blob, `${file.name}.txt`); + } + } + + try { + const uploadedFile = await uploadFile(localStorage.token, file).catch((e) => { + toast.error(e); + }); + + if (uploadedFile) { + const processedFile = await processFile(localStorage.token, uploadedFile.id, id).catch( + (e) => { + toast.error(e); + } + ); + + if (processedFile.status) { + knowledge.data.file_ids = [...(knowledge.data.file_ids ?? []), uploadedFile.id]; + + const updatedKnowledge = await updateKnowledgeById(localStorage.token, id, { + data: knowledge.data + }).catch((e) => { + toast.error(e); + }); + + if (updatedKnowledge) { + knowledge = updatedKnowledge; + toast.success($i18n.t('File added successfully.')); + } + } else { + toast.error($i18n.t('Failed to process file.')); + } + } else { + toast.error($i18n.t('Failed to upload file.')); + } + } catch (e) { + toast.error(e); + } + }; + onMount(async () => { // listen to resize 1024px const mediaQuery = window.matchMedia('(min-width: 1024px)'); @@ -78,7 +135,8 @@ id = $page.params.id; const res = await getKnowledgeById(localStorage.token, id).catch((e) => { - console.error(e); + toast.error(e); + return null; }); if (res) { @@ -102,19 +160,11 @@ e.preventDefault(); if (e.dataTransfer?.files) { - let reader = new FileReader(); const inputFiles = e.dataTransfer?.files; if (inputFiles && inputFiles.length > 0) { for (const file of inputFiles) { - console.log(file, file.name.split('.').at(-1)); - const uploadedFile = await uploadFile(localStorage.token, file).catch((e) => { - toast.error(e); - }); - - if (uploadedFile) { - knowledge.data.file_ids = [...(knowledge.data.file_ids ?? []), uploadedFile.id]; - } + await uploadFileHandler(file); } } else { toast.error($i18n.t(`File not found.`)); @@ -161,6 +211,13 @@ {/if} + { + console.log(e); + }} +/> +