This commit is contained in:
Timothy J. Baek 2024-10-02 21:14:58 -07:00
parent a2eadb30f5
commit 351b1dbf31
6 changed files with 225 additions and 19 deletions

View File

@ -1,3 +1,5 @@
# TODO: Merge this with the webui_app and make it a single app
import json import json
import logging import logging
import mimetypes import mimetypes
@ -728,11 +730,13 @@ def process_file(
docs = loader.load(file.filename, file.meta.get("content_type"), file_path) docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
text_content = " ".join([doc.page_content for doc in docs]) text_content = " ".join([doc.page_content for doc in docs])
log.debug(f"text_content: {text_content}") log.debug(f"text_content: {text_content}")
hash = calculate_sha256_string(text_content)
Files.update_files_data_by_id( Files.update_file_data_by_id(
form_data.file_id, form_data.file_id,
{"content": text_content}, {"content": text_content},
) )
Files.update_file_hash_by_id(form_data.file_id, hash)
try: try:
result = save_docs_to_vector_db( result = save_docs_to_vector_db(

View File

@ -20,7 +20,7 @@ class File(Base):
id = Column(String, primary_key=True) id = Column(String, primary_key=True)
user_id = Column(String) user_id = Column(String)
hash = Column(String) hash = Column(Text, nullable=True)
filename = Column(Text) filename = Column(Text)
data = Column(JSON) data = Column(JSON)
@ -35,7 +35,7 @@ class FileModel(BaseModel):
id: str id: str
user_id: str user_id: str
hash: str hash: Optional[str] = None
filename: str filename: str
data: dict data: dict
@ -53,7 +53,7 @@ class FileModel(BaseModel):
class FileModelResponse(BaseModel): class FileModelResponse(BaseModel):
id: str id: str
user_id: str user_id: str
hash: str hash: Optional[str] = None
filename: str filename: str
data: dict data: dict
@ -65,6 +65,7 @@ class FileModelResponse(BaseModel):
class FileForm(BaseModel): class FileForm(BaseModel):
id: str id: str
hash: Optional[str] = None
filename: str filename: str
meta: dict = {} meta: dict = {}
@ -120,7 +121,18 @@ class FilesTable:
for file in db.query(File).filter_by(user_id=user_id).all() for file in db.query(File).filter_by(user_id=user_id).all()
] ]
def update_files_data_by_id(self, id: str, data: dict) -> Optional[FileModel]: def update_file_hash_by_id(self, id: str, hash: str) -> Optional[FileModel]:
with get_db() as db:
try:
file = db.query(File).filter_by(id=id).first()
file.hash = hash
db.commit()
return FileModel.model_validate(file)
except Exception:
return None
def update_file_data_by_id(self, id: str, data: dict) -> Optional[FileModel]:
with get_db() as db: with get_db() as db:
try: try:
file = db.query(File).filter_by(id=id).first() file = db.query(File).filter_by(id=id).first()
@ -131,7 +143,7 @@ class FilesTable:
except Exception: except Exception:
return None return None
def update_files_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]: def update_file_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]:
with get_db() as db: with get_db() as db:
try: try:
file = db.query(File).filter_by(id=id).first() file = db.query(File).filter_by(id=id).first()

View File

@ -20,7 +20,7 @@ depends_on: Union[str, Sequence[str], None] = None
def upgrade(): def upgrade():
# ### commands auto generated by Alembic - please adjust! ### # ### commands auto generated by Alembic - please adjust! ###
op.add_column("file", sa.Column("hash", sa.String(), nullable=True)) op.add_column("file", sa.Column("hash", sa.Text(), nullable=True))
op.add_column("file", sa.Column("data", sa.JSON(), nullable=True)) op.add_column("file", sa.Column("data", sa.JSON(), nullable=True))
op.add_column("file", sa.Column("updated_at", sa.BigInteger(), nullable=True)) op.add_column("file", sa.Column("updated_at", sa.BigInteger(), nullable=True))

View File

@ -306,7 +306,11 @@ export interface SearchDocument {
filenames: string[]; filenames: string[];
} }
export const processFile = async (token: string, file_id: string) => { export const processFile = async (
token: string,
file_id: string,
collection_name: string | null = null
) => {
let error = null; let error = null;
const res = await fetch(`${RAG_API_BASE_URL}/process/file`, { const res = await fetch(`${RAG_API_BASE_URL}/process/file`, {
@ -317,7 +321,8 @@ export const processFile = async (token: string, file_id: string) => {
authorization: `Bearer ${token}` authorization: `Bearer ${token}`
}, },
body: JSON.stringify({ body: JSON.stringify({
file_id: file_id file_id: file_id,
collection_name: collection_name ? collection_name : undefined
}) })
}) })
.then(async (res) => { .then(async (res) => {

View File

@ -0,0 +1,128 @@
<script lang="ts">
import { toast } from 'svelte-sonner';
import dayjs from 'dayjs';
import { onMount, getContext, createEventDispatcher } from 'svelte';
const i18n = getContext('i18n');
const dispatch = createEventDispatcher();
import { knowledge } from '$lib/stores';
import Modal from '$lib/components/common/Modal.svelte';
import { uploadFile } from '$lib/apis/files';
export let show = false;
let fileInputElement: HTMLInputElement;
let inputFiles;
const submitHandler = async () => {
if (inputFiles && inputFiles.length > 0) {
for (const file of inputFiles) {
console.log(file, file.name.split('.').at(-1));
const uploadedFile = uploadFile(localStorage.token, file);
if (uploadedFile) {
dispatch('add', uploadedFile);
}
}
inputFiles = null;
fileInputElement.value = '';
show = false;
} else {
toast.error($i18n.t(`File not found.`));
}
};
</script>
<Modal size="sm" bind:show>
<div>
<div class=" flex justify-between dark:text-gray-300 px-5 pt-4">
<div class=" text-lg font-medium self-center">{$i18n.t('Add Content')}</div>
<button
class="self-center"
on:click={() => {
show = false;
}}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 20 20"
fill="currentColor"
class="w-5 h-5"
>
<path
d="M6.28 5.22a.75.75 0 00-1.06 1.06L8.94 10l-3.72 3.72a.75.75 0 101.06 1.06L10 11.06l3.72 3.72a.75.75 0 101.06-1.06L11.06 10l3.72-3.72a.75.75 0 00-1.06-1.06L10 8.94 6.28 5.22z"
/>
</svg>
</button>
</div>
<div class="flex flex-col md:flex-row w-full px-5 py-4 md:space-x-4 dark:text-gray-200">
<div class=" flex flex-col w-full sm:flex-row sm:justify-center sm:space-x-6">
<form
class="flex flex-col w-full"
on:submit|preventDefault={() => {
submitHandler();
}}
>
<div class="mb-3 w-full">
<input
id="upload-doc-input"
bind:this={fileInputElement}
bind:files={inputFiles}
type="file"
multiple
hidden
/>
<button
class="w-full text-sm font-medium py-3 bg-gray-100 hover:bg-gray-200 dark:bg-gray-850 dark:hover:bg-gray-800 text-center rounded-xl"
type="button"
on:click={() => {
fileInputElement.click();
}}
>
{#if inputFiles}
{inputFiles.length > 0 ? `${inputFiles.length}` : ''} document(s) selected.
{:else}
{$i18n.t('Click here to select files.')}
{/if}
</button>
</div>
<div class="flex justify-end text-sm font-medium">
<button
class=" px-4 py-2 bg-emerald-700 hover:bg-emerald-800 text-gray-100 transition rounded-lg"
type="submit"
>
{$i18n.t('Save')}
</button>
</div>
</form>
</div>
</div>
</div>
</Modal>
<style>
input::-webkit-outer-spin-button,
input::-webkit-inner-spin-button {
/* display: none; <- Crashes Chrome on hover */
-webkit-appearance: none;
margin: 0; /* <-- Apparently some margin are still there even though it's hidden */
}
.tabs::-webkit-scrollbar {
display: none; /* for Chrome, Safari and Opera */
}
.tabs {
-ms-overflow-style: none; /* IE and Edge */
scrollbar-width: none; /* Firefox */
}
input[type='number'] {
-moz-appearance: textfield; /* Firefox */
}
</style>

View File

@ -16,6 +16,10 @@
import Badge from '$lib/components/common/Badge.svelte'; import Badge from '$lib/components/common/Badge.svelte';
import Files from './Files.svelte'; import Files from './Files.svelte';
import AddFilesPlaceholder from '$lib/components/AddFilesPlaceholder.svelte'; import AddFilesPlaceholder from '$lib/components/AddFilesPlaceholder.svelte';
import AddContentModal from './AddContentModal.svelte';
import { transcribeAudio } from '$lib/apis/audio';
import { blobToFile } from '$lib/utils';
import { processFile } from '$lib/apis/retrieval';
let largeScreen = true; let largeScreen = true;
@ -60,6 +64,59 @@
}, 1000); }, 1000);
}; };
const uploadFileHandler = async (file) => {
console.log(file);
// Check if the file is an audio file and transcribe/convert it to text file
if (['audio/mpeg', 'audio/wav', 'audio/ogg', 'audio/x-m4a'].includes(file['type'])) {
const res = await transcribeAudio(localStorage.token, file).catch((error) => {
toast.error(error);
return null;
});
if (res) {
console.log(res);
const blob = new Blob([res.text], { type: 'text/plain' });
file = blobToFile(blob, `${file.name}.txt`);
}
}
try {
const uploadedFile = await uploadFile(localStorage.token, file).catch((e) => {
toast.error(e);
});
if (uploadedFile) {
const processedFile = await processFile(localStorage.token, uploadedFile.id, id).catch(
(e) => {
toast.error(e);
}
);
if (processedFile.status) {
knowledge.data.file_ids = [...(knowledge.data.file_ids ?? []), uploadedFile.id];
const updatedKnowledge = await updateKnowledgeById(localStorage.token, id, {
data: knowledge.data
}).catch((e) => {
toast.error(e);
});
if (updatedKnowledge) {
knowledge = updatedKnowledge;
toast.success($i18n.t('File added successfully.'));
}
} else {
toast.error($i18n.t('Failed to process file.'));
}
} else {
toast.error($i18n.t('Failed to upload file.'));
}
} catch (e) {
toast.error(e);
}
};
onMount(async () => { onMount(async () => {
// listen to resize 1024px // listen to resize 1024px
const mediaQuery = window.matchMedia('(min-width: 1024px)'); const mediaQuery = window.matchMedia('(min-width: 1024px)');
@ -78,7 +135,8 @@
id = $page.params.id; id = $page.params.id;
const res = await getKnowledgeById(localStorage.token, id).catch((e) => { const res = await getKnowledgeById(localStorage.token, id).catch((e) => {
console.error(e); toast.error(e);
return null;
}); });
if (res) { if (res) {
@ -102,19 +160,11 @@
e.preventDefault(); e.preventDefault();
if (e.dataTransfer?.files) { if (e.dataTransfer?.files) {
let reader = new FileReader();
const inputFiles = e.dataTransfer?.files; const inputFiles = e.dataTransfer?.files;
if (inputFiles && inputFiles.length > 0) { if (inputFiles && inputFiles.length > 0) {
for (const file of inputFiles) { for (const file of inputFiles) {
console.log(file, file.name.split('.').at(-1)); await uploadFileHandler(file);
const uploadedFile = await uploadFile(localStorage.token, file).catch((e) => {
toast.error(e);
});
if (uploadedFile) {
knowledge.data.file_ids = [...(knowledge.data.file_ids ?? []), uploadedFile.id];
}
} }
} else { } else {
toast.error($i18n.t(`File not found.`)); toast.error($i18n.t(`File not found.`));
@ -161,6 +211,13 @@
</div> </div>
{/if} {/if}
<AddContentModal
bind:show={showAddContentModal}
on:add={(e) => {
console.log(e);
}}
/>
<div class="flex flex-col w-full max-h-[100dvh] h-full"> <div class="flex flex-col w-full max-h-[100dvh] h-full">
<button <button
class="flex space-x-1" class="flex space-x-1"