This commit is contained in:
Timothy J. Baek 2024-10-02 21:14:58 -07:00
parent a2eadb30f5
commit 351b1dbf31
6 changed files with 225 additions and 19 deletions

View File

@ -1,3 +1,5 @@
# TODO: Merge this with the webui_app and make it a single app
import json
import logging
import mimetypes
@ -728,11 +730,13 @@ def process_file(
docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
text_content = " ".join([doc.page_content for doc in docs])
log.debug(f"text_content: {text_content}")
hash = calculate_sha256_string(text_content)
Files.update_files_data_by_id(
Files.update_file_data_by_id(
form_data.file_id,
{"content": text_content},
)
Files.update_file_hash_by_id(form_data.file_id, hash)
try:
result = save_docs_to_vector_db(

View File

@ -20,7 +20,7 @@ class File(Base):
id = Column(String, primary_key=True)
user_id = Column(String)
hash = Column(String)
hash = Column(Text, nullable=True)
filename = Column(Text)
data = Column(JSON)
@ -35,7 +35,7 @@ class FileModel(BaseModel):
id: str
user_id: str
hash: str
hash: Optional[str] = None
filename: str
data: dict
@ -53,7 +53,7 @@ class FileModel(BaseModel):
class FileModelResponse(BaseModel):
id: str
user_id: str
hash: str
hash: Optional[str] = None
filename: str
data: dict
@ -65,6 +65,7 @@ class FileModelResponse(BaseModel):
class FileForm(BaseModel):
id: str
hash: Optional[str] = None
filename: str
meta: dict = {}
@ -120,7 +121,18 @@ class FilesTable:
for file in db.query(File).filter_by(user_id=user_id).all()
]
def update_files_data_by_id(self, id: str, data: dict) -> Optional[FileModel]:
def update_file_hash_by_id(self, id: str, hash: str) -> Optional[FileModel]:
with get_db() as db:
try:
file = db.query(File).filter_by(id=id).first()
file.hash = hash
db.commit()
return FileModel.model_validate(file)
except Exception:
return None
def update_file_data_by_id(self, id: str, data: dict) -> Optional[FileModel]:
with get_db() as db:
try:
file = db.query(File).filter_by(id=id).first()
@ -131,7 +143,7 @@ class FilesTable:
except Exception:
return None
def update_files_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]:
def update_file_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]:
with get_db() as db:
try:
file = db.query(File).filter_by(id=id).first()

View File

@ -20,7 +20,7 @@ depends_on: Union[str, Sequence[str], None] = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("file", sa.Column("hash", sa.String(), nullable=True))
op.add_column("file", sa.Column("hash", sa.Text(), nullable=True))
op.add_column("file", sa.Column("data", sa.JSON(), nullable=True))
op.add_column("file", sa.Column("updated_at", sa.BigInteger(), nullable=True))

View File

@ -306,7 +306,11 @@ export interface SearchDocument {
filenames: string[];
}
export const processFile = async (token: string, file_id: string) => {
export const processFile = async (
token: string,
file_id: string,
collection_name: string | null = null
) => {
let error = null;
const res = await fetch(`${RAG_API_BASE_URL}/process/file`, {
@ -317,7 +321,8 @@ export const processFile = async (token: string, file_id: string) => {
authorization: `Bearer ${token}`
},
body: JSON.stringify({
file_id: file_id
file_id: file_id,
collection_name: collection_name ? collection_name : undefined
})
})
.then(async (res) => {

View File

@ -0,0 +1,128 @@
<script lang="ts">
import { toast } from 'svelte-sonner';
import dayjs from 'dayjs';
import { onMount, getContext, createEventDispatcher } from 'svelte';
const i18n = getContext('i18n');
const dispatch = createEventDispatcher();
import { knowledge } from '$lib/stores';
import Modal from '$lib/components/common/Modal.svelte';
import { uploadFile } from '$lib/apis/files';
export let show = false;
let fileInputElement: HTMLInputElement;
let inputFiles;
const submitHandler = async () => {
if (inputFiles && inputFiles.length > 0) {
for (const file of inputFiles) {
console.log(file, file.name.split('.').at(-1));
const uploadedFile = uploadFile(localStorage.token, file);
if (uploadedFile) {
dispatch('add', uploadedFile);
}
}
inputFiles = null;
fileInputElement.value = '';
show = false;
} else {
toast.error($i18n.t(`File not found.`));
}
};
</script>
<Modal size="sm" bind:show>
<div>
<div class=" flex justify-between dark:text-gray-300 px-5 pt-4">
<div class=" text-lg font-medium self-center">{$i18n.t('Add Content')}</div>
<button
class="self-center"
on:click={() => {
show = false;
}}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 20 20"
fill="currentColor"
class="w-5 h-5"
>
<path
d="M6.28 5.22a.75.75 0 00-1.06 1.06L8.94 10l-3.72 3.72a.75.75 0 101.06 1.06L10 11.06l3.72 3.72a.75.75 0 101.06-1.06L11.06 10l3.72-3.72a.75.75 0 00-1.06-1.06L10 8.94 6.28 5.22z"
/>
</svg>
</button>
</div>
<div class="flex flex-col md:flex-row w-full px-5 py-4 md:space-x-4 dark:text-gray-200">
<div class=" flex flex-col w-full sm:flex-row sm:justify-center sm:space-x-6">
<form
class="flex flex-col w-full"
on:submit|preventDefault={() => {
submitHandler();
}}
>
<div class="mb-3 w-full">
<input
id="upload-doc-input"
bind:this={fileInputElement}
bind:files={inputFiles}
type="file"
multiple
hidden
/>
<button
class="w-full text-sm font-medium py-3 bg-gray-100 hover:bg-gray-200 dark:bg-gray-850 dark:hover:bg-gray-800 text-center rounded-xl"
type="button"
on:click={() => {
fileInputElement.click();
}}
>
{#if inputFiles}
{inputFiles.length > 0 ? `${inputFiles.length}` : ''} document(s) selected.
{:else}
{$i18n.t('Click here to select files.')}
{/if}
</button>
</div>
<div class="flex justify-end text-sm font-medium">
<button
class=" px-4 py-2 bg-emerald-700 hover:bg-emerald-800 text-gray-100 transition rounded-lg"
type="submit"
>
{$i18n.t('Save')}
</button>
</div>
</form>
</div>
</div>
</div>
</Modal>
<style>
input::-webkit-outer-spin-button,
input::-webkit-inner-spin-button {
/* display: none; <- Crashes Chrome on hover */
-webkit-appearance: none;
margin: 0; /* <-- Apparently some margin are still there even though it's hidden */
}
.tabs::-webkit-scrollbar {
display: none; /* for Chrome, Safari and Opera */
}
.tabs {
-ms-overflow-style: none; /* IE and Edge */
scrollbar-width: none; /* Firefox */
}
input[type='number'] {
-moz-appearance: textfield; /* Firefox */
}
</style>

View File

@ -16,6 +16,10 @@
import Badge from '$lib/components/common/Badge.svelte';
import Files from './Files.svelte';
import AddFilesPlaceholder from '$lib/components/AddFilesPlaceholder.svelte';
import AddContentModal from './AddContentModal.svelte';
import { transcribeAudio } from '$lib/apis/audio';
import { blobToFile } from '$lib/utils';
import { processFile } from '$lib/apis/retrieval';
let largeScreen = true;
@ -60,6 +64,59 @@
}, 1000);
};
const uploadFileHandler = async (file) => {
console.log(file);
// Check if the file is an audio file and transcribe/convert it to text file
if (['audio/mpeg', 'audio/wav', 'audio/ogg', 'audio/x-m4a'].includes(file['type'])) {
const res = await transcribeAudio(localStorage.token, file).catch((error) => {
toast.error(error);
return null;
});
if (res) {
console.log(res);
const blob = new Blob([res.text], { type: 'text/plain' });
file = blobToFile(blob, `${file.name}.txt`);
}
}
try {
const uploadedFile = await uploadFile(localStorage.token, file).catch((e) => {
toast.error(e);
});
if (uploadedFile) {
const processedFile = await processFile(localStorage.token, uploadedFile.id, id).catch(
(e) => {
toast.error(e);
}
);
if (processedFile.status) {
knowledge.data.file_ids = [...(knowledge.data.file_ids ?? []), uploadedFile.id];
const updatedKnowledge = await updateKnowledgeById(localStorage.token, id, {
data: knowledge.data
}).catch((e) => {
toast.error(e);
});
if (updatedKnowledge) {
knowledge = updatedKnowledge;
toast.success($i18n.t('File added successfully.'));
}
} else {
toast.error($i18n.t('Failed to process file.'));
}
} else {
toast.error($i18n.t('Failed to upload file.'));
}
} catch (e) {
toast.error(e);
}
};
onMount(async () => {
// listen to resize 1024px
const mediaQuery = window.matchMedia('(min-width: 1024px)');
@ -78,7 +135,8 @@
id = $page.params.id;
const res = await getKnowledgeById(localStorage.token, id).catch((e) => {
console.error(e);
toast.error(e);
return null;
});
if (res) {
@ -102,19 +160,11 @@
e.preventDefault();
if (e.dataTransfer?.files) {
let reader = new FileReader();
const inputFiles = e.dataTransfer?.files;
if (inputFiles && inputFiles.length > 0) {
for (const file of inputFiles) {
console.log(file, file.name.split('.').at(-1));
const uploadedFile = await uploadFile(localStorage.token, file).catch((e) => {
toast.error(e);
});
if (uploadedFile) {
knowledge.data.file_ids = [...(knowledge.data.file_ids ?? []), uploadedFile.id];
}
await uploadFileHandler(file);
}
} else {
toast.error($i18n.t(`File not found.`));
@ -161,6 +211,13 @@
</div>
{/if}
<AddContentModal
bind:show={showAddContentModal}
on:add={(e) => {
console.log(e);
}}
/>
<div class="flex flex-col w-full max-h-[100dvh] h-full">
<button
class="flex space-x-1"