This commit is contained in:
Timothy J. Baek 2024-10-03 22:22:22 -07:00
parent 9dd76b72b4
commit b291271df3
12 changed files with 152 additions and 79 deletions

View File

@ -731,7 +731,7 @@ def process_file(
collection_name = form_data.collection_name
if collection_name is None:
collection_name = file.id
collection_name = f"file-{file.id}"
loader = Loader(
engine=app.state.config.CONTENT_EXTRACTION_ENGINE,
@ -758,12 +758,11 @@ def process_file(
log.debug(f"text_content: {text_content}")
hash = calculate_sha256_string(text_content)
res = Files.update_file_data_by_id(
Files.update_file_data_by_id(
file.id,
{"content": text_content},
)
print(res)
Files.update_file_hash_by_id(form_data.file_id, hash)
Files.update_file_hash_by_id(file.id, hash)
try:
result = save_docs_to_vector_db(
@ -778,6 +777,13 @@ def process_file(
)
if result:
Files.update_file_metadata_by_id(
file.id,
{
"collection_name": collection_name,
},
)
return {
"status": True,
"collection_name": collection_name,

View File

@ -319,7 +319,7 @@ def get_rag_context(
for file in files:
if file.get("context") == "full":
context = {
"documents": [[file.get("file").get("content")]],
"documents": [[file.get("file").get("data", {}).get("content")]],
"metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]],
}
else:

View File

@ -6,7 +6,8 @@ from pathlib import Path
from typing import Optional
from open_webui.apps.webui.models.files import FileForm, FileModel, Files
from open_webui.apps.webui.models.knowledge import Knowledges
from open_webui.apps.retrieval.main import process_file, ProcessFileForm
from open_webui.config import UPLOAD_DIR
from open_webui.constants import ERROR_MESSAGES
from open_webui.env import SRC_LOG_LEVELS
@ -61,6 +62,13 @@ def upload_file(file: UploadFile = File(...), user=Depends(get_verified_user)):
),
)
try:
process_file(ProcessFileForm(file_id=id))
file = Files.get_file_by_id(id=id)
except Exception as e:
log.exception(e)
log.error(f"Error processing file: {file.id}")
if file:
return file
else:

View File

@ -17,7 +17,6 @@ from open_webui.utils.utils import get_admin_user, get_verified_user
from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT
router = APIRouter()
############################
@ -132,7 +131,7 @@ class KnowledgeFileIdForm(BaseModel):
@router.post("/{id}/file/add", response_model=Optional[KnowledgeFilesResponse])
async def add_file_to_knowledge_by_id(
def add_file_to_knowledge_by_id(
id: str,
form_data: KnowledgeFileIdForm,
user=Depends(get_admin_user),
@ -144,6 +143,11 @@ async def add_file_to_knowledge_by_id(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.NOT_FOUND,
)
if not file.data:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.FILE_NOT_PROCESSED,
)
if knowledge:
data = knowledge.data or {}
@ -191,7 +195,7 @@ class KnowledgeFileIdForm(BaseModel):
@router.post("/{id}/file/remove", response_model=Optional[KnowledgeFilesResponse])
async def remove_file_from_knowledge_by_id(
def remove_file_from_knowledge_by_id(
id: str,
form_data: KnowledgeFileIdForm,
user=Depends(get_admin_user),

View File

@ -95,6 +95,7 @@ class ERROR_MESSAGES(str, Enum):
)
DUPLICATE_CONTENT = "The content provided is a duplicate. Please ensure that the content is unique before proceeding."
FILE_NOT_PROCESSED = "Extracted content is not available for this file. Please ensure that the file is processed before proceeding."
class TASKS(str, Enum):

View File

@ -138,6 +138,76 @@ export const updateKnowledgeById = async (token: string, id: string, form: Knowl
return res;
};
export const addFileToKnowledgeById = async (token: string, id: string, fileId: string) => {
let error = null;
const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/add`, {
method: 'POST',
headers: {
Accept: 'application/json',
'Content-Type': 'application/json',
authorization: `Bearer ${token}`
},
body: JSON.stringify({
file_id: fileId
})
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.then((json) => {
return json;
})
.catch((err) => {
error = err.detail;
console.log(err);
return null;
});
if (error) {
throw error;
}
return res;
};
export const removeFileFromKnowledgeById = async (token: string, id: string, fileId: string) => {
let error = null;
const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/remove`, {
method: 'POST',
headers: {
Accept: 'application/json',
'Content-Type': 'application/json',
authorization: `Bearer ${token}`
},
body: JSON.stringify({
file_id: fileId
})
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.then((json) => {
return json;
})
.catch((err) => {
error = err.detail;
console.log(err);
return null;
});
if (error) {
throw error;
}
return res;
};
export const deleteKnowledgeById = async (token: string, id: string) => {
let error = null;

View File

@ -35,7 +35,7 @@
{#each chatFiles as file, fileIdx}
<FileItem
className="w-full"
{file}
item={file}
edit={true}
url={`${file?.url}`}
name={file.name}

View File

@ -125,16 +125,17 @@
}
try {
// During the file upload, file content is automatically extracted.
const uploadedFile = await uploadFile(localStorage.token, file);
if (uploadedFile) {
fileItem.status = 'uploaded';
fileItem.status = 'processed';
fileItem.file = uploadedFile;
fileItem.id = uploadedFile.id;
fileItem.collection_name = uploadedFile?.meta?.collection_name;
fileItem.url = `${WEBUI_API_BASE_URL}/files/${uploadedFile.id}`;
// Try to extract content of the file for retrieval, even non-supported file types
processFileItem(fileItem);
files = files;
} else {
files = files.filter((item) => item.status !== null);
}
@ -143,27 +144,6 @@
files = files.filter((item) => item.status !== null);
}
};
const processFileItem = async (fileItem) => {
try {
const res = await processFile(localStorage.token, fileItem.id);
if (res) {
fileItem.status = 'processed';
fileItem.collection_name = res.collection_name;
fileItem.file = {
...fileItem.file,
content: res.content
};
files = files;
}
} catch (e) {
// We keep the file in the files list even if it fails to process
fileItem.status = 'processed';
files = files;
}
};
const inputFilesHandler = async (inputFiles) => {
inputFiles.forEach((file) => {
console.log(file, file.name.split('.').at(-1));
@ -456,7 +436,7 @@
</div>
{:else}
<FileItem
{file}
item={file}
name={file.name}
type={file.type}
size={file?.size}

View File

@ -127,7 +127,7 @@
<img src={file.url} alt="input" class=" max-h-96 rounded-lg" draggable="false" />
{:else}
<FileItem
{file}
item={file}
url={file.url}
name={file.name}
type={file.type}

View File

@ -15,7 +15,7 @@
export let dismissible = false;
export let status = 'processed';
export let file = null;
export let item = null;
export let edit = false;
export let name: string;
@ -25,15 +25,15 @@
let showModal = false;
</script>
{#if file}
<FileItemModal bind:show={showModal} bind:file {edit} />
{#if item}
<FileItemModal bind:show={showModal} bind:item {edit} />
{/if}
<button
class="relative group p-1.5 {className} flex items-center {colorClassName} rounded-2xl text-left"
type="button"
on:click={async () => {
if (file?.file?.content) {
if (item?.file?.data?.content) {
showModal = !showModal;
} else {
if (url) {

View File

@ -10,7 +10,7 @@
import Switch from './Switch.svelte';
import Tooltip from './Tooltip.svelte';
export let file;
export let item;
export let show = false;
export let edit = false;
@ -18,9 +18,9 @@
let enableFullContent = false;
onMount(() => {
console.log(file);
console.log(item);
if (file?.context === 'full') {
if (item?.context === 'full') {
enableFullContent = true;
}
});
@ -33,11 +33,11 @@
<div>
<div class=" font-medium text-lg dark:text-gray-100">
<a
href={file.url ? (file.type === 'file' ? `${file.url}/content` : `${file.url}`) : '#'}
href={item.url ? (item.type === 'file' ? `${item.url}/content` : `${item.url}`) : '#'}
target="_blank"
class="hover:underline line-clamp-1"
>
{file?.name ?? 'File'}
{item?.name ?? 'File'}
</a>
</div>
</div>
@ -56,14 +56,14 @@
<div>
<div class="flex flex-col items-center md:flex-row gap-1 justify-between w-full">
<div class=" flex flex-wrap text-sm gap-1 text-gray-500">
{#if file.size}
<div class="capitalize shrink-0">{formatFileSize(file.size)}</div>
{#if item.size}
<div class="capitalize shrink-0">{formatFileSize(item.size)}</div>
{/if}
{#if file?.file?.content}
{#if item?.file?.data?.content}
<div class="capitalize shrink-0">
{getLineCount(file?.file?.content ?? '')} extracted lines
{getLineCount(item?.file?.data?.content ?? '')} extracted lines
</div>
<div class="flex items-center gap-1 shrink-0">
@ -90,7 +90,7 @@
<Switch
bind:state={enableFullContent}
on:change={(e) => {
file.context = e.detail ? 'full' : undefined;
item.context = e.detail ? 'full' : undefined;
}}
/>
</div>
@ -102,7 +102,7 @@
</div>
<div class="max-h-96 overflow-scroll scrollbar-hidden text-xs whitespace-pre-wrap">
{file?.file?.content ?? 'No content'}
{item?.file?.data?.content ?? 'No content'}
</div>
</div>
</Modal>

View File

@ -9,7 +9,12 @@
import { mobile, showSidebar } from '$lib/stores';
import { uploadFile } from '$lib/apis/files';
import { getKnowledgeById, updateKnowledgeById } from '$lib/apis/knowledge';
import {
addFileToKnowledgeById,
getKnowledgeById,
removeFileFromKnowledgeById,
updateKnowledgeById
} from '$lib/apis/knowledge';
import Spinner from '$lib/components/common/Spinner.svelte';
import Tooltip from '$lib/components/common/Tooltip.svelte';
@ -77,7 +82,7 @@
if (uploadedFile) {
console.log(uploadedFile);
processFileHandler(uploadedFile);
addFileHandler(uploadedFile.id);
} else {
toast.error($i18n.t('Failed to upload file.'));
}
@ -86,34 +91,31 @@
}
};
const processFileHandler = async (uploadedFile) => {
const processedFile = await processFile(localStorage.token, uploadedFile.id, id).catch((e) => {
toast.error(e);
const addFileHandler = async (fileId) => {
const updatedKnowledge = await addFileToKnowledgeById(localStorage.token, id, fileId).catch(
(e) => {
console.error(e);
}
);
if (updatedKnowledge) {
knowledge = updatedKnowledge;
toast.success($i18n.t('File added successfully.'));
}
};
const deleteFileHandler = async (fileId) => {
const updatedKnowledge = await removeFileFromKnowledgeById(
localStorage.token,
id,
fileId
).catch((e) => {
console.error(e);
});
if (processedFile.status) {
console.log(processedFile);
if (!knowledge.data) {
knowledge.data = {};
}
knowledge.data.file_ids = [...(knowledge?.data?.file_ids ?? []), uploadedFile.id];
console.log(knowledge);
const updatedKnowledge = await updateKnowledgeById(localStorage.token, id, {
data: knowledge?.data ?? {}
}).catch((e) => {
console.error(e);
});
if (updatedKnowledge) {
knowledge = updatedKnowledge;
toast.success($i18n.t('File added successfully.'));
}
} else {
toast.error($i18n.t('Failed to process file.'));
if (updatedKnowledge) {
knowledge = updatedKnowledge;
toast.success($i18n.t('File removed successfully.'));
}
};
@ -338,6 +340,8 @@
}}
on:delete={(e) => {
console.log(e.detail);
deleteFileHandler(e.detail);
}}
/>
</div>