enh: show extracted file content

This commit is contained in:
Timothy J. Baek 2024-09-28 10:53:25 +02:00
parent 9636913de0
commit 90ec458c4c
6 changed files with 123 additions and 16 deletions

View File

@ -725,13 +725,15 @@ def process_file(
PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
)
docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
raw_text_content = " ".join([doc.page_content for doc in docs])
text_content = " ".join([doc.page_content for doc in docs])
log.debug(f"text_content: {text_content}")
Files.update_files_metadata_by_id(
form_data.file_id,
{
"content": {
"text": raw_text_content,
"text": text_content,
}
},
)
@ -751,6 +753,7 @@ def process_file(
"status": True,
"collection_name": collection_name,
"filename": file.meta.get("name", file.filename),
"content": text_content,
}
except Exception as e:
raise HTTPException(

View File

@ -163,6 +163,8 @@
if (res) {
fileItem.status = 'processed';
fileItem.collection_name = res.collection_name;
fileItem.content = res.content;
files = files;
}
} catch (e) {
@ -464,6 +466,7 @@
</div>
{:else}
<FileItem
{file}
name={file.name}
type={file.type}
size={file?.size}
@ -473,6 +476,9 @@
files.splice(fileIdx, 1);
files = files;
}}
on:click={() => {
console.log(file);
}}
/>
{/if}
{/each}

View File

@ -1,5 +1,8 @@
<script lang="ts">
import { createEventDispatcher, getContext } from 'svelte';
import { formatFileSize } from '$lib/utils';
import FileItemModal from './FileItemModal.svelte';
const i18n = getContext('i18n');
const dispatch = createEventDispatcher();
@ -11,30 +14,26 @@
export let dismissible = false;
export let status = 'processed';
export let file = null;
export let enableModal = true;
export let name: string;
export let type: string;
export let size: number;
const formatSize = (size) => {
if (size == null) return 'Unknown size';
if (typeof size !== 'number' || size < 0) return 'Invalid size';
if (size === 0) return '0 B';
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
let unitIndex = 0;
while (size >= 1024 && unitIndex < units.length - 1) {
size /= 1024;
unitIndex++;
}
return `${size.toFixed(1)} ${units[unitIndex]}`;
};
let showModal = false;
</script>
{#if file}
<FileItemModal bind:show={showModal} {file} />
{/if}
<div class="relative group">
<button
class="h-14 {className} flex items-center space-x-3 {colorClassName} rounded-xl border border-gray-100 dark:border-gray-800 text-left"
type="button"
on:click={async () => {
showModal = !showModal;
dispatch('click');
}}
>
@ -111,7 +110,7 @@
<span class=" capitalize">{type}</span>
{/if}
{#if size}
<span class="capitalize">{formatSize(size)}</span>
<span class="capitalize">{formatFileSize(size)}</span>
{/if}
</div>
</div>

View File

@ -0,0 +1,62 @@
<script lang="ts">
import { getContext, onMount } from 'svelte';
import { formatFileSize, getLineCount } from '$lib/utils';
const i18n = getContext('i18n');
import Modal from './Modal.svelte';
import XMark from '../icons/XMark.svelte';
import Info from '../icons/Info.svelte';
export let file;
export let show = false;
onMount(() => {
console.log(file);
});
</script>
<Modal bind:show size="md">
<div class="font-primary px-6 py-6 w-full flex flex-col justify-center dark:text-gray-400">
<div class="flex items-start justify-between pb-2">
<div>
<div class=" font-medium text-lg line-clamp-1 dark:text-gray-100">
{file?.name ?? 'File'}
</div>
<div>
<div class=" flex text-sm gap-1 text-gray-500">
{#if file.size}
<div class="capitalize">{formatFileSize(file.size)}</div>
{/if}
{#if file.content}
<div class="capitalize">{getLineCount(file.content)} extracted lines</div>
<div class="flex items-center gap-1">
<Info />
Formatting may be inconsistent from source.
</div>
{/if}
</div>
</div>
</div>
<div>
<button
on:click={() => {
show = false;
}}
>
<XMark />
</button>
</div>
</div>
<div class="max-h-96 overflow-scroll scrollbar-hidden text-xs whitespace-pre-wrap">
{file?.content ?? 'No content'}
</div>
</div>
</Modal>

View File

@ -0,0 +1,19 @@
<script lang="ts">
export let className = 'size-4';
export let strokeWidth = '1.5';
</script>
<svg
xmlns="http://www.w3.org/2000/svg"
fill="none"
viewBox="0 0 24 24"
stroke-width={strokeWidth}
stroke="currentColor"
class={className}
>
<path
stroke-linecap="round"
stroke-linejoin="round"
d="m11.25 11.25.041-.02a.75.75 0 0 1 1.063.852l-.708 2.836a.75.75 0 0 0 1.063.853l.041-.021M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0Zm-9-3.75h.008v.008H12V8.25Z"
/>
</svg>

View File

@ -873,3 +873,21 @@ export const createMessagesList = (history, messageId) => {
return [message];
}
};
export const formatFileSize = (size) => {
if (size == null) return 'Unknown size';
if (typeof size !== 'number' || size < 0) return 'Invalid size';
if (size === 0) return '0 B';
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
let unitIndex = 0;
while (size >= 1024 && unitIndex < units.length - 1) {
size /= 1024;
unitIndex++;
}
return `${size.toFixed(1)} ${units[unitIndex]}`;
};
export const getLineCount = (text) => {
return text.split('\n').length;
};