diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index a3e828978..a0964e5a0 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -725,13 +725,15 @@ def process_file( PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES, ) docs = loader.load(file.filename, file.meta.get("content_type"), file_path) - raw_text_content = " ".join([doc.page_content for doc in docs]) + text_content = " ".join([doc.page_content for doc in docs]) + + log.debug(f"text_content: {text_content}") Files.update_files_metadata_by_id( form_data.file_id, { "content": { - "text": raw_text_content, + "text": text_content, } }, ) @@ -751,6 +753,7 @@ def process_file( "status": True, "collection_name": collection_name, "filename": file.meta.get("name", file.filename), + "content": text_content, } except Exception as e: raise HTTPException( diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index 2ca0c8d20..b93468470 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -163,6 +163,8 @@ if (res) { fileItem.status = 'processed'; fileItem.collection_name = res.collection_name; + fileItem.content = res.content; + files = files; } } catch (e) { @@ -464,6 +466,7 @@ {:else} { + console.log(file); + }} /> {/if} {/each} diff --git a/src/lib/components/common/FileItem.svelte b/src/lib/components/common/FileItem.svelte index 7e8592ab9..46eac31b8 100644 --- a/src/lib/components/common/FileItem.svelte +++ b/src/lib/components/common/FileItem.svelte @@ -1,5 +1,8 @@ +{#if file} + +{/if} +
diff --git a/src/lib/components/common/FileItemModal.svelte b/src/lib/components/common/FileItemModal.svelte new file mode 100644 index 000000000..de70c6826 --- /dev/null +++ b/src/lib/components/common/FileItemModal.svelte @@ -0,0 +1,62 @@ + + + +
+
+
+
+ {file?.name ?? 'File'} +
+ +
+
+ {#if file.size} +
{formatFileSize(file.size)}
+ • + {/if} + + {#if file.content} +
{getLineCount(file.content)} extracted lines
+ +
+ + + Formatting may be inconsistent from source. +
+ {/if} +
+
+
+ +
+ +
+
+ +
+ {file?.content ?? 'No content'} +
+
+
diff --git a/src/lib/components/icons/Info.svelte b/src/lib/components/icons/Info.svelte new file mode 100644 index 000000000..2849ac532 --- /dev/null +++ b/src/lib/components/icons/Info.svelte @@ -0,0 +1,19 @@ + + + + + diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index 8478e885b..fef9aaffe 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -873,3 +873,21 @@ export const createMessagesList = (history, messageId) => { return [message]; } }; + +export const formatFileSize = (size) => { + if (size == null) return 'Unknown size'; + if (typeof size !== 'number' || size < 0) return 'Invalid size'; + if (size === 0) return '0 B'; + const units = ['B', 'KB', 'MB', 'GB', 'TB']; + let unitIndex = 0; + + while (size >= 1024 && unitIndex < units.length - 1) { + size /= 1024; + unitIndex++; + } + return `${size.toFixed(1)} ${units[unitIndex]}`; +}; + +export const getLineCount = (text) => { + return text.split('\n').length; +};