enh/refac: temp chat file upload behaviour
client-side content extraction
This commit is contained in:
@@ -1597,7 +1597,7 @@
|
||||
let files = JSON.parse(JSON.stringify(chatFiles));
|
||||
files.push(
|
||||
...(userMessage?.files ?? []).filter((item) =>
|
||||
['doc', 'file', 'note', 'collection'].includes(item.type)
|
||||
['doc', 'text', 'file', 'note', 'collection'].includes(item.type)
|
||||
)
|
||||
);
|
||||
// Remove duplicates
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
<script lang="ts">
|
||||
import * as pdfjs from 'pdfjs-dist';
|
||||
import * as pdfWorker from 'pdfjs-dist/build/pdf.worker.mjs';
|
||||
pdfjs.GlobalWorkerOptions.workerSrc = import.meta.url + 'pdfjs-dist/build/pdf.worker.mjs';
|
||||
|
||||
import DOMPurify from 'dompurify';
|
||||
import { marked } from 'marked';
|
||||
import heic2any from 'heic2any';
|
||||
@@ -23,13 +27,15 @@
|
||||
tools,
|
||||
user as _user,
|
||||
showControls,
|
||||
TTSWorker
|
||||
TTSWorker,
|
||||
temporaryChatEnabled
|
||||
} from '$lib/stores';
|
||||
|
||||
import {
|
||||
blobToFile,
|
||||
compressImage,
|
||||
createMessagesList,
|
||||
extractContentFromFile,
|
||||
extractCurlyBraceWords,
|
||||
extractInputVariables,
|
||||
getCurrentDateTime,
|
||||
@@ -529,47 +535,77 @@
|
||||
|
||||
files = [...files, fileItem];
|
||||
|
||||
try {
|
||||
// If the file is an audio file, provide the language for STT.
|
||||
let metadata = null;
|
||||
if (
|
||||
(file.type.startsWith('audio/') || file.type.startsWith('video/')) &&
|
||||
$settings?.audio?.stt?.language
|
||||
) {
|
||||
metadata = {
|
||||
language: $settings?.audio?.stt?.language
|
||||
};
|
||||
}
|
||||
|
||||
// During the file upload, file content is automatically extracted.
|
||||
const uploadedFile = await uploadFile(localStorage.token, file, metadata);
|
||||
|
||||
if (uploadedFile) {
|
||||
console.log('File upload completed:', {
|
||||
id: uploadedFile.id,
|
||||
name: fileItem.name,
|
||||
collection: uploadedFile?.meta?.collection_name
|
||||
});
|
||||
|
||||
if (uploadedFile.error) {
|
||||
console.warn('File upload warning:', uploadedFile.error);
|
||||
toast.warning(uploadedFile.error);
|
||||
if (!$temporaryChatEnabled) {
|
||||
try {
|
||||
// If the file is an audio file, provide the language for STT.
|
||||
let metadata = null;
|
||||
if (
|
||||
(file.type.startsWith('audio/') || file.type.startsWith('video/')) &&
|
||||
$settings?.audio?.stt?.language
|
||||
) {
|
||||
metadata = {
|
||||
language: $settings?.audio?.stt?.language
|
||||
};
|
||||
}
|
||||
|
||||
fileItem.status = 'uploaded';
|
||||
fileItem.file = uploadedFile;
|
||||
fileItem.id = uploadedFile.id;
|
||||
fileItem.collection_name =
|
||||
uploadedFile?.meta?.collection_name || uploadedFile?.collection_name;
|
||||
fileItem.url = `${WEBUI_API_BASE_URL}/files/${uploadedFile.id}`;
|
||||
// During the file upload, file content is automatically extracted.
|
||||
const uploadedFile = await uploadFile(localStorage.token, file, metadata);
|
||||
|
||||
files = files;
|
||||
} else {
|
||||
if (uploadedFile) {
|
||||
console.log('File upload completed:', {
|
||||
id: uploadedFile.id,
|
||||
name: fileItem.name,
|
||||
collection: uploadedFile?.meta?.collection_name
|
||||
});
|
||||
|
||||
if (uploadedFile.error) {
|
||||
console.warn('File upload warning:', uploadedFile.error);
|
||||
toast.warning(uploadedFile.error);
|
||||
}
|
||||
|
||||
fileItem.status = 'uploaded';
|
||||
fileItem.file = uploadedFile;
|
||||
fileItem.id = uploadedFile.id;
|
||||
fileItem.collection_name =
|
||||
uploadedFile?.meta?.collection_name || uploadedFile?.collection_name;
|
||||
fileItem.url = `${WEBUI_API_BASE_URL}/files/${uploadedFile.id}`;
|
||||
|
||||
files = files;
|
||||
} else {
|
||||
files = files.filter((item) => item?.itemId !== tempItemId);
|
||||
}
|
||||
} catch (e) {
|
||||
toast.error(`${e}`);
|
||||
files = files.filter((item) => item?.itemId !== tempItemId);
|
||||
}
|
||||
} catch (e) {
|
||||
toast.error(`${e}`);
|
||||
files = files.filter((item) => item?.itemId !== tempItemId);
|
||||
} else {
|
||||
// If temporary chat is enabled, we just add the file to the list without uploading it.
|
||||
|
||||
const content = await extractContentFromFile(file, pdfjsLib).catch((error) => {
|
||||
toast.error(
|
||||
$i18n.t('Failed to extract content from the file: {{error}}', { error: error })
|
||||
);
|
||||
return null;
|
||||
});
|
||||
|
||||
if (content === null) {
|
||||
toast.error($i18n.t('Failed to extract content from the file.'));
|
||||
files = files.filter((item) => item?.itemId !== tempItemId);
|
||||
return null;
|
||||
} else {
|
||||
console.log('Extracted content from file:', {
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
content: content
|
||||
});
|
||||
|
||||
fileItem.status = 'uploaded';
|
||||
fileItem.type = 'text';
|
||||
fileItem.content = content;
|
||||
fileItem.id = uuidv4(); // Temporary ID for the file
|
||||
|
||||
files = files;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -1507,3 +1507,74 @@ export const parseJsonValue = (value: string): any => {
|
||||
|
||||
return value;
|
||||
};
|
||||
|
||||
export const extractContentFromFile = async (file, pdfjsLib = null) => {
|
||||
// Known text file extensions for extra fallback
|
||||
const textExtensions = [
|
||||
'.txt',
|
||||
'.md',
|
||||
'.csv',
|
||||
'.json',
|
||||
'.js',
|
||||
'.ts',
|
||||
'.css',
|
||||
'.html',
|
||||
'.xml',
|
||||
'.yaml',
|
||||
'.yml',
|
||||
'.rtf'
|
||||
];
|
||||
|
||||
function getExtension(filename) {
|
||||
const dot = filename.lastIndexOf('.');
|
||||
return dot === -1 ? '' : filename.substr(dot).toLowerCase();
|
||||
}
|
||||
|
||||
// Uses pdfjs to extract text from PDF
|
||||
async function extractPdfText(file) {
|
||||
if (!pdfjsLib) {
|
||||
throw new Error('pdfjsLib is required for PDF extraction');
|
||||
}
|
||||
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
|
||||
let allText = '';
|
||||
for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) {
|
||||
const page = await pdf.getPage(pageNum);
|
||||
const content = await page.getTextContent();
|
||||
const strings = content.items.map((item) => item.str);
|
||||
allText += strings.join(' ') + '\n';
|
||||
}
|
||||
return allText;
|
||||
}
|
||||
|
||||
// Reads file as text using FileReader
|
||||
function readAsText(file) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => resolve(reader.result);
|
||||
reader.onerror = reject;
|
||||
reader.readAsText(file);
|
||||
});
|
||||
}
|
||||
|
||||
const type = file.type || '';
|
||||
const ext = getExtension(file.name);
|
||||
|
||||
// PDF check
|
||||
if (type === 'application/pdf' || ext === '.pdf') {
|
||||
return await extractPdfText(file);
|
||||
}
|
||||
|
||||
// Text check (plain or common text-based)
|
||||
if (type.startsWith('text/') || textExtensions.includes(ext)) {
|
||||
return await readAsText(file);
|
||||
}
|
||||
|
||||
// Fallback: try to read as text, if decodable
|
||||
try {
|
||||
return await readAsText(file);
|
||||
} catch (err) {
|
||||
throw new Error('Unsupported or non-text file type: ' + (file.name || type));
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user