mirror of
https://github.com/open-webui/open-webui
synced 2025-05-31 11:00:49 +00:00
refac
This commit is contained in:
parent
042c37ea34
commit
cb4299eb98
@ -1863,43 +1863,44 @@ DATALAB_MARKER_LANGS = PersistentConfig(
|
||||
DATALAB_MARKER_USE_LLM = PersistentConfig(
|
||||
"DATALAB_MARKER_USE_LLM",
|
||||
"rag.DATALAB_MARKER_USE_LLM",
|
||||
os.environ.get("DATALAB_MARKER_USE_LLM", "false") == "true",
|
||||
os.environ.get("DATALAB_MARKER_USE_LLM", "false").lower() == "true",
|
||||
)
|
||||
|
||||
DATALAB_MARKER_SKIP_CACHE = PersistentConfig(
|
||||
"DATALAB_MARKER_SKIP_CACHE",
|
||||
"rag.datalab_marker_skip_cache",
|
||||
os.environ.get("DATALAB_MARKER_SKIP_CACHE", "false") == "true",
|
||||
os.environ.get("DATALAB_MARKER_SKIP_CACHE", "false").lower() == "true",
|
||||
)
|
||||
|
||||
DATALAB_MARKER_FORCE_OCR = PersistentConfig(
|
||||
"DATALAB_MARKER_FORCE_OCR",
|
||||
"rag.datalab_marker_force_ocr",
|
||||
os.environ.get("DATALAB_MARKER_FORCE_OCR", "false") == "true",
|
||||
os.environ.get("DATALAB_MARKER_FORCE_OCR", "false").lower() == "true",
|
||||
)
|
||||
|
||||
DATALAB_MARKER_PAGINATE = PersistentConfig(
|
||||
"DATALAB_MARKER_PAGINATE",
|
||||
"rag.datalab_marker_paginate",
|
||||
os.environ.get("DATALAB_MARKER_PAGINATE", "false") == "true",
|
||||
os.environ.get("DATALAB_MARKER_PAGINATE", "false").lower() == "true",
|
||||
)
|
||||
|
||||
DATALAB_MARKER_STRIP_EXISTING_OCR = PersistentConfig(
|
||||
"DATALAB_MARKER_STRIP_EXISTING_OCR",
|
||||
"rag.datalab_marker_strip_existing_ocr",
|
||||
os.environ.get("DATALAB_MARKER_STRIP_EXISTING_OCR", "false") == "true",
|
||||
os.environ.get("DATALAB_MARKER_STRIP_EXISTING_OCR", "false").lower() == "true",
|
||||
)
|
||||
|
||||
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION = PersistentConfig(
|
||||
"DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION",
|
||||
"rag.datalab_marker_disable_image_extraction",
|
||||
os.environ.get("DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION", "false") == "true",
|
||||
os.environ.get("DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION", "false").lower()
|
||||
== "true",
|
||||
)
|
||||
|
||||
DATALAB_MARKER_OUTPUT_FORMAT = PersistentConfig(
|
||||
"DATALAB_MARKER_OUTPUT_FORMAT",
|
||||
"rag.datalab_marker_output_format",
|
||||
os.environ.get("DATALAB_MARKER_OUTPUT_FORMAT", ""),
|
||||
os.environ.get("DATALAB_MARKER_OUTPUT_FORMAT", "markdown"),
|
||||
)
|
||||
|
||||
EXTERNAL_DOCUMENT_LOADER_URL = PersistentConfig(
|
||||
|
@ -58,27 +58,6 @@
|
||||
};
|
||||
|
||||
let RAGConfig = null;
|
||||
let selectedLanguages: string[] = ['en'];
|
||||
let langsHydrated = false;
|
||||
|
||||
const SUPPORTED_LANGUAGES = {
|
||||
"af": "Afrikaans", "am": "Amharic", "ar": "Arabic", "as": "Assamese", "az": "Azerbaijani", "be": "Belarusian",
|
||||
"bg": "Bulgarian", "bn": "Bengali", "br": "Breton", "bs": "Bosnian", "ca": "Catalan", "cs": "Czech",
|
||||
"cy": "Welsh", "da": "Danish", "de": "German", "el": "Greek", "en": "English", "eo": "Esperanto",
|
||||
"es": "Spanish", "et": "Estonian", "eu": "Basque", "fa": "Persian", "fi": "Finnish", "fr": "French",
|
||||
"fy": "Western Frisian", "ga": "Irish", "gd": "Scottish Gaelic", "gl": "Galician", "gu": "Gujarati",
|
||||
"ha": "Hausa", "he": "Hebrew", "hi": "Hindi", "hr": "Croatian", "hu": "Hungarian", "hy": "Armenian",
|
||||
"id": "Indonesian", "is": "Icelandic", "it": "Italian", "ja": "Japanese", "jv": "Javanese", "ka": "Georgian",
|
||||
"kk": "Kazakh", "km": "Khmer", "kn": "Kannada", "ko": "Korean", "ku": "Kurdish", "ky": "Kyrgyz",
|
||||
"la": "Latin", "lo": "Lao", "lt": "Lithuanian", "lv": "Latvian", "mg": "Malagasy", "mk": "Macedonian",
|
||||
"ml": "Malayalam", "mn": "Mongolian", "mr": "Marathi", "ms": "Malay", "my": "Burmese", "ne": "Nepali",
|
||||
"nl": "Dutch", "no": "Norwegian", "om": "Oromo", "or": "Oriya", "pa": "Punjabi", "pl": "Polish",
|
||||
"ps": "Pashto", "pt": "Portuguese", "ro": "Romanian", "ru": "Russian", "sa": "Sanskrit", "sd": "Sindhi",
|
||||
"si": "Sinhala", "sk": "Slovak", "sl": "Slovenian", "so": "Somali", "sq": "Albanian", "sr": "Serbian",
|
||||
"su": "Sundanese", "sv": "Swedish", "sw": "Swahili", "ta": "Tamil", "te": "Telugu", "th": "Thai",
|
||||
"tl": "Tagalog", "tr": "Turkish", "ug": "Uyghur", "uk": "Ukrainian", "ur": "Urdu", "uz": "Uzbek",
|
||||
"vi": "Vietnamese", "xh": "Xhosa", "yi": "Yiddish", "zh": "Chinese", "_math": "Math"
|
||||
};
|
||||
|
||||
const embeddingModelUpdateHandler = async () => {
|
||||
if (embeddingEngine === '' && embeddingModel.split('/').length - 1 > 1) {
|
||||
@ -145,10 +124,6 @@
|
||||
};
|
||||
|
||||
const submitHandler = async () => {
|
||||
if (RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker' && !RAGConfig.DATALAB_MARKER_API_KEY) {
|
||||
toast.error($i18n.t('Datalab Marker API Key required.'));
|
||||
return;
|
||||
}
|
||||
if (
|
||||
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'external' &&
|
||||
RAGConfig.EXTERNAL_DOCUMENT_LOADER_URL === ''
|
||||
@ -175,6 +150,14 @@
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker' &&
|
||||
!RAGConfig.DATALAB_MARKER_API_KEY
|
||||
) {
|
||||
toast.error($i18n.t('Datalab Marker API Key required.'));
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'document_intelligence' &&
|
||||
(RAGConfig.DOCUMENT_INTELLIGENCE_ENDPOINT === '' ||
|
||||
@ -200,6 +183,11 @@
|
||||
.map((ext) => ext.trim())
|
||||
.filter((ext) => ext !== '');
|
||||
|
||||
RAGConfig.DATALAB_MARKER_LANGS = RAGConfig.DATALAB_MARKER_LANGS.split(',')
|
||||
.map((code) => code.trim())
|
||||
.filter((code) => code !== '')
|
||||
.join(', ');
|
||||
|
||||
const res = await updateRAGConfig(localStorage.token, RAGConfig);
|
||||
dispatch('save');
|
||||
};
|
||||
@ -224,27 +212,8 @@
|
||||
|
||||
const config = await getRAGConfig(localStorage.token);
|
||||
config.ALLOWED_FILE_EXTENSIONS = (config?.ALLOWED_FILE_EXTENSIONS ?? []).join(', ');
|
||||
|
||||
if (!config.DATALAB_MARKER_OUTPUT_FORMAT) {
|
||||
config.DATALAB_MARKER_OUTPUT_FORMAT = 'markdown';
|
||||
}
|
||||
|
||||
if (config.DATALAB_MARKER_LANGS) {
|
||||
selectedLanguages = config.DATALAB_MARKER_LANGS
|
||||
.split(',')
|
||||
.map(code => code.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
RAGConfig = config;
|
||||
langsHydrated = true;
|
||||
});
|
||||
|
||||
$: if (langsHydrated && RAGConfig) {
|
||||
RAGConfig.DATALAB_MARKER_LANGS = selectedLanguages.length
|
||||
? selectedLanguages.join(',')
|
||||
: 'en';
|
||||
}
|
||||
</script>
|
||||
|
||||
<ResetUploadDirConfirmDialog
|
||||
@ -314,10 +283,10 @@
|
||||
bind:value={RAGConfig.CONTENT_EXTRACTION_ENGINE}
|
||||
>
|
||||
<option value="">{$i18n.t('Default')}</option>
|
||||
<option value="datalab_marker">{ $i18n.t('Datalab Marker API') }</option>
|
||||
<option value="external">{$i18n.t('External')}</option>
|
||||
<option value="tika">{$i18n.t('Tika')}</option>
|
||||
<option value="docling">{$i18n.t('Docling')}</option>
|
||||
<option value="datalab_marker">{$i18n.t('Datalab Marker API')}</option>
|
||||
<option value="document_intelligence">{$i18n.t('Document Intelligence')}</option>
|
||||
<option value="mistral_ocr">{$i18n.t('Mistral OCR')}</option>
|
||||
</select>
|
||||
@ -336,106 +305,136 @@
|
||||
</div>
|
||||
</div>
|
||||
{:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker'}
|
||||
<div class="my-0.5 flex gap-2 pr-2">
|
||||
<SensitiveInput
|
||||
placeholder={$i18n.t('Enter Datalab Marker API Key')}
|
||||
required={false}
|
||||
bind:value={RAGConfig.DATALAB_MARKER_API_KEY}
|
||||
/>
|
||||
</div>
|
||||
<div class="my-0.5 flex gap-2 pr-2 w-full">
|
||||
<div class="flex flex-col w-full">
|
||||
<label class="text-xs font-medium mb-1">
|
||||
{$i18n.t("OCR language(s). Hold Ctrl (Windows) or Cmd (Mac) to select multiple. If no selection defaults to English")}
|
||||
</label>
|
||||
<select
|
||||
class="w-full text-sm bg-transparent border border-gray-300 dark:border-gray-700 rounded-sm p-1 outline-hidden"
|
||||
multiple
|
||||
size="6"
|
||||
bind:value={selectedLanguages}
|
||||
>
|
||||
{#each Object.entries(SUPPORTED_LANGUAGES) as [code, label]}
|
||||
<option value={code}>{label}</option>
|
||||
{/each}
|
||||
</select>
|
||||
<div class="my-0.5 flex gap-2 pr-2">
|
||||
<SensitiveInput
|
||||
placeholder={$i18n.t('Enter Datalab Marker API Key')}
|
||||
required={false}
|
||||
bind:value={RAGConfig.DATALAB_MARKER_API_KEY}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
<div class="mb-1 flex w-full justify-between">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip content={$i18n.t('Significantly improves accuracy by using an LLM to enhance tables, forms, inline math, and layout detection. Will increase latency. Defaults to True.')} placement="top-start">
|
||||
{$i18n.t('Use LLM')}
|
||||
</Tooltip>
|
||||
|
||||
<div class="flex justify-between w-full mt-2">
|
||||
<div class="text-xs font-medium">
|
||||
{$i18n.t('Languages')}
|
||||
</div>
|
||||
|
||||
<input
|
||||
class="text-sm bg-transparent outline-hidden"
|
||||
type="text"
|
||||
bind:value={RAGConfig.DATALAB_MARKER_LANGS}
|
||||
placeholder={$i18n.t('e.g.) en,fr,de')}
|
||||
/>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_USE_LLM} />
|
||||
|
||||
<div class="flex justify-between w-full mt-2">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t(
|
||||
'Significantly improves accuracy by using an LLM to enhance tables, forms, inline math, and layout detection. Will increase latency. Defaults to True.'
|
||||
)}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Use LLM')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_USE_LLM} />
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex justify-between w-full mt-2">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t('Skip the cache and re-run the inference. Defaults to False.')}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Skip Cache')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_SKIP_CACHE} />
|
||||
</div>
|
||||
</div>
|
||||
<div class="mb-1 flex w-full justify-between">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip content={$i18n.t('Skip the cache and re-run the inference. Defaults to False.')} placement="top-start">
|
||||
{$i18n.t('Skip Cache')}
|
||||
</Tooltip>
|
||||
<div class="flex justify-between w-full mt-2">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t(
|
||||
'Force OCR on all pages of the PDF. This can lead to worse results if you have good text in your PDFs. Defaults to False.'
|
||||
)}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Force OCR')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_FORCE_OCR} />
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_SKIP_CACHE} />
|
||||
<div class="flex justify-between w-full mt-2">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t(
|
||||
'Whether to paginate the output. Each page will be separated by a horizontal rule and page number. Defaults to False.'
|
||||
)}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Paginate')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_PAGINATE} />
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex justify-between w-full mt-2">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t(
|
||||
'Strip existing OCR text from the PDF and re-run OCR. Ignored if Force OCR is enabled. Defaults to False.'
|
||||
)}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Strip Existing OCR')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_STRIP_EXISTING_OCR} />
|
||||
</div>
|
||||
</div>
|
||||
<div class="mb-1 flex w-full justify-between">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip content={$i18n.t('Force OCR on all pages of the PDF. This can lead to worse results if you have good text in your PDFs. Defaults to False.')} placement="top-start">
|
||||
{$i18n.t('Force OCR')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_FORCE_OCR} />
|
||||
</div>
|
||||
</div>
|
||||
<div class="mb-1 flex w-full justify-between">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip content={$i18n.t('Whether to paginate the output. Each page will be separated by a horizontal rule and page number. Defaults to False.')} placement="top-start">
|
||||
{$i18n.t('Paginate')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_PAGINATE} />
|
||||
</div>
|
||||
</div>
|
||||
<div class="mb-1 flex w-full justify-between">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip content={$i18n.t('Strip existing OCR text from the PDF and re-run OCR. Ignored if Force OCR is enabled. Defaults to False.')} placement="top-start">
|
||||
{$i18n.t('Strip Existing OCR')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_STRIP_EXISTING_OCR} />
|
||||
</div>
|
||||
</div>
|
||||
<div class="mb-1 flex w-full justify-between">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip content={$i18n.t('Disable image extraction from the PDF. If Use LLM is enabled, images will be automatically captioned. Defaults to False.')} placement="top-start">
|
||||
{$i18n.t('Disable Image Extraction')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION} />
|
||||
</div>
|
||||
</div>
|
||||
<div class="mb-1 flex w-full justify-between">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip content={$i18n.t("The output format for the text. Can be 'json', 'markdown', or 'html'. Defaults to 'markdown'.")} placement="top-start">
|
||||
{$i18n.t('Output Format')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="">
|
||||
<select
|
||||
class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 text-xs bg-transparent outline-hidden text-right"
|
||||
bind:value={RAGConfig.DATALAB_MARKER_OUTPUT_FORMAT}
|
||||
>
|
||||
<option value="markdown">{$i18n.t('Markdown')}</option>
|
||||
<option value="json">{$i18n.t('JSON')}</option>
|
||||
<option value="html">{$i18n.t('HTML')}</option>
|
||||
</select>
|
||||
<div class="flex justify-between w-full mt-2">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t(
|
||||
'Disable image extraction from the PDF. If Use LLM is enabled, images will be automatically captioned. Defaults to False.'
|
||||
)}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Disable Image Extraction')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION} />
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex justify-between w-full mt-2">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t(
|
||||
"The output format for the text. Can be 'json', 'markdown', or 'html'. Defaults to 'markdown'."
|
||||
)}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Output Format')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="">
|
||||
<select
|
||||
class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 text-xs bg-transparent outline-hidden text-right"
|
||||
bind:value={RAGConfig.DATALAB_MARKER_OUTPUT_FORMAT}
|
||||
>
|
||||
<option value="markdown">{$i18n.t('Markdown')}</option>
|
||||
<option value="json">{$i18n.t('JSON')}</option>
|
||||
<option value="html">{$i18n.t('HTML')}</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
{:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'external'}
|
||||
<div class="my-0.5 flex gap-2 pr-2">
|
||||
|
Loading…
Reference in New Issue
Block a user