Extend docling configuration options to include:

* do_ocr
* force_ocr
* pdf_backend
* table_mode
* pipeline

as per https://github.com/docling-project/docling-serve/blob/main/docs/usage.md

See https://github.com/open-webui/open-webui/issues/17148
This commit is contained in:
Antonio Pisano
2025-09-08 18:51:33 +02:00
parent 2407d9b905
commit daa2a036f8
5 changed files with 198 additions and 17 deletions

View File

@@ -152,7 +152,8 @@
return;
}
if (
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' &&
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' &&
RAGConfig.DOCLING_DO_OCR &&
((RAGConfig.DOCLING_OCR_ENGINE === '' && RAGConfig.DOCLING_OCR_LANG !== '') ||
(RAGConfig.DOCLING_OCR_ENGINE !== '' && RAGConfig.DOCLING_OCR_LANG === ''))
) {
@@ -161,6 +162,16 @@
);
return;
}
if (
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' &&
RAGConfig.DOCLING_DO_OCR === false &&
RAGConfig.DOCLING_FORCE_OCR === true
) {
toast.error(
$i18n.t('In order to force OCR, performing OCR must be enabled.')
);
return;
}
if (
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker' &&
@@ -544,21 +555,93 @@
placeholder={$i18n.t('Enter Docling Server URL')}
bind:value={RAGConfig.DOCLING_SERVER_URL}
/>
</div>
<div class="flex w-full mt-2">
<input
class="flex-1 w-full text-sm bg-transparent outline-hidden"
placeholder={$i18n.t('Enter Docling OCR Engine')}
bind:value={RAGConfig.DOCLING_OCR_ENGINE}
/>
<input
class="flex-1 w-full text-sm bg-transparent outline-hidden"
placeholder={$i18n.t('Enter Docling OCR Language(s)')}
bind:value={RAGConfig.DOCLING_OCR_LANG}
/>
</div>
</div>
<div class="flex w-full mt-2">
<div class="flex-1 flex justify-between">
<div class=" self-center text-xs font-medium">
{$i18n.t('Perform OCR')}
</div>
<div class="flex items-center relative">
<Switch bind:state={RAGConfig.DOCLING_DO_OCR} />
</div>
</div>
</div>
{#if RAGConfig.DOCLING_DO_OCR}
<div class="flex w-full mt-2">
<input
class="flex-1 w-full text-sm bg-transparent outline-hidden"
placeholder={$i18n.t('Enter Docling OCR Engine')}
bind:value={RAGConfig.DOCLING_OCR_ENGINE}
/>
<input
class="flex-1 w-full text-sm bg-transparent outline-hidden"
placeholder={$i18n.t('Enter Docling OCR Language(s)')}
bind:value={RAGConfig.DOCLING_OCR_LANG}
/>
</div>
{/if}
<div class="flex w-full mt-2">
<div class="flex-1 flex justify-between">
<div class=" self-center text-xs font-medium">
{$i18n.t('Force OCR')}
</div>
<div class="flex items-center relative">
<Switch bind:state={RAGConfig.DOCLING_FORCE_OCR} />
</div>
</div>
</div>
<div class="flex justify-between w-full mt-2">
<div class="self-center text-xs font-medium">
<Tooltip content={''} placement="top-start">
{$i18n.t('PDF Backend')}
</Tooltip>
</div>
<div class="">
<select
class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 text-xs bg-transparent outline-hidden text-right"
bind:value={RAGConfig.DOCLING_PDF_BACKEND}
>
<option value="pypdfium2">{$i18n.t('pypdfium2')}</option>
<option value="dlparse_v1">{$i18n.t('dlparse_v1')}</option>
<option value="dlparse_v2">{$i18n.t('dlparse_v2')}</option>
<option value="dlparse_v4">{$i18n.t('dlparse_v4')}</option>
</select>
</div>
</div>
<div class="flex justify-between w-full mt-2">
<div class="self-center text-xs font-medium">
<Tooltip content={''} placement="top-start">
{$i18n.t('Table Mode')}
</Tooltip>
</div>
<div class="">
<select
class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 text-xs bg-transparent outline-hidden text-right"
bind:value={RAGConfig.DOCLING_TABLE_MODE}
>
<option value="fast">{$i18n.t('fast')}</option>
<option value="accurate">{$i18n.t('accurate')}</option>
</select>
</div>
</div>
<div class="flex justify-between w-full mt-2">
<div class="self-center text-xs font-medium">
<Tooltip content={''} placement="top-start">
{$i18n.t('Pipeline')}
</Tooltip>
</div>
<div class="">
<select
class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 text-xs bg-transparent outline-hidden text-right"
bind:value={RAGConfig.DOCLING_PIPELINE}
>
<option value="standard">{$i18n.t('standard')}</option>
<option value="vlm">{$i18n.t('vlm')}</option>
</select>
</div>
</div>
<div class="flex w-full mt-2">
<div class="flex-1 flex justify-between">
<div class=" self-center text-xs font-medium">
{$i18n.t('Describe Pictures in Documents')}