feat(ocr): add support for Docling OCR engine and language configuration

This commit adds support for configuring the OCR engine and language(s) for Docling.
Configuration can be set via the environment variables `DOCLING_OCR_ENGINE` and `DOCLING_OCR_LANG`, or through the UI.

Fixes #13133
This commit is contained in:
Athanasios Oikonomou
2025-05-03 00:31:00 +03:00
committed by Athanasios Oikonomou
parent 7d184c3a14
commit 657162e96d
5 changed files with 67 additions and 2 deletions

View File

@@ -161,6 +161,13 @@
toast.error($i18n.t('Docling Server URL required.'));
return;
}
if (
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' &&
(RAGConfig.DOCLING_OCR_ENGINE === '' || RAGConfig.DOCLING_OCR_LANG === '')
) {
toast.error($i18n.t('Docling OCR Engine and Language(s) required.'));
return;
}
if (
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'document_intelligence' &&
@@ -326,6 +333,18 @@
bind:value={RAGConfig.DOCLING_SERVER_URL}
/>
</div>
<div class="flex w-full mt-2">
<input
class="flex-1 w-full rounded-lg text-sm bg-transparent outline-hidden"
placeholder={$i18n.t('Enter Docling OCR Engine')}
bind:value={RAGConfig.DOCLING_OCR_ENGINE}
/>
<input
class="flex-1 w-full rounded-lg text-sm bg-transparent outline-hidden"
placeholder={$i18n.t('Enter Docling OCR Language(s)')}
bind:value={RAGConfig.DOCLING_OCR_LANG}
/>
</div>
{:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'document_intelligence'}
<div class="my-0.5 flex gap-2 pr-2">
<input