From a7e614ca4c915db6a2bbfe4bb7998dfb0a41d841 Mon Sep 17 00:00:00 2001 From: Henne <65833107+HennieLP@users.noreply.github.com> Date: Tue, 2 Dec 2025 20:41:09 +0100 Subject: [PATCH] feat: Adds document intelligence model configuration (#19692) * Adds document intelligence model configuration Enables the configuration of the Document Intelligence model to be used by the RAG pipeline. This allows users to specify the model they want to use for document processing, providing flexibility and control over the extraction process. * Added Titel to Document Intelligence Model Config Added Titel to Document Intelligence Model Config --- backend/open_webui/config.py | 6 ++++++ backend/open_webui/main.py | 2 ++ backend/open_webui/retrieval/loaders/main.py | 2 ++ backend/open_webui/routers/retrieval.py | 9 +++++++++ src/lib/apis/retrieval/index.ts | 1 + src/lib/components/admin/Settings/Documents.svelte | 12 ++++++++++++ src/lib/i18n/locales/en-GB/translation.json | 2 ++ src/lib/i18n/locales/en-US/translation.json | 2 ++ 8 files changed, 36 insertions(+) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index a3a9050f7..41e88df5d 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2590,6 +2590,12 @@ DOCUMENT_INTELLIGENCE_KEY = PersistentConfig( os.getenv("DOCUMENT_INTELLIGENCE_KEY", ""), ) +DOCUMENT_INTELLIGENCE_MODEL = PersistentConfig( + "DOCUMENT_INTELLIGENCE_MODEL", + "rag.document_intelligence_model", + os.getenv("DOCUMENT_INTELLIGENCE_MODEL", "prebuilt-layout"), +) + MISTRAL_OCR_API_BASE_URL = PersistentConfig( "MISTRAL_OCR_API_BASE_URL", "rag.MISTRAL_OCR_API_BASE_URL", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 087dc5fb0..e1f3b39a3 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -273,6 +273,7 @@ from open_webui.config import ( DOCLING_PARAMS, DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_KEY, + DOCUMENT_INTELLIGENCE_MODEL, MISTRAL_OCR_API_BASE_URL, MISTRAL_OCR_API_KEY, RAG_TEXT_SPLITTER, @@ -871,6 +872,7 @@ app.state.config.DOCLING_API_KEY = DOCLING_API_KEY app.state.config.DOCLING_PARAMS = DOCLING_PARAMS app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY +app.state.config.DOCUMENT_INTELLIGENCE_MODEL = DOCUMENT_INTELLIGENCE_MODEL app.state.config.MISTRAL_OCR_API_BASE_URL = MISTRAL_OCR_API_BASE_URL app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY app.state.config.MINERU_API_MODE = MINERU_API_MODE diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index fcc507e08..1346cd065 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -322,12 +322,14 @@ class Loader: file_path=file_path, api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"), api_key=self.kwargs.get("DOCUMENT_INTELLIGENCE_KEY"), + api_model=self.kwargs.get("DOCUMENT_INTELLIGENCE_MODEL"), ) else: loader = AzureAIDocumentIntelligenceLoader( file_path=file_path, api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"), azure_credential=DefaultAzureCredential(), + api_model=self.kwargs.get("DOCUMENT_INTELLIGENCE_MODEL"), ) elif self.engine == "mineru" and file_ext in [ "pdf" diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 190f001ed..b7ed99389 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -468,6 +468,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS, "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + "DOCUMENT_INTELLIGENCE_MODEL": request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL, "MISTRAL_OCR_API_BASE_URL": request.app.state.config.MISTRAL_OCR_API_BASE_URL, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, # MinerU settings @@ -647,6 +648,7 @@ class ConfigForm(BaseModel): DOCLING_PARAMS: Optional[dict] = None DOCUMENT_INTELLIGENCE_ENDPOINT: Optional[str] = None DOCUMENT_INTELLIGENCE_KEY: Optional[str] = None + DOCUMENT_INTELLIGENCE_MODEL: Optional[str] = None MISTRAL_OCR_API_BASE_URL: Optional[str] = None MISTRAL_OCR_API_KEY: Optional[str] = None @@ -842,6 +844,11 @@ async def update_rag_config( if form_data.DOCUMENT_INTELLIGENCE_KEY is not None else request.app.state.config.DOCUMENT_INTELLIGENCE_KEY ) + request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL = ( + form_data.DOCUMENT_INTELLIGENCE_MODEL + if form_data.DOCUMENT_INTELLIGENCE_MODEL is not None + else request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL + ) request.app.state.config.MISTRAL_OCR_API_BASE_URL = ( form_data.MISTRAL_OCR_API_BASE_URL @@ -1131,6 +1138,7 @@ async def update_rag_config( "DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS, "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + "DOCUMENT_INTELLIGENCE_MODEL": request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL, "MISTRAL_OCR_API_BASE_URL": request.app.state.config.MISTRAL_OCR_API_BASE_URL, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, # MinerU settings @@ -1543,6 +1551,7 @@ def process_file( PDF_EXTRACT_IMAGES=request.app.state.config.PDF_EXTRACT_IMAGES, DOCUMENT_INTELLIGENCE_ENDPOINT=request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_KEY=request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + DOCUMENT_INTELLIGENCE_MODEL=request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL, MISTRAL_OCR_API_BASE_URL=request.app.state.config.MISTRAL_OCR_API_BASE_URL, MISTRAL_OCR_API_KEY=request.app.state.config.MISTRAL_OCR_API_KEY, MINERU_API_MODE=request.app.state.config.MINERU_API_MODE, diff --git a/src/lib/apis/retrieval/index.ts b/src/lib/apis/retrieval/index.ts index 5cb0f60a7..75065910d 100644 --- a/src/lib/apis/retrieval/index.ts +++ b/src/lib/apis/retrieval/index.ts @@ -35,6 +35,7 @@ type ChunkConfigForm = { type DocumentIntelligenceConfigForm = { key: string; endpoint: string; + model: string; }; type ContentExtractConfigForm = { diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 26c23028e..0b9accd4b 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -597,6 +597,18 @@ required={false} /> +
+
{$i18n.t('Document Intelligence Model')}
+
+
+ +
+
+
{:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'mistral_ocr'}