From afaa404fe442435368defb0858cba213baa1725b Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sat, 20 Dec 2025 17:39:33 +0400 Subject: [PATCH] enh: mineru api timeout --- backend/open_webui/config.py | 6 ++ backend/open_webui/main.py | 2 + backend/open_webui/retrieval/loaders/main.py | 1 + .../open_webui/retrieval/loaders/mineru.py | 6 +- backend/open_webui/routers/retrieval.py | 9 ++ package-lock.json | 92 +++++++++++++++++++ .../admin/Settings/Documents.svelte | 17 +++- 7 files changed, 126 insertions(+), 7 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 3c21a828f..1e17b9b1d 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2523,6 +2523,12 @@ MINERU_API_URL = PersistentConfig( os.environ.get("MINERU_API_URL", "http://localhost:8000"), ) +MINERU_API_TIMEOUT = PersistentConfig( + "MINERU_API_TIMEOUT", + "rag.mineru_api_timeout", + os.environ.get("MINERU_API_TIMEOUT", "300"), +) + MINERU_API_KEY = PersistentConfig( "MINERU_API_KEY", "rag.mineru_api_key", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 1ff36e0c6..ea8e20fe4 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -264,6 +264,7 @@ from open_webui.config import ( MINERU_API_MODE, MINERU_API_URL, MINERU_API_KEY, + MINERU_API_TIMEOUT, MINERU_PARAMS, DATALAB_MARKER_USE_LLM, EXTERNAL_DOCUMENT_LOADER_URL, @@ -879,6 +880,7 @@ app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY app.state.config.MINERU_API_MODE = MINERU_API_MODE app.state.config.MINERU_API_URL = MINERU_API_URL app.state.config.MINERU_API_KEY = MINERU_API_KEY +app.state.config.MINERU_API_TIMEOUT = MINERU_API_TIMEOUT app.state.config.MINERU_PARAMS = MINERU_PARAMS app.state.config.TEXT_SPLITTER = RAG_TEXT_SPLITTER diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index 41e872361..319ad2977 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -337,6 +337,7 @@ class Loader: api_url=self.kwargs.get("MINERU_API_URL", "http://localhost:8000"), api_key=self.kwargs.get("MINERU_API_KEY", ""), params=self.kwargs.get("MINERU_PARAMS", {}), + timeout=int(self.kwargs.get("MINERU_API_TIMEOUT", 300)), ) elif ( self.engine == "mistral_ocr" diff --git a/backend/open_webui/retrieval/loaders/mineru.py b/backend/open_webui/retrieval/loaders/mineru.py index 360af804c..2e175755d 100644 --- a/backend/open_webui/retrieval/loaders/mineru.py +++ b/backend/open_webui/retrieval/loaders/mineru.py @@ -26,11 +26,13 @@ class MinerULoader: api_url: str = "http://localhost:8000", api_key: str = "", params: dict = None, + timeout: int = 300, ): self.file_path = file_path self.api_mode = api_mode.lower() self.api_url = api_url.rstrip("/") self.api_key = api_key + self.timeout = timeout # Parse params dict with defaults self.params = params or {} @@ -101,7 +103,7 @@ class MinerULoader: f"{self.api_url}/file_parse", data=form_data, files=files, - timeout=300, # 5 minute timeout for large documents + timeout=self.timeout, # 5 minute timeout for large documents ) response.raise_for_status() @@ -300,7 +302,7 @@ class MinerULoader: response = requests.put( upload_url, data=f, - timeout=300, # 5 minute timeout for large files + timeout=self.timeout, # 5 minute timeout for large files ) response.raise_for_status() except FileNotFoundError: diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index a169ce60a..3f7b64716 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -473,6 +473,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "MINERU_API_MODE": request.app.state.config.MINERU_API_MODE, "MINERU_API_URL": request.app.state.config.MINERU_API_URL, "MINERU_API_KEY": request.app.state.config.MINERU_API_KEY, + "MINERU_API_TIMEOUT": request.app.state.config.MINERU_API_TIMEOUT, "MINERU_PARAMS": request.app.state.config.MINERU_PARAMS, # Reranking settings "RAG_RERANKING_MODEL": request.app.state.config.RAG_RERANKING_MODEL, @@ -658,6 +659,7 @@ class ConfigForm(BaseModel): MINERU_API_MODE: Optional[str] = None MINERU_API_URL: Optional[str] = None MINERU_API_KEY: Optional[str] = None + MINERU_API_TIMEOUT: Optional[str] = None MINERU_PARAMS: Optional[dict] = None # Reranking settings @@ -879,6 +881,11 @@ async def update_rag_config( if form_data.MINERU_API_KEY is not None else request.app.state.config.MINERU_API_KEY ) + request.app.state.config.MINERU_API_TIMEOUT = ( + form_data.MINERU_API_TIMEOUT + if form_data.MINERU_API_TIMEOUT is not None + else request.app.state.config.MINERU_API_TIMEOUT + ) request.app.state.config.MINERU_PARAMS = ( form_data.MINERU_PARAMS if form_data.MINERU_PARAMS is not None @@ -1150,6 +1157,7 @@ async def update_rag_config( "MINERU_API_MODE": request.app.state.config.MINERU_API_MODE, "MINERU_API_URL": request.app.state.config.MINERU_API_URL, "MINERU_API_KEY": request.app.state.config.MINERU_API_KEY, + "MINERU_API_TIMEOUT": request.app.state.config.MINERU_API_TIMEOUT, "MINERU_PARAMS": request.app.state.config.MINERU_PARAMS, # Reranking settings "RAG_RERANKING_MODEL": request.app.state.config.RAG_RERANKING_MODEL, @@ -1565,6 +1573,7 @@ def process_file( MINERU_API_MODE=request.app.state.config.MINERU_API_MODE, MINERU_API_URL=request.app.state.config.MINERU_API_URL, MINERU_API_KEY=request.app.state.config.MINERU_API_KEY, + MINERU_API_TIMEOUT=request.app.state.config.MINERU_API_TIMEOUT, MINERU_PARAMS=request.app.state.config.MINERU_PARAMS, ) docs = loader.load( diff --git a/package-lock.json b/package-lock.json index 1572d4324..7bf8f4da9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -96,6 +96,7 @@ "vega": "^6.2.0", "vega-lite": "^6.4.1", "vite-plugin-static-copy": "^2.2.0", + "xlsx": "^0.18.5", "y-prosemirror": "^1.3.7", "yaml": "^2.7.1", "yjs": "^13.6.27" @@ -4661,6 +4662,15 @@ "node": ">=0.4.0" } }, + "node_modules/adler-32": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.3.1.tgz", + "integrity": "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, "node_modules/agent-base": { "version": "7.1.4", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", @@ -5321,6 +5331,19 @@ "integrity": "sha512-4tYFyifaFfGacoiObjJegolkwSU4xQNGbVgUiNYVUxbQ2x2lUsFvY4hVgVzGiIe6WLOPqycWXA40l+PWsxthUw==", "dev": true }, + "node_modules/cfb": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.2.tgz", + "integrity": "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==", + "license": "Apache-2.0", + "dependencies": { + "adler-32": "~1.3.0", + "crc-32": "~1.2.0" + }, + "engines": { + "node": ">=0.8" + } + }, "node_modules/chai": { "version": "4.5.0", "resolved": "https://registry.npmjs.org/chai/-/chai-4.5.0.tgz", @@ -5784,6 +5807,15 @@ "@lezer/lr": "^1.0.0" } }, + "node_modules/codepage": { + "version": "1.15.0", + "resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz", + "integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, "node_modules/coincident": { "version": "1.2.3", "resolved": "https://registry.npmjs.org/coincident/-/coincident-1.2.3.tgz", @@ -7761,6 +7793,15 @@ "node": ">= 6" } }, + "node_modules/frac": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz", + "integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, "node_modules/fs-extra": { "version": "11.2.0", "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz", @@ -12153,6 +12194,18 @@ "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==", "dev": true }, + "node_modules/ssf": { + "version": "0.11.2", + "resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz", + "integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==", + "license": "Apache-2.0", + "dependencies": { + "frac": "~1.1.2" + }, + "engines": { + "node": ">=0.8" + } + }, "node_modules/sshpk": { "version": "1.18.0", "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.18.0.tgz", @@ -14466,6 +14519,24 @@ "node": ">=8" } }, + "node_modules/wmf": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz", + "integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, + "node_modules/word": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz", + "integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, "node_modules/wrap-ansi": { "version": "8.1.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", @@ -14578,6 +14649,27 @@ } } }, + "node_modules/xlsx": { + "version": "0.18.5", + "resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz", + "integrity": "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==", + "license": "Apache-2.0", + "dependencies": { + "adler-32": "~1.3.0", + "cfb": "~1.2.1", + "codepage": "~1.15.0", + "crc-32": "~1.2.1", + "ssf": "~0.11.2", + "wmf": "~1.0.1", + "word": "~0.3.0" + }, + "bin": { + "xlsx": "bin/xlsx.njs" + }, + "engines": { + "node": ">=0.8" + } + }, "node_modules/xml-name-validator": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 57a4f7b5f..fa68783af 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -667,11 +667,18 @@
- +
+
+ {$i18n.t('API Timeout')} +
+ +