diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index c25e0e046..1162fde22 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2081,10 +2081,10 @@ PLAYWRIGHT_WS_URI = PersistentConfig( os.environ.get("PLAYWRIGHT_WS_URI", None), ) -PLAYWRIGHT_GOTO_TIMEOUT = PersistentConfig( - "PLAYWRIGHT_GOTO_TIMEOUT", - "rag.web.loader.engine.playwright.goto.timeout", - int(os.environ.get("PLAYWRIGHT_GOTO_TIMEOUT", "10")), +PLAYWRIGHT_TIMEOUT = PersistentConfig( + "PLAYWRIGHT_TIMEOUT", + "rag.web.loader.engine.playwright.timeout", + int(os.environ.get("PLAYWRIGHT_TIMEOUT", "10")), ) FIRECRAWL_API_KEY = PersistentConfig( diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 228c92e64..674926055 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -155,7 +155,7 @@ from open_webui.config import ( AUDIO_TTS_AZURE_SPEECH_REGION, AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT, PLAYWRIGHT_WS_URI, - PLAYWRIGHT_GOTO_TIMEOUT, + PLAYWRIGHT_TIMEOUT, FIRECRAWL_API_BASE_URL, FIRECRAWL_API_KEY, RAG_WEB_LOADER_ENGINE, @@ -630,7 +630,7 @@ app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_ app.state.config.RAG_WEB_LOADER_ENGINE = RAG_WEB_LOADER_ENGINE app.state.config.RAG_WEB_SEARCH_TRUST_ENV = RAG_WEB_SEARCH_TRUST_ENV app.state.config.PLAYWRIGHT_WS_URI = PLAYWRIGHT_WS_URI -app.state.config.PLAYWRIGHT_GOTO_TIMEOUT = PLAYWRIGHT_GOTO_TIMEOUT +app.state.config.PLAYWRIGHT_TIMEOUT = PLAYWRIGHT_TIMEOUT app.state.config.FIRECRAWL_API_BASE_URL = FIRECRAWL_API_BASE_URL app.state.config.FIRECRAWL_API_KEY = FIRECRAWL_API_KEY app.state.config.TAVILY_EXTRACT_DEPTH = TAVILY_EXTRACT_DEPTH diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 0eee00879..942cb8483 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -29,7 +29,7 @@ from open_webui.constants import ERROR_MESSAGES from open_webui.config import ( ENABLE_RAG_LOCAL_WEB_FETCH, PLAYWRIGHT_WS_URI, - PLAYWRIGHT_GOTO_TIMEOUT, + PLAYWRIGHT_TIMEOUT, RAG_WEB_LOADER_ENGINE, FIRECRAWL_API_BASE_URL, FIRECRAWL_API_KEY, @@ -377,7 +377,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing headless (bool): If True, the browser will run in headless mode. proxy (dict): Proxy override settings for the Playwright session. playwright_ws_url (Optional[str]): WebSocket endpoint URI for remote browser connection. - playwright_goto_timeout (Optional[int]): Maximum operation time in milliseconds. + playwright_timeout (Optional[int]): Maximum operation time in milliseconds. """ def __init__( @@ -391,7 +391,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing remove_selectors: Optional[List[str]] = None, proxy: Optional[Dict[str, str]] = None, playwright_ws_url: Optional[str] = None, - playwright_goto_timeout: Optional[int] = 10000, + playwright_timeout: Optional[int] = 10000, ): """Initialize with additional safety parameters and remote browser support.""" @@ -418,7 +418,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing self.last_request_time = None self.playwright_ws_url = playwright_ws_url self.trust_env = trust_env - self.playwright_goto_timeout = playwright_goto_timeout + self.playwright_timeout = playwright_timeout def lazy_load(self) -> Iterator[Document]: """Safely load URLs synchronously with support for remote browser.""" @@ -435,7 +435,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing try: self._safe_process_url_sync(url) page = browser.new_page() - response = page.goto(url, timeout=self.playwright_goto_timeout) + response = page.goto(url, timeout=self.playwright_timeout) if response is None: raise ValueError(f"page.goto() returned None for url {url}") @@ -466,9 +466,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing try: await self._safe_process_url(url) page = await browser.new_page() - response = await page.goto( - url, timeout=self.playwright_goto_timeout - ) + response = await page.goto(url, timeout=self.playwright_timeout) if response is None: raise ValueError(f"page.goto() returned None for url {url}") @@ -611,9 +609,7 @@ def get_web_loader( } if RAG_WEB_LOADER_ENGINE.value == "playwright": - web_loader_args["playwright_goto_timeout"] = ( - PLAYWRIGHT_GOTO_TIMEOUT.value * 1000 - ) + web_loader_args["playwright_timeout"] = PLAYWRIGHT_TIMEOUT.value * 1000 if PLAYWRIGHT_WS_URI.value: web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value