mirror of
https://github.com/open-webui/open-webui
synced 2025-06-04 03:37:35 +00:00
Merge pull request #11899 from genjuro214/set-playwright-timeout
perf: set shorter timeout for playwright and make it configurable
This commit is contained in:
commit
f066eea92e
@ -2081,6 +2081,12 @@ PLAYWRIGHT_WS_URI = PersistentConfig(
|
|||||||
os.environ.get("PLAYWRIGHT_WS_URI", None),
|
os.environ.get("PLAYWRIGHT_WS_URI", None),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
PLAYWRIGHT_GOTO_TIMEOUT = PersistentConfig(
|
||||||
|
"PLAYWRIGHT_GOTO_TIMEOUT",
|
||||||
|
"rag.web.loader.engine.playwright.goto.timeout",
|
||||||
|
int(os.environ.get("PLAYWRIGHT_GOTO_TIMEOUT", "10")),
|
||||||
|
)
|
||||||
|
|
||||||
FIRECRAWL_API_KEY = PersistentConfig(
|
FIRECRAWL_API_KEY = PersistentConfig(
|
||||||
"FIRECRAWL_API_KEY",
|
"FIRECRAWL_API_KEY",
|
||||||
"firecrawl.api_key",
|
"firecrawl.api_key",
|
||||||
|
@ -155,6 +155,7 @@ from open_webui.config import (
|
|||||||
AUDIO_TTS_AZURE_SPEECH_REGION,
|
AUDIO_TTS_AZURE_SPEECH_REGION,
|
||||||
AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
||||||
PLAYWRIGHT_WS_URI,
|
PLAYWRIGHT_WS_URI,
|
||||||
|
PLAYWRIGHT_GOTO_TIMEOUT,
|
||||||
FIRECRAWL_API_BASE_URL,
|
FIRECRAWL_API_BASE_URL,
|
||||||
FIRECRAWL_API_KEY,
|
FIRECRAWL_API_KEY,
|
||||||
RAG_WEB_LOADER_ENGINE,
|
RAG_WEB_LOADER_ENGINE,
|
||||||
@ -629,6 +630,7 @@ app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_
|
|||||||
app.state.config.RAG_WEB_LOADER_ENGINE = RAG_WEB_LOADER_ENGINE
|
app.state.config.RAG_WEB_LOADER_ENGINE = RAG_WEB_LOADER_ENGINE
|
||||||
app.state.config.RAG_WEB_SEARCH_TRUST_ENV = RAG_WEB_SEARCH_TRUST_ENV
|
app.state.config.RAG_WEB_SEARCH_TRUST_ENV = RAG_WEB_SEARCH_TRUST_ENV
|
||||||
app.state.config.PLAYWRIGHT_WS_URI = PLAYWRIGHT_WS_URI
|
app.state.config.PLAYWRIGHT_WS_URI = PLAYWRIGHT_WS_URI
|
||||||
|
app.state.config.PLAYWRIGHT_GOTO_TIMEOUT = PLAYWRIGHT_GOTO_TIMEOUT
|
||||||
app.state.config.FIRECRAWL_API_BASE_URL = FIRECRAWL_API_BASE_URL
|
app.state.config.FIRECRAWL_API_BASE_URL = FIRECRAWL_API_BASE_URL
|
||||||
app.state.config.FIRECRAWL_API_KEY = FIRECRAWL_API_KEY
|
app.state.config.FIRECRAWL_API_KEY = FIRECRAWL_API_KEY
|
||||||
app.state.config.TAVILY_EXTRACT_DEPTH = TAVILY_EXTRACT_DEPTH
|
app.state.config.TAVILY_EXTRACT_DEPTH = TAVILY_EXTRACT_DEPTH
|
||||||
|
@ -29,6 +29,7 @@ from open_webui.constants import ERROR_MESSAGES
|
|||||||
from open_webui.config import (
|
from open_webui.config import (
|
||||||
ENABLE_RAG_LOCAL_WEB_FETCH,
|
ENABLE_RAG_LOCAL_WEB_FETCH,
|
||||||
PLAYWRIGHT_WS_URI,
|
PLAYWRIGHT_WS_URI,
|
||||||
|
PLAYWRIGHT_GOTO_TIMEOUT,
|
||||||
RAG_WEB_LOADER_ENGINE,
|
RAG_WEB_LOADER_ENGINE,
|
||||||
FIRECRAWL_API_BASE_URL,
|
FIRECRAWL_API_BASE_URL,
|
||||||
FIRECRAWL_API_KEY,
|
FIRECRAWL_API_KEY,
|
||||||
@ -376,6 +377,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
|
|||||||
headless (bool): If True, the browser will run in headless mode.
|
headless (bool): If True, the browser will run in headless mode.
|
||||||
proxy (dict): Proxy override settings for the Playwright session.
|
proxy (dict): Proxy override settings for the Playwright session.
|
||||||
playwright_ws_url (Optional[str]): WebSocket endpoint URI for remote browser connection.
|
playwright_ws_url (Optional[str]): WebSocket endpoint URI for remote browser connection.
|
||||||
|
playwright_goto_timeout (Optional[int]): Maximum operation time in milliseconds.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -389,6 +391,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
|
|||||||
remove_selectors: Optional[List[str]] = None,
|
remove_selectors: Optional[List[str]] = None,
|
||||||
proxy: Optional[Dict[str, str]] = None,
|
proxy: Optional[Dict[str, str]] = None,
|
||||||
playwright_ws_url: Optional[str] = None,
|
playwright_ws_url: Optional[str] = None,
|
||||||
|
playwright_goto_timeout: Optional[int] = 10000,
|
||||||
):
|
):
|
||||||
"""Initialize with additional safety parameters and remote browser support."""
|
"""Initialize with additional safety parameters and remote browser support."""
|
||||||
|
|
||||||
@ -415,6 +418,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
|
|||||||
self.last_request_time = None
|
self.last_request_time = None
|
||||||
self.playwright_ws_url = playwright_ws_url
|
self.playwright_ws_url = playwright_ws_url
|
||||||
self.trust_env = trust_env
|
self.trust_env = trust_env
|
||||||
|
self.playwright_goto_timeout = playwright_goto_timeout
|
||||||
|
|
||||||
def lazy_load(self) -> Iterator[Document]:
|
def lazy_load(self) -> Iterator[Document]:
|
||||||
"""Safely load URLs synchronously with support for remote browser."""
|
"""Safely load URLs synchronously with support for remote browser."""
|
||||||
@ -431,7 +435,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
|
|||||||
try:
|
try:
|
||||||
self._safe_process_url_sync(url)
|
self._safe_process_url_sync(url)
|
||||||
page = browser.new_page()
|
page = browser.new_page()
|
||||||
response = page.goto(url)
|
response = page.goto(url, timeout=self.playwright_goto_timeout)
|
||||||
if response is None:
|
if response is None:
|
||||||
raise ValueError(f"page.goto() returned None for url {url}")
|
raise ValueError(f"page.goto() returned None for url {url}")
|
||||||
|
|
||||||
@ -462,7 +466,9 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
|
|||||||
try:
|
try:
|
||||||
await self._safe_process_url(url)
|
await self._safe_process_url(url)
|
||||||
page = await browser.new_page()
|
page = await browser.new_page()
|
||||||
response = await page.goto(url)
|
response = await page.goto(
|
||||||
|
url, timeout=self.playwright_goto_timeout
|
||||||
|
)
|
||||||
if response is None:
|
if response is None:
|
||||||
raise ValueError(f"page.goto() returned None for url {url}")
|
raise ValueError(f"page.goto() returned None for url {url}")
|
||||||
|
|
||||||
@ -604,6 +610,10 @@ def get_web_loader(
|
|||||||
"trust_env": trust_env,
|
"trust_env": trust_env,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if RAG_WEB_LOADER_ENGINE.value == "playwright":
|
||||||
|
web_loader_args["playwright_goto_timeout"] = (
|
||||||
|
PLAYWRIGHT_GOTO_TIMEOUT.value * 1000
|
||||||
|
)
|
||||||
if PLAYWRIGHT_WS_URI.value:
|
if PLAYWRIGHT_WS_URI.value:
|
||||||
web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value
|
web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user