mirror of
				https://github.com/open-webui/open-webui
				synced 2025-06-26 18:26:48 +00:00 
			
		
		
		
	feat: bypass web loader in web search
Co-Authored-By: Perry Li <peiyaoli@mail.nankai.edu.cn> Co-Authored-By: WilliamGates <3852641+williamgateszhao@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									0b7f927983
								
							
						
					
					
						commit
						2eca6f6414
					
				@ -2177,6 +2177,12 @@ BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BYPASS_WEB_SEARCH_WEB_LOADER = PersistentConfig(
 | 
			
		||||
    "BYPASS_WEB_SEARCH_WEB_LOADER",
 | 
			
		||||
    "rag.web.search.bypass_web_loader",
 | 
			
		||||
    os.getenv("BYPASS_WEB_SEARCH_WEB_LOADER", "False").lower() == "true",
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
WEB_SEARCH_RESULT_COUNT = PersistentConfig(
 | 
			
		||||
    "WEB_SEARCH_RESULT_COUNT",
 | 
			
		||||
    "rag.web.search.result_count",
 | 
			
		||||
@ -2202,6 +2208,7 @@ WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig(
 | 
			
		||||
    int(os.getenv("WEB_SEARCH_CONCURRENT_REQUESTS", "10")),
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
WEB_LOADER_ENGINE = PersistentConfig(
 | 
			
		||||
    "WEB_LOADER_ENGINE",
 | 
			
		||||
    "rag.web.loader.engine",
 | 
			
		||||
 | 
			
		||||
@ -228,6 +228,7 @@ from open_webui.config import (
 | 
			
		||||
    ENABLE_WEB_SEARCH,
 | 
			
		||||
    WEB_SEARCH_ENGINE,
 | 
			
		||||
    BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
 | 
			
		||||
    BYPASS_WEB_SEARCH_WEB_LOADER,
 | 
			
		||||
    WEB_SEARCH_RESULT_COUNT,
 | 
			
		||||
    WEB_SEARCH_CONCURRENT_REQUESTS,
 | 
			
		||||
    WEB_SEARCH_TRUST_ENV,
 | 
			
		||||
@ -707,6 +708,7 @@ app.state.config.WEB_SEARCH_TRUST_ENV = WEB_SEARCH_TRUST_ENV
 | 
			
		||||
app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
 | 
			
		||||
    BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
 | 
			
		||||
)
 | 
			
		||||
app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER = BYPASS_WEB_SEARCH_WEB_LOADER
 | 
			
		||||
 | 
			
		||||
app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION
 | 
			
		||||
app.state.config.ENABLE_ONEDRIVE_INTEGRATION = ENABLE_ONEDRIVE_INTEGRATION
 | 
			
		||||
 | 
			
		||||
@ -387,6 +387,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
 | 
			
		||||
            "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
 | 
			
		||||
            "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
 | 
			
		||||
            "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
 | 
			
		||||
            "BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER,
 | 
			
		||||
            "SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL,
 | 
			
		||||
            "YACY_QUERY_URL": request.app.state.config.YACY_QUERY_URL,
 | 
			
		||||
            "YACY_USERNAME": request.app.state.config.YACY_USERNAME,
 | 
			
		||||
@ -439,6 +440,7 @@ class WebConfig(BaseModel):
 | 
			
		||||
    WEB_SEARCH_CONCURRENT_REQUESTS: Optional[int] = None
 | 
			
		||||
    WEB_SEARCH_DOMAIN_FILTER_LIST: Optional[List[str]] = []
 | 
			
		||||
    BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None
 | 
			
		||||
    BYPASS_WEB_SEARCH_WEB_LOADER: Optional[bool] = None
 | 
			
		||||
    SEARXNG_QUERY_URL: Optional[str] = None
 | 
			
		||||
    YACY_QUERY_URL: Optional[str] = None
 | 
			
		||||
    YACY_USERNAME: Optional[str] = None
 | 
			
		||||
@ -751,6 +753,9 @@ async def update_rag_config(
 | 
			
		||||
        request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
 | 
			
		||||
            form_data.web.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
 | 
			
		||||
        )
 | 
			
		||||
        request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER = (
 | 
			
		||||
            form_data.web.BYPASS_WEB_SEARCH_WEB_LOADER
 | 
			
		||||
        )
 | 
			
		||||
        request.app.state.config.SEARXNG_QUERY_URL = form_data.web.SEARXNG_QUERY_URL
 | 
			
		||||
        request.app.state.config.YACY_QUERY_URL = form_data.web.YACY_QUERY_URL
 | 
			
		||||
        request.app.state.config.YACY_USERNAME = form_data.web.YACY_USERNAME
 | 
			
		||||
@ -875,6 +880,7 @@ async def update_rag_config(
 | 
			
		||||
            "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
 | 
			
		||||
            "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
 | 
			
		||||
            "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
 | 
			
		||||
            "BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER,
 | 
			
		||||
            "SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL,
 | 
			
		||||
            "YACY_QUERY_URL": request.app.state.config.YACY_QUERY_URL,
 | 
			
		||||
            "YACY_USERNAME": request.app.state.config.YACY_USERNAME,
 | 
			
		||||
@ -1678,13 +1684,29 @@ async def process_web_search(
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        loader = get_web_loader(
 | 
			
		||||
            urls,
 | 
			
		||||
            verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
 | 
			
		||||
            requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
 | 
			
		||||
            trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV,
 | 
			
		||||
        )
 | 
			
		||||
        docs = await loader.aload()
 | 
			
		||||
        if request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER:
 | 
			
		||||
            docs = [
 | 
			
		||||
                Document(
 | 
			
		||||
                    page_content=result.snippet,
 | 
			
		||||
                    metadata={
 | 
			
		||||
                        "source": result.link,
 | 
			
		||||
                        "title": result.title,
 | 
			
		||||
                        "snippet": result.snippet,
 | 
			
		||||
                        "link": result.link,
 | 
			
		||||
                    },
 | 
			
		||||
                )
 | 
			
		||||
                for result in search_results
 | 
			
		||||
                if hasattr(result, "snippet")
 | 
			
		||||
            ]
 | 
			
		||||
        else:
 | 
			
		||||
            loader = get_web_loader(
 | 
			
		||||
                urls,
 | 
			
		||||
                verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
 | 
			
		||||
                requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
 | 
			
		||||
                trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV,
 | 
			
		||||
            )
 | 
			
		||||
            docs = await loader.aload()
 | 
			
		||||
 | 
			
		||||
        urls = [
 | 
			
		||||
            doc.metadata.get("source") for doc in docs if doc.metadata.get("source")
 | 
			
		||||
        ]  # only keep the urls returned by the loader
 | 
			
		||||
 | 
			
		||||
@ -613,6 +613,19 @@
 | 
			
		||||
						</div>
 | 
			
		||||
					</div>
 | 
			
		||||
 | 
			
		||||
					<div class="  mb-2.5 flex w-full justify-between">
 | 
			
		||||
						<div class=" self-center text-xs font-medium">
 | 
			
		||||
							<Tooltip content={$i18n.t('Bypass Web Loader')} placement="top-start">
 | 
			
		||||
								{$i18n.t('Bypass Web Loader')}
 | 
			
		||||
							</Tooltip>
 | 
			
		||||
						</div>
 | 
			
		||||
						<div class="flex items-center relative">
 | 
			
		||||
							<Tooltip content={''}>
 | 
			
		||||
								<Switch bind:state={webConfig.BYPASS_WEB_SEARCH_WEB_LOADER} />
 | 
			
		||||
							</Tooltip>
 | 
			
		||||
						</div>
 | 
			
		||||
					</div>
 | 
			
		||||
 | 
			
		||||
					<div class="  mb-2.5 flex w-full justify-between">
 | 
			
		||||
						<div class=" self-center text-xs font-medium">
 | 
			
		||||
							{$i18n.t('Trust Proxy Environment')}
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user