add option: BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE

This commit is contained in:
WilliamGates 2025-03-14 02:00:49 +00:00
parent b03fc97e28
commit 30104c615f
4 changed files with 55 additions and 7 deletions

View File

@ -1865,6 +1865,12 @@ BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
os.getenv("BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL", "False").lower() == "true", os.getenv("BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL", "False").lower() == "true",
) )
BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE = PersistentConfig(
"BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE",
"rag.web.search.bypass_result_link_scrape",
os.getenv("BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE", "False").lower() == "true",
)
# You can provide a list of your own websites to filter after performing a web search. # You can provide a list of your own websites to filter after performing a web search.
# This ensures the highest level of safety and reliability of the information sources. # This ensures the highest level of safety and reliability of the information sources.
RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig( RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(

View File

@ -197,6 +197,7 @@ from open_webui.config import (
# Retrieval (Web Search) # Retrieval (Web Search)
RAG_WEB_SEARCH_ENGINE, RAG_WEB_SEARCH_ENGINE,
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE,
RAG_WEB_SEARCH_RESULT_COUNT, RAG_WEB_SEARCH_RESULT_COUNT,
RAG_WEB_SEARCH_CONCURRENT_REQUESTS, RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
RAG_WEB_SEARCH_TRUST_ENV, RAG_WEB_SEARCH_TRUST_ENV,
@ -581,6 +582,9 @@ app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = ( app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
) )
app.state.config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE = (
BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE
)
app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST
app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION

View File

@ -380,6 +380,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
"web": { "web": {
"ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, "ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
"BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
"BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE": request.app.state.config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE,
"search": { "search": {
"enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH, "enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
"drive": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION, "drive": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION,
@ -477,6 +478,7 @@ class WebConfig(BaseModel):
search: WebSearchConfig search: WebSearchConfig
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None
BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE: Optional[bool] = None
class ConfigUpdateForm(BaseModel): class ConfigUpdateForm(BaseModel):
@ -571,6 +573,10 @@ async def update_rag_config(
form_data.web.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL form_data.web.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
) )
request.app.state.config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE = (
form_data.web.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE
)
request.app.state.config.SEARXNG_QUERY_URL = ( request.app.state.config.SEARXNG_QUERY_URL = (
form_data.web.search.searxng_query_url form_data.web.search.searxng_query_url
) )
@ -1438,13 +1444,28 @@ async def process_web_search(
] ]
urls = [result.link for result in web_results] urls = [result.link for result in web_results]
loader = get_web_loader( if request.app.state.config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE:
urls, docs: List[Document] = [
verify_ssl=request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, Document(
requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, page_content=result.snippet,
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, metadata={
) "source": result.link,
docs = await loader.aload() "title": (
result.title if result.title is not None else result.link
),
},
)
for result in web_results
if result.snippet is not None and result.snippet != ""
]
else:
loader = get_web_loader(
urls,
verify_ssl=request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
)
docs = await loader.aload()
if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
return { return {

View File

@ -470,6 +470,23 @@
</div> </div>
</div> </div>
<div class=" mb-2.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">
<Tooltip content={$i18n.t('Search Web Without Scraping Links')} placement="top-start">
{$i18n.t('Bypass Scrape Links of Web Search Result')}
</Tooltip>
</div>
<div class="flex items-center relative">
<Tooltip
content={webConfig.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE
? 'Skip scraping of links in search results, useful when search engines already provide detailed content.'
: 'By default, perform a web search and scrape each link in the search results.'}
>
<Switch bind:state={webConfig.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE} />
</Tooltip>
</div>
</div>
<div class=" mb-2.5 flex w-full justify-between"> <div class=" mb-2.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium"> <div class=" self-center text-xs font-medium">
{$i18n.t('Trust Proxy Environment')} {$i18n.t('Trust Proxy Environment')}