mirror of
https://github.com/open-webui/open-webui
synced 2025-04-03 20:41:29 +00:00
add option: BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE
This commit is contained in:
parent
b03fc97e28
commit
30104c615f
@ -1865,6 +1865,12 @@ BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
|
||||
os.getenv("BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL", "False").lower() == "true",
|
||||
)
|
||||
|
||||
BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE = PersistentConfig(
|
||||
"BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE",
|
||||
"rag.web.search.bypass_result_link_scrape",
|
||||
os.getenv("BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE", "False").lower() == "true",
|
||||
)
|
||||
|
||||
# You can provide a list of your own websites to filter after performing a web search.
|
||||
# This ensures the highest level of safety and reliability of the information sources.
|
||||
RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
|
||||
|
@ -197,6 +197,7 @@ from open_webui.config import (
|
||||
# Retrieval (Web Search)
|
||||
RAG_WEB_SEARCH_ENGINE,
|
||||
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
||||
BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE,
|
||||
RAG_WEB_SEARCH_RESULT_COUNT,
|
||||
RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||
RAG_WEB_SEARCH_TRUST_ENV,
|
||||
@ -581,6 +582,9 @@ app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
|
||||
app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
|
||||
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
|
||||
)
|
||||
app.state.config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE = (
|
||||
BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE
|
||||
)
|
||||
app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST
|
||||
|
||||
app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION
|
||||
|
@ -380,6 +380,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
||||
"web": {
|
||||
"ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
||||
"BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
||||
"BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE": request.app.state.config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE,
|
||||
"search": {
|
||||
"enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
|
||||
"drive": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION,
|
||||
@ -477,6 +478,7 @@ class WebConfig(BaseModel):
|
||||
search: WebSearchConfig
|
||||
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None
|
||||
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None
|
||||
BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE: Optional[bool] = None
|
||||
|
||||
|
||||
class ConfigUpdateForm(BaseModel):
|
||||
@ -571,6 +573,10 @@ async def update_rag_config(
|
||||
form_data.web.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
|
||||
)
|
||||
|
||||
request.app.state.config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE = (
|
||||
form_data.web.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE
|
||||
)
|
||||
|
||||
request.app.state.config.SEARXNG_QUERY_URL = (
|
||||
form_data.web.search.searxng_query_url
|
||||
)
|
||||
@ -1438,13 +1444,28 @@ async def process_web_search(
|
||||
]
|
||||
|
||||
urls = [result.link for result in web_results]
|
||||
loader = get_web_loader(
|
||||
urls,
|
||||
verify_ssl=request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
||||
requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
|
||||
)
|
||||
docs = await loader.aload()
|
||||
if request.app.state.config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE:
|
||||
docs: List[Document] = [
|
||||
Document(
|
||||
page_content=result.snippet,
|
||||
metadata={
|
||||
"source": result.link,
|
||||
"title": (
|
||||
result.title if result.title is not None else result.link
|
||||
),
|
||||
},
|
||||
)
|
||||
for result in web_results
|
||||
if result.snippet is not None and result.snippet != ""
|
||||
]
|
||||
else:
|
||||
loader = get_web_loader(
|
||||
urls,
|
||||
verify_ssl=request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
||||
requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
|
||||
)
|
||||
docs = await loader.aload()
|
||||
|
||||
if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
|
||||
return {
|
||||
|
@ -470,6 +470,23 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class=" mb-2.5 flex w-full justify-between">
|
||||
<div class=" self-center text-xs font-medium">
|
||||
<Tooltip content={$i18n.t('Search Web Without Scraping Links')} placement="top-start">
|
||||
{$i18n.t('Bypass Scrape Links of Web Search Result')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="flex items-center relative">
|
||||
<Tooltip
|
||||
content={webConfig.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE
|
||||
? 'Skip scraping of links in search results, useful when search engines already provide detailed content.'
|
||||
: 'By default, perform a web search and scrape each link in the search results.'}
|
||||
>
|
||||
<Switch bind:state={webConfig.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE} />
|
||||
</Tooltip>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class=" mb-2.5 flex w-full justify-between">
|
||||
<div class=" self-center text-xs font-medium">
|
||||
{$i18n.t('Trust Proxy Environment')}
|
||||
|
Loading…
Reference in New Issue
Block a user