mirror of
https://github.com/open-webui/open-webui
synced 2025-04-03 12:31:32 +00:00
Merge 7bbe43ba2d
into e0ec2cdeb0
This commit is contained in:
commit
18dfc2ef49
@ -1970,6 +1970,12 @@ BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
|
|||||||
os.getenv("BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL", "False").lower() == "true",
|
os.getenv("BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL", "False").lower() == "true",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE = PersistentConfig(
|
||||||
|
"BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE",
|
||||||
|
"rag.web.search.bypass_result_link_scrape",
|
||||||
|
os.getenv("BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE", "False").lower() == "true",
|
||||||
|
)
|
||||||
|
|
||||||
# You can provide a list of your own websites to filter after performing a web search.
|
# You can provide a list of your own websites to filter after performing a web search.
|
||||||
# This ensures the highest level of safety and reliability of the information sources.
|
# This ensures the highest level of safety and reliability of the information sources.
|
||||||
RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
|
RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
|
||||||
|
@ -201,6 +201,7 @@ from open_webui.config import (
|
|||||||
# Retrieval (Web Search)
|
# Retrieval (Web Search)
|
||||||
RAG_WEB_SEARCH_ENGINE,
|
RAG_WEB_SEARCH_ENGINE,
|
||||||
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
||||||
|
BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE,
|
||||||
RAG_WEB_SEARCH_RESULT_COUNT,
|
RAG_WEB_SEARCH_RESULT_COUNT,
|
||||||
RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||||
RAG_WEB_SEARCH_TRUST_ENV,
|
RAG_WEB_SEARCH_TRUST_ENV,
|
||||||
@ -612,6 +613,9 @@ app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
|
|||||||
app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
|
app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
|
||||||
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
|
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
|
||||||
)
|
)
|
||||||
|
app.state.config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE = (
|
||||||
|
BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE
|
||||||
|
)
|
||||||
app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST
|
app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST
|
||||||
|
|
||||||
app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION
|
app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION
|
||||||
|
@ -382,6 +382,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
|||||||
"web": {
|
"web": {
|
||||||
"ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
"ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
||||||
"BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
"BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
||||||
|
"BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE": request.app.state.config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE,
|
||||||
"search": {
|
"search": {
|
||||||
"enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
|
"enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
|
||||||
"drive": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION,
|
"drive": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION,
|
||||||
@ -480,6 +481,7 @@ class WebConfig(BaseModel):
|
|||||||
search: WebSearchConfig
|
search: WebSearchConfig
|
||||||
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None
|
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None
|
||||||
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None
|
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None
|
||||||
|
BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE: Optional[bool] = None
|
||||||
|
|
||||||
|
|
||||||
class ConfigUpdateForm(BaseModel):
|
class ConfigUpdateForm(BaseModel):
|
||||||
@ -577,6 +579,10 @@ async def update_rag_config(
|
|||||||
form_data.web.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
|
form_data.web.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
|
||||||
)
|
)
|
||||||
|
|
||||||
|
request.app.state.config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE = (
|
||||||
|
form_data.web.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE
|
||||||
|
)
|
||||||
|
|
||||||
request.app.state.config.SEARXNG_QUERY_URL = (
|
request.app.state.config.SEARXNG_QUERY_URL = (
|
||||||
form_data.web.search.searxng_query_url
|
form_data.web.search.searxng_query_url
|
||||||
)
|
)
|
||||||
@ -1452,13 +1458,28 @@ async def process_web_search(
|
|||||||
]
|
]
|
||||||
|
|
||||||
urls = [result.link for result in web_results]
|
urls = [result.link for result in web_results]
|
||||||
loader = get_web_loader(
|
if request.app.state.config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE:
|
||||||
urls,
|
docs: List[Document] = [
|
||||||
verify_ssl=request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
Document(
|
||||||
requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
page_content=result.snippet,
|
||||||
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
|
metadata={
|
||||||
)
|
"source": result.link,
|
||||||
docs = await loader.aload()
|
"title": (
|
||||||
|
result.title if result.title is not None else result.link
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for result in web_results
|
||||||
|
if result.snippet is not None and result.snippet != ""
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
loader = get_web_loader(
|
||||||
|
urls,
|
||||||
|
verify_ssl=request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
||||||
|
requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||||
|
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
|
||||||
|
)
|
||||||
|
docs = await loader.aload()
|
||||||
|
|
||||||
if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
|
if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
|
||||||
return {
|
return {
|
||||||
|
115
backend/open_webui/test/apps/webui/routers/test_retrieval.py
Normal file
115
backend/open_webui/test/apps/webui/routers/test_retrieval.py
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
from test.util.abstract_integration_test import AbstractPostgresTest
|
||||||
|
from test.util.mock_user import mock_webui_user
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
from langchain_core.documents import Document
|
||||||
|
|
||||||
|
|
||||||
|
class TestRetrieval(AbstractPostgresTest):
|
||||||
|
BASE_PATH = "/api/v1/retrieval"
|
||||||
|
|
||||||
|
def setup_class(cls):
|
||||||
|
super().setup_class()
|
||||||
|
from open_webui.retrieval.web.main import SearchResult
|
||||||
|
|
||||||
|
cls.searchresult = SearchResult
|
||||||
|
|
||||||
|
@patch("open_webui.routers.retrieval.search_web")
|
||||||
|
@patch("open_webui.routers.retrieval.get_web_loader")
|
||||||
|
@patch("open_webui.routers.retrieval.get_config")
|
||||||
|
@patch("open_webui.routers.retrieval.run_in_threadpool")
|
||||||
|
def test_process_web_search_bypass_scrape(
|
||||||
|
self,
|
||||||
|
mock_run_in_threadpool,
|
||||||
|
mock_get_config,
|
||||||
|
mock_get_web_loader,
|
||||||
|
mock_search_web,
|
||||||
|
):
|
||||||
|
# Setup mocks
|
||||||
|
mock_search_results = [
|
||||||
|
self.searchresult(
|
||||||
|
link="https://example.com/1",
|
||||||
|
title="Example 1",
|
||||||
|
snippet="Example snippet 1",
|
||||||
|
),
|
||||||
|
self.searchresult(
|
||||||
|
link="https://example.com/2", title=None, snippet="Example snippet 2"
|
||||||
|
),
|
||||||
|
]
|
||||||
|
mock_search_web.return_value = mock_search_results
|
||||||
|
|
||||||
|
mock_config = MagicMock()
|
||||||
|
mock_config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE = True
|
||||||
|
mock_get_config.return_value = mock_config
|
||||||
|
|
||||||
|
mock_run_in_threadpool.return_value = True
|
||||||
|
|
||||||
|
# Execute function
|
||||||
|
with mock_webui_user(id="2"):
|
||||||
|
response = self.fast_api_client.post(
|
||||||
|
self.create_url("/process/web/search"),
|
||||||
|
json={
|
||||||
|
"query": "test query",
|
||||||
|
"collection_name": "test_collection",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assertions
|
||||||
|
assert response.status_code == 200
|
||||||
|
result = response.json()
|
||||||
|
assert not mock_get_web_loader.called
|
||||||
|
assert result["status"] is True
|
||||||
|
assert result["collection_name"] == "test_collection"
|
||||||
|
assert result["loaded_count"] == 2
|
||||||
|
assert result["docs"][0].page_content == "Example snippet 1"
|
||||||
|
assert result["docs"][1].metadata["title"] == "https://example.com/2"
|
||||||
|
|
||||||
|
@patch("open_webui.routers.retrieval.search_web")
|
||||||
|
@patch("open_webui.routers.retrieval.get_web_loader")
|
||||||
|
@patch("open_webui.routers.retrieval.get_config")
|
||||||
|
@patch("open_webui.routers.retrieval.run_in_threadpool")
|
||||||
|
def test_process_web_search_with_scrape(
|
||||||
|
self,
|
||||||
|
mock_run_in_threadpool,
|
||||||
|
mock_get_config,
|
||||||
|
mock_get_web_loader,
|
||||||
|
mock_search_web,
|
||||||
|
):
|
||||||
|
# Setup mocks
|
||||||
|
mock_search_results = [
|
||||||
|
self.searchresult(
|
||||||
|
link="https://example.com/1",
|
||||||
|
title="Example 1",
|
||||||
|
snippet="Example snippet 1",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
mock_search_web.return_value = mock_search_results
|
||||||
|
|
||||||
|
mock_config = MagicMock()
|
||||||
|
mock_config.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE = False
|
||||||
|
mock_get_config.return_value = mock_config
|
||||||
|
|
||||||
|
mock_loader = MagicMock()
|
||||||
|
mock_loader.load.return_value = [Document(page_content="Web page content")]
|
||||||
|
mock_get_web_loader.return_value = mock_loader
|
||||||
|
|
||||||
|
mock_run_in_threadpool.return_value = True
|
||||||
|
|
||||||
|
# Execute function
|
||||||
|
with mock_webui_user(id="2"):
|
||||||
|
response = self.fast_api_client.post(
|
||||||
|
self.create_url("/process/web/search"),
|
||||||
|
json={
|
||||||
|
"query": "test query",
|
||||||
|
"collection_name": "test_collection",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assertions
|
||||||
|
assert response.status_code == 200
|
||||||
|
result = response.json()
|
||||||
|
assert mock_get_web_loader.called
|
||||||
|
assert result["status"] is True
|
||||||
|
assert result["collection_name"] == "test_collection"
|
||||||
|
assert result["loaded_count"] == 1
|
||||||
|
assert result["docs"][0].page_content == "Web page content"
|
@ -474,6 +474,23 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class=" mb-2.5 flex w-full justify-between">
|
||||||
|
<div class=" self-center text-xs font-medium">
|
||||||
|
<Tooltip content={$i18n.t('Search Web Without Scraping Links')} placement="top-start">
|
||||||
|
{$i18n.t('Bypass Scrape Links of Web Search Result')}
|
||||||
|
</Tooltip>
|
||||||
|
</div>
|
||||||
|
<div class="flex items-center relative">
|
||||||
|
<Tooltip
|
||||||
|
content={webConfig.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE
|
||||||
|
? 'Skip scraping of links in search results, useful when search engines already provide detailed content.'
|
||||||
|
: 'By default, perform a web search and scrape each link in the search results.'}
|
||||||
|
>
|
||||||
|
<Switch bind:state={webConfig.BYPASS_WEB_SEARCH_RESULT_LINK_SCRAPE} />
|
||||||
|
</Tooltip>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class=" mb-2.5 flex w-full justify-between">
|
<div class=" mb-2.5 flex w-full justify-between">
|
||||||
<div class=" self-center text-xs font-medium">
|
<div class=" self-center text-xs font-medium">
|
||||||
{$i18n.t('Trust Proxy Environment')}
|
{$i18n.t('Trust Proxy Environment')}
|
||||||
|
Loading…
Reference in New Issue
Block a user