mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
feat: prototype frontend web search integration
This commit is contained in:
@@ -93,6 +93,7 @@ from config import (
|
||||
CHUNK_OVERLAP,
|
||||
RAG_TEMPLATE,
|
||||
ENABLE_RAG_LOCAL_WEB_FETCH,
|
||||
RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||
)
|
||||
|
||||
from constants import ERROR_MESSAGES
|
||||
@@ -538,18 +539,23 @@ def store_web(form_data: UrlForm, user=Depends(get_current_user)):
|
||||
detail=ERROR_MESSAGES.DEFAULT(e),
|
||||
)
|
||||
|
||||
|
||||
def get_web_loader(url: Union[str, Sequence[str]], verify_ssl: bool = True):
|
||||
# Check if the URL is valid
|
||||
if not validate_url(url):
|
||||
raise ValueError(ERROR_MESSAGES.INVALID_URL)
|
||||
return WebBaseLoader(url, verify_ssl=verify_ssl)
|
||||
return WebBaseLoader(
|
||||
url,
|
||||
verify_ssl=verify_ssl,
|
||||
requests_per_second=RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||
)
|
||||
|
||||
|
||||
def validate_url(url: Union[str, Sequence[str]]):
|
||||
if isinstance(url, str):
|
||||
if isinstance(validators.url(url), validators.ValidationError):
|
||||
raise ValueError(ERROR_MESSAGES.INVALID_URL)
|
||||
if not ENABLE_LOCAL_WEB_FETCH:
|
||||
if not ENABLE_RAG_LOCAL_WEB_FETCH:
|
||||
# Local web fetch is disabled, filter out any URLs that resolve to private IP addresses
|
||||
parsed_url = urllib.parse.urlparse(url)
|
||||
# Get IPv4 and IPv6 addresses
|
||||
@@ -593,7 +599,7 @@ def store_websearch(form_data: SearchForm, user=Depends(get_current_user)):
|
||||
)
|
||||
urls = [result.link for result in web_results]
|
||||
loader = get_web_loader(urls)
|
||||
data = loader.load()
|
||||
data = loader.aload()
|
||||
|
||||
collection_name = form_data.collection_name
|
||||
if collection_name == "":
|
||||
|
||||
@@ -3,7 +3,7 @@ import logging
|
||||
import requests
|
||||
|
||||
from apps.rag.search.main import SearchResult
|
||||
from config import SRC_LOG_LEVELS, WEB_SEARCH_RESULT_COUNT
|
||||
from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||
@@ -22,7 +22,7 @@ def search_brave(api_key: str, query: str) -> list[SearchResult]:
|
||||
"Accept-Encoding": "gzip",
|
||||
"X-Subscription-Token": api_key,
|
||||
}
|
||||
params = {"q": query, "count": WEB_SEARCH_RESULT_COUNT}
|
||||
params = {"q": query, "count": RAG_WEB_SEARCH_RESULT_COUNT}
|
||||
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
response.raise_for_status()
|
||||
@@ -33,5 +33,5 @@ def search_brave(api_key: str, query: str) -> list[SearchResult]:
|
||||
SearchResult(
|
||||
link=result["url"], title=result.get("title"), snippet=result.get("snippet")
|
||||
)
|
||||
for result in results[:WEB_SEARCH_RESULT_COUNT]
|
||||
for result in results[:RAG_WEB_SEARCH_RESULT_COUNT]
|
||||
]
|
||||
|
||||
@@ -4,7 +4,7 @@ import logging
|
||||
import requests
|
||||
|
||||
from apps.rag.search.main import SearchResult
|
||||
from config import SRC_LOG_LEVELS, WEB_SEARCH_RESULT_COUNT
|
||||
from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||
@@ -27,7 +27,7 @@ def search_google_pse(
|
||||
"cx": search_engine_id,
|
||||
"q": query,
|
||||
"key": api_key,
|
||||
"num": WEB_SEARCH_RESULT_COUNT,
|
||||
"num": RAG_WEB_SEARCH_RESULT_COUNT,
|
||||
}
|
||||
|
||||
response = requests.request("GET", url, headers=headers, params=params)
|
||||
|
||||
@@ -3,7 +3,7 @@ import logging
|
||||
import requests
|
||||
|
||||
from apps.rag.search.main import SearchResult
|
||||
from config import SRC_LOG_LEVELS, WEB_SEARCH_RESULT_COUNT
|
||||
from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||
@@ -40,5 +40,5 @@ def search_searxng(query_url: str, query: str) -> list[SearchResult]:
|
||||
SearchResult(
|
||||
link=result["url"], title=result.get("title"), snippet=result.get("content")
|
||||
)
|
||||
for result in sorted_results[:WEB_SEARCH_RESULT_COUNT]
|
||||
for result in sorted_results[:RAG_WEB_SEARCH_RESULT_COUNT]
|
||||
]
|
||||
|
||||
@@ -4,7 +4,7 @@ import logging
|
||||
import requests
|
||||
|
||||
from apps.rag.search.main import SearchResult
|
||||
from config import SRC_LOG_LEVELS, WEB_SEARCH_RESULT_COUNT
|
||||
from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||
@@ -35,5 +35,5 @@ def search_serper(api_key: str, query: str) -> list[SearchResult]:
|
||||
title=result.get("title"),
|
||||
snippet=result.get("description"),
|
||||
)
|
||||
for result in results[:WEB_SEARCH_RESULT_COUNT]
|
||||
for result in results[:RAG_WEB_SEARCH_RESULT_COUNT]
|
||||
]
|
||||
|
||||
@@ -4,7 +4,7 @@ import logging
|
||||
import requests
|
||||
|
||||
from apps.rag.search.main import SearchResult
|
||||
from config import SRC_LOG_LEVELS, WEB_SEARCH_RESULT_COUNT
|
||||
from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||
@@ -39,5 +39,5 @@ def search_serpstack(
|
||||
SearchResult(
|
||||
link=result["url"], title=result.get("title"), snippet=result.get("snippet")
|
||||
)
|
||||
for result in results[:WEB_SEARCH_RESULT_COUNT]
|
||||
for result in results[:RAG_WEB_SEARCH_RESULT_COUNT]
|
||||
]
|
||||
|
||||
@@ -549,7 +549,10 @@ BRAVE_SEARCH_API_KEY = os.getenv("BRAVE_SEARCH_API_KEY", "")
|
||||
SERPSTACK_API_KEY = os.getenv("SERPSTACK_API_KEY", "")
|
||||
SERPSTACK_HTTPS = os.getenv("SERPSTACK_HTTPS", "True").lower() == "true"
|
||||
SERPER_API_KEY = os.getenv("SERPER_API_KEY", "")
|
||||
WEB_SEARCH_RESULT_COUNT = int(os.getenv("WEB_SEARCH_RESULT_COUNT", "10"))
|
||||
RAG_WEB_SEARCH_RESULT_COUNT = int(os.getenv("RAG_WEB_SEARCH_RESULT_COUNT", "10"))
|
||||
RAG_WEB_SEARCH_CONCURRENT_REQUESTS = int(
|
||||
os.getenv("RAG_WEB_SEARCH_CONCURRENT_REQUESTS", "10")
|
||||
)
|
||||
|
||||
####################################
|
||||
# Transcribe
|
||||
|
||||
Reference in New Issue
Block a user