enh: full context web search

This commit is contained in:
Timothy Jaeryang Baek 2025-02-17 18:14:26 -08:00
parent 16ce8ab16c
commit ca0b7217d2
6 changed files with 93 additions and 29 deletions

View File

@ -1780,6 +1780,12 @@ RAG_WEB_SEARCH_ENGINE = PersistentConfig(
os.getenv("RAG_WEB_SEARCH_ENGINE", ""),
)
RAG_WEB_SEARCH_FULL_CONTEXT = PersistentConfig(
"RAG_WEB_SEARCH_FULL_CONTEXT",
"rag.web.search.full_context",
os.getenv("RAG_WEB_SEARCH_FULL_CONTEXT", "False").lower() == "true",
)
# You can provide a list of your own websites to filter after performing a web search.
# This ensures the highest level of safety and reliability of the information sources.
RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(

View File

@ -179,6 +179,7 @@ from open_webui.config import (
YOUTUBE_LOADER_PROXY_URL,
# Retrieval (Web Search)
RAG_WEB_SEARCH_ENGINE,
RAG_WEB_SEARCH_FULL_CONTEXT,
RAG_WEB_SEARCH_RESULT_COUNT,
RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
RAG_WEB_SEARCH_TRUST_ENV,
@ -548,6 +549,7 @@ app.state.config.YOUTUBE_LOADER_PROXY_URL = YOUTUBE_LOADER_PROXY_URL
app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH
app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT = RAG_WEB_SEARCH_FULL_CONTEXT
app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST
app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION

View File

@ -304,7 +304,14 @@ def get_sources_from_files(
relevant_contexts = []
for file in files:
if file.get("context") == "full":
if file.get("docs"):
print("file.get('docs')", file.get("docs"))
context = {
"documents": [[doc.get("content") for doc in file.get("docs")]],
"metadatas": [[doc.get("metadata") for doc in file.get("docs")]],
}
elif file.get("context") == "full":
context = {
"documents": [[file.get("file").get("data", {}).get("content")]],
"metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]],

View File

@ -371,7 +371,8 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
"proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL,
},
"web": {
"web_loader_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
"ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
"RAG_WEB_SEARCH_FULL_CONTEXT": request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT,
"search": {
"enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
"drive": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION,
@ -457,7 +458,8 @@ class WebSearchConfig(BaseModel):
class WebConfig(BaseModel):
search: WebSearchConfig
web_loader_ssl_verification: Optional[bool] = None
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None
RAG_WEB_SEARCH_FULL_CONTEXT: Optional[bool] = None
class ConfigUpdateForm(BaseModel):
@ -512,11 +514,16 @@ async def update_rag_config(
if form_data.web is not None:
request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
# Note: When UI "Bypass SSL verification for Websites"=True then ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION=False
form_data.web.web_loader_ssl_verification
form_data.web.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
)
request.app.state.config.ENABLE_RAG_WEB_SEARCH = form_data.web.search.enabled
request.app.state.config.RAG_WEB_SEARCH_ENGINE = form_data.web.search.engine
request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT = (
form_data.web.RAG_WEB_SEARCH_FULL_CONTEXT
)
request.app.state.config.SEARXNG_QUERY_URL = (
form_data.web.search.searxng_query_url
)
@ -600,7 +607,8 @@ async def update_rag_config(
"translation": request.app.state.YOUTUBE_LOADER_TRANSLATION,
},
"web": {
"web_loader_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
"ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
"RAG_WEB_SEARCH_FULL_CONTEXT": request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT,
"search": {
"enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
"engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE,
@ -1349,21 +1357,36 @@ async def process_web_search(
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
)
docs = await loader.aload()
await run_in_threadpool(
save_docs_to_vector_db,
request,
docs,
collection_name,
overwrite=True,
user=user,
)
return {
"status": True,
"collection_name": collection_name,
"filenames": urls,
"loaded_count": len(docs),
}
if request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT:
return {
"status": True,
"docs": [
{
"content": doc.page_content,
"metadata": doc.metadata,
}
for doc in docs
],
"filenames": urls,
"loaded_count": len(docs),
}
else:
await run_in_threadpool(
save_docs_to_vector_db,
request,
docs,
collection_name,
overwrite=True,
user=user,
)
return {
"status": True,
"collection_name": collection_name,
"filenames": urls,
"loaded_count": len(docs),
}
except Exception as e:
log.exception(e)
raise HTTPException(

View File

@ -362,14 +362,25 @@ async def chat_web_search_handler(
)
files = form_data.get("files", [])
files.append(
{
"collection_name": results["collection_name"],
"name": searchQuery,
"type": "web_search_results",
"urls": results["filenames"],
}
)
if request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT:
files.append(
{
"docs": results.get("docs", []),
"name": searchQuery,
"type": "web_search_docs",
"urls": results["filenames"],
}
)
else:
files.append(
{
"collection_name": results["collection_name"],
"name": searchQuery,
"type": "web_search_results",
"urls": results["filenames"],
}
)
form_data["files"] = files
else:
await event_emitter(

View File

@ -6,6 +6,7 @@
import { onMount, getContext } from 'svelte';
import { toast } from 'svelte-sonner';
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
import Tooltip from '$lib/components/common/Tooltip.svelte';
const i18n = getContext('i18n');
@ -116,6 +117,19 @@
</div>
</div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Full Context Mode')}</div>
<div class="flex items-center relative">
<Tooltip
content={webConfig.RAG_WEB_SEARCH_FULL_CONTEXT
? 'Inject the entire web results as context for comprehensive processing, this is recommended for complex queries.'
: 'Default to segmented retrieval for focused and relevant content extraction, this is recommended for most cases.'}
>
<Switch bind:state={webConfig.RAG_WEB_SEARCH_FULL_CONTEXT} />
</Tooltip>
</div>
</div>
{#if webConfig.search.engine !== ''}
<div class="mt-1.5">
{#if webConfig.search.engine === 'searxng'}
@ -424,12 +438,13 @@
<button
class="p-1 px-3 text-xs flex rounded-sm transition"
on:click={() => {
webConfig.web_loader_ssl_verification = !webConfig.web_loader_ssl_verification;
webConfig.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION =
!webConfig.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION;
submitHandler();
}}
type="button"
>
{#if webConfig.web_loader_ssl_verification === false}
{#if webConfig.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION === false}
<span class="ml-2 self-center">{$i18n.t('On')}</span>
{:else}
<span class="ml-2 self-center">{$i18n.t('Off')}</span>