mirror of
https://github.com/open-webui/open-webui
synced 2025-06-25 09:47:41 +00:00
enh: full context web search
This commit is contained in:
parent
16ce8ab16c
commit
ca0b7217d2
@ -1780,6 +1780,12 @@ RAG_WEB_SEARCH_ENGINE = PersistentConfig(
|
|||||||
os.getenv("RAG_WEB_SEARCH_ENGINE", ""),
|
os.getenv("RAG_WEB_SEARCH_ENGINE", ""),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
RAG_WEB_SEARCH_FULL_CONTEXT = PersistentConfig(
|
||||||
|
"RAG_WEB_SEARCH_FULL_CONTEXT",
|
||||||
|
"rag.web.search.full_context",
|
||||||
|
os.getenv("RAG_WEB_SEARCH_FULL_CONTEXT", "False").lower() == "true",
|
||||||
|
)
|
||||||
|
|
||||||
# You can provide a list of your own websites to filter after performing a web search.
|
# You can provide a list of your own websites to filter after performing a web search.
|
||||||
# This ensures the highest level of safety and reliability of the information sources.
|
# This ensures the highest level of safety and reliability of the information sources.
|
||||||
RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
|
RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
|
||||||
|
@ -179,6 +179,7 @@ from open_webui.config import (
|
|||||||
YOUTUBE_LOADER_PROXY_URL,
|
YOUTUBE_LOADER_PROXY_URL,
|
||||||
# Retrieval (Web Search)
|
# Retrieval (Web Search)
|
||||||
RAG_WEB_SEARCH_ENGINE,
|
RAG_WEB_SEARCH_ENGINE,
|
||||||
|
RAG_WEB_SEARCH_FULL_CONTEXT,
|
||||||
RAG_WEB_SEARCH_RESULT_COUNT,
|
RAG_WEB_SEARCH_RESULT_COUNT,
|
||||||
RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||||
RAG_WEB_SEARCH_TRUST_ENV,
|
RAG_WEB_SEARCH_TRUST_ENV,
|
||||||
@ -548,6 +549,7 @@ app.state.config.YOUTUBE_LOADER_PROXY_URL = YOUTUBE_LOADER_PROXY_URL
|
|||||||
|
|
||||||
app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH
|
app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH
|
||||||
app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
|
app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
|
||||||
|
app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT = RAG_WEB_SEARCH_FULL_CONTEXT
|
||||||
app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST
|
app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST
|
||||||
|
|
||||||
app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION
|
app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION
|
||||||
|
@ -304,7 +304,14 @@ def get_sources_from_files(
|
|||||||
relevant_contexts = []
|
relevant_contexts = []
|
||||||
|
|
||||||
for file in files:
|
for file in files:
|
||||||
if file.get("context") == "full":
|
if file.get("docs"):
|
||||||
|
|
||||||
|
print("file.get('docs')", file.get("docs"))
|
||||||
|
context = {
|
||||||
|
"documents": [[doc.get("content") for doc in file.get("docs")]],
|
||||||
|
"metadatas": [[doc.get("metadata") for doc in file.get("docs")]],
|
||||||
|
}
|
||||||
|
elif file.get("context") == "full":
|
||||||
context = {
|
context = {
|
||||||
"documents": [[file.get("file").get("data", {}).get("content")]],
|
"documents": [[file.get("file").get("data", {}).get("content")]],
|
||||||
"metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]],
|
"metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]],
|
||||||
|
@ -371,7 +371,8 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
|||||||
"proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL,
|
"proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL,
|
||||||
},
|
},
|
||||||
"web": {
|
"web": {
|
||||||
"web_loader_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
"ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
||||||
|
"RAG_WEB_SEARCH_FULL_CONTEXT": request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT,
|
||||||
"search": {
|
"search": {
|
||||||
"enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
|
"enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
|
||||||
"drive": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION,
|
"drive": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION,
|
||||||
@ -457,7 +458,8 @@ class WebSearchConfig(BaseModel):
|
|||||||
|
|
||||||
class WebConfig(BaseModel):
|
class WebConfig(BaseModel):
|
||||||
search: WebSearchConfig
|
search: WebSearchConfig
|
||||||
web_loader_ssl_verification: Optional[bool] = None
|
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None
|
||||||
|
RAG_WEB_SEARCH_FULL_CONTEXT: Optional[bool] = None
|
||||||
|
|
||||||
|
|
||||||
class ConfigUpdateForm(BaseModel):
|
class ConfigUpdateForm(BaseModel):
|
||||||
@ -512,11 +514,16 @@ async def update_rag_config(
|
|||||||
if form_data.web is not None:
|
if form_data.web is not None:
|
||||||
request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
|
request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
|
||||||
# Note: When UI "Bypass SSL verification for Websites"=True then ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION=False
|
# Note: When UI "Bypass SSL verification for Websites"=True then ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION=False
|
||||||
form_data.web.web_loader_ssl_verification
|
form_data.web.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
|
||||||
)
|
)
|
||||||
|
|
||||||
request.app.state.config.ENABLE_RAG_WEB_SEARCH = form_data.web.search.enabled
|
request.app.state.config.ENABLE_RAG_WEB_SEARCH = form_data.web.search.enabled
|
||||||
request.app.state.config.RAG_WEB_SEARCH_ENGINE = form_data.web.search.engine
|
request.app.state.config.RAG_WEB_SEARCH_ENGINE = form_data.web.search.engine
|
||||||
|
|
||||||
|
request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT = (
|
||||||
|
form_data.web.RAG_WEB_SEARCH_FULL_CONTEXT
|
||||||
|
)
|
||||||
|
|
||||||
request.app.state.config.SEARXNG_QUERY_URL = (
|
request.app.state.config.SEARXNG_QUERY_URL = (
|
||||||
form_data.web.search.searxng_query_url
|
form_data.web.search.searxng_query_url
|
||||||
)
|
)
|
||||||
@ -600,7 +607,8 @@ async def update_rag_config(
|
|||||||
"translation": request.app.state.YOUTUBE_LOADER_TRANSLATION,
|
"translation": request.app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||||
},
|
},
|
||||||
"web": {
|
"web": {
|
||||||
"web_loader_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
"ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
||||||
|
"RAG_WEB_SEARCH_FULL_CONTEXT": request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT,
|
||||||
"search": {
|
"search": {
|
||||||
"enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
|
"enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
|
||||||
"engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE,
|
"engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE,
|
||||||
@ -1349,21 +1357,36 @@ async def process_web_search(
|
|||||||
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
|
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
|
||||||
)
|
)
|
||||||
docs = await loader.aload()
|
docs = await loader.aload()
|
||||||
await run_in_threadpool(
|
|
||||||
save_docs_to_vector_db,
|
|
||||||
request,
|
|
||||||
docs,
|
|
||||||
collection_name,
|
|
||||||
overwrite=True,
|
|
||||||
user=user,
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
if request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT:
|
||||||
"status": True,
|
return {
|
||||||
"collection_name": collection_name,
|
"status": True,
|
||||||
"filenames": urls,
|
"docs": [
|
||||||
"loaded_count": len(docs),
|
{
|
||||||
}
|
"content": doc.page_content,
|
||||||
|
"metadata": doc.metadata,
|
||||||
|
}
|
||||||
|
for doc in docs
|
||||||
|
],
|
||||||
|
"filenames": urls,
|
||||||
|
"loaded_count": len(docs),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
await run_in_threadpool(
|
||||||
|
save_docs_to_vector_db,
|
||||||
|
request,
|
||||||
|
docs,
|
||||||
|
collection_name,
|
||||||
|
overwrite=True,
|
||||||
|
user=user,
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": True,
|
||||||
|
"collection_name": collection_name,
|
||||||
|
"filenames": urls,
|
||||||
|
"loaded_count": len(docs),
|
||||||
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception(e)
|
log.exception(e)
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
@ -362,14 +362,25 @@ async def chat_web_search_handler(
|
|||||||
)
|
)
|
||||||
|
|
||||||
files = form_data.get("files", [])
|
files = form_data.get("files", [])
|
||||||
files.append(
|
|
||||||
{
|
if request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT:
|
||||||
"collection_name": results["collection_name"],
|
files.append(
|
||||||
"name": searchQuery,
|
{
|
||||||
"type": "web_search_results",
|
"docs": results.get("docs", []),
|
||||||
"urls": results["filenames"],
|
"name": searchQuery,
|
||||||
}
|
"type": "web_search_docs",
|
||||||
)
|
"urls": results["filenames"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
files.append(
|
||||||
|
{
|
||||||
|
"collection_name": results["collection_name"],
|
||||||
|
"name": searchQuery,
|
||||||
|
"type": "web_search_results",
|
||||||
|
"urls": results["filenames"],
|
||||||
|
}
|
||||||
|
)
|
||||||
form_data["files"] = files
|
form_data["files"] = files
|
||||||
else:
|
else:
|
||||||
await event_emitter(
|
await event_emitter(
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
import { onMount, getContext } from 'svelte';
|
import { onMount, getContext } from 'svelte';
|
||||||
import { toast } from 'svelte-sonner';
|
import { toast } from 'svelte-sonner';
|
||||||
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
|
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
|
||||||
|
import Tooltip from '$lib/components/common/Tooltip.svelte';
|
||||||
|
|
||||||
const i18n = getContext('i18n');
|
const i18n = getContext('i18n');
|
||||||
|
|
||||||
@ -116,6 +117,19 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class=" py-0.5 flex w-full justify-between">
|
||||||
|
<div class=" self-center text-xs font-medium">{$i18n.t('Full Context Mode')}</div>
|
||||||
|
<div class="flex items-center relative">
|
||||||
|
<Tooltip
|
||||||
|
content={webConfig.RAG_WEB_SEARCH_FULL_CONTEXT
|
||||||
|
? 'Inject the entire web results as context for comprehensive processing, this is recommended for complex queries.'
|
||||||
|
: 'Default to segmented retrieval for focused and relevant content extraction, this is recommended for most cases.'}
|
||||||
|
>
|
||||||
|
<Switch bind:state={webConfig.RAG_WEB_SEARCH_FULL_CONTEXT} />
|
||||||
|
</Tooltip>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
{#if webConfig.search.engine !== ''}
|
{#if webConfig.search.engine !== ''}
|
||||||
<div class="mt-1.5">
|
<div class="mt-1.5">
|
||||||
{#if webConfig.search.engine === 'searxng'}
|
{#if webConfig.search.engine === 'searxng'}
|
||||||
@ -424,12 +438,13 @@
|
|||||||
<button
|
<button
|
||||||
class="p-1 px-3 text-xs flex rounded-sm transition"
|
class="p-1 px-3 text-xs flex rounded-sm transition"
|
||||||
on:click={() => {
|
on:click={() => {
|
||||||
webConfig.web_loader_ssl_verification = !webConfig.web_loader_ssl_verification;
|
webConfig.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION =
|
||||||
|
!webConfig.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION;
|
||||||
submitHandler();
|
submitHandler();
|
||||||
}}
|
}}
|
||||||
type="button"
|
type="button"
|
||||||
>
|
>
|
||||||
{#if webConfig.web_loader_ssl_verification === false}
|
{#if webConfig.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION === false}
|
||||||
<span class="ml-2 self-center">{$i18n.t('On')}</span>
|
<span class="ml-2 self-center">{$i18n.t('On')}</span>
|
||||||
{:else}
|
{:else}
|
||||||
<span class="ml-2 self-center">{$i18n.t('Off')}</span>
|
<span class="ml-2 self-center">{$i18n.t('Off')}</span>
|
||||||
|
Loading…
Reference in New Issue
Block a user