mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
refac: web/rag config
This commit is contained in:
@@ -201,7 +201,10 @@ def save_config(config):
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
ENABLE_PERSISTENT_CONFIG = os.environ.get("ENABLE_PERSISTENT_CONFIG", "True").lower() == "true"
|
||||
ENABLE_PERSISTENT_CONFIG = (
|
||||
os.environ.get("ENABLE_PERSISTENT_CONFIG", "True").lower() == "true"
|
||||
)
|
||||
|
||||
|
||||
class PersistentConfig(Generic[T]):
|
||||
def __init__(self, env_name: str, config_path: str, env_value: T):
|
||||
@@ -612,10 +615,16 @@ def load_oauth_providers():
|
||||
"scope": OAUTH_SCOPES.value,
|
||||
}
|
||||
|
||||
if OAUTH_CODE_CHALLENGE_METHOD.value and OAUTH_CODE_CHALLENGE_METHOD.value == "S256":
|
||||
if (
|
||||
OAUTH_CODE_CHALLENGE_METHOD.value
|
||||
and OAUTH_CODE_CHALLENGE_METHOD.value == "S256"
|
||||
):
|
||||
client_kwargs["code_challenge_method"] = "S256"
|
||||
elif OAUTH_CODE_CHALLENGE_METHOD.value:
|
||||
raise Exception('Code challenge methods other than "%s" not supported. Given: "%s"' % ("S256", OAUTH_CODE_CHALLENGE_METHOD.value))
|
||||
raise Exception(
|
||||
'Code challenge methods other than "%s" not supported. Given: "%s"'
|
||||
% ("S256", OAUTH_CODE_CHALLENGE_METHOD.value)
|
||||
)
|
||||
|
||||
client.register(
|
||||
name="oidc",
|
||||
@@ -1820,12 +1829,6 @@ RAG_FILE_MAX_SIZE = PersistentConfig(
|
||||
),
|
||||
)
|
||||
|
||||
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = PersistentConfig(
|
||||
"ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION",
|
||||
"rag.enable_web_loader_ssl_verification",
|
||||
os.environ.get("ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION", "True").lower() == "true",
|
||||
)
|
||||
|
||||
RAG_EMBEDDING_ENGINE = PersistentConfig(
|
||||
"RAG_EMBEDDING_ENGINE",
|
||||
"rag.embedding_engine",
|
||||
@@ -1990,16 +1993,20 @@ YOUTUBE_LOADER_PROXY_URL = PersistentConfig(
|
||||
)
|
||||
|
||||
|
||||
ENABLE_RAG_WEB_SEARCH = PersistentConfig(
|
||||
"ENABLE_RAG_WEB_SEARCH",
|
||||
####################################
|
||||
# Web Search (RAG)
|
||||
####################################
|
||||
|
||||
ENABLE_WEB_SEARCH = PersistentConfig(
|
||||
"ENABLE_WEB_SEARCH",
|
||||
"rag.web.search.enable",
|
||||
os.getenv("ENABLE_RAG_WEB_SEARCH", "False").lower() == "true",
|
||||
os.getenv("ENABLE_WEB_SEARCH", "False").lower() == "true",
|
||||
)
|
||||
|
||||
RAG_WEB_SEARCH_ENGINE = PersistentConfig(
|
||||
"RAG_WEB_SEARCH_ENGINE",
|
||||
WEB_SEARCH_ENGINE = PersistentConfig(
|
||||
"WEB_SEARCH_ENGINE",
|
||||
"rag.web.search.engine",
|
||||
os.getenv("RAG_WEB_SEARCH_ENGINE", ""),
|
||||
os.getenv("WEB_SEARCH_ENGINE", ""),
|
||||
)
|
||||
|
||||
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
|
||||
@@ -2008,10 +2015,18 @@ BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
|
||||
os.getenv("BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL", "False").lower() == "true",
|
||||
)
|
||||
|
||||
|
||||
WEB_SEARCH_RESULT_COUNT = PersistentConfig(
|
||||
"WEB_SEARCH_RESULT_COUNT",
|
||||
"rag.web.search.result_count",
|
||||
int(os.getenv("WEB_SEARCH_RESULT_COUNT", "3")),
|
||||
)
|
||||
|
||||
|
||||
# You can provide a list of your own websites to filter after performing a web search.
|
||||
# This ensures the highest level of safety and reliability of the information sources.
|
||||
RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
|
||||
"RAG_WEB_SEARCH_DOMAIN_FILTER_LIST",
|
||||
WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
|
||||
"WEB_SEARCH_DOMAIN_FILTER_LIST",
|
||||
"rag.web.search.domain.filter_list",
|
||||
[
|
||||
# "wikipedia.com",
|
||||
@@ -2020,6 +2035,30 @@ RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
|
||||
],
|
||||
)
|
||||
|
||||
WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig(
|
||||
"WEB_SEARCH_CONCURRENT_REQUESTS",
|
||||
"rag.web.search.concurrent_requests",
|
||||
int(os.getenv("WEB_SEARCH_CONCURRENT_REQUESTS", "10")),
|
||||
)
|
||||
|
||||
WEB_LOADER_ENGINE = PersistentConfig(
|
||||
"WEB_LOADER_ENGINE",
|
||||
"rag.web.loader.engine",
|
||||
os.environ.get("WEB_LOADER_ENGINE", ""),
|
||||
)
|
||||
|
||||
ENABLE_WEB_LOADER_SSL_VERIFICATION = PersistentConfig(
|
||||
"ENABLE_WEB_LOADER_SSL_VERIFICATION",
|
||||
"rag.web.loader.ssl_verification",
|
||||
os.environ.get("ENABLE_WEB_LOADER_SSL_VERIFICATION", "True").lower() == "true",
|
||||
)
|
||||
|
||||
WEB_SEARCH_TRUST_ENV = PersistentConfig(
|
||||
"WEB_SEARCH_TRUST_ENV",
|
||||
"rag.web.search.trust_env",
|
||||
os.getenv("WEB_SEARCH_TRUST_ENV", "False").lower() == "true",
|
||||
)
|
||||
|
||||
|
||||
SEARXNG_QUERY_URL = PersistentConfig(
|
||||
"SEARXNG_QUERY_URL",
|
||||
@@ -2155,34 +2194,22 @@ SOUGOU_API_SK = PersistentConfig(
|
||||
os.getenv("SOUGOU_API_SK", ""),
|
||||
)
|
||||
|
||||
RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig(
|
||||
"RAG_WEB_SEARCH_RESULT_COUNT",
|
||||
"rag.web.search.result_count",
|
||||
int(os.getenv("RAG_WEB_SEARCH_RESULT_COUNT", "3")),
|
||||
TAVILY_API_KEY = PersistentConfig(
|
||||
"TAVILY_API_KEY",
|
||||
"rag.web.search.tavily_api_key",
|
||||
os.getenv("TAVILY_API_KEY", ""),
|
||||
)
|
||||
|
||||
RAG_WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig(
|
||||
"RAG_WEB_SEARCH_CONCURRENT_REQUESTS",
|
||||
"rag.web.search.concurrent_requests",
|
||||
int(os.getenv("RAG_WEB_SEARCH_CONCURRENT_REQUESTS", "10")),
|
||||
TAVILY_EXTRACT_DEPTH = PersistentConfig(
|
||||
"TAVILY_EXTRACT_DEPTH",
|
||||
"rag.web.search.tavily_extract_depth",
|
||||
os.getenv("TAVILY_EXTRACT_DEPTH", "basic"),
|
||||
)
|
||||
|
||||
RAG_WEB_LOADER_ENGINE = PersistentConfig(
|
||||
"RAG_WEB_LOADER_ENGINE",
|
||||
"rag.web.loader.engine",
|
||||
os.environ.get("RAG_WEB_LOADER_ENGINE", "safe_web"),
|
||||
)
|
||||
|
||||
RAG_WEB_SEARCH_TRUST_ENV = PersistentConfig(
|
||||
"RAG_WEB_SEARCH_TRUST_ENV",
|
||||
"rag.web.search.trust_env",
|
||||
os.getenv("RAG_WEB_SEARCH_TRUST_ENV", "False").lower() == "true",
|
||||
)
|
||||
|
||||
PLAYWRIGHT_WS_URI = PersistentConfig(
|
||||
"PLAYWRIGHT_WS_URI",
|
||||
"rag.web.loader.playwright_ws_uri",
|
||||
os.environ.get("PLAYWRIGHT_WS_URI", ""),
|
||||
PLAYWRIGHT_WS_URL = PersistentConfig(
|
||||
"PLAYWRIGHT_WS_URL",
|
||||
"rag.web.loader.PLAYWRIGHT_WS_URL",
|
||||
os.environ.get("PLAYWRIGHT_WS_URL", ""),
|
||||
)
|
||||
|
||||
PLAYWRIGHT_TIMEOUT = PersistentConfig(
|
||||
@@ -2203,17 +2230,6 @@ FIRECRAWL_API_BASE_URL = PersistentConfig(
|
||||
os.environ.get("FIRECRAWL_API_BASE_URL", "https://api.firecrawl.dev"),
|
||||
)
|
||||
|
||||
TAVILY_API_KEY = PersistentConfig(
|
||||
"TAVILY_API_KEY",
|
||||
"rag.web.loader.tavily_api_key",
|
||||
os.getenv("TAVILY_API_KEY", ""),
|
||||
)
|
||||
|
||||
TAVILY_EXTRACT_DEPTH = PersistentConfig(
|
||||
"TAVILY_EXTRACT_DEPTH",
|
||||
"rag.web.loader.tavily_extract_depth",
|
||||
os.getenv("TAVILY_EXTRACT_DEPTH", "basic"),
|
||||
)
|
||||
|
||||
####################################
|
||||
# Images
|
||||
|
||||
@@ -160,11 +160,11 @@ from open_webui.config import (
|
||||
AUDIO_TTS_VOICE,
|
||||
AUDIO_TTS_AZURE_SPEECH_REGION,
|
||||
AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
||||
PLAYWRIGHT_WS_URI,
|
||||
PLAYWRIGHT_WS_URL,
|
||||
PLAYWRIGHT_TIMEOUT,
|
||||
FIRECRAWL_API_BASE_URL,
|
||||
FIRECRAWL_API_KEY,
|
||||
RAG_WEB_LOADER_ENGINE,
|
||||
WEB_LOADER_ENGINE,
|
||||
WHISPER_MODEL,
|
||||
DEEPGRAM_API_KEY,
|
||||
WHISPER_MODEL_AUTO_UPDATE,
|
||||
@@ -205,12 +205,13 @@ from open_webui.config import (
|
||||
YOUTUBE_LOADER_LANGUAGE,
|
||||
YOUTUBE_LOADER_PROXY_URL,
|
||||
# Retrieval (Web Search)
|
||||
RAG_WEB_SEARCH_ENGINE,
|
||||
ENABLE_WEB_SEARCH,
|
||||
WEB_SEARCH_ENGINE,
|
||||
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
||||
RAG_WEB_SEARCH_RESULT_COUNT,
|
||||
RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||
RAG_WEB_SEARCH_TRUST_ENV,
|
||||
RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
|
||||
WEB_SEARCH_RESULT_COUNT,
|
||||
WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||
WEB_SEARCH_TRUST_ENV,
|
||||
WEB_SEARCH_DOMAIN_FILTER_LIST,
|
||||
JINA_API_KEY,
|
||||
SEARCHAPI_API_KEY,
|
||||
SEARCHAPI_ENGINE,
|
||||
@@ -240,8 +241,7 @@ from open_webui.config import (
|
||||
ONEDRIVE_CLIENT_ID,
|
||||
ENABLE_RAG_HYBRID_SEARCH,
|
||||
ENABLE_RAG_LOCAL_WEB_FETCH,
|
||||
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
||||
ENABLE_RAG_WEB_SEARCH,
|
||||
ENABLE_WEB_LOADER_SSL_VERIFICATION,
|
||||
ENABLE_GOOGLE_DRIVE_INTEGRATION,
|
||||
ENABLE_ONEDRIVE_INTEGRATION,
|
||||
UPLOAD_DIR,
|
||||
@@ -594,9 +594,7 @@ app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT
|
||||
app.state.config.RAG_FULL_CONTEXT = RAG_FULL_CONTEXT
|
||||
app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL = BYPASS_EMBEDDING_AND_RETRIEVAL
|
||||
app.state.config.ENABLE_RAG_HYBRID_SEARCH = ENABLE_RAG_HYBRID_SEARCH
|
||||
app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
|
||||
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
|
||||
)
|
||||
app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION = ENABLE_WEB_LOADER_SSL_VERIFICATION
|
||||
|
||||
app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE
|
||||
app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL
|
||||
@@ -629,12 +627,16 @@ app.state.config.YOUTUBE_LOADER_LANGUAGE = YOUTUBE_LOADER_LANGUAGE
|
||||
app.state.config.YOUTUBE_LOADER_PROXY_URL = YOUTUBE_LOADER_PROXY_URL
|
||||
|
||||
|
||||
app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH
|
||||
app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
|
||||
app.state.config.ENABLE_WEB_SEARCH = ENABLE_WEB_SEARCH
|
||||
app.state.config.WEB_SEARCH_ENGINE = WEB_SEARCH_ENGINE
|
||||
app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST = WEB_SEARCH_DOMAIN_FILTER_LIST
|
||||
app.state.config.WEB_SEARCH_RESULT_COUNT = WEB_SEARCH_RESULT_COUNT
|
||||
app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = WEB_SEARCH_CONCURRENT_REQUESTS
|
||||
app.state.config.WEB_LOADER_ENGINE = WEB_LOADER_ENGINE
|
||||
app.state.config.WEB_SEARCH_TRUST_ENV = WEB_SEARCH_TRUST_ENV
|
||||
app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
|
||||
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
|
||||
)
|
||||
app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST
|
||||
|
||||
app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION
|
||||
app.state.config.ENABLE_ONEDRIVE_INTEGRATION = ENABLE_ONEDRIVE_INTEGRATION
|
||||
@@ -662,11 +664,8 @@ app.state.config.PERPLEXITY_API_KEY = PERPLEXITY_API_KEY
|
||||
app.state.config.SOUGOU_API_SID = SOUGOU_API_SID
|
||||
app.state.config.SOUGOU_API_SK = SOUGOU_API_SK
|
||||
|
||||
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT
|
||||
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS
|
||||
app.state.config.RAG_WEB_LOADER_ENGINE = RAG_WEB_LOADER_ENGINE
|
||||
app.state.config.RAG_WEB_SEARCH_TRUST_ENV = RAG_WEB_SEARCH_TRUST_ENV
|
||||
app.state.config.PLAYWRIGHT_WS_URI = PLAYWRIGHT_WS_URI
|
||||
|
||||
app.state.config.PLAYWRIGHT_WS_URL = PLAYWRIGHT_WS_URL
|
||||
app.state.config.PLAYWRIGHT_TIMEOUT = PLAYWRIGHT_TIMEOUT
|
||||
app.state.config.FIRECRAWL_API_BASE_URL = FIRECRAWL_API_BASE_URL
|
||||
app.state.config.FIRECRAWL_API_KEY = FIRECRAWL_API_KEY
|
||||
@@ -1261,7 +1260,7 @@ async def get_app_config(request: Request):
|
||||
{
|
||||
"enable_direct_connections": app.state.config.ENABLE_DIRECT_CONNECTIONS,
|
||||
"enable_channels": app.state.config.ENABLE_CHANNELS,
|
||||
"enable_web_search": app.state.config.ENABLE_RAG_WEB_SEARCH,
|
||||
"enable_web_search": app.state.config.ENABLE_WEB_SEARCH,
|
||||
"enable_code_execution": app.state.config.ENABLE_CODE_EXECUTION,
|
||||
"enable_code_interpreter": app.state.config.ENABLE_CODE_INTERPRETER,
|
||||
"enable_image_generation": app.state.config.ENABLE_IMAGE_GENERATION,
|
||||
|
||||
@@ -28,9 +28,9 @@ from open_webui.retrieval.loaders.tavily import TavilyLoader
|
||||
from open_webui.constants import ERROR_MESSAGES
|
||||
from open_webui.config import (
|
||||
ENABLE_RAG_LOCAL_WEB_FETCH,
|
||||
PLAYWRIGHT_WS_URI,
|
||||
PLAYWRIGHT_WS_URL,
|
||||
PLAYWRIGHT_TIMEOUT,
|
||||
RAG_WEB_LOADER_ENGINE,
|
||||
WEB_LOADER_ENGINE,
|
||||
FIRECRAWL_API_BASE_URL,
|
||||
FIRECRAWL_API_KEY,
|
||||
TAVILY_API_KEY,
|
||||
@@ -584,13 +584,6 @@ class SafeWebBaseLoader(WebBaseLoader):
|
||||
return [document async for document in self.alazy_load()]
|
||||
|
||||
|
||||
RAG_WEB_LOADER_ENGINES = defaultdict(lambda: SafeWebBaseLoader)
|
||||
RAG_WEB_LOADER_ENGINES["playwright"] = SafePlaywrightURLLoader
|
||||
RAG_WEB_LOADER_ENGINES["safe_web"] = SafeWebBaseLoader
|
||||
RAG_WEB_LOADER_ENGINES["firecrawl"] = SafeFireCrawlLoader
|
||||
RAG_WEB_LOADER_ENGINES["tavily"] = SafeTavilyLoader
|
||||
|
||||
|
||||
def get_web_loader(
|
||||
urls: Union[str, Sequence[str]],
|
||||
verify_ssl: bool = True,
|
||||
@@ -608,27 +601,36 @@ def get_web_loader(
|
||||
"trust_env": trust_env,
|
||||
}
|
||||
|
||||
if RAG_WEB_LOADER_ENGINE.value == "playwright":
|
||||
if WEB_LOADER_ENGINE.value == "" or WEB_LOADER_ENGINE.value == "safe_web":
|
||||
WebLoaderClass = SafeWebBaseLoader
|
||||
if WEB_LOADER_ENGINE.value == "playwright":
|
||||
WebLoaderClass = SafePlaywrightURLLoader
|
||||
web_loader_args["playwright_timeout"] = PLAYWRIGHT_TIMEOUT.value * 1000
|
||||
if PLAYWRIGHT_WS_URI.value:
|
||||
web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value
|
||||
if PLAYWRIGHT_WS_URL.value:
|
||||
web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URL.value
|
||||
|
||||
if RAG_WEB_LOADER_ENGINE.value == "firecrawl":
|
||||
if WEB_LOADER_ENGINE.value == "firecrawl":
|
||||
WebLoaderClass = SafeFireCrawlLoader
|
||||
web_loader_args["api_key"] = FIRECRAWL_API_KEY.value
|
||||
web_loader_args["api_url"] = FIRECRAWL_API_BASE_URL.value
|
||||
|
||||
if RAG_WEB_LOADER_ENGINE.value == "tavily":
|
||||
if WEB_LOADER_ENGINE.value == "tavily":
|
||||
WebLoaderClass = SafeTavilyLoader
|
||||
web_loader_args["api_key"] = TAVILY_API_KEY.value
|
||||
web_loader_args["extract_depth"] = TAVILY_EXTRACT_DEPTH.value
|
||||
|
||||
# Create the appropriate WebLoader based on the configuration
|
||||
WebLoaderClass = RAG_WEB_LOADER_ENGINES[RAG_WEB_LOADER_ENGINE.value]
|
||||
web_loader = WebLoaderClass(**web_loader_args)
|
||||
if WebLoaderClass:
|
||||
web_loader = WebLoaderClass(**web_loader_args)
|
||||
|
||||
log.debug(
|
||||
"Using RAG_WEB_LOADER_ENGINE %s for %s URLs",
|
||||
web_loader.__class__.__name__,
|
||||
len(safe_urls),
|
||||
)
|
||||
log.debug(
|
||||
"Using WEB_LOADER_ENGINE %s for %s URLs",
|
||||
web_loader.__class__.__name__,
|
||||
len(safe_urls),
|
||||
)
|
||||
|
||||
return web_loader
|
||||
return web_loader
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid WEB_LOADER_ENGINE: {WEB_LOADER_ENGINE.value}. "
|
||||
"Please set it to 'safe_web', 'playwright', 'firecrawl', or 'tavily'."
|
||||
)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user