From 3e2a6df1fb016a7084cc69fc0adea69671243da2 Mon Sep 17 00:00:00 2001 From: Youggls Date: Thu, 10 Apr 2025 14:51:44 +0800 Subject: [PATCH] feat: Add sougou web search API for backend, add config panel in for frontend. --- backend/open_webui/config.py | 12 +++++ backend/open_webui/main.py | 4 ++ backend/open_webui/retrieval/web/sougou.py | 48 +++++++++++++++++++ backend/open_webui/routers/retrieval.py | 25 ++++++++++ backend/requirements.txt | 3 ++ pyproject.toml | 2 + .../admin/Settings/WebSearch.svelte | 28 ++++++++++- src/lib/i18n/locales/en-US/translation.json | 4 ++ src/lib/i18n/locales/zh-CN/translation.json | 4 ++ 9 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 backend/open_webui/retrieval/web/sougou.py diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index bdd6ec874..77e4a763f 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2142,6 +2142,18 @@ PERPLEXITY_API_KEY = PersistentConfig( os.getenv("PERPLEXITY_API_KEY", ""), ) +SOUGOU_API_SID = PersistentConfig( + "SOUGOU_API_SID", + "rag.web.search.sougou_api_sid", + os.getenv("SOUGOU_API_SID", ""), +) + +SOUGOU_API_SK = PersistentConfig( + "SOUGOU_API_SK", + "rag.web.search.sougou_api_sk", + os.getenv("SOUGOU_API_SK", ""), +) + RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig( "RAG_WEB_SEARCH_RESULT_COUNT", "rag.web.search.result_count", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index cc58d5b02..7e36ff173 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -225,6 +225,8 @@ from open_webui.config import ( BRAVE_SEARCH_API_KEY, EXA_API_KEY, PERPLEXITY_API_KEY, + SOUGOU_API_SID, + SOUGOU_API_SK, KAGI_SEARCH_API_KEY, MOJEEK_SEARCH_API_KEY, BOCHA_SEARCH_API_KEY, @@ -652,6 +654,8 @@ app.state.config.BING_SEARCH_V7_ENDPOINT = BING_SEARCH_V7_ENDPOINT app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = BING_SEARCH_V7_SUBSCRIPTION_KEY app.state.config.EXA_API_KEY = EXA_API_KEY app.state.config.PERPLEXITY_API_KEY = PERPLEXITY_API_KEY +app.state.config.SOUGOU_API_SID = SOUGOU_API_SID +app.state.config.SOUGOU_API_SK = SOUGOU_API_SK app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS diff --git a/backend/open_webui/retrieval/web/sougou.py b/backend/open_webui/retrieval/web/sougou.py new file mode 100644 index 000000000..a0726372c --- /dev/null +++ b/backend/open_webui/retrieval/web/sougou.py @@ -0,0 +1,48 @@ +import logging +import json +from typing import Optional, List + +from tencentcloud.common.common_client import CommonClient +from tencentcloud.common import credential +from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException +from tencentcloud.common.profile.client_profile import ClientProfile +from tencentcloud.common.profile.http_profile import HttpProfile + +from open_webui.retrieval.web.main import SearchResult, get_filtered_results +from open_webui.env import SRC_LOG_LEVELS + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["RAG"]) + + +def search_sougou( + sougou_api_sid: str, + sougou_api_sk: str, + query: str, + count: int, + filter_list: Optional[List[str]] = None, +) -> List[SearchResult]: + try: + cred = credential.Credential(sougou_api_sid, sougou_api_sk) + http_profile = HttpProfile() + http_profile.endpoint = "tms.tencentcloudapi.com" + client_profile = ClientProfile() + client_profile.http_profile = http_profile + params = json.dumps({"Query": query, 'Cnt': 20}) + common_client = CommonClient("tms", "2020-12-29", cred, "", profile=client_profile) + results = [ + json.loads(page) for page in common_client.call_json("SearchPro", json.loads(params))["Response"]["Pages"] + ] + sorted_results = sorted(results, key=lambda x: x.get("scour", 0.0), reverse=True) + if filter_list: + sorted_results = get_filtered_results(sorted_results, filter_list) + + return [ + SearchResult( + link=result.get("url"), title=result.get("title"), snippet=result.get("passage") + ) + for result in sorted_results[:count] + ] + except TencentCloudSDKException as err: + log.error(f"Error in Sougou search: {err}") + return [] diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index f31abd9ff..8e1708c65 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -60,6 +60,7 @@ from open_webui.retrieval.web.tavily import search_tavily from open_webui.retrieval.web.bing import search_bing from open_webui.retrieval.web.exa import search_exa from open_webui.retrieval.web.perplexity import search_perplexity +from open_webui.retrieval.web.sougou import search_sougou from open_webui.retrieval.utils import ( get_embedding_function, @@ -411,6 +412,8 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "bing_search_v7_subscription_key": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, "exa_api_key": request.app.state.config.EXA_API_KEY, "perplexity_api_key": request.app.state.config.PERPLEXITY_API_KEY, + "sougou_api_sid": request.app.state.config.SOUGOU_API_SID, + "sougou_api_sk": request.app.state.config.SOUGOU_API_SK, "result_count": request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, "trust_env": request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, "concurrent_requests": request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, @@ -478,6 +481,8 @@ class WebSearchConfig(BaseModel): bing_search_v7_subscription_key: Optional[str] = None exa_api_key: Optional[str] = None perplexity_api_key: Optional[str] = None + sougou_api_sid: Optional[str] = None + sougou_api_sk: Optional[str] = None result_count: Optional[int] = None concurrent_requests: Optional[int] = None trust_env: Optional[bool] = None @@ -640,6 +645,12 @@ async def update_rag_config( request.app.state.config.PERPLEXITY_API_KEY = ( form_data.web.search.perplexity_api_key ) + request.app.state.config.SOUGOU_API_SID = ( + form_data.web.search.sougou_api_sid + ) + request.app.state.config.SOUGOU_API_SK = ( + form_data.web.search.sougou_api_sk + ) request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = ( form_data.web.search.result_count @@ -712,6 +723,8 @@ async def update_rag_config( "bing_search_v7_subscription_key": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, "exa_api_key": request.app.state.config.EXA_API_KEY, "perplexity_api_key": request.app.state.config.PERPLEXITY_API_KEY, + "sougou_api_sid": request.app.state.config.SOUGOU_API_SID, + "sougou_api_sk": request.app.state.config.SOUGOU_API_SK, "result_count": request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, "concurrent_requests": request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, "trust_env": request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, @@ -1267,6 +1280,7 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: - TAVILY_API_KEY - EXA_API_KEY - PERPLEXITY_API_KEY + - SOUGOU_API_SID + SOUGOU_API_SK - SEARCHAPI_API_KEY + SEARCHAPI_ENGINE (by default `google`) - SERPAPI_API_KEY + SERPAPI_ENGINE (by default `google`) Args: @@ -1438,6 +1452,17 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, ) + elif engine == 'sougou': + if request.app.state.config.SOUGOU_API_SID and request.app.state.config.SOUGOU_API_SK: + return search_sougou( + request.app.state.config.SOUGOU_API_SID, + request.app.state.config.SOUGOU_API_SK, + query, + request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, + request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + ) + else: + raise Exception("No SOUGOU_API_SID or SOUGOU_API_SK found in environment variables") else: raise Exception("No search engine API key found in environment variables") diff --git a/backend/requirements.txt b/backend/requirements.txt index ad490d00a..670c363ee 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -123,6 +123,9 @@ ldap3==2.9.1 ## Firecrawl firecrawl-py==1.12.0 +# Sougou API SDK(Tencentcloud SDK) +tencentcloud-sdk-python==3.0.1336 + ## Trace opentelemetry-api==1.31.1 opentelemetry-sdk==1.31.1 diff --git a/pyproject.toml b/pyproject.toml index 18e833290..98c072df5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -125,6 +125,8 @@ dependencies = [ "firecrawl-py==1.12.0", + "tencentcloud-sdk-python==3.0.1336", + "gcp-storage-emulator>=2024.8.3", ] readme = "README.md" diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index 5e27274e0..82e61bdc3 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -30,7 +30,8 @@ 'jina', 'bing', 'exa', - 'perplexity' + 'perplexity', + 'sougou' ]; let youtubeLanguage = 'en'; @@ -404,6 +405,31 @@ /> + {:else if webConfig.search.engine === 'sougou'} +
+
+
+ {$i18n.t('Sougou Search API sID')} +
+ + +
+
+
+
+
+ {$i18n.t('Sougou Search API SK')} +
+ + +
+
{/if} {/if} diff --git a/src/lib/i18n/locales/en-US/translation.json b/src/lib/i18n/locales/en-US/translation.json index 827e09ca2..eb90f905a 100644 --- a/src/lib/i18n/locales/en-US/translation.json +++ b/src/lib/i18n/locales/en-US/translation.json @@ -424,6 +424,8 @@ "Enter Mojeek Search API Key": "", "Enter Number of Steps (e.g. 50)": "", "Enter Perplexity API Key": "", + "Enter Sougou Search API sID": "", + "Enter Sougou Search API SK": "", "Enter proxy URL (e.g. https://user:password@host:port)": "", "Enter reasoning effort": "", "Enter Sampler (e.g. Euler a)": "", @@ -822,6 +824,8 @@ "Permission denied when accessing microphone: {{error}}": "", "Permissions": "", "Perplexity API Key": "", + "Sougou Search API sID": "", + "Sougou Search API SK": "", "Personalization": "", "Pin": "", "Pinned": "", diff --git a/src/lib/i18n/locales/zh-CN/translation.json b/src/lib/i18n/locales/zh-CN/translation.json index 5c6337a54..433ffeb16 100644 --- a/src/lib/i18n/locales/zh-CN/translation.json +++ b/src/lib/i18n/locales/zh-CN/translation.json @@ -424,6 +424,8 @@ "Enter Mojeek Search API Key": "输入 Mojeek Search API 密钥", "Enter Number of Steps (e.g. 50)": "输入步骤数 (Steps) (例如:50)", "Enter Perplexity API Key": "输入 Perplexity API 密钥", + "Enter Sougou Search API sID": "输入搜狗搜索 API 的 Secret ID", + "Enter Sougou Search API SK": "输入搜狗搜索 API 的 Secret Key", "Enter proxy URL (e.g. https://user:password@host:port)": "输入代理 URL (例如:https://用户名:密码@主机名:端口)", "Enter reasoning effort": "设置推理努力", "Enter Sampler (e.g. Euler a)": "输入 Sampler (例如:Euler a)", @@ -822,6 +824,8 @@ "Permission denied when accessing microphone: {{error}}": "申请麦克风权限被拒绝:{{error}}", "Permissions": "权限", "Perplexity API Key": "Perplexity API 密钥", + "Sougou Search API sID": "搜狗搜索 API 的 Secret ID", + "Sougou Search API SK": "搜狗搜索 API 的 Secret Key", "Personalization": "个性化", "Pin": "置顶", "Pinned": "已置顶",