From efb4a710c80635cc0066fc060e5021de79732d1f Mon Sep 17 00:00:00 2001 From: teampen <136991215+teampen@users.noreply.github.com> Date: Sun, 9 Jun 2024 20:44:34 -0400 Subject: [PATCH 1/5] adding Serply as an alternative web search --- README.md | 2 +- backend/apps/rag/main.py | 19 +- backend/apps/rag/search/serply.py | 68 ++++++ backend/apps/rag/search/testdata/serply.json | 206 ++++++++++++++++++ .../documents/Settings/WebParams.svelte | 20 +- 5 files changed, 312 insertions(+), 3 deletions(-) create mode 100644 backend/apps/rag/search/serply.py create mode 100644 backend/apps/rag/search/testdata/serply.json diff --git a/README.md b/README.md index a8d79bd5c..af10c67bc 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Open WebUI is an [extensible](https://github.com/open-webui/pipelines), feature- - 📚 **Local RAG Integration**: Dive into the future of chat interactions with groundbreaking Retrieval Augmented Generation (RAG) support. This feature seamlessly integrates document interactions into your chat experience. You can load documents directly into the chat or add files to your document library, effortlessly accessing them using the `#` command before a query. -- 🔍 **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, and `serper`, and inject the results directly into your chat experience. +- 🔍 **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, and [`Serply`](https://serply.io) and inject the results directly into your chat experience. - 🌐 **Web Browsing Capability**: Seamlessly integrate websites into your chat experience using the `#` command followed by a URL. This feature allows you to incorporate web content directly into your conversations, enhancing the richness and depth of your interactions. diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index d405ef0b4..be55f0f95 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -67,7 +67,7 @@ from apps.rag.search.main import SearchResult from apps.rag.search.searxng import search_searxng from apps.rag.search.serper import search_serper from apps.rag.search.serpstack import search_serpstack - +from apps.rag.search.serply import search_serply from utils.misc import ( calculate_sha256, @@ -113,6 +113,7 @@ from config import ( SERPSTACK_API_KEY, SERPSTACK_HTTPS, SERPER_API_KEY, + SERPLY_API_KEY, RAG_WEB_SEARCH_RESULT_COUNT, RAG_WEB_SEARCH_CONCURRENT_REQUESTS, RAG_EMBEDDING_OPENAI_BATCH_SIZE, @@ -165,6 +166,7 @@ app.state.config.BRAVE_SEARCH_API_KEY = BRAVE_SEARCH_API_KEY app.state.config.SERPSTACK_API_KEY = SERPSTACK_API_KEY app.state.config.SERPSTACK_HTTPS = SERPSTACK_HTTPS app.state.config.SERPER_API_KEY = SERPER_API_KEY +app.state.config.SERPLY_API_KEY = SERPLY_API_KEY app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS @@ -392,6 +394,7 @@ async def get_rag_config(user=Depends(get_admin_user)): "serpstack_api_key": app.state.config.SERPSTACK_API_KEY, "serpstack_https": app.state.config.SERPSTACK_HTTPS, "serper_api_key": app.state.config.SERPER_API_KEY, + "serply_api_key": app.state.config.SERPLY_API_KEY, "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, }, @@ -419,6 +422,7 @@ class WebSearchConfig(BaseModel): serpstack_api_key: Optional[str] = None serpstack_https: Optional[bool] = None serper_api_key: Optional[str] = None + serply_api_key: Optional[str] = None result_count: Optional[int] = None concurrent_requests: Optional[int] = None @@ -469,6 +473,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_ app.state.config.SERPSTACK_API_KEY = form_data.web.search.serpstack_api_key app.state.config.SERPSTACK_HTTPS = form_data.web.search.serpstack_https app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key + app.state.config.SERPLY_API_KEY = form_data.web.search.serply_api_key app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = form_data.web.search.result_count app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = ( form_data.web.search.concurrent_requests @@ -497,6 +502,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_ "serpstack_api_key": app.state.config.SERPSTACK_API_KEY, "serpstack_https": app.state.config.SERPSTACK_HTTPS, "serper_api_key": app.state.config.SERPER_API_KEY, + "serply_api_key": app.state.config.SERPLY_API_KEY, "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, }, @@ -744,6 +750,7 @@ def search_web(engine: str, query: str) -> list[SearchResult]: - BRAVE_SEARCH_API_KEY - SERPSTACK_API_KEY - SERPER_API_KEY + - SERPLY_API_KEY Args: query (str): The query to search for @@ -802,6 +809,15 @@ def search_web(engine: str, query: str) -> list[SearchResult]: ) else: raise Exception("No SERPER_API_KEY found in environment variables") + elif engine == "serply": + if app.state.config.SERPLY_API_KEY: + return search_serply( + app.state.config.SERPLY_API_KEY, + query, + app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, + ) + else: + raise Exception("No SERPLY_API_KEY found in environment variables") else: raise Exception("No search engine API key found in environment variables") @@ -809,6 +825,7 @@ def search_web(engine: str, query: str) -> list[SearchResult]: @app.post("/web/search") def store_web_search(form_data: SearchForm, user=Depends(get_current_user)): try: + logging.info(f"trying to web search with {app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query}") web_results = search_web( app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query ) diff --git a/backend/apps/rag/search/serply.py b/backend/apps/rag/search/serply.py new file mode 100644 index 000000000..12d5e51d2 --- /dev/null +++ b/backend/apps/rag/search/serply.py @@ -0,0 +1,68 @@ +import json +import logging + +import requests +from urllib.parse import urlencode + +from apps.rag.search.main import SearchResult +from config import SRC_LOG_LEVELS + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["RAG"]) + + +def search_serply( + api_key: str, + query: str, + count: int, + hl: str = "us", + limit: int = 10, + device_type: str = "desktop", + proxy_location: str = "US" + ) -> list[SearchResult]: + """Search using serper.dev's API and return the results as a list of SearchResult objects. + + Args: + api_key (str): A serply.io API key + query (str): The query to search for + hl (str): Host Language code to display results in (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages) + limit (int): The maximum number of results to return [10-100, defaults to 10] + """ + log.info("Searching with Serply") + + url = "https://api.serply.io/v1/search/" + + query_payload = { + "q": query, + "language": "en", + "num": limit, + "gl": proxy_location.upper(), + "hl": hl.lower() + } + + url = f"{url}{urlencode(query_payload)}" + headers = { + "X-API-KEY": api_key, + "X-User-Agent": device_type, + "User-Agent": "open-webui", + "X-Proxy-Location": proxy_location + } + + response = requests.request("GET", url, headers=headers) + response.raise_for_status() + + json_response = response.json() + log.info(f"results from serply search: {json_response}") + + results = sorted( + json_response.get("results", []), key=lambda x: x.get("realPosition", 0) + ) + + return [ + SearchResult( + link=result["link"], + title=result.get("title"), + snippet=result.get("description"), + ) + for result in results[:count] + ] diff --git a/backend/apps/rag/search/testdata/serply.json b/backend/apps/rag/search/testdata/serply.json new file mode 100644 index 000000000..7d575de40 --- /dev/null +++ b/backend/apps/rag/search/testdata/serply.json @@ -0,0 +1,206 @@ +{ + "ads": [], + "ads_count": 0, + "answers": [], + "results": [ + { + "title": "Apple", + "link": "https://www.apple.com/", + "description": "Discover the innovative world of Apple and shop everything iPhone, iPad, Apple Watch, Mac, and Apple TV, plus explore accessories, entertainment, ...", + "additional_links": [ + { + "text": "AppleApplehttps://www.apple.com", + "href": "https://www.apple.com/" + } + ], + "cite": {}, + "subdomains": [ + { + "title": "Support", + "link": "https://support.apple.com/", + "description": "SupportContact - iPhone Support - Billing and Subscriptions - Apple Repair" + }, + { + "title": "Store", + "link": "https://www.apple.com/store", + "description": "StoreShop iPhone - Shop iPad - App Store - Shop Mac - ..." + }, + { + "title": "Mac", + "link": "https://www.apple.com/mac/", + "description": "MacMacBook Air - MacBook Pro - iMac - Compare Mac models - Mac mini" + }, + { + "title": "iPad", + "link": "https://www.apple.com/ipad/", + "description": "iPadShop iPad - iPad Pro - iPad Air - Compare iPad models - ..." + }, + { + "title": "Watch", + "link": "https://www.apple.com/watch/", + "description": "WatchShop Apple Watch - Series 9 - SE - Ultra 2 - Nike - HermĂšs - ..." + } + ], + "realPosition": 1 + }, + { + "title": "Apple", + "link": "https://www.apple.com/", + "description": "Discover the innovative world of Apple and shop everything iPhone, iPad, Apple Watch, Mac, and Apple TV, plus explore accessories, entertainment, ...", + "additional_links": [ + { + "text": "AppleApplehttps://www.apple.com", + "href": "https://www.apple.com/" + } + ], + "cite": {}, + "realPosition": 2 + }, + { + "title": "Apple Inc.", + "link": "https://en.wikipedia.org/wiki/Apple_Inc.", + "description": "Apple Inc. (formerly Apple Computer, Inc.) is an American multinational corporation and technology company headquartered in Cupertino, California, ...", + "additional_links": [ + { + "text": "Apple Inc.Wikipediahttps://en.wikipedia.org â€ș wiki â€ș Apple_Inc", + "href": "https://en.wikipedia.org/wiki/Apple_Inc." + }, + { + "text": "", + "href": "https://en.wikipedia.org/wiki/Apple_Inc." + }, + { + "text": "History", + "href": "https://en.wikipedia.org/wiki/History_of_Apple_Inc." + }, + { + "text": "List of Apple products", + "href": "https://en.wikipedia.org/wiki/List_of_Apple_products" + }, + { + "text": "Litigation involving Apple Inc.", + "href": "https://en.wikipedia.org/wiki/Litigation_involving_Apple_Inc." + }, + { + "text": "Apple Park", + "href": "https://en.wikipedia.org/wiki/Apple_Park" + } + ], + "cite": { + "domain": "https://en.wikipedia.org â€ș wiki â€ș Apple_Inc", + "span": " â€ș wiki â€ș Apple_Inc" + }, + "realPosition": 3 + }, + { + "title": "Apple Inc. (AAPL) Company Profile & Facts", + "link": "https://finance.yahoo.com/quote/AAPL/profile/", + "description": "Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line ...", + "additional_links": [ + { + "text": "Apple Inc. (AAPL) Company Profile & FactsYahoo Financehttps://finance.yahoo.com â€ș quote â€ș AAPL â€ș profile", + "href": "https://finance.yahoo.com/quote/AAPL/profile/" + } + ], + "cite": { + "domain": "https://finance.yahoo.com â€ș quote â€ș AAPL â€ș profile", + "span": " â€ș quote â€ș AAPL â€ș profile" + }, + "realPosition": 4 + }, + { + "title": "Apple Inc - Company Profile and News", + "link": "https://www.bloomberg.com/profile/company/AAPL:US", + "description": "Apple Inc. Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables and accessories, and sells a variety of related ...", + "additional_links": [ + { + "text": "Apple Inc - Company Profile and NewsBloomberghttps://www.bloomberg.com â€ș company â€ș AAPL:US", + "href": "https://www.bloomberg.com/profile/company/AAPL:US" + }, + { + "text": "", + "href": "https://www.bloomberg.com/profile/company/AAPL:US" + } + ], + "cite": { + "domain": "https://www.bloomberg.com â€ș company â€ș AAPL:US", + "span": " â€ș company â€ș AAPL:US" + }, + "realPosition": 5 + }, + { + "title": "Apple Inc. | History, Products, Headquarters, & Facts", + "link": "https://www.britannica.com/money/Apple-Inc", + "description": "May 22, 2024 — Apple Inc. is an American multinational technology company that revolutionized the technology sector through its innovation of computer ...", + "additional_links": [ + { + "text": "Apple Inc. | History, Products, Headquarters, & FactsBritannicahttps://www.britannica.com â€ș money â€ș Apple-Inc", + "href": "https://www.britannica.com/money/Apple-Inc" + }, + { + "text": "", + "href": "https://www.britannica.com/money/Apple-Inc" + } + ], + "cite": { + "domain": "https://www.britannica.com â€ș money â€ș Apple-Inc", + "span": " â€ș money â€ș Apple-Inc" + }, + "realPosition": 6 + } + ], + "shopping_ads": [], + "places": [ + { + "title": "Apple Inc." + }, + { + "title": "Apple Inc" + }, + { + "title": "Apple Inc" + } + ], + "related_searches": { + "images": [], + "text": [ + { + "title": "apple inc full form", + "link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+Inc+full+form&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhPEAE" + }, + { + "title": "apple company history", + "link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+company+history&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhOEAE" + }, + { + "title": "apple store", + "link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+Store&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhQEAE" + }, + { + "title": "apple id", + "link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+id&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhSEAE" + }, + { + "title": "apple inc industry", + "link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+Inc+industry&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhREAE" + }, + { + "title": "apple login", + "link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+login&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhTEAE" + } + ] + }, + "image_results": [], + "carousel": [], + "total": 2450000000, + "knowledge_graph": "", + "related_questions": [ + "What does the Apple Inc do?", + "Why did Apple change to Apple Inc?", + "Who owns Apple Inc.?", + "What is Apple Inc best known for?" + ], + "carousel_count": 0, + "ts": 2.491065263748169, + "device_type": null +} \ No newline at end of file diff --git a/src/lib/components/documents/Settings/WebParams.svelte b/src/lib/components/documents/Settings/WebParams.svelte index 89d4442b0..7b3a2e434 100644 --- a/src/lib/components/documents/Settings/WebParams.svelte +++ b/src/lib/components/documents/Settings/WebParams.svelte @@ -11,7 +11,7 @@ export let saveHandler: Function; let webConfig = null; - let webSearchEngines = ['searxng', 'google_pse', 'brave', 'serpstack', 'serper']; + let webSearchEngines = ['searxng', 'google_pse', 'brave', 'serpstack', 'serper', 'serply']; let youtubeLanguage = 'en'; let youtubeTranslation = null; @@ -188,6 +188,24 @@ + {:else if webConfig.search.engine === 'serply'} +
+
+ {$i18n.t('Serply API Key')} +
+ +
+
+ +
+
+
{/if} {/if} From 4dcec4855eb66ef3a4c4adc7b2fa31da21fd3c3c Mon Sep 17 00:00:00 2001 From: teampen <136991215+teampen@users.noreply.github.com> Date: Sun, 9 Jun 2024 21:39:46 -0400 Subject: [PATCH 2/5] adding Serply as an alternative web search --- backend/config.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/backend/config.py b/backend/config.py index dd3bc9e4b..098de3b5a 100644 --- a/backend/config.py +++ b/backend/config.py @@ -852,6 +852,12 @@ SERPER_API_KEY = PersistentConfig( os.getenv("SERPER_API_KEY", ""), ) +SERPLY_API_KEY = PersistentConfig( + "SERPLY_API_KEY", + "rag.web.search.serply_api_key", + os.getenv("SERPLY_API_KEY", ""), +) + RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig( "RAG_WEB_SEARCH_RESULT_COUNT", From 79bf8d6dd31dc17f37aea0bfb339b7150484050c Mon Sep 17 00:00:00 2001 From: teampen <136991215+teampen@users.noreply.github.com> Date: Sun, 9 Jun 2024 21:43:13 -0400 Subject: [PATCH 3/5] undo changes to deprecated web search params --- .../documents/Settings/WebParams.svelte | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/lib/components/documents/Settings/WebParams.svelte b/src/lib/components/documents/Settings/WebParams.svelte index 7130c9c3b..b5a4d1679 100644 --- a/src/lib/components/documents/Settings/WebParams.svelte +++ b/src/lib/components/documents/Settings/WebParams.svelte @@ -188,24 +188,6 @@ - {:else if webConfig.search.engine === 'serply'} -
-
- {$i18n.t('Serply API Key')} -
- -
-
- -
-
-
{/if} {/if} From 14eb667fc8b332d1d8027aee510a4d2a40edb93a Mon Sep 17 00:00:00 2001 From: teampen <136991215+teampen@users.noreply.github.com> Date: Sun, 9 Jun 2024 21:52:08 -0400 Subject: [PATCH 4/5] add changes to web search in admin settings --- .../admin/Settings/WebSearch.svelte | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index 7013759fa..eeafdce15 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -11,7 +11,7 @@ export let saveHandler: Function; let webConfig = null; - let webSearchEngines = ['searxng', 'google_pse', 'brave', 'serpstack', 'serper']; + let webSearchEngines = ['searxng', 'google_pse', 'brave', 'serpstack', 'serper', 'serply']; let youtubeLanguage = 'en'; let youtubeTranslation = null; @@ -188,6 +188,24 @@ + {:else if webConfig.search.engine === 'serply'} +
+
+ {$i18n.t('Serply API Key')} +
+ +
+
+ +
+
+
{/if} {/if} From 3566002b0bc87cb7025be00a03e18818f62b8ace Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Mon, 10 Jun 2024 03:49:23 -0500 Subject: [PATCH 5/5] doc --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7ef4a477d..5bce3ad06 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Open WebUI is an [extensible](https://github.com/open-webui/pipelines), feature- - 📚 **Local RAG Integration**: Dive into the future of chat interactions with groundbreaking Retrieval Augmented Generation (RAG) support. This feature seamlessly integrates document interactions into your chat experience. You can load documents directly into the chat or add files to your document library, effortlessly accessing them using the `#` command before a query. -- 🔍 **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, and [`Serply`](https://serply.io) and inject the results directly into your chat experience. +- 🔍 **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, and `Serply` and inject the results directly into your chat experience. - 🌐 **Web Browsing Capability**: Seamlessly integrate websites into your chat experience using the `#` command followed by a URL. This feature allows you to incorporate web content directly into your conversations, enhancing the richness and depth of your interactions.