From efb4a710c80635cc0066fc060e5021de79732d1f Mon Sep 17 00:00:00 2001 From: teampen <136991215+teampen@users.noreply.github.com> Date: Sun, 9 Jun 2024 20:44:34 -0400 Subject: [PATCH] adding Serply as an alternative web search --- README.md | 2 +- backend/apps/rag/main.py | 19 +- backend/apps/rag/search/serply.py | 68 ++++++ backend/apps/rag/search/testdata/serply.json | 206 ++++++++++++++++++ .../documents/Settings/WebParams.svelte | 20 +- 5 files changed, 312 insertions(+), 3 deletions(-) create mode 100644 backend/apps/rag/search/serply.py create mode 100644 backend/apps/rag/search/testdata/serply.json diff --git a/README.md b/README.md index a8d79bd5c..af10c67bc 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Open WebUI is an [extensible](https://github.com/open-webui/pipelines), feature- - đ **Local RAG Integration**: Dive into the future of chat interactions with groundbreaking Retrieval Augmented Generation (RAG) support. This feature seamlessly integrates document interactions into your chat experience. You can load documents directly into the chat or add files to your document library, effortlessly accessing them using the `#` command before a query. -- đ **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, and `serper`, and inject the results directly into your chat experience. +- đ **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, and [`Serply`](https://serply.io) and inject the results directly into your chat experience. - đ **Web Browsing Capability**: Seamlessly integrate websites into your chat experience using the `#` command followed by a URL. This feature allows you to incorporate web content directly into your conversations, enhancing the richness and depth of your interactions. diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index d405ef0b4..be55f0f95 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -67,7 +67,7 @@ from apps.rag.search.main import SearchResult from apps.rag.search.searxng import search_searxng from apps.rag.search.serper import search_serper from apps.rag.search.serpstack import search_serpstack - +from apps.rag.search.serply import search_serply from utils.misc import ( calculate_sha256, @@ -113,6 +113,7 @@ from config import ( SERPSTACK_API_KEY, SERPSTACK_HTTPS, SERPER_API_KEY, + SERPLY_API_KEY, RAG_WEB_SEARCH_RESULT_COUNT, RAG_WEB_SEARCH_CONCURRENT_REQUESTS, RAG_EMBEDDING_OPENAI_BATCH_SIZE, @@ -165,6 +166,7 @@ app.state.config.BRAVE_SEARCH_API_KEY = BRAVE_SEARCH_API_KEY app.state.config.SERPSTACK_API_KEY = SERPSTACK_API_KEY app.state.config.SERPSTACK_HTTPS = SERPSTACK_HTTPS app.state.config.SERPER_API_KEY = SERPER_API_KEY +app.state.config.SERPLY_API_KEY = SERPLY_API_KEY app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS @@ -392,6 +394,7 @@ async def get_rag_config(user=Depends(get_admin_user)): "serpstack_api_key": app.state.config.SERPSTACK_API_KEY, "serpstack_https": app.state.config.SERPSTACK_HTTPS, "serper_api_key": app.state.config.SERPER_API_KEY, + "serply_api_key": app.state.config.SERPLY_API_KEY, "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, }, @@ -419,6 +422,7 @@ class WebSearchConfig(BaseModel): serpstack_api_key: Optional[str] = None serpstack_https: Optional[bool] = None serper_api_key: Optional[str] = None + serply_api_key: Optional[str] = None result_count: Optional[int] = None concurrent_requests: Optional[int] = None @@ -469,6 +473,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_ app.state.config.SERPSTACK_API_KEY = form_data.web.search.serpstack_api_key app.state.config.SERPSTACK_HTTPS = form_data.web.search.serpstack_https app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key + app.state.config.SERPLY_API_KEY = form_data.web.search.serply_api_key app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = form_data.web.search.result_count app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = ( form_data.web.search.concurrent_requests @@ -497,6 +502,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_ "serpstack_api_key": app.state.config.SERPSTACK_API_KEY, "serpstack_https": app.state.config.SERPSTACK_HTTPS, "serper_api_key": app.state.config.SERPER_API_KEY, + "serply_api_key": app.state.config.SERPLY_API_KEY, "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, }, @@ -744,6 +750,7 @@ def search_web(engine: str, query: str) -> list[SearchResult]: - BRAVE_SEARCH_API_KEY - SERPSTACK_API_KEY - SERPER_API_KEY + - SERPLY_API_KEY Args: query (str): The query to search for @@ -802,6 +809,15 @@ def search_web(engine: str, query: str) -> list[SearchResult]: ) else: raise Exception("No SERPER_API_KEY found in environment variables") + elif engine == "serply": + if app.state.config.SERPLY_API_KEY: + return search_serply( + app.state.config.SERPLY_API_KEY, + query, + app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, + ) + else: + raise Exception("No SERPLY_API_KEY found in environment variables") else: raise Exception("No search engine API key found in environment variables") @@ -809,6 +825,7 @@ def search_web(engine: str, query: str) -> list[SearchResult]: @app.post("/web/search") def store_web_search(form_data: SearchForm, user=Depends(get_current_user)): try: + logging.info(f"trying to web search with {app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query}") web_results = search_web( app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query ) diff --git a/backend/apps/rag/search/serply.py b/backend/apps/rag/search/serply.py new file mode 100644 index 000000000..12d5e51d2 --- /dev/null +++ b/backend/apps/rag/search/serply.py @@ -0,0 +1,68 @@ +import json +import logging + +import requests +from urllib.parse import urlencode + +from apps.rag.search.main import SearchResult +from config import SRC_LOG_LEVELS + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["RAG"]) + + +def search_serply( + api_key: str, + query: str, + count: int, + hl: str = "us", + limit: int = 10, + device_type: str = "desktop", + proxy_location: str = "US" + ) -> list[SearchResult]: + """Search using serper.dev's API and return the results as a list of SearchResult objects. + + Args: + api_key (str): A serply.io API key + query (str): The query to search for + hl (str): Host Language code to display results in (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages) + limit (int): The maximum number of results to return [10-100, defaults to 10] + """ + log.info("Searching with Serply") + + url = "https://api.serply.io/v1/search/" + + query_payload = { + "q": query, + "language": "en", + "num": limit, + "gl": proxy_location.upper(), + "hl": hl.lower() + } + + url = f"{url}{urlencode(query_payload)}" + headers = { + "X-API-KEY": api_key, + "X-User-Agent": device_type, + "User-Agent": "open-webui", + "X-Proxy-Location": proxy_location + } + + response = requests.request("GET", url, headers=headers) + response.raise_for_status() + + json_response = response.json() + log.info(f"results from serply search: {json_response}") + + results = sorted( + json_response.get("results", []), key=lambda x: x.get("realPosition", 0) + ) + + return [ + SearchResult( + link=result["link"], + title=result.get("title"), + snippet=result.get("description"), + ) + for result in results[:count] + ] diff --git a/backend/apps/rag/search/testdata/serply.json b/backend/apps/rag/search/testdata/serply.json new file mode 100644 index 000000000..7d575de40 --- /dev/null +++ b/backend/apps/rag/search/testdata/serply.json @@ -0,0 +1,206 @@ +{ + "ads": [], + "ads_count": 0, + "answers": [], + "results": [ + { + "title": "Apple", + "link": "https://www.apple.com/", + "description": "Discover the innovative world of Apple and shop everything iPhone, iPad, Apple Watch, Mac, and Apple TV, plus explore accessories, entertainment, ...", + "additional_links": [ + { + "text": "AppleApplehttps://www.apple.com", + "href": "https://www.apple.com/" + } + ], + "cite": {}, + "subdomains": [ + { + "title": "Support", + "link": "https://support.apple.com/", + "description": "SupportContact - iPhone Support - Billing and Subscriptions - Apple Repair" + }, + { + "title": "Store", + "link": "https://www.apple.com/store", + "description": "StoreShop iPhone - Shop iPad - App Store - Shop Mac - ..." + }, + { + "title": "Mac", + "link": "https://www.apple.com/mac/", + "description": "MacMacBook Air - MacBook Pro - iMac - Compare Mac models - Mac mini" + }, + { + "title": "iPad", + "link": "https://www.apple.com/ipad/", + "description": "iPadShop iPad - iPad Pro - iPad Air - Compare iPad models - ..." + }, + { + "title": "Watch", + "link": "https://www.apple.com/watch/", + "description": "WatchShop Apple Watch - Series 9 - SE - Ultra 2 - Nike - HermĂšs - ..." + } + ], + "realPosition": 1 + }, + { + "title": "Apple", + "link": "https://www.apple.com/", + "description": "Discover the innovative world of Apple and shop everything iPhone, iPad, Apple Watch, Mac, and Apple TV, plus explore accessories, entertainment, ...", + "additional_links": [ + { + "text": "AppleApplehttps://www.apple.com", + "href": "https://www.apple.com/" + } + ], + "cite": {}, + "realPosition": 2 + }, + { + "title": "Apple Inc.", + "link": "https://en.wikipedia.org/wiki/Apple_Inc.", + "description": "Apple Inc. (formerly Apple Computer, Inc.) is an American multinational corporation and technology company headquartered in Cupertino, California, ...", + "additional_links": [ + { + "text": "Apple Inc.Wikipediahttps://en.wikipedia.org âș wiki âș Apple_Inc", + "href": "https://en.wikipedia.org/wiki/Apple_Inc." + }, + { + "text": "", + "href": "https://en.wikipedia.org/wiki/Apple_Inc." + }, + { + "text": "History", + "href": "https://en.wikipedia.org/wiki/History_of_Apple_Inc." + }, + { + "text": "List of Apple products", + "href": "https://en.wikipedia.org/wiki/List_of_Apple_products" + }, + { + "text": "Litigation involving Apple Inc.", + "href": "https://en.wikipedia.org/wiki/Litigation_involving_Apple_Inc." + }, + { + "text": "Apple Park", + "href": "https://en.wikipedia.org/wiki/Apple_Park" + } + ], + "cite": { + "domain": "https://en.wikipedia.org âș wiki âș Apple_Inc", + "span": " âș wiki âș Apple_Inc" + }, + "realPosition": 3 + }, + { + "title": "Apple Inc. (AAPL) Company Profile & Facts", + "link": "https://finance.yahoo.com/quote/AAPL/profile/", + "description": "Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line ...", + "additional_links": [ + { + "text": "Apple Inc. (AAPL) Company Profile & FactsYahoo Financehttps://finance.yahoo.com âș quote âș AAPL âș profile", + "href": "https://finance.yahoo.com/quote/AAPL/profile/" + } + ], + "cite": { + "domain": "https://finance.yahoo.com âș quote âș AAPL âș profile", + "span": " âș quote âș AAPL âș profile" + }, + "realPosition": 4 + }, + { + "title": "Apple Inc - Company Profile and News", + "link": "https://www.bloomberg.com/profile/company/AAPL:US", + "description": "Apple Inc. Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables and accessories, and sells a variety of related ...", + "additional_links": [ + { + "text": "Apple Inc - Company Profile and NewsBloomberghttps://www.bloomberg.com âș company âș AAPL:US", + "href": "https://www.bloomberg.com/profile/company/AAPL:US" + }, + { + "text": "", + "href": "https://www.bloomberg.com/profile/company/AAPL:US" + } + ], + "cite": { + "domain": "https://www.bloomberg.com âș company âș AAPL:US", + "span": " âș company âș AAPL:US" + }, + "realPosition": 5 + }, + { + "title": "Apple Inc. | History, Products, Headquarters, & Facts", + "link": "https://www.britannica.com/money/Apple-Inc", + "description": "May 22, 2024 â Apple Inc. is an American multinational technology company that revolutionized the technology sector through its innovation of computer ...", + "additional_links": [ + { + "text": "Apple Inc. | History, Products, Headquarters, & FactsBritannicahttps://www.britannica.com âș money âș Apple-Inc", + "href": "https://www.britannica.com/money/Apple-Inc" + }, + { + "text": "", + "href": "https://www.britannica.com/money/Apple-Inc" + } + ], + "cite": { + "domain": "https://www.britannica.com âș money âș Apple-Inc", + "span": " âș money âș Apple-Inc" + }, + "realPosition": 6 + } + ], + "shopping_ads": [], + "places": [ + { + "title": "Apple Inc." + }, + { + "title": "Apple Inc" + }, + { + "title": "Apple Inc" + } + ], + "related_searches": { + "images": [], + "text": [ + { + "title": "apple inc full form", + "link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+Inc+full+form&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhPEAE" + }, + { + "title": "apple company history", + "link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+company+history&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhOEAE" + }, + { + "title": "apple store", + "link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+Store&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhQEAE" + }, + { + "title": "apple id", + "link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+id&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhSEAE" + }, + { + "title": "apple inc industry", + "link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+Inc+industry&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhREAE" + }, + { + "title": "apple login", + "link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+login&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhTEAE" + } + ] + }, + "image_results": [], + "carousel": [], + "total": 2450000000, + "knowledge_graph": "", + "related_questions": [ + "What does the Apple Inc do?", + "Why did Apple change to Apple Inc?", + "Who owns Apple Inc.?", + "What is Apple Inc best known for?" + ], + "carousel_count": 0, + "ts": 2.491065263748169, + "device_type": null +} \ No newline at end of file diff --git a/src/lib/components/documents/Settings/WebParams.svelte b/src/lib/components/documents/Settings/WebParams.svelte index 89d4442b0..7b3a2e434 100644 --- a/src/lib/components/documents/Settings/WebParams.svelte +++ b/src/lib/components/documents/Settings/WebParams.svelte @@ -11,7 +11,7 @@ export let saveHandler: Function; let webConfig = null; - let webSearchEngines = ['searxng', 'google_pse', 'brave', 'serpstack', 'serper']; + let webSearchEngines = ['searxng', 'google_pse', 'brave', 'serpstack', 'serper', 'serply']; let youtubeLanguage = 'en'; let youtubeTranslation = null; @@ -188,6 +188,24 @@ + {:else if webConfig.search.engine === 'serply'} +