From f7d8a6ccbae74ff3bafba0fcee14477d85590592 Mon Sep 17 00:00:00 2001 From: Jeetesh Chellani Date: Mon, 28 Oct 2024 11:33:52 +0200 Subject: [PATCH] feat: enable bing support --- README.md | 2 +- backend/open_webui/apps/retrieval/main.py | 14 ++++- backend/open_webui/apps/retrieval/web/bing.py | 52 +++++++++++++++++ .../apps/retrieval/web/testdata/bing.json | 58 +++++++++++++++++++ backend/open_webui/env.py | 8 +++ .../admin/Settings/WebSearch.svelte | 3 +- 6 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 backend/open_webui/apps/retrieval/web/bing.py create mode 100644 backend/open_webui/apps/retrieval/web/testdata/bing.json diff --git a/README.md b/README.md index c4ca343ca..7e1f8d190 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Open WebUI is an [extensible](https://github.com/open-webui/pipelines), feature- - πŸ“š **Local RAG Integration**: Dive into the future of chat interactions with groundbreaking Retrieval Augmented Generation (RAG) support. This feature seamlessly integrates document interactions into your chat experience. You can load documents directly into the chat or add files to your document library, effortlessly accessing them using the `#` command before a query. -- πŸ” **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, `Serply`, `DuckDuckGo`, `TavilySearch` and `SearchApi` and inject the results directly into your chat experience. +- πŸ” **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, `Serply`, `DuckDuckGo`, `TavilySearch`, `SearchApi` and `Bing` and inject the results directly into your chat experience. - 🌐 **Web Browsing Capability**: Seamlessly integrate websites into your chat experience using the `#` command followed by a URL. This feature allows you to incorporate web content directly into your conversations, enhancing the richness and depth of your interactions. diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index e67d1df23..0f81d6573 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -37,6 +37,7 @@ from open_webui.apps.retrieval.web.serper import search_serper from open_webui.apps.retrieval.web.serply import search_serply from open_webui.apps.retrieval.web.serpstack import search_serpstack from open_webui.apps.retrieval.web.tavily import search_tavily +from open_webui.apps.retrieval.web.bing import search_bing from open_webui.apps.retrieval.utils import ( @@ -96,10 +97,11 @@ from open_webui.config import ( TIKA_SERVER_URL, UPLOAD_DIR, YOUTUBE_LOADER_LANGUAGE, + DEFAULT_LOCALE, AppConfig, ) from open_webui.constants import ERROR_MESSAGES -from open_webui.env import SRC_LOG_LEVELS, DEVICE_TYPE, DOCKER +from open_webui.env import SRC_LOG_LEVELS, DEVICE_TYPE, DOCKER, BING_SEARCH_V7_ENDPOINT, BING_SEARCH_V7_SUBSCRIPTION_KEY from open_webui.utils.misc import ( calculate_sha256, calculate_sha256_string, @@ -174,7 +176,6 @@ app.state.config.SEARCHAPI_ENGINE = SEARCHAPI_ENGINE app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS - def update_embedding_model( embedding_model: str, auto_update: bool = False, @@ -1133,6 +1134,15 @@ def search_web(engine: str, query: str) -> list[SearchResult]: raise Exception("No SEARCHAPI_API_KEY found in environment variables") elif engine == "jina": return search_jina(query, app.state.config.RAG_WEB_SEARCH_RESULT_COUNT) + elif engine == "bing": + return search_bing( + BING_SEARCH_V7_SUBSCRIPTION_KEY, + BING_SEARCH_V7_ENDPOINT, + str(DEFAULT_LOCALE), + query, + app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, + app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + ) else: raise Exception("No search engine API key found in environment variables") diff --git a/backend/open_webui/apps/retrieval/web/bing.py b/backend/open_webui/apps/retrieval/web/bing.py new file mode 100644 index 000000000..f73472d52 --- /dev/null +++ b/backend/open_webui/apps/retrieval/web/bing.py @@ -0,0 +1,52 @@ + +import logging +import os +from pprint import pprint +from typing import Optional +import requests +from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results +from open_webui.env import SRC_LOG_LEVELS +import argparse + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["RAG"]) +''' +Documentation: https://docs.microsoft.com/en-us/bing/search-apis/bing-web-search/overview +''' +def search_bing( + subscription_key: str, endpoint: str, locale: str, query: str, count: int, filter_list: Optional[list[str]] = None +) -> list[SearchResult]: + mkt = locale + params = { 'q': query, 'mkt': mkt, 'answerCount': count } + headers = { 'Ocp-Apim-Subscription-Key': subscription_key } + + try: + response = requests.get(endpoint, headers=headers, params=params) + response.raise_for_status() + json_response = response.json() + results = json_response.get("webPages", {}).get("value", []) + if filter_list: + results = get_filtered_results(results, filter_list) + return [ + SearchResult( + link=result["url"], + title=result.get("name"), + snippet=result.get("snippet"), + ) + for result in results + ] + except Exception as ex: + log.error(f"Error: {ex}") + raise ex + +def main(): + parser = argparse.ArgumentParser(description="Search Bing from the command line.") + parser.add_argument("query", type=str, default="Top 10 international news today", help="The search query.") + parser.add_argument("--count", type=int, default=10, help="Number of search results to return.") + parser.add_argument("--filter", nargs='*', help="List of filters to apply to the search results.") + parser.add_argument("--locale", type=str, default="en-US", help="The locale to use for the search, maps to market in api") + + args = parser.parse_args() + + results = search_bing(args.locale, args.query, args.count, args.filter) + pprint(results) \ No newline at end of file diff --git a/backend/open_webui/apps/retrieval/web/testdata/bing.json b/backend/open_webui/apps/retrieval/web/testdata/bing.json new file mode 100644 index 000000000..80324f3b4 --- /dev/null +++ b/backend/open_webui/apps/retrieval/web/testdata/bing.json @@ -0,0 +1,58 @@ +{ + "_type": "SearchResponse", + "queryContext": { + "originalQuery": "Top 10 international results" + }, + "webPages": { + "webSearchUrl": "https://www.bing.com/search?q=Top+10+international+results", + "totalEstimatedMatches": 687, + "value": [ + { + "id": "https://api.bing.microsoft.com/api/v7/#WebPages.0", + "name": "2024 Mexican Grand Prix - F1 results and latest standings ... - PlanetF1", + "url": "https://www.planetf1.com/news/f1-results-2024-mexican-grand-prix-race-standings", + "datePublished": "2024-10-27T00:00:00.0000000", + "datePublishedFreshnessText": "1 day ago", + "isFamilyFriendly": true, + "displayUrl": "https://www.planetf1.com/news/f1-results-2024-mexican-grand-prix-race-standings", + "snippet": "Nico Hulkenberg and Pierre Gasly completed the top 10. A full report of the Mexican Grand Prix is available at the bottom of this article. F1 results – 2024 Mexican Grand Prix", + "dateLastCrawled": "2024-10-28T07:15:00.0000000Z", + "cachedPageUrl": "https://cc.bingj.com/cache.aspx?q=Top+10+international+results&d=916492551782&mkt=en-US&setlang=en-US&w=zBsfaAPyF2tUrHFHr_vFFdUm8sng4g34", + "language": "en", + "isNavigational": false, + "noCache": false + }, + { + "id": "https://api.bing.microsoft.com/api/v7/#WebPages.1", + "name": "F1 Results Today: HUGE Verstappen penalties cause major title change", + "url": "https://www.gpfans.com/en/f1-news/1033512/f1-results-today-mexican-grand-prix-huge-max-verstappen-penalties-cause-major-title-change/", + "datePublished": "2024-10-27T00:00:00.0000000", + "datePublishedFreshnessText": "1 day ago", + "isFamilyFriendly": true, + "displayUrl": "https://www.gpfans.com/en/f1-news/1033512/f1-results-today-mexican-grand-prix-huge-max...", + "snippet": "Elsewhere, Mercedes duo Lewis Hamilton and George Russell came home in P4 and P5 respectively. Meanwhile, the surprise package of the day were Haas, with both Kevin Magnussen and Nico Hulkenberg finishing inside the points.. READ MORE: RB star issues apology after red flag CRASH at Mexican GP Mexican Grand Prix 2024 results. 1. Carlos Sainz [Ferrari] 2. Lando Norris [McLaren] - +4.705", + "dateLastCrawled": "2024-10-28T06:06:00.0000000Z", + "cachedPageUrl": "https://cc.bingj.com/cache.aspx?q=Top+10+international+results&d=2840656522642&mkt=en-US&setlang=en-US&w=-Tbkwxnq52jZCvG7l3CtgcwT1vwAjIUD", + "language": "en", + "isNavigational": false, + "noCache": false + }, + { + "id": "https://api.bing.microsoft.com/api/v7/#WebPages.2", + "name": "International Power Rankings: England flying, Kangaroos cruising, Fiji rise", + "url": "https://www.loverugbyleague.com/post/international-power-rankings-england-flying-kangaroos-cruising-fiji-rise", + "datePublished": "2024-10-28T00:00:00.0000000", + "datePublishedFreshnessText": "7 hours ago", + "isFamilyFriendly": true, + "displayUrl": "https://www.loverugbyleague.com/post/international-power-rankings-england-flying...", + "snippet": "LRL RECOMMENDS: England player ratings from first Test against Samoa as omnificent George Williams scores perfect 10. 2. Australia (Men) – SAME. The Kangaroos remain 2nd in our Power Rankings after their 22-10 win against New Zealand in Christchurch on Sunday. As was the case in their win against Tonga last week, Mal Meninga’s side weren ...", + "dateLastCrawled": "2024-10-28T07:09:00.0000000Z", + "cachedPageUrl": "https://cc.bingj.com/cache.aspx?q=Top+10+international+results&d=1535008462672&mkt=en-US&setlang=en-US&w=82ujhH4Kp0iuhCS7wh1xLUFYUeetaVVm", + "language": "en", + "isNavigational": false, + "noCache": false + } + ], + "someResultsRemoved": true + } +} diff --git a/backend/open_webui/env.py b/backend/open_webui/env.py index 4b61e1a89..96c3e67f7 100644 --- a/backend/open_webui/env.py +++ b/backend/open_webui/env.py @@ -382,3 +382,11 @@ else: #################################### OFFLINE_MODE = os.environ.get("OFFLINE_MODE", "false").lower() == "true" + +#################################### +# WEB SEARCH +#################################### + +BING_SEARCH_V7_ENDPOINT = os.environ.get("BING_SEARCH_V7_ENDPOINT", "https://api.bing.microsoft.com/v7.0/search") + +BING_SEARCH_V7_SUBSCRIPTION_KEY = os.environ.get("BING_SEARCH_V7_SUBSCRIPTION_KEY", "") \ No newline at end of file diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index 69f1f9104..49e6309b7 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -22,7 +22,8 @@ 'searchapi', 'duckduckgo', 'tavily', - 'jina' + 'jina', + 'bing' ]; let youtubeLanguage = 'en';