From 92dbeb19390023d54faf76880506202279aab262 Mon Sep 17 00:00:00 2001 From: tth37 Date: Thu, 24 Apr 2025 14:57:28 +0800 Subject: [PATCH] feat: Add Firecrawl search engine --- backend/open_webui/retrieval/web/firecrawl.py | 49 +++++++++++++++++++ backend/open_webui/routers/retrieval.py | 9 ++++ .../admin/Settings/WebSearch.svelte | 34 ++++++++++++- 3 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 backend/open_webui/retrieval/web/firecrawl.py diff --git a/backend/open_webui/retrieval/web/firecrawl.py b/backend/open_webui/retrieval/web/firecrawl.py new file mode 100644 index 000000000..a85fc51fb --- /dev/null +++ b/backend/open_webui/retrieval/web/firecrawl.py @@ -0,0 +1,49 @@ +import logging +from typing import Optional, List +from urllib.parse import urljoin + +import requests +from open_webui.retrieval.web.main import SearchResult, get_filtered_results +from open_webui.env import SRC_LOG_LEVELS + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["RAG"]) + + +def search_firecrawl( + firecrawl_url: str, + firecrawl_api_key: str, + query: str, + count: int, + filter_list: Optional[List[str]] = None, +) -> List[SearchResult]: + try: + firecrawl_search_url = urljoin(firecrawl_url, "/v1/search") + response = requests.post( + firecrawl_search_url, + headers={ + "User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot", + "Authorization": f"Bearer {firecrawl_api_key}", + }, + json={ + "query": query, + "limit": count, + }, + ) + response.raise_for_status() + results = response.json().get("data", []) + if filter_list: + results = get_filtered_results(results, filter_list) + results = [ + SearchResult( + link=result.get("url"), + title=result.get("title"), + snippet=result.get("description"), + ) + for result in results[:count] + ] + log.info(f"External search results: {results}") + return results + except Exception as e: + log.error(f"Error in External search: {e}") + return [] diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index a582bd9ba..1351274d1 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -61,6 +61,7 @@ from open_webui.retrieval.web.bing import search_bing from open_webui.retrieval.web.exa import search_exa from open_webui.retrieval.web.perplexity import search_perplexity from open_webui.retrieval.web.sougou import search_sougou +from open_webui.retrieval.web.firecrawl import search_firecrawl from open_webui.retrieval.web.external import search_external from open_webui.retrieval.utils import ( @@ -1499,6 +1500,14 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: raise Exception( "No SOUGOU_API_SID or SOUGOU_API_SK found in environment variables" ) + elif engine == "firecrawl": + return search_firecrawl( + request.app.state.config.FIRECRAWL_API_BASE_URL, + request.app.state.config.FIRECRAWL_API_KEY, + query, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, + ) elif engine == "external": return search_external( request.app.state.config.EXTERNAL_WEB_SEARCH_URL, diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index 96b887405..19cd1d633 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -31,6 +31,7 @@ 'exa', 'perplexity', 'sougou', + 'firecrawl', 'external' ]; let webLoaderEngines = ['playwright', 'firecrawl', 'tavily', 'external']; @@ -432,6 +433,37 @@ /> + {:else if webConfig.WEB_SEARCH_ENGINE === 'firecrawl'} +
+
+
+ {$i18n.t('Firecrawl API Base URL')} +
+ +
+
+ +
+
+
+ +
+
+ {$i18n.t('Firecrawl API Key')} +
+ + +
+
{:else if webConfig.WEB_SEARCH_ENGINE === 'external'}
@@ -620,7 +652,7 @@
- {:else if webConfig.WEB_LOADER_ENGINE === 'firecrawl'} + {:else if webConfig.WEB_LOADER_ENGINE === 'firecrawl' && webConfig.WEB_SEARCH_ENGINE !== 'firecrawl'}