diff --git a/backend/open_webui/retrieval/web/firecrawl.py b/backend/open_webui/retrieval/web/firecrawl.py new file mode 100644 index 000000000..a85fc51fb --- /dev/null +++ b/backend/open_webui/retrieval/web/firecrawl.py @@ -0,0 +1,49 @@ +import logging +from typing import Optional, List +from urllib.parse import urljoin + +import requests +from open_webui.retrieval.web.main import SearchResult, get_filtered_results +from open_webui.env import SRC_LOG_LEVELS + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["RAG"]) + + +def search_firecrawl( + firecrawl_url: str, + firecrawl_api_key: str, + query: str, + count: int, + filter_list: Optional[List[str]] = None, +) -> List[SearchResult]: + try: + firecrawl_search_url = urljoin(firecrawl_url, "/v1/search") + response = requests.post( + firecrawl_search_url, + headers={ + "User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot", + "Authorization": f"Bearer {firecrawl_api_key}", + }, + json={ + "query": query, + "limit": count, + }, + ) + response.raise_for_status() + results = response.json().get("data", []) + if filter_list: + results = get_filtered_results(results, filter_list) + results = [ + SearchResult( + link=result.get("url"), + title=result.get("title"), + snippet=result.get("description"), + ) + for result in results[:count] + ] + log.info(f"External search results: {results}") + return results + except Exception as e: + log.error(f"Error in External search: {e}") + return [] diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index affaf50ad..36897cdea 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -62,6 +62,7 @@ from open_webui.retrieval.web.bing import search_bing from open_webui.retrieval.web.exa import search_exa from open_webui.retrieval.web.perplexity import search_perplexity from open_webui.retrieval.web.sougou import search_sougou +from open_webui.retrieval.web.firecrawl import search_firecrawl from open_webui.retrieval.web.external import search_external from open_webui.retrieval.utils import ( @@ -1525,6 +1526,14 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: raise Exception( "No SOUGOU_API_SID or SOUGOU_API_SK found in environment variables" ) + elif engine == "firecrawl": + return search_firecrawl( + request.app.state.config.FIRECRAWL_API_BASE_URL, + request.app.state.config.FIRECRAWL_API_KEY, + query, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, + ) elif engine == "external": return search_external( request.app.state.config.EXTERNAL_WEB_SEARCH_URL, diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index 2f43601fa..e25852cc3 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -32,6 +32,7 @@ 'exa', 'perplexity', 'sougou', + 'firecrawl', 'external' ]; let webLoaderEngines = ['playwright', 'firecrawl', 'tavily', 'external']; @@ -480,6 +481,37 @@ /> + {:else if webConfig.WEB_SEARCH_ENGINE === 'firecrawl'} +