Merge pull request #13191 from tth37/feat_firecrawl_search_engine

feat: Add Firecrawl search engine
This commit is contained in:
Tim Jaeryang Baek
2025-04-29 08:38:28 -07:00
committed by GitHub
3 changed files with 91 additions and 1 deletions

View File

@@ -0,0 +1,49 @@
import logging
from typing import Optional, List
from urllib.parse import urljoin
import requests
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
def search_firecrawl(
firecrawl_url: str,
firecrawl_api_key: str,
query: str,
count: int,
filter_list: Optional[List[str]] = None,
) -> List[SearchResult]:
try:
firecrawl_search_url = urljoin(firecrawl_url, "/v1/search")
response = requests.post(
firecrawl_search_url,
headers={
"User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot",
"Authorization": f"Bearer {firecrawl_api_key}",
},
json={
"query": query,
"limit": count,
},
)
response.raise_for_status()
results = response.json().get("data", [])
if filter_list:
results = get_filtered_results(results, filter_list)
results = [
SearchResult(
link=result.get("url"),
title=result.get("title"),
snippet=result.get("description"),
)
for result in results[:count]
]
log.info(f"External search results: {results}")
return results
except Exception as e:
log.error(f"Error in External search: {e}")
return []

View File

@@ -62,6 +62,7 @@ from open_webui.retrieval.web.bing import search_bing
from open_webui.retrieval.web.exa import search_exa
from open_webui.retrieval.web.perplexity import search_perplexity
from open_webui.retrieval.web.sougou import search_sougou
from open_webui.retrieval.web.firecrawl import search_firecrawl
from open_webui.retrieval.web.external import search_external
from open_webui.retrieval.utils import (
@@ -1525,6 +1526,14 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]:
raise Exception(
"No SOUGOU_API_SID or SOUGOU_API_SK found in environment variables"
)
elif engine == "firecrawl":
return search_firecrawl(
request.app.state.config.FIRECRAWL_API_BASE_URL,
request.app.state.config.FIRECRAWL_API_KEY,
query,
request.app.state.config.WEB_SEARCH_RESULT_COUNT,
request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
)
elif engine == "external":
return search_external(
request.app.state.config.EXTERNAL_WEB_SEARCH_URL,