Merge pull request #13191 from tth37/feat_firecrawl_search_engine

feat: Add Firecrawl search engine
This commit is contained in:
Tim Jaeryang Baek 2025-04-29 08:38:28 -07:00 committed by GitHub
commit e87f2669fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 91 additions and 1 deletions

View File

@ -0,0 +1,49 @@
import logging
from typing import Optional, List
from urllib.parse import urljoin
import requests
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
def search_firecrawl(
firecrawl_url: str,
firecrawl_api_key: str,
query: str,
count: int,
filter_list: Optional[List[str]] = None,
) -> List[SearchResult]:
try:
firecrawl_search_url = urljoin(firecrawl_url, "/v1/search")
response = requests.post(
firecrawl_search_url,
headers={
"User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot",
"Authorization": f"Bearer {firecrawl_api_key}",
},
json={
"query": query,
"limit": count,
},
)
response.raise_for_status()
results = response.json().get("data", [])
if filter_list:
results = get_filtered_results(results, filter_list)
results = [
SearchResult(
link=result.get("url"),
title=result.get("title"),
snippet=result.get("description"),
)
for result in results[:count]
]
log.info(f"External search results: {results}")
return results
except Exception as e:
log.error(f"Error in External search: {e}")
return []

View File

@ -62,6 +62,7 @@ from open_webui.retrieval.web.bing import search_bing
from open_webui.retrieval.web.exa import search_exa
from open_webui.retrieval.web.perplexity import search_perplexity
from open_webui.retrieval.web.sougou import search_sougou
from open_webui.retrieval.web.firecrawl import search_firecrawl
from open_webui.retrieval.web.external import search_external
from open_webui.retrieval.utils import (
@ -1525,6 +1526,14 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]:
raise Exception(
"No SOUGOU_API_SID or SOUGOU_API_SK found in environment variables"
)
elif engine == "firecrawl":
return search_firecrawl(
request.app.state.config.FIRECRAWL_API_BASE_URL,
request.app.state.config.FIRECRAWL_API_KEY,
query,
request.app.state.config.WEB_SEARCH_RESULT_COUNT,
request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
)
elif engine == "external":
return search_external(
request.app.state.config.EXTERNAL_WEB_SEARCH_URL,

View File

@ -32,6 +32,7 @@
'exa',
'perplexity',
'sougou',
'firecrawl',
'external'
];
let webLoaderEngines = ['playwright', 'firecrawl', 'tavily', 'external'];
@ -480,6 +481,37 @@
/>
</div>
</div>
{:else if webConfig.WEB_SEARCH_ENGINE === 'firecrawl'}
<div class="mb-2.5 flex w-full flex-col">
<div>
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('Firecrawl API Base URL')}
</div>
<div class="flex w-full">
<div class="flex-1">
<input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
type="text"
placeholder={$i18n.t('Enter Firecrawl API Base URL')}
bind:value={webConfig.FIRECRAWL_API_BASE_URL}
autocomplete="off"
/>
</div>
</div>
</div>
<div class="mt-2">
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('Firecrawl API Key')}
</div>
<SensitiveInput
placeholder={$i18n.t('Enter Firecrawl API Key')}
bind:value={webConfig.FIRECRAWL_API_KEY}
/>
</div>
</div>
{:else if webConfig.WEB_SEARCH_ENGINE === 'external'}
<div class="mb-2.5 flex w-full flex-col">
<div>
@ -668,7 +700,7 @@
</div>
</div>
</div>
{:else if webConfig.WEB_LOADER_ENGINE === 'firecrawl'}
{:else if webConfig.WEB_LOADER_ENGINE === 'firecrawl' && webConfig.WEB_SEARCH_ENGINE !== 'firecrawl'}
<div class="mb-2.5 flex w-full flex-col">
<div>
<div class=" self-center text-xs font-medium mb-1">