2024-06-10 00:44:34 +00:00
|
|
|
import logging
|
2024-08-14 12:46:31 +00:00
|
|
|
from typing import Optional
|
2024-06-10 00:44:34 +00:00
|
|
|
from urllib.parse import urlencode
|
|
|
|
|
2024-08-27 22:10:27 +00:00
|
|
|
import requests
|
2024-12-12 02:05:42 +00:00
|
|
|
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
|
2024-09-04 14:54:48 +00:00
|
|
|
from open_webui.env import SRC_LOG_LEVELS
|
2024-06-10 00:44:34 +00:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
|
|
|
|
|
|
|
|
|
|
|
def search_serply(
|
2024-06-12 07:18:22 +00:00
|
|
|
api_key: str,
|
|
|
|
query: str,
|
|
|
|
count: int,
|
|
|
|
hl: str = "us",
|
|
|
|
limit: int = 10,
|
|
|
|
device_type: str = "desktop",
|
|
|
|
proxy_location: str = "US",
|
2024-08-14 12:46:31 +00:00
|
|
|
filter_list: Optional[list[str]] = None,
|
2024-06-12 07:18:22 +00:00
|
|
|
) -> list[SearchResult]:
|
2024-06-10 00:44:34 +00:00
|
|
|
"""Search using serper.dev's API and return the results as a list of SearchResult objects.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
api_key (str): A serply.io API key
|
|
|
|
query (str): The query to search for
|
|
|
|
hl (str): Host Language code to display results in (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
|
|
|
|
limit (int): The maximum number of results to return [10-100, defaults to 10]
|
|
|
|
"""
|
|
|
|
log.info("Searching with Serply")
|
|
|
|
|
|
|
|
url = "https://api.serply.io/v1/search/"
|
|
|
|
|
|
|
|
query_payload = {
|
|
|
|
"q": query,
|
|
|
|
"language": "en",
|
|
|
|
"num": limit,
|
|
|
|
"gl": proxy_location.upper(),
|
2024-06-12 07:18:22 +00:00
|
|
|
"hl": hl.lower(),
|
2024-06-10 00:44:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
url = f"{url}{urlencode(query_payload)}"
|
|
|
|
headers = {
|
|
|
|
"X-API-KEY": api_key,
|
|
|
|
"X-User-Agent": device_type,
|
|
|
|
"User-Agent": "open-webui",
|
2024-06-12 07:18:22 +00:00
|
|
|
"X-Proxy-Location": proxy_location,
|
2024-06-10 00:44:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
response = requests.request("GET", url, headers=headers)
|
|
|
|
response.raise_for_status()
|
|
|
|
|
|
|
|
json_response = response.json()
|
|
|
|
log.info(f"results from serply search: {json_response}")
|
|
|
|
|
|
|
|
results = sorted(
|
|
|
|
json_response.get("results", []), key=lambda x: x.get("realPosition", 0)
|
|
|
|
)
|
2024-06-17 07:37:52 +00:00
|
|
|
if filter_list:
|
|
|
|
results = get_filtered_results(results, filter_list)
|
2024-06-10 00:44:34 +00:00
|
|
|
return [
|
|
|
|
SearchResult(
|
|
|
|
link=result["link"],
|
|
|
|
title=result.get("title"),
|
|
|
|
snippet=result.get("description"),
|
|
|
|
)
|
2024-06-17 07:37:52 +00:00
|
|
|
for result in results[:count]
|
2024-06-10 00:44:34 +00:00
|
|
|
]
|