2024-06-11 14:19:08 +00:00
|
|
|
import logging
|
2024-08-14 12:46:31 +00:00
|
|
|
from typing import Optional
|
2024-08-27 22:10:27 +00:00
|
|
|
|
2024-12-12 02:05:42 +00:00
|
|
|
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
|
2024-06-11 14:19:08 +00:00
|
|
|
from duckduckgo_search import DDGS
|
2024-09-04 14:54:48 +00:00
|
|
|
from open_webui.env import SRC_LOG_LEVELS
|
2024-06-11 14:19:08 +00:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
|
|
|
|
|
|
|
|
2024-06-17 21:32:23 +00:00
|
|
|
def search_duckduckgo(
|
2024-08-14 12:46:31 +00:00
|
|
|
query: str, count: int, filter_list: Optional[list[str]] = None
|
2024-06-17 21:32:23 +00:00
|
|
|
) -> list[SearchResult]:
|
2024-06-11 14:19:08 +00:00
|
|
|
"""
|
|
|
|
Search using DuckDuckGo's Search API and return the results as a list of SearchResult objects.
|
|
|
|
Args:
|
|
|
|
query (str): The query to search for
|
|
|
|
count (int): The number of results to return
|
|
|
|
|
|
|
|
Returns:
|
2024-08-14 12:46:31 +00:00
|
|
|
list[SearchResult]: A list of search results
|
2024-06-11 14:19:08 +00:00
|
|
|
"""
|
|
|
|
# Use the DDGS context manager to create a DDGS object
|
|
|
|
with DDGS() as ddgs:
|
|
|
|
# Use the ddgs.text() method to perform the search
|
|
|
|
ddgs_gen = ddgs.text(
|
|
|
|
query, safesearch="moderate", max_results=count, backend="api"
|
|
|
|
)
|
|
|
|
# Check if there are search results
|
|
|
|
if ddgs_gen:
|
|
|
|
# Convert the search results into a list
|
|
|
|
search_results = [r for r in ddgs_gen]
|
|
|
|
|
|
|
|
# Create an empty list to store the SearchResult objects
|
|
|
|
results = []
|
|
|
|
# Iterate over each search result
|
|
|
|
for result in search_results:
|
|
|
|
# Create a SearchResult object and append it to the results list
|
|
|
|
results.append(
|
|
|
|
SearchResult(
|
|
|
|
link=result["href"],
|
|
|
|
title=result.get("title"),
|
|
|
|
snippet=result.get("body"),
|
|
|
|
)
|
|
|
|
)
|
2024-06-17 07:34:59 +00:00
|
|
|
if filter_list:
|
|
|
|
results = get_filtered_results(results, filter_list)
|
2024-06-11 14:19:08 +00:00
|
|
|
# Return the list of search results
|
2024-06-17 07:34:59 +00:00
|
|
|
return results
|