open-webui/backend/open_webui/apps/retrieval/web/duckduckgo.py

51 lines
1.6 KiB
Python
Raw Normal View History

import logging
2024-08-14 12:46:31 +00:00
from typing import Optional
2024-08-27 22:10:27 +00:00
2024-09-28 00:23:09 +00:00
from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results
from duckduckgo_search import DDGS
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
2024-06-17 21:32:23 +00:00
def search_duckduckgo(
2024-08-14 12:46:31 +00:00
query: str, count: int, filter_list: Optional[list[str]] = None
2024-06-17 21:32:23 +00:00
) -> list[SearchResult]:
"""
Search using DuckDuckGo's Search API and return the results as a list of SearchResult objects.
Args:
query (str): The query to search for
count (int): The number of results to return
Returns:
2024-08-14 12:46:31 +00:00
list[SearchResult]: A list of search results
"""
# Use the DDGS context manager to create a DDGS object
with DDGS() as ddgs:
# Use the ddgs.text() method to perform the search
ddgs_gen = ddgs.text(
query, safesearch="moderate", max_results=count, backend="api"
)
# Check if there are search results
if ddgs_gen:
# Convert the search results into a list
search_results = [r for r in ddgs_gen]
# Create an empty list to store the SearchResult objects
results = []
# Iterate over each search result
for result in search_results:
# Create a SearchResult object and append it to the results list
results.append(
SearchResult(
link=result["href"],
title=result.get("title"),
snippet=result.get("body"),
)
)
if filter_list:
results = get_filtered_results(results, filter_list)
# Return the list of search results
return results