Set filter_list as optional param in serply.py

This commit is contained in:
Que Nguyen 2024-06-17 14:37:52 +07:00 committed by GitHub
parent 6b8290fa6d
commit bcb84235b1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,10 +1,10 @@
import json import json
import logging import logging
from typing import List from typing import List, Optional
import requests import requests
from urllib.parse import urlencode from urllib.parse import urlencode
from apps.rag.search.main import SearchResult, filter_by_whitelist from apps.rag.search.main import SearchResult, get_filtered_results
from config import SRC_LOG_LEVELS from config import SRC_LOG_LEVELS
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -15,11 +15,11 @@ def search_serply(
api_key: str, api_key: str,
query: str, query: str,
count: int, count: int,
whitelist:List[str],
hl: str = "us", hl: str = "us",
limit: int = 10, limit: int = 10,
device_type: str = "desktop", device_type: str = "desktop",
proxy_location: str = "US", proxy_location: str = "US",
filter_list: Optional[List[str]] = None,
) -> list[SearchResult]: ) -> list[SearchResult]:
"""Search using serper.dev's API and return the results as a list of SearchResult objects. """Search using serper.dev's API and return the results as a list of SearchResult objects.
@ -58,12 +58,13 @@ def search_serply(
results = sorted( results = sorted(
json_response.get("results", []), key=lambda x: x.get("realPosition", 0) json_response.get("results", []), key=lambda x: x.get("realPosition", 0)
) )
filtered_results = filter_by_whitelist(results, whitelist) if filter_list:
results = get_filtered_results(results, filter_list)
return [ return [
SearchResult( SearchResult(
link=result["link"], link=result["link"],
title=result.get("title"), title=result.get("title"),
snippet=result.get("description"), snippet=result.get("description"),
) )
for result in filtered_results[:count] for result in results[:count]
] ]