Merge pull request #3380 from Yash-1511/main

feat: add jina_search as new websearch provider
This commit is contained in:
Timothy Jaeryang Baek 2024-06-22 15:19:38 -07:00 committed by GitHub
commit fd96c9c68d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 46 additions and 1 deletions

View File

@ -77,6 +77,7 @@ from apps.rag.search.serpstack import search_serpstack
from apps.rag.search.serply import search_serply from apps.rag.search.serply import search_serply
from apps.rag.search.duckduckgo import search_duckduckgo from apps.rag.search.duckduckgo import search_duckduckgo
from apps.rag.search.tavily import search_tavily from apps.rag.search.tavily import search_tavily
from apps.rag.search.jina_search import search_jina
from utils.misc import ( from utils.misc import (
calculate_sha256, calculate_sha256,
@ -856,6 +857,8 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
) )
else: else:
raise Exception("No TAVILY_API_KEY found in environment variables") raise Exception("No TAVILY_API_KEY found in environment variables")
elif engine == "jina":
return search_jina(query, app.state.config.RAG_WEB_SEARCH_RESULT_COUNT)
else: else:
raise Exception("No search engine API key found in environment variables") raise Exception("No search engine API key found in environment variables")

View File

@ -0,0 +1,41 @@
import logging
import requests
from yarl import URL
from apps.rag.search.main import SearchResult
from config import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
def search_jina(query: str, count: int) -> list[SearchResult]:
"""
Search using Jina's Search API and return the results as a list of SearchResult objects.
Args:
query (str): The query to search for
count (int): The number of results to return
Returns:
List[SearchResult]: A list of search results
"""
jina_search_endpoint = "https://s.jina.ai/"
headers = {
"Accept": "application/json",
}
url = str(URL(jina_search_endpoint + query))
response = requests.get(url, headers=headers)
response.raise_for_status()
data = response.json()
results = []
for result in data["data"][:count]:
results.append(
SearchResult(
link=result["url"],
title=result.get("title"),
snippet=result.get("content"),
)
)
return results

View File

@ -19,7 +19,8 @@
'serper', 'serper',
'serply', 'serply',
'duckduckgo', 'duckduckgo',
'tavily' 'tavily',
'jina'
]; ];
let youtubeLanguage = 'en'; let youtubeLanguage = 'en';