From 7c9fb9199ebe254b147f25cbc55693875fbda88d Mon Sep 17 00:00:00 2001 From: Yash-1511 Date: Sat, 22 Jun 2024 20:06:15 +0530 Subject: [PATCH] feat: add jina_search as new websearch provider --- backend/apps/rag/main.py | 3 ++ backend/apps/rag/search/jina_search.py | 41 +++++++++++++++++++ .../admin/Settings/WebSearch.svelte | 3 +- 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 backend/apps/rag/search/jina_search.py diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 4bd5da86c..bfc60ae4a 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -74,6 +74,7 @@ from apps.rag.search.serpstack import search_serpstack from apps.rag.search.serply import search_serply from apps.rag.search.duckduckgo import search_duckduckgo from apps.rag.search.tavily import search_tavily +from apps.rag.search.jina_search import search_jina from utils.misc import ( calculate_sha256, @@ -841,6 +842,8 @@ def search_web(engine: str, query: str) -> list[SearchResult]: ) else: raise Exception("No TAVILY_API_KEY found in environment variables") + elif engine == "jina": + return search_jina(query, app.state.config.RAG_WEB_SEARCH_RESULT_COUNT) else: raise Exception("No search engine API key found in environment variables") diff --git a/backend/apps/rag/search/jina_search.py b/backend/apps/rag/search/jina_search.py new file mode 100644 index 000000000..b37be8e16 --- /dev/null +++ b/backend/apps/rag/search/jina_search.py @@ -0,0 +1,41 @@ +import logging +import requests +from yarl import URL + +from apps.rag.search.main import SearchResult +from config import SRC_LOG_LEVELS + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["RAG"]) + + +def search_jina(query: str, count: int) -> list[SearchResult]: + """ + Search using Jina's Search API and return the results as a list of SearchResult objects. + Args: + query (str): The query to search for + count (int): The number of results to return + + Returns: + List[SearchResult]: A list of search results + """ + jina_search_endpoint = "https://s.jina.ai/" + headers = { + "Accept": "application/json", + } + url = str(URL(jina_search_endpoint + query)) + response = requests.get(url, headers=headers) + response.raise_for_status() + data = response.json() + + results = [] + for result in data["data"][:count]: + results.append( + SearchResult( + link=result["url"], + title=result.get("title"), + snippet=result.get("content"), + ) + ) + + return results \ No newline at end of file diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index b9f43a9ab..4c4202388 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -19,7 +19,8 @@ 'serper', 'serply', 'duckduckgo', - 'tavily' + 'tavily', + 'jina' ]; let youtubeLanguage = 'en';