refac: web search settings

This commit is contained in:
Timothy J. Baek 2024-06-01 19:40:48 -07:00
parent ea6b8984ab
commit 912a704fdc
5 changed files with 300 additions and 86 deletions

View File

@ -97,9 +97,11 @@ from config import (
ENABLE_RAG_LOCAL_WEB_FETCH, ENABLE_RAG_LOCAL_WEB_FETCH,
YOUTUBE_LOADER_LANGUAGE, YOUTUBE_LOADER_LANGUAGE,
ENABLE_RAG_WEB_SEARCH, ENABLE_RAG_WEB_SEARCH,
RAG_WEB_SEARCH_ENGINE,
SEARXNG_QUERY_URL, SEARXNG_QUERY_URL,
GOOGLE_PSE_API_KEY, GOOGLE_PSE_API_KEY,
GOOGLE_PSE_ENGINE_ID, GOOGLE_PSE_ENGINE_ID,
BRAVE_SEARCH_API_KEY,
SERPSTACK_API_KEY, SERPSTACK_API_KEY,
SERPSTACK_HTTPS, SERPSTACK_HTTPS,
SERPER_API_KEY, SERPER_API_KEY,
@ -145,9 +147,12 @@ app.state.YOUTUBE_LOADER_TRANSLATION = None
app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH
app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
app.state.config.SEARXNG_QUERY_URL = SEARXNG_QUERY_URL app.state.config.SEARXNG_QUERY_URL = SEARXNG_QUERY_URL
app.state.config.GOOGLE_PSE_API_KEY = GOOGLE_PSE_API_KEY app.state.config.GOOGLE_PSE_API_KEY = GOOGLE_PSE_API_KEY
app.state.config.GOOGLE_PSE_ENGINE_ID = GOOGLE_PSE_ENGINE_ID app.state.config.GOOGLE_PSE_ENGINE_ID = GOOGLE_PSE_ENGINE_ID
app.state.config.BRAVE_SEARCH_API_KEY = BRAVE_SEARCH_API_KEY
app.state.config.SERPSTACK_API_KEY = SERPSTACK_API_KEY app.state.config.SERPSTACK_API_KEY = SERPSTACK_API_KEY
app.state.config.SERPSTACK_HTTPS = SERPSTACK_HTTPS app.state.config.SERPSTACK_HTTPS = SERPSTACK_HTTPS
app.state.config.SERPER_API_KEY = SERPER_API_KEY app.state.config.SERPER_API_KEY = SERPER_API_KEY
@ -351,23 +356,25 @@ async def get_rag_config(user=Depends(get_admin_user)):
"chunk_size": app.state.config.CHUNK_SIZE, "chunk_size": app.state.config.CHUNK_SIZE,
"chunk_overlap": app.state.config.CHUNK_OVERLAP, "chunk_overlap": app.state.config.CHUNK_OVERLAP,
}, },
"web_loader_ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
"youtube": { "youtube": {
"language": app.state.config.YOUTUBE_LOADER_LANGUAGE, "language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
"translation": app.state.YOUTUBE_LOADER_TRANSLATION, "translation": app.state.YOUTUBE_LOADER_TRANSLATION,
}, },
"web": { "web": {
"ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
"search": { "search": {
"enable": app.state.config.ENABLE_RAG_WEB_SEARCH, "enable": app.state.config.ENABLE_RAG_WEB_SEARCH,
"engine": app.state.config.RAG_WEB_SEARCH_ENGINE,
"searxng_query_url": app.state.config.SEARXNG_QUERY_URL, "searxng_query_url": app.state.config.SEARXNG_QUERY_URL,
"google_pse_api_key": app.state.config.GOOGLE_PSE_API_KEY, "google_pse_api_key": app.state.config.GOOGLE_PSE_API_KEY,
"google_pse_engine_id": app.state.config.GOOGLE_PSE_ENGINE_ID, "google_pse_engine_id": app.state.config.GOOGLE_PSE_ENGINE_ID,
"brave_search_api_key": app.state.config.BRAVE_SEARCH_API_KEY,
"serpstack_api_key": app.state.config.SERPSTACK_API_KEY, "serpstack_api_key": app.state.config.SERPSTACK_API_KEY,
"serpstack_https": app.state.config.SERPSTACK_HTTPS, "serpstack_https": app.state.config.SERPSTACK_HTTPS,
"serper_api_key": app.state.config.SERPER_API_KEY, "serper_api_key": app.state.config.SERPER_API_KEY,
"result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
"concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
} },
}, },
} }
@ -384,9 +391,11 @@ class YoutubeLoaderConfig(BaseModel):
class WebSearchConfig(BaseModel): class WebSearchConfig(BaseModel):
enable: bool enable: bool
engine: Optional[str] = None
searxng_query_url: Optional[str] = None searxng_query_url: Optional[str] = None
google_pse_api_key: Optional[str] = None google_pse_api_key: Optional[str] = None
google_pse_engine_id: Optional[str] = None google_pse_engine_id: Optional[str] = None
brave_search_api_key: Optional[str] = None
serpstack_api_key: Optional[str] = None serpstack_api_key: Optional[str] = None
serpstack_https: Optional[bool] = None serpstack_https: Optional[bool] = None
serper_api_key: Optional[str] = None serper_api_key: Optional[str] = None
@ -394,11 +403,16 @@ class WebSearchConfig(BaseModel):
concurrent_requests: Optional[int] = None concurrent_requests: Optional[int] = None
class WebConfig(BaseModel):
search: WebSearchConfig
web_loader_ssl_verification: Optional[bool] = None
class ConfigUpdateForm(BaseModel): class ConfigUpdateForm(BaseModel):
pdf_extract_images: Optional[bool] = None pdf_extract_images: Optional[bool] = None
chunk: Optional[ChunkParamUpdateForm] = None chunk: Optional[ChunkParamUpdateForm] = None
web_loader_ssl_verification: Optional[bool] = None
youtube: Optional[YoutubeLoaderConfig] = None youtube: Optional[YoutubeLoaderConfig] = None
web: Optional[WebConfig] = None
@app.post("/config/update") @app.post("/config/update")
@ -409,34 +423,35 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
else app.state.config.PDF_EXTRACT_IMAGES else app.state.config.PDF_EXTRACT_IMAGES
) )
app.state.config.CHUNK_SIZE = ( if form_data.chunk is not None:
form_data.chunk.chunk_size app.state.config.CHUNK_SIZE = form_data.chunk.chunk_size
if form_data.chunk is not None app.state.config.CHUNK_OVERLAP = form_data.chunk.chunk_overlap
else app.state.config.CHUNK_SIZE
)
app.state.config.CHUNK_OVERLAP = ( if form_data.youtube is not None:
form_data.chunk.chunk_overlap app.state.config.YOUTUBE_LOADER_LANGUAGE = form_data.youtube.language
if form_data.chunk is not None app.state.YOUTUBE_LOADER_TRANSLATION = form_data.youtube.translation
else app.state.config.CHUNK_OVERLAP
)
if form_data.web is not None:
app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = ( app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
form_data.web_loader_ssl_verification form_data.web.web_loader_ssl_verification
if form_data.web_loader_ssl_verification != None
else app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
) )
app.state.config.YOUTUBE_LOADER_LANGUAGE = ( app.state.config.ENABLE_RAG_WEB_SEARCH = form_data.web.search.enable
form_data.youtube.language app.state.config.RAG_WEB_SEARCH_ENGINE = form_data.web.search.engine
if form_data.youtube is not None app.state.config.SEARXNG_QUERY_URL = form_data.web.search.searxng_query_url
else app.state.config.YOUTUBE_LOADER_LANGUAGE app.state.config.GOOGLE_PSE_API_KEY = form_data.web.search.google_pse_api_key
app.state.config.GOOGLE_PSE_ENGINE_ID = (
form_data.web.search.google_pse_engine_id
) )
app.state.config.BRAVE_SEARCH_API_KEY = (
app.state.YOUTUBE_LOADER_TRANSLATION = ( form_data.web.search.brave_search_api_key
form_data.youtube.translation )
if form_data.youtube is not None app.state.config.SERPSTACK_API_KEY = form_data.web.search.serpstack_api_key
else app.state.YOUTUBE_LOADER_TRANSLATION app.state.config.SERPSTACK_HTTPS = form_data.web.search.serpstack_https
app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = form_data.web.search.result_count
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = (
form_data.web.search.concurrent_requests
) )
return { return {
@ -446,11 +461,26 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
"chunk_size": app.state.config.CHUNK_SIZE, "chunk_size": app.state.config.CHUNK_SIZE,
"chunk_overlap": app.state.config.CHUNK_OVERLAP, "chunk_overlap": app.state.config.CHUNK_OVERLAP,
}, },
"web_loader_ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
"youtube": { "youtube": {
"language": app.state.config.YOUTUBE_LOADER_LANGUAGE, "language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
"translation": app.state.YOUTUBE_LOADER_TRANSLATION, "translation": app.state.YOUTUBE_LOADER_TRANSLATION,
}, },
"web": {
"ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
"search": {
"enable": app.state.config.ENABLE_RAG_WEB_SEARCH,
"engine": app.state.config.RAG_WEB_SEARCH_ENGINE,
"searxng_query_url": app.state.config.SEARXNG_QUERY_URL,
"google_pse_api_key": app.state.config.GOOGLE_PSE_API_KEY,
"google_pse_engine_id": app.state.config.GOOGLE_PSE_ENGINE_ID,
"brave_search_api_key": app.state.config.BRAVE_SEARCH_API_KEY,
"serpstack_api_key": app.state.config.SERPSTACK_API_KEY,
"serpstack_https": app.state.config.SERPSTACK_HTTPS,
"serper_api_key": app.state.config.SERPER_API_KEY,
"result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
"concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
},
},
} }
@ -690,7 +720,9 @@ def resolve_hostname(hostname):
def store_web_search(form_data: SearchForm, user=Depends(get_current_user)): def store_web_search(form_data: SearchForm, user=Depends(get_current_user)):
try: try:
try: try:
web_results = search_web(form_data.query) web_results = search_web(
app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query
)
except Exception as e: except Exception as e:
log.exception(e) log.exception(e)
raise HTTPException( raise HTTPException(

View File

@ -538,7 +538,7 @@ class RerankCompressor(BaseDocumentCompressor):
return final_results return final_results
def search_web(query: str) -> list[SearchResult]: def search_web(engine: str, query: str) -> list[SearchResult]:
"""Search the web using a search engine and return the results as a list of SearchResult objects. """Search the web using a search engine and return the results as a list of SearchResult objects.
Will look for a search engine API key in environment variables in the following order: Will look for a search engine API key in environment variables in the following order:
- SEARXNG_QUERY_URL - SEARXNG_QUERY_URL
@ -552,15 +552,34 @@ def search_web(query: str) -> list[SearchResult]:
""" """
# TODO: add playwright to search the web # TODO: add playwright to search the web
if engine == "searxng":
if SEARXNG_QUERY_URL: if SEARXNG_QUERY_URL:
return search_searxng(SEARXNG_QUERY_URL, query) return search_searxng(SEARXNG_QUERY_URL, query)
elif GOOGLE_PSE_API_KEY and GOOGLE_PSE_ENGINE_ID: else:
raise Exception("No SEARXNG_QUERY_URL found in environment variables")
elif engine == "google_pse":
if GOOGLE_PSE_API_KEY and GOOGLE_PSE_ENGINE_ID:
return search_google_pse(GOOGLE_PSE_API_KEY, GOOGLE_PSE_ENGINE_ID, query) return search_google_pse(GOOGLE_PSE_API_KEY, GOOGLE_PSE_ENGINE_ID, query)
elif BRAVE_SEARCH_API_KEY: else:
raise Exception(
"No GOOGLE_PSE_API_KEY or GOOGLE_PSE_ENGINE_ID found in environment variables"
)
elif engine == "brave":
if BRAVE_SEARCH_API_KEY:
return search_brave(BRAVE_SEARCH_API_KEY, query) return search_brave(BRAVE_SEARCH_API_KEY, query)
elif SERPSTACK_API_KEY: else:
return search_serpstack(SERPSTACK_API_KEY, query, https_enabled=SERPSTACK_HTTPS) raise Exception("No BRAVE_SEARCH_API_KEY found in environment variables")
elif SERPER_API_KEY: elif engine == "serpstack":
if SERPSTACK_API_KEY:
return search_serpstack(
SERPSTACK_API_KEY, query, https_enabled=SERPSTACK_HTTPS
)
else:
raise Exception("No SERPSTACK_API_KEY found in environment variables")
elif engine == "serper":
if SERPER_API_KEY:
return search_serper(SERPER_API_KEY, query) return search_serper(SERPER_API_KEY, query)
else:
raise Exception("No SERPER_API_KEY found in environment variables")
else: else:
raise Exception("No search engine API key found in environment variables") raise Exception("No search engine API key found in environment variables")

View File

@ -773,6 +773,11 @@ ENABLE_RAG_WEB_SEARCH = PersistentConfig(
os.getenv("ENABLE_RAG_WEB_SEARCH", "False").lower() == "true", os.getenv("ENABLE_RAG_WEB_SEARCH", "False").lower() == "true",
) )
RAG_WEB_SEARCH_ENGINE = PersistentConfig(
"RAG_WEB_SEARCH_ENGINE",
"rag.web.search.engine",
os.getenv("RAG_WEB_SEARCH_ENGINE", ""),
)
SEARXNG_QUERY_URL = PersistentConfig( SEARXNG_QUERY_URL = PersistentConfig(
"SEARXNG_QUERY_URL", "SEARXNG_QUERY_URL",

View File

@ -1,5 +1,6 @@
<script lang="ts"> <script lang="ts">
import { getRAGConfig, updateRAGConfig } from '$lib/apis/rag'; import { getRAGConfig, updateRAGConfig } from '$lib/apis/rag';
import Switch from '$lib/components/common/Switch.svelte';
import { documents, models } from '$lib/stores'; import { documents, models } from '$lib/stores';
import { onMount, getContext } from 'svelte'; import { onMount, getContext } from 'svelte';
@ -9,14 +10,15 @@
export let saveHandler: Function; export let saveHandler: Function;
let webLoaderSSLVerification = true; let webConfig = null;
let webSearchEngines = ['searxng', 'google_pse', 'brave', 'serpstack', 'serper'];
let youtubeLanguage = 'en'; let youtubeLanguage = 'en';
let youtubeTranslation = null; let youtubeTranslation = null;
const submitHandler = async () => { const submitHandler = async () => {
const res = await updateRAGConfig(localStorage.token, { const res = await updateRAGConfig(localStorage.token, {
web_loader_ssl_verification: webLoaderSSLVerification, web: webConfig,
youtube: { youtube: {
language: youtubeLanguage.split(',').map((lang) => lang.trim()), language: youtubeLanguage.split(',').map((lang) => lang.trim()),
translation: youtubeTranslation translation: youtubeTranslation
@ -28,7 +30,8 @@
const res = await getRAGConfig(localStorage.token); const res = await getRAGConfig(localStorage.token);
if (res) { if (res) {
webLoaderSSLVerification = res.web_loader_ssl_verification; webConfig = res.web;
youtubeLanguage = res.youtube.language.join(','); youtubeLanguage = res.youtube.language.join(',');
youtubeTranslation = res.youtube.translation; youtubeTranslation = res.youtube.translation;
} }
@ -37,12 +40,161 @@
<form <form
class="flex flex-col h-full justify-between space-y-3 text-sm" class="flex flex-col h-full justify-between space-y-3 text-sm"
on:submit|preventDefault={() => { on:submit|preventDefault={async () => {
submitHandler(); await submitHandler();
saveHandler(); saveHandler();
}} }}
> >
<div class=" space-y-3 pr-1.5 overflow-y-scroll h-full max-h-[22rem]"> <div class=" space-y-3 pr-1.5 overflow-y-scroll h-full max-h-[22rem]">
{#if webConfig}
<div>
<div class=" mb-1 text-sm font-medium">
{$i18n.t('Web Search')}
</div>
<div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">
{$i18n.t('Enable Web Search')}
</div>
<Switch bind:state={webConfig.search.enable} />
</div>
</div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Web Search Engine')}</div>
<div class="flex items-center relative">
<select
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={webConfig.search.engine}
placeholder="Select a engine"
required
>
<option disabled selected value="">Select a engine</option>
{#each webSearchEngines as engine}
<option value={engine}>{engine}</option>
{/each}
</select>
</div>
</div>
{#if webConfig.search.engine !== ''}
<div class="mt-1.5">
{#if webConfig.search.engine === 'searxng'}
<div>
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('Searxng Query URL')}
</div>
<div class="flex w-full">
<div class="flex-1">
<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
type="text"
placeholder={$i18n.t('Enter Searxng Query URL')}
bind:value={webConfig.search.searxng_query_url}
autocomplete="off"
/>
</div>
</div>
</div>
{:else if webConfig.search.engine === 'google_pse'}
<div>
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('Google PSE API Key')}
</div>
<div class="flex w-full">
<div class="flex-1">
<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
type="text"
placeholder={$i18n.t('Enter Google PSE API Key')}
bind:value={webConfig.search.google_pse_api_key}
autocomplete="off"
/>
</div>
</div>
</div>
<div class="mt-1.5">
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('Google PSE Engine Id')}
</div>
<div class="flex w-full">
<div class="flex-1">
<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
type="text"
placeholder={$i18n.t('Enter Google PSE Engine Id')}
bind:value={webConfig.search.google_pse_engine_id}
autocomplete="off"
/>
</div>
</div>
</div>
{:else if webConfig.search.engine === 'brave'}
<div>
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('Brave Search API Key')}
</div>
<div class="flex w-full">
<div class="flex-1">
<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
type="text"
placeholder={$i18n.t('Enter Brave Search API Key')}
bind:value={webConfig.search.brave_search_api_key}
autocomplete="off"
/>
</div>
</div>
</div>
{:else if webConfig.search.engine === 'serpstack'}
<div>
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('Serpstack API Key')}
</div>
<div class="flex w-full">
<div class="flex-1">
<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
type="text"
placeholder={$i18n.t('Enter Serpstack API Key')}
bind:value={webConfig.search.serpstack_api_key}
autocomplete="off"
/>
</div>
</div>
</div>
{:else if webConfig.search.engine === 'serper'}
<div>
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('Serper API Key')}
</div>
<div class="flex w-full">
<div class="flex-1">
<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
type="text"
placeholder={$i18n.t('Enter Serper API Key')}
bind:value={webConfig.search.serper_api_key}
autocomplete="off"
/>
</div>
</div>
</div>
{/if}
</div>
{/if}
</div>
<hr class=" dark:border-gray-850 my-2" />
<div> <div>
<div class=" mb-1 text-sm font-medium"> <div class=" mb-1 text-sm font-medium">
{$i18n.t('Web Loader Settings')} {$i18n.t('Web Loader Settings')}
@ -57,12 +209,12 @@
<button <button
class="p-1 px-3 text-xs flex rounded transition" class="p-1 px-3 text-xs flex rounded transition"
on:click={() => { on:click={() => {
webLoaderSSLVerification = !webLoaderSSLVerification; webConfig.ssl_verification = !webConfig.ssl_verification;
submitHandler(); submitHandler();
}} }}
type="button" type="button"
> >
{#if webLoaderSSLVerification === true} {#if webConfig.ssl_verification === true}
<span class="ml-2 self-center">{$i18n.t('On')}</span> <span class="ml-2 self-center">{$i18n.t('On')}</span>
{:else} {:else}
<span class="ml-2 self-center">{$i18n.t('Off')}</span> <span class="ml-2 self-center">{$i18n.t('Off')}</span>
@ -90,6 +242,7 @@
</div> </div>
</div> </div>
</div> </div>
{/if}
</div> </div>
<div class="flex justify-end pt-3 text-sm font-medium"> <div class="flex justify-end pt-3 text-sm font-medium">
<button <button

View File

@ -1,11 +1,13 @@
<script> <script>
import { getContext } from 'svelte'; import { getContext, tick } from 'svelte';
import Modal from '../common/Modal.svelte'; import Modal from '../common/Modal.svelte';
import General from './Settings/General.svelte'; import General from './Settings/General.svelte';
import ChunkParams from './Settings/ChunkParams.svelte'; import ChunkParams from './Settings/ChunkParams.svelte';
import QueryParams from './Settings/QueryParams.svelte'; import QueryParams from './Settings/QueryParams.svelte';
import WebParams from './Settings/WebParams.svelte'; import WebParams from './Settings/WebParams.svelte';
import { toast } from 'svelte-sonner'; import { toast } from 'svelte-sonner';
import { config } from '$lib/stores';
import { getBackendConfig } from '$lib/apis';
const i18n = getContext('i18n'); const i18n = getContext('i18n');
@ -171,8 +173,11 @@
/> />
{:else if selectedTab === 'web'} {:else if selectedTab === 'web'}
<WebParams <WebParams
saveHandler={() => { saveHandler={async () => {
toast.success($i18n.t('Settings saved successfully!')); toast.success($i18n.t('Settings saved successfully!'));
await tick();
await config.set(await getBackendConfig());
}} }}
/> />
{/if} {/if}