mirror of
				https://github.com/open-webui/open-webui
				synced 2025-06-26 18:26:48 +00:00 
			
		
		
		
	refac: web search
This commit is contained in:
		
							parent
							
								
									912a704fdc
								
							
						
					
					
						commit
						999d2bc21b
					
				@ -59,9 +59,16 @@ from apps.rag.utils import (
 | 
			
		||||
    query_doc_with_hybrid_search,
 | 
			
		||||
    query_collection,
 | 
			
		||||
    query_collection_with_hybrid_search,
 | 
			
		||||
    search_web,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
from apps.rag.search.brave import search_brave
 | 
			
		||||
from apps.rag.search.google_pse import search_google_pse
 | 
			
		||||
from apps.rag.search.main import SearchResult
 | 
			
		||||
from apps.rag.search.searxng import search_searxng
 | 
			
		||||
from apps.rag.search.serper import search_serper
 | 
			
		||||
from apps.rag.search.serpstack import search_serpstack
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from utils.misc import (
 | 
			
		||||
    calculate_sha256,
 | 
			
		||||
    calculate_sha256_string,
 | 
			
		||||
@ -716,19 +723,78 @@ def resolve_hostname(hostname):
 | 
			
		||||
    return ipv4_addresses, ipv6_addresses
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def search_web(engine: str, query: str) -> list[SearchResult]:
 | 
			
		||||
    """Search the web using a search engine and return the results as a list of SearchResult objects.
 | 
			
		||||
    Will look for a search engine API key in environment variables in the following order:
 | 
			
		||||
    - SEARXNG_QUERY_URL
 | 
			
		||||
    - GOOGLE_PSE_API_KEY + GOOGLE_PSE_ENGINE_ID
 | 
			
		||||
    - BRAVE_SEARCH_API_KEY
 | 
			
		||||
    - SERPSTACK_API_KEY
 | 
			
		||||
    - SERPER_API_KEY
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        query (str): The query to search for
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    # TODO: add playwright to search the web
 | 
			
		||||
    if engine == "searxng":
 | 
			
		||||
        if app.state.config.SEARXNG_QUERY_URL:
 | 
			
		||||
            return search_searxng(app.state.config.SEARXNG_QUERY_URL, query)
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception("No SEARXNG_QUERY_URL found in environment variables")
 | 
			
		||||
    elif engine == "google_pse":
 | 
			
		||||
        if (
 | 
			
		||||
            app.state.config.GOOGLE_PSE_API_KEY
 | 
			
		||||
            and app.state.config.GOOGLE_PSE_ENGINE_ID
 | 
			
		||||
        ):
 | 
			
		||||
            return search_google_pse(
 | 
			
		||||
                app.state.config.GOOGLE_PSE_API_KEY,
 | 
			
		||||
                app.state.config.GOOGLE_PSE_ENGINE_ID,
 | 
			
		||||
                query,
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception(
 | 
			
		||||
                "No GOOGLE_PSE_API_KEY or GOOGLE_PSE_ENGINE_ID found in environment variables"
 | 
			
		||||
            )
 | 
			
		||||
    elif engine == "brave":
 | 
			
		||||
        if app.state.config.BRAVE_SEARCH_API_KEY:
 | 
			
		||||
            return search_brave(app.state.config.BRAVE_SEARCH_API_KEY, query)
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception("No BRAVE_SEARCH_API_KEY found in environment variables")
 | 
			
		||||
    elif engine == "serpstack":
 | 
			
		||||
        if app.state.config.SERPSTACK_API_KEY:
 | 
			
		||||
            return search_serpstack(
 | 
			
		||||
                app.state.config.SERPSTACK_API_KEY,
 | 
			
		||||
                query,
 | 
			
		||||
                https_enabled=app.state.config.SERPSTACK_HTTPS,
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception("No SERPSTACK_API_KEY found in environment variables")
 | 
			
		||||
    elif engine == "serper":
 | 
			
		||||
        if app.state.config.SERPER_API_KEY:
 | 
			
		||||
            return search_serper(app.state.config.SERPER_API_KEY, query)
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception("No SERPER_API_KEY found in environment variables")
 | 
			
		||||
    else:
 | 
			
		||||
        raise Exception("No search engine API key found in environment variables")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@app.post("/web/search")
 | 
			
		||||
def store_web_search(form_data: SearchForm, user=Depends(get_current_user)):
 | 
			
		||||
    try:
 | 
			
		||||
        try:
 | 
			
		||||
            web_results = search_web(
 | 
			
		||||
                app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query
 | 
			
		||||
            )
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            log.exception(e)
 | 
			
		||||
            raise HTTPException(
 | 
			
		||||
                status_code=status.HTTP_400_BAD_REQUEST,
 | 
			
		||||
                detail=ERROR_MESSAGES.WEB_SEARCH_ERROR,
 | 
			
		||||
            )
 | 
			
		||||
        web_results = search_web(
 | 
			
		||||
            app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query
 | 
			
		||||
        )
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        log.exception(e)
 | 
			
		||||
 | 
			
		||||
        print(e)
 | 
			
		||||
        raise HTTPException(
 | 
			
		||||
            status_code=status.HTTP_400_BAD_REQUEST,
 | 
			
		||||
            detail=ERROR_MESSAGES.WEB_SEARCH_ERROR(e),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        urls = [result.link for result in web_results]
 | 
			
		||||
        loader = get_web_loader(urls)
 | 
			
		||||
        data = loader.load()
 | 
			
		||||
 | 
			
		||||
@ -20,12 +20,7 @@ from langchain.retrievers import (
 | 
			
		||||
 | 
			
		||||
from typing import Optional
 | 
			
		||||
 | 
			
		||||
from apps.rag.search.brave import search_brave
 | 
			
		||||
from apps.rag.search.google_pse import search_google_pse
 | 
			
		||||
from apps.rag.search.main import SearchResult
 | 
			
		||||
from apps.rag.search.searxng import search_searxng
 | 
			
		||||
from apps.rag.search.serper import search_serper
 | 
			
		||||
from apps.rag.search.serpstack import search_serpstack
 | 
			
		||||
 | 
			
		||||
from config import (
 | 
			
		||||
    SRC_LOG_LEVELS,
 | 
			
		||||
    CHROMA_CLIENT,
 | 
			
		||||
@ -536,50 +531,3 @@ class RerankCompressor(BaseDocumentCompressor):
 | 
			
		||||
            )
 | 
			
		||||
            final_results.append(doc)
 | 
			
		||||
        return final_results
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def search_web(engine: str, query: str) -> list[SearchResult]:
 | 
			
		||||
    """Search the web using a search engine and return the results as a list of SearchResult objects.
 | 
			
		||||
    Will look for a search engine API key in environment variables in the following order:
 | 
			
		||||
    - SEARXNG_QUERY_URL
 | 
			
		||||
    - GOOGLE_PSE_API_KEY + GOOGLE_PSE_ENGINE_ID
 | 
			
		||||
    - BRAVE_SEARCH_API_KEY
 | 
			
		||||
    - SERPSTACK_API_KEY
 | 
			
		||||
    - SERPER_API_KEY
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        query (str): The query to search for
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    # TODO: add playwright to search the web
 | 
			
		||||
    if engine == "searxng":
 | 
			
		||||
        if SEARXNG_QUERY_URL:
 | 
			
		||||
            return search_searxng(SEARXNG_QUERY_URL, query)
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception("No SEARXNG_QUERY_URL found in environment variables")
 | 
			
		||||
    elif engine == "google_pse":
 | 
			
		||||
        if GOOGLE_PSE_API_KEY and GOOGLE_PSE_ENGINE_ID:
 | 
			
		||||
            return search_google_pse(GOOGLE_PSE_API_KEY, GOOGLE_PSE_ENGINE_ID, query)
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception(
 | 
			
		||||
                "No GOOGLE_PSE_API_KEY or GOOGLE_PSE_ENGINE_ID found in environment variables"
 | 
			
		||||
            )
 | 
			
		||||
    elif engine == "brave":
 | 
			
		||||
        if BRAVE_SEARCH_API_KEY:
 | 
			
		||||
            return search_brave(BRAVE_SEARCH_API_KEY, query)
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception("No BRAVE_SEARCH_API_KEY found in environment variables")
 | 
			
		||||
    elif engine == "serpstack":
 | 
			
		||||
        if SERPSTACK_API_KEY:
 | 
			
		||||
            return search_serpstack(
 | 
			
		||||
                SERPSTACK_API_KEY, query, https_enabled=SERPSTACK_HTTPS
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception("No SERPSTACK_API_KEY found in environment variables")
 | 
			
		||||
    elif engine == "serper":
 | 
			
		||||
        if SERPER_API_KEY:
 | 
			
		||||
            return search_serper(SERPER_API_KEY, query)
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception("No SERPER_API_KEY found in environment variables")
 | 
			
		||||
    else:
 | 
			
		||||
        raise Exception("No search engine API key found in environment variables")
 | 
			
		||||
 | 
			
		||||
@ -82,5 +82,5 @@ class ERROR_MESSAGES(str, Enum):
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    WEB_SEARCH_ERROR = (
 | 
			
		||||
        "Oops! Something went wrong while searching the web. Please try again later."
 | 
			
		||||
        lambda err="": f"{err if err else 'Oops! Something went wrong while searching the web.'}"
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
@ -518,8 +518,10 @@ export const runWebSearch = async (
 | 
			
		||||
	token: string,
 | 
			
		||||
	query: string,
 | 
			
		||||
	collection_name?: string
 | 
			
		||||
): Promise<SearchDocument | undefined> => {
 | 
			
		||||
	return await fetch(`${RAG_API_BASE_URL}/web/search`, {
 | 
			
		||||
): Promise<SearchDocument | null> => {
 | 
			
		||||
	let error = null;
 | 
			
		||||
 | 
			
		||||
	const res = await fetch(`${RAG_API_BASE_URL}/web/search`, {
 | 
			
		||||
		method: 'POST',
 | 
			
		||||
		headers: {
 | 
			
		||||
			'Content-Type': 'application/json',
 | 
			
		||||
@ -536,8 +538,15 @@ export const runWebSearch = async (
 | 
			
		||||
		})
 | 
			
		||||
		.catch((err) => {
 | 
			
		||||
			console.log(err);
 | 
			
		||||
			return undefined;
 | 
			
		||||
			error = err.detail;
 | 
			
		||||
			return null;
 | 
			
		||||
		});
 | 
			
		||||
 | 
			
		||||
	if (error) {
 | 
			
		||||
		throw error;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return res;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
export interface SearchDocument {
 | 
			
		||||
 | 
			
		||||
@ -473,9 +473,34 @@
 | 
			
		||||
		};
 | 
			
		||||
		messages = messages;
 | 
			
		||||
 | 
			
		||||
		const results = await runWebSearch(localStorage.token, searchQuery);
 | 
			
		||||
		if (results === undefined) {
 | 
			
		||||
			toast.warning($i18n.t('No search results found'));
 | 
			
		||||
		const results = await runWebSearch(localStorage.token, searchQuery).catch((error) => {
 | 
			
		||||
			console.log(error);
 | 
			
		||||
			toast.error(error);
 | 
			
		||||
 | 
			
		||||
			return null;
 | 
			
		||||
		});
 | 
			
		||||
 | 
			
		||||
		if (results) {
 | 
			
		||||
			responseMessage.status = {
 | 
			
		||||
				...responseMessage.status,
 | 
			
		||||
				done: true,
 | 
			
		||||
				description: $i18n.t('Searched {{count}} sites', { count: results.filenames.length }),
 | 
			
		||||
				urls: results.filenames
 | 
			
		||||
			};
 | 
			
		||||
 | 
			
		||||
			if (responseMessage?.files ?? undefined === undefined) {
 | 
			
		||||
				responseMessage.files = [];
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			responseMessage.files.push({
 | 
			
		||||
				collection_name: results.collection_name,
 | 
			
		||||
				name: searchQuery,
 | 
			
		||||
				type: 'web_search_results',
 | 
			
		||||
				urls: results.filenames
 | 
			
		||||
			});
 | 
			
		||||
 | 
			
		||||
			messages = messages;
 | 
			
		||||
		} else {
 | 
			
		||||
			responseMessage.status = {
 | 
			
		||||
				...responseMessage.status,
 | 
			
		||||
				done: true,
 | 
			
		||||
@ -483,28 +508,7 @@
 | 
			
		||||
				description: 'No search results found'
 | 
			
		||||
			};
 | 
			
		||||
			messages = messages;
 | 
			
		||||
			return;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		responseMessage.status = {
 | 
			
		||||
			...responseMessage.status,
 | 
			
		||||
			done: true,
 | 
			
		||||
			description: $i18n.t('Searched {{count}} sites', { count: results.filenames.length }),
 | 
			
		||||
			urls: results.filenames
 | 
			
		||||
		};
 | 
			
		||||
 | 
			
		||||
		if (responseMessage?.files ?? undefined === undefined) {
 | 
			
		||||
			responseMessage.files = [];
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		responseMessage.files.push({
 | 
			
		||||
			collection_name: results.collection_name,
 | 
			
		||||
			name: searchQuery,
 | 
			
		||||
			type: 'web_search_results',
 | 
			
		||||
			urls: results.filenames
 | 
			
		||||
		});
 | 
			
		||||
 | 
			
		||||
		messages = messages;
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	const sendPromptOllama = async (model, userPrompt, responseMessageId, _chatId) => {
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user