From 121a13d4ed62a77ea6c1bc9c0b5628644c264112 Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Mon, 3 Feb 2025 17:37:20 -0600 Subject: [PATCH] fix: Filter to valid RAG web search URLs --- backend/open_webui/retrieval/web/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/open_webui/retrieval/web/main.py b/backend/open_webui/retrieval/web/main.py index 1af8a70aa..28a749e7d 100644 --- a/backend/open_webui/retrieval/web/main.py +++ b/backend/open_webui/retrieval/web/main.py @@ -1,3 +1,5 @@ +import validators + from typing import Optional from urllib.parse import urlparse @@ -10,6 +12,8 @@ def get_filtered_results(results, filter_list): filtered_results = [] for result in results: url = result.get("url") or result.get("link", "") + if not validators.url(url): + continue domain = urlparse(url).netloc if any(domain.endswith(filtered_domain) for filtered_domain in filter_list): filtered_results.append(result)