fix: Filter to valid RAG web search URLs

This commit is contained in:
Rory 2025-02-03 17:37:20 -06:00
parent e3214d08b5
commit 121a13d4ed

View File

@ -1,3 +1,5 @@
import validators
from typing import Optional
from urllib.parse import urlparse
@ -10,6 +12,8 @@ def get_filtered_results(results, filter_list):
filtered_results = []
for result in results:
url = result.get("url") or result.get("link", "")
if not validators.url(url):
continue
domain = urlparse(url).netloc
if any(domain.endswith(filtered_domain) for filtered_domain in filter_list):
filtered_results.append(result)