mirror of
https://github.com/open-webui/open-webui
synced 2025-05-22 13:54:20 +00:00
Merge pull request #9314 from roryeckel/validate-rag-urls
Validate URLs returned by search engine
This commit is contained in:
commit
1b8dc673e7
@ -1,3 +1,5 @@
|
|||||||
|
import validators
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
@ -10,6 +12,8 @@ def get_filtered_results(results, filter_list):
|
|||||||
filtered_results = []
|
filtered_results = []
|
||||||
for result in results:
|
for result in results:
|
||||||
url = result.get("url") or result.get("link", "")
|
url = result.get("url") or result.get("link", "")
|
||||||
|
if not validators.url(url):
|
||||||
|
continue
|
||||||
domain = urlparse(url).netloc
|
domain = urlparse(url).netloc
|
||||||
if any(domain.endswith(filtered_domain) for filtered_domain in filter_list):
|
if any(domain.endswith(filtered_domain) for filtered_domain in filter_list):
|
||||||
filtered_results.append(result)
|
filtered_results.append(result)
|
||||||
|
@ -42,6 +42,15 @@ def validate_url(url: Union[str, Sequence[str]]):
|
|||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def safe_validate_urls(url: Sequence[str]) -> Sequence[str]:
|
||||||
|
valid_urls = []
|
||||||
|
for u in url:
|
||||||
|
try:
|
||||||
|
if validate_url(u):
|
||||||
|
valid_urls.append(u)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return valid_urls
|
||||||
|
|
||||||
def resolve_hostname(hostname):
|
def resolve_hostname(hostname):
|
||||||
# Get address information
|
# Get address information
|
||||||
@ -86,11 +95,11 @@ def get_web_loader(
|
|||||||
verify_ssl: bool = True,
|
verify_ssl: bool = True,
|
||||||
requests_per_second: int = 2,
|
requests_per_second: int = 2,
|
||||||
):
|
):
|
||||||
# Check if the URL is valid
|
# Check if the URLs are valid
|
||||||
if not validate_url(urls):
|
safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls)
|
||||||
raise ValueError(ERROR_MESSAGES.INVALID_URL)
|
|
||||||
return SafeWebBaseLoader(
|
return SafeWebBaseLoader(
|
||||||
urls,
|
safe_urls,
|
||||||
verify_ssl=verify_ssl,
|
verify_ssl=verify_ssl,
|
||||||
requests_per_second=requests_per_second,
|
requests_per_second=requests_per_second,
|
||||||
continue_on_failure=True,
|
continue_on_failure=True,
|
||||||
|
Loading…
Reference in New Issue
Block a user