diff --git a/backend/open_webui/retrieval/web/main.py b/backend/open_webui/retrieval/web/main.py index 1af8a70aa..28a749e7d 100644 --- a/backend/open_webui/retrieval/web/main.py +++ b/backend/open_webui/retrieval/web/main.py @@ -1,3 +1,5 @@ +import validators + from typing import Optional from urllib.parse import urlparse @@ -10,6 +12,8 @@ def get_filtered_results(results, filter_list): filtered_results = [] for result in results: url = result.get("url") or result.get("link", "") + if not validators.url(url): + continue domain = urlparse(url).netloc if any(domain.endswith(filtered_domain) for filtered_domain in filter_list): filtered_results.append(result) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index ddbdc6004..3c249daa4 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -48,6 +48,15 @@ def validate_url(url: Union[str, Sequence[str]]): else: return False +def safe_validate_urls(url: Sequence[str]) -> Sequence[str]: + valid_urls = [] + for u in url: + try: + if validate_url(u): + valid_urls.append(u) + except ValueError: + continue + return valid_urls def resolve_hostname(hostname): # Get address information addr_info = socket.getaddrinfo(hostname, None) @@ -243,12 +252,12 @@ def get_web_loader( verify_ssl: bool = True, requests_per_second: int = 2, ): - # Check if the URL is valid - if not validate_url(urls): - raise ValueError(ERROR_MESSAGES.INVALID_URL) + # Check if the URLs are valid + safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls) + web_loader_args = { - "urls": urls, + "urls": safe_urls, "verify_ssl": verify_ssl, "requests_per_second": requests_per_second, "continue_on_failure": True diff --git a/src/lib/components/channel/MessageInput.svelte b/src/lib/components/channel/MessageInput.svelte index c599de5e2..595f84cb5 100644 --- a/src/lib/components/channel/MessageInput.svelte +++ b/src/lib/components/channel/MessageInput.svelte @@ -398,7 +398,7 @@ dir={$settings?.chatDirection ?? 'LTR'} > {#if files.length > 0} -