mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
[feat] Allow use of proxy for downloading Youtube transscripts
This commit is contained in:
@@ -1,7 +1,12 @@
|
||||
import logging
|
||||
|
||||
from typing import Any, Dict, Generator, List, Optional, Sequence, Union
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
from langchain_core.documents import Document
|
||||
from open_webui.env import SRC_LOG_LEVELS
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||
|
||||
ALLOWED_SCHEMES = {"http", "https"}
|
||||
ALLOWED_NETLOCS = {
|
||||
@@ -51,12 +56,14 @@ class YoutubeLoader:
|
||||
self,
|
||||
video_id: str,
|
||||
language: Union[str, Sequence[str]] = "en",
|
||||
proxy_url: Optional[str] = None,
|
||||
):
|
||||
"""Initialize with YouTube video ID."""
|
||||
_video_id = _parse_video_id(video_id)
|
||||
self.video_id = _video_id if _video_id is not None else video_id
|
||||
self._metadata = {"source": video_id}
|
||||
self.language = language
|
||||
self.proxy_url = proxy_url
|
||||
if isinstance(language, str):
|
||||
self.language = [language]
|
||||
else:
|
||||
@@ -76,10 +83,20 @@ class YoutubeLoader:
|
||||
"Please install it with `pip install youtube-transcript-api`."
|
||||
)
|
||||
|
||||
if self.proxy_url:
|
||||
youtube_proxies = {
|
||||
'http': self.proxy_url,
|
||||
'https': self.proxy_url,
|
||||
}
|
||||
# Don't log complete URL because it might contain secrets
|
||||
log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
|
||||
else:
|
||||
youtube_proxies = None
|
||||
|
||||
try:
|
||||
transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id)
|
||||
transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id, proxies=youtube_proxies)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
log.exception("Loading YouTube transcript failed")
|
||||
return []
|
||||
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user