mirror of
				https://github.com/open-webui/open-webui
				synced 2025-06-26 18:26:48 +00:00 
			
		
		
		
	Merge pull request #7422 from alpha-pet/feat-youtube-transscript-proxy
feat: Optional proxy setting for downloading Youtube transscripts
This commit is contained in:
		
						commit
						9f981db0b9
					
				@ -1,7 +1,12 @@
 | 
			
		||||
import logging
 | 
			
		||||
 | 
			
		||||
from typing import Any, Dict, Generator, List, Optional, Sequence, Union
 | 
			
		||||
from urllib.parse import parse_qs, urlparse
 | 
			
		||||
from langchain_core.documents import Document
 | 
			
		||||
from open_webui.env import SRC_LOG_LEVELS
 | 
			
		||||
 | 
			
		||||
log = logging.getLogger(__name__)
 | 
			
		||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
 | 
			
		||||
 | 
			
		||||
ALLOWED_SCHEMES = {"http", "https"}
 | 
			
		||||
ALLOWED_NETLOCS = {
 | 
			
		||||
@ -51,12 +56,14 @@ class YoutubeLoader:
 | 
			
		||||
        self,
 | 
			
		||||
        video_id: str,
 | 
			
		||||
        language: Union[str, Sequence[str]] = "en",
 | 
			
		||||
        proxy_url: Optional[str] = None,
 | 
			
		||||
    ):
 | 
			
		||||
        """Initialize with YouTube video ID."""
 | 
			
		||||
        _video_id = _parse_video_id(video_id)
 | 
			
		||||
        self.video_id = _video_id if _video_id is not None else video_id
 | 
			
		||||
        self._metadata = {"source": video_id}
 | 
			
		||||
        self.language = language
 | 
			
		||||
        self.proxy_url = proxy_url
 | 
			
		||||
        if isinstance(language, str):
 | 
			
		||||
            self.language = [language]
 | 
			
		||||
        else:
 | 
			
		||||
@ -76,10 +83,20 @@ class YoutubeLoader:
 | 
			
		||||
                "Please install it with `pip install youtube-transcript-api`."
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        if self.proxy_url:
 | 
			
		||||
            youtube_proxies = {
 | 
			
		||||
                'http': self.proxy_url,
 | 
			
		||||
                'https': self.proxy_url,
 | 
			
		||||
            }
 | 
			
		||||
            # Don't log complete URL because it might contain secrets
 | 
			
		||||
            log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
 | 
			
		||||
        else:
 | 
			
		||||
            youtube_proxies = None
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id)
 | 
			
		||||
            transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id, proxies=youtube_proxies)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            print(e)
 | 
			
		||||
            log.exception("Loading YouTube transcript failed")
 | 
			
		||||
            return []
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
 | 
			
		||||
@ -105,6 +105,7 @@ from open_webui.config import (
 | 
			
		||||
    TIKA_SERVER_URL,
 | 
			
		||||
    UPLOAD_DIR,
 | 
			
		||||
    YOUTUBE_LOADER_LANGUAGE,
 | 
			
		||||
    YOUTUBE_LOADER_PROXY_URL,
 | 
			
		||||
    DEFAULT_LOCALE,
 | 
			
		||||
    AppConfig,
 | 
			
		||||
)
 | 
			
		||||
@ -171,6 +172,7 @@ app.state.config.OLLAMA_API_KEY = RAG_OLLAMA_API_KEY
 | 
			
		||||
app.state.config.PDF_EXTRACT_IMAGES = PDF_EXTRACT_IMAGES
 | 
			
		||||
 | 
			
		||||
app.state.config.YOUTUBE_LOADER_LANGUAGE = YOUTUBE_LOADER_LANGUAGE
 | 
			
		||||
app.state.config.YOUTUBE_LOADER_PROXY_URL = YOUTUBE_LOADER_PROXY_URL
 | 
			
		||||
app.state.YOUTUBE_LOADER_TRANSLATION = None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -471,6 +473,7 @@ async def get_rag_config(user=Depends(get_admin_user)):
 | 
			
		||||
        "youtube": {
 | 
			
		||||
            "language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
 | 
			
		||||
            "translation": app.state.YOUTUBE_LOADER_TRANSLATION,
 | 
			
		||||
            "proxy_url": app.state.config.YOUTUBE_LOADER_PROXY_URL,
 | 
			
		||||
        },
 | 
			
		||||
        "web": {
 | 
			
		||||
            "web_loader_ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
 | 
			
		||||
@ -518,6 +521,7 @@ class ChunkParamUpdateForm(BaseModel):
 | 
			
		||||
class YoutubeLoaderConfig(BaseModel):
 | 
			
		||||
    language: list[str]
 | 
			
		||||
    translation: Optional[str] = None
 | 
			
		||||
    proxy_url: str = ""
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class WebSearchConfig(BaseModel):
 | 
			
		||||
@ -580,6 +584,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
 | 
			
		||||
 | 
			
		||||
    if form_data.youtube is not None:
 | 
			
		||||
        app.state.config.YOUTUBE_LOADER_LANGUAGE = form_data.youtube.language
 | 
			
		||||
        app.state.config.YOUTUBE_LOADER_PROXY_URL = form_data.youtube.proxy_url
 | 
			
		||||
        app.state.YOUTUBE_LOADER_TRANSLATION = form_data.youtube.translation
 | 
			
		||||
 | 
			
		||||
    if form_data.web is not None:
 | 
			
		||||
@ -640,6 +645,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
 | 
			
		||||
        },
 | 
			
		||||
        "youtube": {
 | 
			
		||||
            "language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
 | 
			
		||||
            "proxy_url": app.state.config.YOUTUBE_LOADER_PROXY_URL,
 | 
			
		||||
            "translation": app.state.YOUTUBE_LOADER_TRANSLATION,
 | 
			
		||||
        },
 | 
			
		||||
        "web": {
 | 
			
		||||
@ -1081,7 +1087,9 @@ def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_u
 | 
			
		||||
            collection_name = calculate_sha256_string(form_data.url)[:63]
 | 
			
		||||
 | 
			
		||||
        loader = YoutubeLoader(
 | 
			
		||||
            form_data.url, language=app.state.config.YOUTUBE_LOADER_LANGUAGE
 | 
			
		||||
            form_data.url,
 | 
			
		||||
            language=app.state.config.YOUTUBE_LOADER_LANGUAGE,
 | 
			
		||||
            proxy_url=app.state.config.YOUTUBE_LOADER_PROXY_URL,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        docs = loader.load()
 | 
			
		||||
 | 
			
		||||
@ -1305,6 +1305,12 @@ YOUTUBE_LOADER_LANGUAGE = PersistentConfig(
 | 
			
		||||
    os.getenv("YOUTUBE_LOADER_LANGUAGE", "en").split(","),
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
YOUTUBE_LOADER_PROXY_URL = PersistentConfig(
 | 
			
		||||
    "YOUTUBE_LOADER_PROXY_URL",
 | 
			
		||||
    "rag.youtube_loader_proxy_url",
 | 
			
		||||
    os.getenv("YOUTUBE_LOADER_PROXY_URL", ""),
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
ENABLE_RAG_WEB_SEARCH = PersistentConfig(
 | 
			
		||||
    "ENABLE_RAG_WEB_SEARCH",
 | 
			
		||||
 | 
			
		||||
@ -40,6 +40,7 @@ type ContentExtractConfigForm = {
 | 
			
		||||
type YoutubeConfigForm = {
 | 
			
		||||
	language: string[];
 | 
			
		||||
	translation?: string | null;
 | 
			
		||||
	proxy_url: string;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
type RAGConfigForm = {
 | 
			
		||||
 | 
			
		||||
@ -29,13 +29,15 @@
 | 
			
		||||
 | 
			
		||||
	let youtubeLanguage = 'en';
 | 
			
		||||
	let youtubeTranslation = null;
 | 
			
		||||
	let youtubeProxyUrl = '';
 | 
			
		||||
 | 
			
		||||
	const submitHandler = async () => {
 | 
			
		||||
		const res = await updateRAGConfig(localStorage.token, {
 | 
			
		||||
			web: webConfig,
 | 
			
		||||
			youtube: {
 | 
			
		||||
				language: youtubeLanguage.split(',').map((lang) => lang.trim()),
 | 
			
		||||
				translation: youtubeTranslation
 | 
			
		||||
				translation: youtubeTranslation,
 | 
			
		||||
				proxy_url: youtubeProxyUrl
 | 
			
		||||
			}
 | 
			
		||||
		});
 | 
			
		||||
	};
 | 
			
		||||
@ -48,6 +50,7 @@
 | 
			
		||||
 | 
			
		||||
			youtubeLanguage = res.youtube.language.join(',');
 | 
			
		||||
			youtubeTranslation = res.youtube.translation;
 | 
			
		||||
			youtubeProxyUrl = res.youtube.proxy_url;
 | 
			
		||||
		}
 | 
			
		||||
	});
 | 
			
		||||
</script>
 | 
			
		||||
@ -358,6 +361,21 @@
 | 
			
		||||
						</div>
 | 
			
		||||
					</div>
 | 
			
		||||
				</div>
 | 
			
		||||
 | 
			
		||||
				<div>
 | 
			
		||||
					<div class=" py-0.5 flex w-full justify-between">
 | 
			
		||||
						<div class=" w-20 text-xs font-medium self-center">{$i18n.t('Proxy URL')}</div>
 | 
			
		||||
						<div class=" flex-1 self-center">
 | 
			
		||||
							<input
 | 
			
		||||
								class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
 | 
			
		||||
								type="text"
 | 
			
		||||
								placeholder={$i18n.t('Enter proxy URL (e.g. https://user:password@host:port)')}
 | 
			
		||||
								bind:value={youtubeProxyUrl}
 | 
			
		||||
								autocomplete="off"
 | 
			
		||||
							/>
 | 
			
		||||
						</div>
 | 
			
		||||
					</div>
 | 
			
		||||
				</div>
 | 
			
		||||
			</div>
 | 
			
		||||
		{/if}
 | 
			
		||||
	</div>
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user