mirror of
https://github.com/open-webui/open-webui
synced 2024-12-28 06:42:47 +00:00
[feat] Allow use of proxy for downloading Youtube transscripts
This commit is contained in:
parent
0a26c41c7b
commit
53296c1005
@ -1,7 +1,12 @@
|
||||
import logging
|
||||
|
||||
from typing import Any, Dict, Generator, List, Optional, Sequence, Union
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
from langchain_core.documents import Document
|
||||
from open_webui.env import SRC_LOG_LEVELS
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||
|
||||
ALLOWED_SCHEMES = {"http", "https"}
|
||||
ALLOWED_NETLOCS = {
|
||||
@ -51,12 +56,14 @@ class YoutubeLoader:
|
||||
self,
|
||||
video_id: str,
|
||||
language: Union[str, Sequence[str]] = "en",
|
||||
proxy_url: Optional[str] = None,
|
||||
):
|
||||
"""Initialize with YouTube video ID."""
|
||||
_video_id = _parse_video_id(video_id)
|
||||
self.video_id = _video_id if _video_id is not None else video_id
|
||||
self._metadata = {"source": video_id}
|
||||
self.language = language
|
||||
self.proxy_url = proxy_url
|
||||
if isinstance(language, str):
|
||||
self.language = [language]
|
||||
else:
|
||||
@ -76,10 +83,20 @@ class YoutubeLoader:
|
||||
"Please install it with `pip install youtube-transcript-api`."
|
||||
)
|
||||
|
||||
if self.proxy_url:
|
||||
youtube_proxies = {
|
||||
'http': self.proxy_url,
|
||||
'https': self.proxy_url,
|
||||
}
|
||||
# Don't log complete URL because it might contain secrets
|
||||
log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
|
||||
else:
|
||||
youtube_proxies = None
|
||||
|
||||
try:
|
||||
transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id)
|
||||
transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id, proxies=youtube_proxies)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
log.exception("Loading YouTube transcript failed")
|
||||
return []
|
||||
|
||||
try:
|
||||
|
@ -105,6 +105,7 @@ from open_webui.config import (
|
||||
TIKA_SERVER_URL,
|
||||
UPLOAD_DIR,
|
||||
YOUTUBE_LOADER_LANGUAGE,
|
||||
YOUTUBE_LOADER_PROXY_URL,
|
||||
DEFAULT_LOCALE,
|
||||
AppConfig,
|
||||
)
|
||||
@ -171,6 +172,7 @@ app.state.config.OLLAMA_API_KEY = RAG_OLLAMA_API_KEY
|
||||
app.state.config.PDF_EXTRACT_IMAGES = PDF_EXTRACT_IMAGES
|
||||
|
||||
app.state.config.YOUTUBE_LOADER_LANGUAGE = YOUTUBE_LOADER_LANGUAGE
|
||||
app.state.config.YOUTUBE_LOADER_PROXY_URL = YOUTUBE_LOADER_PROXY_URL
|
||||
app.state.YOUTUBE_LOADER_TRANSLATION = None
|
||||
|
||||
|
||||
@ -471,6 +473,7 @@ async def get_rag_config(user=Depends(get_admin_user)):
|
||||
"youtube": {
|
||||
"language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
|
||||
"translation": app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||
"proxy_url": app.state.config.YOUTUBE_LOADER_PROXY_URL,
|
||||
},
|
||||
"web": {
|
||||
"web_loader_ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
||||
@ -518,6 +521,7 @@ class ChunkParamUpdateForm(BaseModel):
|
||||
class YoutubeLoaderConfig(BaseModel):
|
||||
language: list[str]
|
||||
translation: Optional[str] = None
|
||||
proxy_url: str = ""
|
||||
|
||||
|
||||
class WebSearchConfig(BaseModel):
|
||||
@ -580,6 +584,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
||||
|
||||
if form_data.youtube is not None:
|
||||
app.state.config.YOUTUBE_LOADER_LANGUAGE = form_data.youtube.language
|
||||
app.state.config.YOUTUBE_LOADER_PROXY_URL = form_data.youtube.proxy_url
|
||||
app.state.YOUTUBE_LOADER_TRANSLATION = form_data.youtube.translation
|
||||
|
||||
if form_data.web is not None:
|
||||
@ -640,6 +645,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
||||
},
|
||||
"youtube": {
|
||||
"language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
|
||||
"proxy_url": app.state.config.YOUTUBE_LOADER_PROXY_URL,
|
||||
"translation": app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||
},
|
||||
"web": {
|
||||
@ -1081,7 +1087,9 @@ def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_u
|
||||
collection_name = calculate_sha256_string(form_data.url)[:63]
|
||||
|
||||
loader = YoutubeLoader(
|
||||
form_data.url, language=app.state.config.YOUTUBE_LOADER_LANGUAGE
|
||||
form_data.url,
|
||||
language=app.state.config.YOUTUBE_LOADER_LANGUAGE,
|
||||
proxy_url=app.state.config.YOUTUBE_LOADER_PROXY_URL,
|
||||
)
|
||||
|
||||
docs = loader.load()
|
||||
|
@ -1259,6 +1259,12 @@ YOUTUBE_LOADER_LANGUAGE = PersistentConfig(
|
||||
os.getenv("YOUTUBE_LOADER_LANGUAGE", "en").split(","),
|
||||
)
|
||||
|
||||
YOUTUBE_LOADER_PROXY_URL = PersistentConfig(
|
||||
"YOUTUBE_LOADER_PROXY_URL",
|
||||
"rag.youtube_loader_proxy_url",
|
||||
os.getenv("YOUTUBE_LOADER_PROXY_URL", ""),
|
||||
)
|
||||
|
||||
|
||||
ENABLE_RAG_WEB_SEARCH = PersistentConfig(
|
||||
"ENABLE_RAG_WEB_SEARCH",
|
||||
|
@ -40,6 +40,7 @@ type ContentExtractConfigForm = {
|
||||
type YoutubeConfigForm = {
|
||||
language: string[];
|
||||
translation?: string | null;
|
||||
proxy_url: string;
|
||||
};
|
||||
|
||||
type RAGConfigForm = {
|
||||
|
@ -29,13 +29,15 @@
|
||||
|
||||
let youtubeLanguage = 'en';
|
||||
let youtubeTranslation = null;
|
||||
let youtubeProxyUrl = '';
|
||||
|
||||
const submitHandler = async () => {
|
||||
const res = await updateRAGConfig(localStorage.token, {
|
||||
web: webConfig,
|
||||
youtube: {
|
||||
language: youtubeLanguage.split(',').map((lang) => lang.trim()),
|
||||
translation: youtubeTranslation
|
||||
translation: youtubeTranslation,
|
||||
proxy_url: youtubeProxyUrl
|
||||
}
|
||||
});
|
||||
};
|
||||
@ -48,6 +50,7 @@
|
||||
|
||||
youtubeLanguage = res.youtube.language.join(',');
|
||||
youtubeTranslation = res.youtube.translation;
|
||||
youtubeProxyUrl = res.youtube.proxy_url;
|
||||
}
|
||||
});
|
||||
</script>
|
||||
@ -358,6 +361,21 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class=" py-0.5 flex w-full justify-between">
|
||||
<div class=" w-20 text-xs font-medium self-center">{$i18n.t('Proxy URL')}</div>
|
||||
<div class=" flex-1 self-center">
|
||||
<input
|
||||
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
|
||||
type="text"
|
||||
placeholder={$i18n.t('Enter proxy URL (e.g. https://user:password@host:port)')}
|
||||
bind:value={youtubeProxyUrl}
|
||||
autocomplete="off"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
Loading…
Reference in New Issue
Block a user