mirror of
https://github.com/open-webui/open-webui
synced 2025-06-08 15:37:22 +00:00
Update youtube.py
This commit is contained in:
parent
f65dc715f9
commit
d7927506f1
@ -62,13 +62,17 @@ class YoutubeLoader:
|
|||||||
_video_id = _parse_video_id(video_id)
|
_video_id = _parse_video_id(video_id)
|
||||||
self.video_id = _video_id if _video_id is not None else video_id
|
self.video_id = _video_id if _video_id is not None else video_id
|
||||||
self._metadata = {"source": video_id}
|
self._metadata = {"source": video_id}
|
||||||
self.language = language
|
|
||||||
self.proxy_url = proxy_url
|
self.proxy_url = proxy_url
|
||||||
|
|
||||||
# Ensure language is a list
|
# Ensure language is a list
|
||||||
if isinstance(language, str):
|
if isinstance(language, str):
|
||||||
self.language = [language]
|
self.language = [language]
|
||||||
else:
|
else:
|
||||||
self.language = language
|
self.language = list(language) # Make a copy to avoid modifying the original
|
||||||
|
|
||||||
|
# Add English as fallback if not already in the list
|
||||||
|
if "en" not in self.language:
|
||||||
|
self.language.append("en")
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load YouTube transcripts into `Document` objects."""
|
"""Load YouTube transcripts into `Document` objects."""
|
||||||
@ -102,16 +106,8 @@ class YoutubeLoader:
|
|||||||
log.exception("Loading YouTube transcript failed")
|
log.exception("Loading YouTube transcript failed")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Make a copy of the language list to avoid modifying the original
|
|
||||||
languages_to_try = list(self.language)
|
|
||||||
|
|
||||||
# Add English as fallback if not already in the list
|
|
||||||
if "en" not in languages_to_try:
|
|
||||||
log.debug("Adding English as fallback language")
|
|
||||||
languages_to_try.append("en")
|
|
||||||
|
|
||||||
# Try each language in order of priority
|
# Try each language in order of priority
|
||||||
for lang in languages_to_try:
|
for lang in self.language:
|
||||||
try:
|
try:
|
||||||
transcript = transcript_list.find_transcript([lang])
|
transcript = transcript_list.find_transcript([lang])
|
||||||
log.debug(f"Found transcript for language '{lang}'")
|
log.debug(f"Found transcript for language '{lang}'")
|
||||||
@ -131,6 +127,6 @@ class YoutubeLoader:
|
|||||||
raise e
|
raise e
|
||||||
|
|
||||||
# If we get here, all languages failed
|
# If we get here, all languages failed
|
||||||
languages_tried = ", ".join(languages_to_try)
|
languages_tried = ", ".join(self.language)
|
||||||
log.warning(f"No transcript found for any of the specified languages: {languages_tried}. Verify if the video has transcripts, add more languages if needed.")
|
log.warning(f"No transcript found for any of the specified languages: {languages_tried}")
|
||||||
raise NoTranscriptFound(f"No transcript found for any supported language. Verify if the video has transcripts, add more languages if needed.")
|
raise NoTranscriptFound(f"No transcript found for any supported language")
|
||||||
|
Loading…
Reference in New Issue
Block a user