mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
Update youtube.py
This commit is contained in:
parent
67a612fe24
commit
5e1cb76b93
@ -101,8 +101,16 @@ class YoutubeLoader:
|
|||||||
log.exception("Loading YouTube transcript failed")
|
log.exception("Loading YouTube transcript failed")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# Make a copy of the language list to avoid modifying the original
|
||||||
|
languages_to_try = list(self.language)
|
||||||
|
|
||||||
|
# Add English as fallback, if not already in the list
|
||||||
|
if "en" not in languages_to_try:
|
||||||
|
log.debug("Adding English as fallback language")
|
||||||
|
languages_to_try.append("en")
|
||||||
|
|
||||||
# Try each language in order of priority
|
# Try each language in order of priority
|
||||||
for lang in self.language:
|
for lang in languages_to_try:
|
||||||
try:
|
try:
|
||||||
transcript = transcript_list.find_transcript([lang])
|
transcript = transcript_list.find_transcript([lang])
|
||||||
log.debug(f"Found transcript for language '{lang}'")
|
log.debug(f"Found transcript for language '{lang}'")
|
||||||
@ -121,29 +129,7 @@ class YoutubeLoader:
|
|||||||
log.info(f"Error finding transcript for language '{lang}'")
|
log.info(f"Error finding transcript for language '{lang}'")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
# If all specified languages fail, fall back to English (unless English was already tried)
|
# If we get here, all languages failed including the English fallback
|
||||||
if "en" not in self.language:
|
languages_tried = ", ".join(languages_to_try)
|
||||||
try:
|
|
||||||
log.debug("Falling back to English transcript")
|
|
||||||
transcript = transcript_list.find_transcript(["en"])
|
|
||||||
transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
|
|
||||||
transcript_text = " ".join(
|
|
||||||
map(
|
|
||||||
lambda transcript_piece: transcript_piece.text.strip(" "),
|
|
||||||
transcript_pieces,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return [Document(page_content=transcript_text, metadata=self._metadata)]
|
|
||||||
except NoTranscriptFound:
|
|
||||||
log.warning("No English transcript found as fallback")
|
|
||||||
except Exception as e:
|
|
||||||
log.exception("Error finding English transcript fallback")
|
|
||||||
raise e
|
|
||||||
|
|
||||||
# All languages failed
|
|
||||||
languages_tried = ", ".join(self.language)
|
|
||||||
if "en" not in self.language:
|
|
||||||
languages_tried += ", en (fallback)"
|
|
||||||
|
|
||||||
log.warning(f"No transcript found for any of the specified languages: {languages_tried}")
|
log.warning(f"No transcript found for any of the specified languages: {languages_tried}")
|
||||||
raise NoTranscriptFound(f"No transcript found for any supported language")
|
raise NoTranscriptFound(f"No transcript found for any supported language")
|
||||||
|
Loading…
Reference in New Issue
Block a user