mirror of
https://github.com/open-webui/open-webui
synced 2025-06-14 10:20:52 +00:00
Merge pull request #14781 from lucyknada/patch-2
fix: fix #14752 and add manual transcription retrieval
This commit is contained in:
commit
50d9a2ac58
@ -1,4 +1,5 @@
|
|||||||
import logging
|
import logging
|
||||||
|
from xml.etree.ElementTree import ParseError
|
||||||
|
|
||||||
from typing import Any, Dict, Generator, List, Optional, Sequence, Union
|
from typing import Any, Dict, Generator, List, Optional, Sequence, Union
|
||||||
from urllib.parse import parse_qs, urlparse
|
from urllib.parse import parse_qs, urlparse
|
||||||
@ -93,7 +94,6 @@ class YoutubeLoader:
|
|||||||
"http": self.proxy_url,
|
"http": self.proxy_url,
|
||||||
"https": self.proxy_url,
|
"https": self.proxy_url,
|
||||||
}
|
}
|
||||||
# Don't log complete URL because it might contain secrets
|
|
||||||
log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
|
log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
|
||||||
else:
|
else:
|
||||||
youtube_proxies = None
|
youtube_proxies = None
|
||||||
@ -110,11 +110,35 @@ class YoutubeLoader:
|
|||||||
for lang in self.language:
|
for lang in self.language:
|
||||||
try:
|
try:
|
||||||
transcript = transcript_list.find_transcript([lang])
|
transcript = transcript_list.find_transcript([lang])
|
||||||
|
if transcript.is_generated:
|
||||||
|
log.debug(f"Found generated transcript for language '{lang}'")
|
||||||
|
try:
|
||||||
|
transcript = transcript_list.find_manually_created_transcript(
|
||||||
|
[lang]
|
||||||
|
)
|
||||||
|
log.debug(f"Found manual transcript for language '{lang}'")
|
||||||
|
except NoTranscriptFound:
|
||||||
|
log.debug(
|
||||||
|
f"No manual transcript found for language '{lang}', using generated"
|
||||||
|
)
|
||||||
|
pass
|
||||||
|
|
||||||
log.debug(f"Found transcript for language '{lang}'")
|
log.debug(f"Found transcript for language '{lang}'")
|
||||||
|
try:
|
||||||
transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
|
transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
|
||||||
|
except ParseError:
|
||||||
|
log.debug(f"Empty or invalid transcript for language '{lang}'")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not transcript_pieces:
|
||||||
|
log.debug(f"Empty transcript for language '{lang}'")
|
||||||
|
continue
|
||||||
|
|
||||||
transcript_text = " ".join(
|
transcript_text = " ".join(
|
||||||
map(
|
map(
|
||||||
lambda transcript_piece: transcript_piece.text.strip(" "),
|
lambda transcript_piece: transcript_piece.text.strip(" ")
|
||||||
|
if hasattr(transcript_piece, "text")
|
||||||
|
else "",
|
||||||
transcript_pieces,
|
transcript_pieces,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -131,6 +155,4 @@ class YoutubeLoader:
|
|||||||
log.warning(
|
log.warning(
|
||||||
f"No transcript found for any of the specified languages: {languages_tried}. Verify if the video has transcripts, add more languages if needed."
|
f"No transcript found for any of the specified languages: {languages_tried}. Verify if the video has transcripts, add more languages if needed."
|
||||||
)
|
)
|
||||||
raise NoTranscriptFound(
|
raise NoTranscriptFound(self.video_id, self.language, list(transcript_list))
|
||||||
f"No transcript found for any supported language. Verify if the video has transcripts, add more languages if needed."
|
|
||||||
)
|
|
||||||
|
Loading…
Reference in New Issue
Block a user