mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 10:17:00 +00:00
Update youtube.py
This commit is contained in:
parent
c9fde5cec7
commit
17e100661c
@ -40,11 +40,13 @@ def _parse_video_id(url: str) -> Optional[str]:
|
|||||||
video_id = ids if isinstance(ids, str) else ids[0]
|
video_id = ids if isinstance(ids, str) else ids[0]
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
elif parsed_url.netloc == "youtu.be":
|
||||||
|
video_id = parsed_url.path.lstrip("/").split("?")[0]
|
||||||
else:
|
else:
|
||||||
path = parsed_url.path.lstrip("/")
|
path = parsed_url.path.lstrip("/")
|
||||||
video_id = path.split("/")[-1]
|
video_id = path.split("/")[-1].split("?")[0]
|
||||||
|
|
||||||
if len(video_id) != 11: # Video IDs are 11 characters long
|
if len(video_id) != 11:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return video_id
|
return video_id
|
||||||
@ -109,19 +111,19 @@ class YoutubeLoader:
|
|||||||
# Try each language in order of priority
|
# Try each language in order of priority
|
||||||
for lang in self.language:
|
for lang in self.language:
|
||||||
try:
|
try:
|
||||||
transcript = transcript_list.find_transcript([lang])
|
|
||||||
if transcript.is_generated:
|
|
||||||
log.debug(f"Found generated transcript for language '{lang}'")
|
|
||||||
try:
|
try:
|
||||||
transcript = transcript_list.find_manually_created_transcript(
|
transcript = transcript_list.find_manually_created_transcript([lang])
|
||||||
[lang]
|
|
||||||
)
|
|
||||||
log.debug(f"Found manual transcript for language '{lang}'")
|
log.debug(f"Found manual transcript for language '{lang}'")
|
||||||
except NoTranscriptFound:
|
except NoTranscriptFound:
|
||||||
log.debug(
|
transcript = transcript_list.find_generated_transcript([lang])
|
||||||
f"No manual transcript found for language '{lang}', using generated"
|
log.debug(f"Found auto-generated transcript for language '{lang}'")
|
||||||
)
|
|
||||||
pass
|
log.debug(f"Found transcript for language '{lang}'")
|
||||||
|
try:
|
||||||
|
transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
|
||||||
|
except ParseError:
|
||||||
|
log.debug(f"Empty or invalid transcript for language '{lang}'")
|
||||||
|
continue
|
||||||
|
|
||||||
log.debug(f"Found transcript for language '{lang}'")
|
log.debug(f"Found transcript for language '{lang}'")
|
||||||
try:
|
try:
|
||||||
|
Loading…
Reference in New Issue
Block a user