From 7680ac25179aed4d48815e178aa22ac8399c6381 Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Mon, 5 May 2025 19:57:06 +0200
Subject: [PATCH 01/15] Update youtube.py

---
 .../open_webui/retrieval/loaders/youtube.py   | 107 +++++++++++-------
 1 file changed, 63 insertions(+), 44 deletions(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index f59dd7df5..337436960 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -70,48 +70,67 @@ class YoutubeLoader:
             self.language = language
 
     def load(self) -> List[Document]:
-        """Load YouTube transcripts into `Document` objects."""
-        try:
-            from youtube_transcript_api import (
-                NoTranscriptFound,
-                TranscriptsDisabled,
-                YouTubeTranscriptApi,
-            )
-        except ImportError:
-            raise ImportError(
-                'Could not import "youtube_transcript_api" Python package. '
-                "Please install it with `pip install youtube-transcript-api`."
-            )
-
-        if self.proxy_url:
-            youtube_proxies = {
-                "http": self.proxy_url,
-                "https": self.proxy_url,
-            }
-            # Don't log complete URL because it might contain secrets
-            log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
-        else:
-            youtube_proxies = None
-
-        try:
-            transcript_list = YouTubeTranscriptApi.list_transcripts(
-                self.video_id, proxies=youtube_proxies
-            )
-        except Exception as e:
-            log.exception("Loading YouTube transcript failed")
-            return []
-
-        try:
-            transcript = transcript_list.find_transcript(self.language)
-        except NoTranscriptFound:
-            transcript = transcript_list.find_transcript(["en"])
-
-        transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
-
-        transcript = " ".join(
-            map(
-                lambda transcript_piece: transcript_piece.text.strip(" "),
-                transcript_pieces,
-            )
+    """Load YouTube transcripts into `Document` objects."""
+    try:
+        from youtube_transcript_api import (
+            NoTranscriptFound,
+            TranscriptsDisabled,
+            YouTubeTranscriptApi,
         )
-        return [Document(page_content=transcript, metadata=self._metadata)]
+    except ImportError:
+        raise ImportError(
+            'Could not import "youtube_transcript_api" Python package. '
+            "Please install it with `pip install youtube-transcript-api`."
+        )
+
+    if self.proxy_url:
+        youtube_proxies = {
+            "http": self.proxy_url,
+            "https": self.proxy_url,
+        }
+        # Don't log complete URL because it might contain secrets
+        log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
+    else:
+        youtube_proxies = None
+
+    try:
+        transcript_list = YouTubeTranscriptApi.list_transcripts(
+            self.video_id, proxies=youtube_proxies
+        )
+    except Exception as e:
+        log.exception("Loading YouTube transcript failed")
+        return []
+
+    # Try each language in order of priority
+    last_exception = None
+    for lang in self.language:
+        try:
+            log.debug(f"Attempting to find transcript for language '{lang}'")
+            transcript = transcript_list.find_transcript([lang])
+            log.info(f"Found transcript for language '{lang}'")
+            
+            transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
+            transcript_text = " ".join(
+                map(
+                    lambda transcript_piece: transcript_piece.text.strip(" "),
+                    transcript_pieces,
+                )
+            )
+            return [Document(page_content=transcript_text, metadata=self._metadata)]
+        except NoTranscriptFound as e:
+            log.debug(f"No transcript found for language '{lang}'")
+            last_exception = e
+            continue
+        except Exception as e:
+            # If we hit any other type of exception, log it and re-raise
+            log.exception(f"Error finding transcript for language '{lang}'")
+            raise e
+
+    # If all specified languages fail, raise the last exception
+    # This maintains compatibility with the error handling in the rest of the application
+    if last_exception:
+        log.warning(f"No transcript found for any of the specified languages: {', '.join(self.language)}")
+        raise last_exception
+    
+    # This should never happen (we'd have raised an exception above)
+    return []

From 0a845db8eca7554d6310b7fad4d7360e2db66b91 Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Mon, 5 May 2025 19:57:21 +0200
Subject: [PATCH 02/15] Update youtube.py

---
 .../open_webui/retrieval/loaders/youtube.py   | 122 +++++++++---------
 1 file changed, 61 insertions(+), 61 deletions(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index 337436960..c1c8669f1 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -70,67 +70,67 @@ class YoutubeLoader:
             self.language = language
 
     def load(self) -> List[Document]:
-    """Load YouTube transcripts into `Document` objects."""
-    try:
-        from youtube_transcript_api import (
-            NoTranscriptFound,
-            TranscriptsDisabled,
-            YouTubeTranscriptApi,
-        )
-    except ImportError:
-        raise ImportError(
-            'Could not import "youtube_transcript_api" Python package. '
-            "Please install it with `pip install youtube-transcript-api`."
-        )
-
-    if self.proxy_url:
-        youtube_proxies = {
-            "http": self.proxy_url,
-            "https": self.proxy_url,
-        }
-        # Don't log complete URL because it might contain secrets
-        log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
-    else:
-        youtube_proxies = None
-
-    try:
-        transcript_list = YouTubeTranscriptApi.list_transcripts(
-            self.video_id, proxies=youtube_proxies
-        )
-    except Exception as e:
-        log.exception("Loading YouTube transcript failed")
-        return []
-
-    # Try each language in order of priority
-    last_exception = None
-    for lang in self.language:
+        """Load YouTube transcripts into `Document` objects."""
         try:
-            log.debug(f"Attempting to find transcript for language '{lang}'")
-            transcript = transcript_list.find_transcript([lang])
-            log.info(f"Found transcript for language '{lang}'")
-            
-            transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
-            transcript_text = " ".join(
-                map(
-                    lambda transcript_piece: transcript_piece.text.strip(" "),
-                    transcript_pieces,
-                )
+            from youtube_transcript_api import (
+                NoTranscriptFound,
+                TranscriptsDisabled,
+                YouTubeTranscriptApi,
+            )
+        except ImportError:
+            raise ImportError(
+                'Could not import "youtube_transcript_api" Python package. '
+                "Please install it with `pip install youtube-transcript-api`."
             )
-            return [Document(page_content=transcript_text, metadata=self._metadata)]
-        except NoTranscriptFound as e:
-            log.debug(f"No transcript found for language '{lang}'")
-            last_exception = e
-            continue
-        except Exception as e:
-            # If we hit any other type of exception, log it and re-raise
-            log.exception(f"Error finding transcript for language '{lang}'")
-            raise e
-
-    # If all specified languages fail, raise the last exception
-    # This maintains compatibility with the error handling in the rest of the application
-    if last_exception:
-        log.warning(f"No transcript found for any of the specified languages: {', '.join(self.language)}")
-        raise last_exception
     
-    # This should never happen (we'd have raised an exception above)
-    return []
+        if self.proxy_url:
+            youtube_proxies = {
+                "http": self.proxy_url,
+                "https": self.proxy_url,
+            }
+            # Don't log complete URL because it might contain secrets
+            log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
+        else:
+            youtube_proxies = None
+    
+        try:
+            transcript_list = YouTubeTranscriptApi.list_transcripts(
+                self.video_id, proxies=youtube_proxies
+            )
+        except Exception as e:
+            log.exception("Loading YouTube transcript failed")
+            return []
+    
+        # Try each language in order of priority
+        last_exception = None
+        for lang in self.language:
+            try:
+                log.debug(f"Attempting to find transcript for language '{lang}'")
+                transcript = transcript_list.find_transcript([lang])
+                log.info(f"Found transcript for language '{lang}'")
+                
+                transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
+                transcript_text = " ".join(
+                    map(
+                        lambda transcript_piece: transcript_piece.text.strip(" "),
+                        transcript_pieces,
+                    )
+                )
+                return [Document(page_content=transcript_text, metadata=self._metadata)]
+            except NoTranscriptFound as e:
+                log.debug(f"No transcript found for language '{lang}'")
+                last_exception = e
+                continue
+            except Exception as e:
+                # If we hit any other type of exception, log it and re-raise
+                log.exception(f"Error finding transcript for language '{lang}'")
+                raise e
+    
+        # If all specified languages fail, raise the last exception
+        # This maintains compatibility with the error handling in the rest of the application
+        if last_exception:
+            log.warning(f"No transcript found for any of the specified languages: {', '.join(self.language)}")
+            raise last_exception
+        
+        # This should never happen (we'd have raised an exception above)
+        return []

From 0a3817ed860b2f1d1db190ec6a539b037d1f0701 Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Mon, 5 May 2025 20:00:10 +0200
Subject: [PATCH 03/15] Update youtube.py

---
 backend/open_webui/retrieval/loaders/youtube.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index c1c8669f1..48347aa09 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -88,8 +88,7 @@ class YoutubeLoader:
                 "http": self.proxy_url,
                 "https": self.proxy_url,
             }
-            # Don't log complete URL because it might contain secrets
-            log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
+            log.debug(f"Using proxy URL: {self.proxy_url}...")
         else:
             youtube_proxies = None
     
@@ -105,9 +104,8 @@ class YoutubeLoader:
         last_exception = None
         for lang in self.language:
             try:
-                log.debug(f"Attempting to find transcript for language '{lang}'")
                 transcript = transcript_list.find_transcript([lang])
-                log.info(f"Found transcript for language '{lang}'")
+                log.debug(f"Found transcript for language '{lang}'")
                 
                 transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
                 transcript_text = " ".join(
@@ -127,10 +125,8 @@ class YoutubeLoader:
                 raise e
     
         # If all specified languages fail, raise the last exception
-        # This maintains compatibility with the error handling in the rest of the application
         if last_exception:
             log.warning(f"No transcript found for any of the specified languages: {', '.join(self.language)}")
             raise last_exception
         
-        # This should never happen (we'd have raised an exception above)
         return []

From 1a30b3746ed05e9888b038e025075b6e1c17767a Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Mon, 5 May 2025 20:03:00 +0200
Subject: [PATCH 04/15] Update youtube.py

---
 .../open_webui/retrieval/loaders/youtube.py   | 38 ++++++++++++++-----
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index 48347aa09..0bd286bca 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -88,7 +88,8 @@ class YoutubeLoader:
                 "http": self.proxy_url,
                 "https": self.proxy_url,
             }
-            log.debug(f"Using proxy URL: {self.proxy_url}...")
+            # Don't log complete URL because it might contain secrets
+            log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
         else:
             youtube_proxies = None
     
@@ -101,12 +102,10 @@ class YoutubeLoader:
             return []
     
         # Try each language in order of priority
-        last_exception = None
         for lang in self.language:
             try:
                 transcript = transcript_list.find_transcript([lang])
                 log.debug(f"Found transcript for language '{lang}'")
-                
                 transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
                 transcript_text = " ".join(
                     map(
@@ -115,18 +114,37 @@ class YoutubeLoader:
                     )
                 )
                 return [Document(page_content=transcript_text, metadata=self._metadata)]
-            except NoTranscriptFound as e:
+            except NoTranscriptFound:
                 log.debug(f"No transcript found for language '{lang}'")
-                last_exception = e
                 continue
             except Exception as e:
                 # If we hit any other type of exception, log it and re-raise
                 log.exception(f"Error finding transcript for language '{lang}'")
                 raise e
     
-        # If all specified languages fail, raise the last exception
-        if last_exception:
-            log.warning(f"No transcript found for any of the specified languages: {', '.join(self.language)}")
-            raise last_exception
+        # If all specified languages fail, fall back to English (unless English was already tried)
+        if "en" not in self.language:
+            try:
+                log.debug("Falling back to English transcript")
+                transcript = transcript_list.find_transcript(["en"])
+                transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
+                transcript_text = " ".join(
+                    map(
+                        lambda transcript_piece: transcript_piece.text.strip(" "),
+                        transcript_pieces,
+                    )
+                )
+                return [Document(page_content=transcript_text, metadata=self._metadata)]
+            except NoTranscriptFound:
+                log.warning("No English transcript found as fallback")
+            except Exception as e:
+                log.exception("Error finding English transcript fallback")
+                raise e
         
-        return []
+        # If we get here, all languages failed including the English fallback
+        languages_tried = ", ".join(self.language)
+        if "en" not in self.language:
+            languages_tried += ", en (fallback)"
+        
+        log.warning(f"No transcript found for any of the specified languages: {languages_tried}")
+        raise NoTranscriptFound(f"No transcript found for any supported language")

From b0d74a59f14d8f9c8fbe6aa2676039523a45ef62 Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Mon, 5 May 2025 20:07:37 +0200
Subject: [PATCH 05/15] Update youtube.py

---
 backend/open_webui/retrieval/loaders/youtube.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index 0bd286bca..958dcfd61 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -118,8 +118,7 @@ class YoutubeLoader:
                 log.debug(f"No transcript found for language '{lang}'")
                 continue
             except Exception as e:
-                # If we hit any other type of exception, log it and re-raise
-                log.exception(f"Error finding transcript for language '{lang}'")
+                log.warning(f"Error finding transcript for language '{lang}'")
                 raise e
     
         # If all specified languages fail, fall back to English (unless English was already tried)
@@ -141,7 +140,7 @@ class YoutubeLoader:
                 log.exception("Error finding English transcript fallback")
                 raise e
         
-        # If we get here, all languages failed including the English fallback
+        # All languages failed
         languages_tried = ", ".join(self.language)
         if "en" not in self.language:
             languages_tried += ", en (fallback)"

From 9cf33813813f92dc97ce33c4b89e79dcdc3f3a13 Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Mon, 5 May 2025 20:07:52 +0200
Subject: [PATCH 06/15] Update youtube.py

---
 backend/open_webui/retrieval/loaders/youtube.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index 958dcfd61..ea8983b31 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -88,8 +88,7 @@ class YoutubeLoader:
                 "http": self.proxy_url,
                 "https": self.proxy_url,
             }
-            # Don't log complete URL because it might contain secrets
-            log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
+            log.debug(f"Using proxy URL: {self.proxy_url}...")
         else:
             youtube_proxies = None
     

From 791dd24ace6054d1822c4ad76f272c3228337d8c Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Mon, 5 May 2025 20:08:25 +0200
Subject: [PATCH 07/15] Update youtube.py

---
 backend/open_webui/retrieval/loaders/youtube.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index ea8983b31..958dcfd61 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -88,7 +88,8 @@ class YoutubeLoader:
                 "http": self.proxy_url,
                 "https": self.proxy_url,
             }
-            log.debug(f"Using proxy URL: {self.proxy_url}...")
+            # Don't log complete URL because it might contain secrets
+            log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
         else:
             youtube_proxies = None
     

From 67a612fe2404edd7819717005981070339043932 Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Mon, 5 May 2025 20:40:48 +0200
Subject: [PATCH 08/15] Update youtube.py

---
 backend/open_webui/retrieval/loaders/youtube.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index 958dcfd61..1f78131e2 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -118,7 +118,7 @@ class YoutubeLoader:
                 log.debug(f"No transcript found for language '{lang}'")
                 continue
             except Exception as e:
-                log.warning(f"Error finding transcript for language '{lang}'")
+                log.info(f"Error finding transcript for language '{lang}'")
                 raise e
     
         # If all specified languages fail, fall back to English (unless English was already tried)

From 5e1cb76b93ea3b632ca0ddf9cbe308fd8ecd1d4d Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Tue, 6 May 2025 16:16:58 +0200
Subject: [PATCH 09/15] Update youtube.py

---
 .../open_webui/retrieval/loaders/youtube.py   | 36 ++++++-------------
 1 file changed, 11 insertions(+), 25 deletions(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index 1f78131e2..67b3715fd 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -101,8 +101,16 @@ class YoutubeLoader:
             log.exception("Loading YouTube transcript failed")
             return []
     
+        # Make a copy of the language list to avoid modifying the original
+        languages_to_try = list(self.language)
+        
+        # Add English as fallback, if not already in the list
+        if "en" not in languages_to_try:
+            log.debug("Adding English as fallback language")
+            languages_to_try.append("en")
+        
         # Try each language in order of priority
-        for lang in self.language:
+        for lang in languages_to_try:
             try:
                 transcript = transcript_list.find_transcript([lang])
                 log.debug(f"Found transcript for language '{lang}'")
@@ -120,30 +128,8 @@ class YoutubeLoader:
             except Exception as e:
                 log.info(f"Error finding transcript for language '{lang}'")
                 raise e
-    
-        # If all specified languages fail, fall back to English (unless English was already tried)
-        if "en" not in self.language:
-            try:
-                log.debug("Falling back to English transcript")
-                transcript = transcript_list.find_transcript(["en"])
-                transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
-                transcript_text = " ".join(
-                    map(
-                        lambda transcript_piece: transcript_piece.text.strip(" "),
-                        transcript_pieces,
-                    )
-                )
-                return [Document(page_content=transcript_text, metadata=self._metadata)]
-            except NoTranscriptFound:
-                log.warning("No English transcript found as fallback")
-            except Exception as e:
-                log.exception("Error finding English transcript fallback")
-                raise e
-        
-        # All languages failed
-        languages_tried = ", ".join(self.language)
-        if "en" not in self.language:
-            languages_tried += ", en (fallback)"
         
+        # If we get here, all languages failed including the English fallback
+        languages_tried = ", ".join(languages_to_try)
         log.warning(f"No transcript found for any of the specified languages: {languages_tried}")
         raise NoTranscriptFound(f"No transcript found for any supported language")

From a129e0954ec7be642d57df816c98bd8a05c99d87 Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Tue, 6 May 2025 16:22:40 +0200
Subject: [PATCH 10/15] Update youtube.py

---
 backend/open_webui/retrieval/loaders/youtube.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index 67b3715fd..88938d0f2 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -64,6 +64,7 @@ class YoutubeLoader:
         self._metadata = {"source": video_id}
         self.language = language
         self.proxy_url = proxy_url
+        # If language is string, convert to list
         if isinstance(language, str):
             self.language = [language]
         else:
@@ -100,7 +101,7 @@ class YoutubeLoader:
         except Exception as e:
             log.exception("Loading YouTube transcript failed")
             return []
-    
+
         # Make a copy of the language list to avoid modifying the original
         languages_to_try = list(self.language)
         
@@ -129,7 +130,7 @@ class YoutubeLoader:
                 log.info(f"Error finding transcript for language '{lang}'")
                 raise e
         
-        # If we get here, all languages failed including the English fallback
+        # If we get here, all languages failed
         languages_tried = ", ".join(languages_to_try)
         log.warning(f"No transcript found for any of the specified languages: {languages_tried}")
-        raise NoTranscriptFound(f"No transcript found for any supported language")
+        raise NoTranscriptFound(f"No transcript found for any supported language. Add additional supported languages and verify whether the video has any transcripts.")

From c69278c13c366777806a6272d83bd0851992c340 Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Tue, 6 May 2025 16:24:27 +0200
Subject: [PATCH 11/15] Update youtube.py

---
 backend/open_webui/retrieval/loaders/youtube.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index 88938d0f2..17b1fad60 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -64,7 +64,7 @@ class YoutubeLoader:
         self._metadata = {"source": video_id}
         self.language = language
         self.proxy_url = proxy_url
-        # If language is string, convert to list
+        # Ensure language is a list
         if isinstance(language, str):
             self.language = [language]
         else:

From f65dc715f91ce94750934e457ac14dbebab084e9 Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Tue, 6 May 2025 16:30:18 +0200
Subject: [PATCH 12/15] Update youtube.py

---
 backend/open_webui/retrieval/loaders/youtube.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index 17b1fad60..7fa0247da 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -83,7 +83,7 @@ class YoutubeLoader:
                 'Could not import "youtube_transcript_api" Python package. '
                 "Please install it with `pip install youtube-transcript-api`."
             )
-    
+
         if self.proxy_url:
             youtube_proxies = {
                 "http": self.proxy_url,
@@ -93,7 +93,7 @@ class YoutubeLoader:
             log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
         else:
             youtube_proxies = None
-    
+        
         try:
             transcript_list = YouTubeTranscriptApi.list_transcripts(
                 self.video_id, proxies=youtube_proxies
@@ -101,11 +101,11 @@ class YoutubeLoader:
         except Exception as e:
             log.exception("Loading YouTube transcript failed")
             return []
-
+        
         # Make a copy of the language list to avoid modifying the original
         languages_to_try = list(self.language)
         
-        # Add English as fallback, if not already in the list
+        # Add English as fallback if not already in the list
         if "en" not in languages_to_try:
             log.debug("Adding English as fallback language")
             languages_to_try.append("en")
@@ -129,8 +129,8 @@ class YoutubeLoader:
             except Exception as e:
                 log.info(f"Error finding transcript for language '{lang}'")
                 raise e
-        
+
         # If we get here, all languages failed
         languages_tried = ", ".join(languages_to_try)
-        log.warning(f"No transcript found for any of the specified languages: {languages_tried}")
-        raise NoTranscriptFound(f"No transcript found for any supported language. Add additional supported languages and verify whether the video has any transcripts.")
+        log.warning(f"No transcript found for any of the specified languages: {languages_tried}. Verify if the video has transcripts, add more languages if needed.")
+        raise NoTranscriptFound(f"No transcript found for any supported language. Verify if the video has transcripts, add more languages if needed.")

From d7927506f12be656bcc1c452281c8f8733ea7baa Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Tue, 6 May 2025 17:06:21 +0200
Subject: [PATCH 13/15] Update youtube.py

---
 .../open_webui/retrieval/loaders/youtube.py   | 28 ++++++++-----------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index 7fa0247da..1fa2b635c 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -62,13 +62,17 @@ class YoutubeLoader:
         _video_id = _parse_video_id(video_id)
         self.video_id = _video_id if _video_id is not None else video_id
         self._metadata = {"source": video_id}
-        self.language = language
         self.proxy_url = proxy_url
+        
         # Ensure language is a list
         if isinstance(language, str):
             self.language = [language]
         else:
-            self.language = language
+            self.language = list(language)  # Make a copy to avoid modifying the original
+        
+        # Add English as fallback if not already in the list
+        if "en" not in self.language:
+            self.language.append("en")
 
     def load(self) -> List[Document]:
         """Load YouTube transcripts into `Document` objects."""
@@ -83,7 +87,7 @@ class YoutubeLoader:
                 'Could not import "youtube_transcript_api" Python package. '
                 "Please install it with `pip install youtube-transcript-api`."
             )
-
+    
         if self.proxy_url:
             youtube_proxies = {
                 "http": self.proxy_url,
@@ -102,16 +106,8 @@ class YoutubeLoader:
             log.exception("Loading YouTube transcript failed")
             return []
         
-        # Make a copy of the language list to avoid modifying the original
-        languages_to_try = list(self.language)
-        
-        # Add English as fallback if not already in the list
-        if "en" not in languages_to_try:
-            log.debug("Adding English as fallback language")
-            languages_to_try.append("en")
-        
         # Try each language in order of priority
-        for lang in languages_to_try:
+        for lang in self.language:
             try:
                 transcript = transcript_list.find_transcript([lang])
                 log.debug(f"Found transcript for language '{lang}'")
@@ -129,8 +125,8 @@ class YoutubeLoader:
             except Exception as e:
                 log.info(f"Error finding transcript for language '{lang}'")
                 raise e
-
+    
         # If we get here, all languages failed
-        languages_tried = ", ".join(languages_to_try)
-        log.warning(f"No transcript found for any of the specified languages: {languages_tried}. Verify if the video has transcripts, add more languages if needed.")
-        raise NoTranscriptFound(f"No transcript found for any supported language. Verify if the video has transcripts, add more languages if needed.")
+        languages_tried = ", ".join(self.language)
+        log.warning(f"No transcript found for any of the specified languages: {languages_tried}")
+        raise NoTranscriptFound(f"No transcript found for any supported language")

From 87dcbd198c3aed00e22c11dcc0e591f72126a057 Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Tue, 6 May 2025 17:11:03 +0200
Subject: [PATCH 14/15] Update youtube.py

---
 backend/open_webui/retrieval/loaders/youtube.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index 1fa2b635c..70153f8cf 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -68,7 +68,7 @@ class YoutubeLoader:
         if isinstance(language, str):
             self.language = [language]
         else:
-            self.language = list(language)  # Make a copy to avoid modifying the original
+            self.language = list(language)
         
         # Add English as fallback if not already in the list
         if "en" not in self.language:

From 1dcbec71ec054f79f570cd95da5a4031568a63fe Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Tue, 6 May 2025 17:14:00 +0200
Subject: [PATCH 15/15] Update youtube.py

---
 backend/open_webui/retrieval/loaders/youtube.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py
index 70153f8cf..763d73094 100644
--- a/backend/open_webui/retrieval/loaders/youtube.py
+++ b/backend/open_webui/retrieval/loaders/youtube.py
@@ -128,5 +128,5 @@ class YoutubeLoader:
     
         # If we get here, all languages failed
         languages_tried = ", ".join(self.language)
-        log.warning(f"No transcript found for any of the specified languages: {languages_tried}")
-        raise NoTranscriptFound(f"No transcript found for any supported language")
+        log.warning(f"No transcript found for any of the specified languages: {languages_tried}. Verify if the video has transcripts, add more languages if needed.")
+        raise NoTranscriptFound(f"No transcript found for any supported language. Verify if the video has transcripts, add more languages if needed.")