diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py
index caabddd03..b88677bb2 100644
--- a/backend/open_webui/routers/retrieval.py
+++ b/backend/open_webui/routers/retrieval.py
@@ -199,173 +199,181 @@ def get_rf(
 
 class TextCleaner:
     """Modular text cleaning system for document processing and embedding preparation."""
-    
+
     @staticmethod
     def normalize_escape_sequences(text: str) -> str:
         """Normalize escape sequences from various document formats."""
         if not text:
             return ""
-        
+
         # Handle double-escaped sequences (common in PPTX)
         replacements = [
-            ('\\\\n', '\n'),     # Double-escaped newlines
-            ('\\\\t', ' '),      # Double-escaped tabs
-            ('\\\\"', '"'),      # Double-escaped quotes
-            ('\\\\r', ''),       # Double-escaped carriage returns
-            ('\\\\/', '/'),      # Double-escaped slashes
-            ('\\\\', '\\'),      # Convert double backslashes to single
+            ("\\\\n", "\n"),  # Double-escaped newlines
+            ("\\\\t", " "),  # Double-escaped tabs
+            ('\\\\"', '"'),  # Double-escaped quotes
+            ("\\\\r", ""),  # Double-escaped carriage returns
+            ("\\\\/", "/"),  # Double-escaped slashes
+            ("\\\\", "\\"),  # Convert double backslashes to single
         ]
-        
+
         for old, new in replacements:
             text = text.replace(old, new)
-        
+
         # Handle single-escaped sequences
         single_replacements = [
-            ('\\n', '\n'),       # Single-escaped newlines
-            ('\\t', ' '),        # Single-escaped tabs
-            ('\\"', '"'),        # Single-escaped quotes
-            ('\\\'', "'"),       # Single-escaped single quotes
-            ('\\r', ''),         # Single-escaped carriage returns
-            ('\\/', '/'),        # Single-escaped slashes
+            ("\\n", "\n"),  # Single-escaped newlines
+            ("\\t", " "),  # Single-escaped tabs
+            ('\\"', '"'),  # Single-escaped quotes
+            ("\\'", "'"),  # Single-escaped single quotes
+            ("\\r", ""),  # Single-escaped carriage returns
+            ("\\/", "/"),  # Single-escaped slashes
         ]
-        
+
         for old, new in single_replacements:
             text = text.replace(old, new)
-        
+
         # Remove any remaining backslash artifacts
-        text = re.sub(r'\\[a-zA-Z]', '', text)       # Remove \letter patterns
-        text = re.sub(r'\\[0-9]', '', text)          # Remove \number patterns
-        text = re.sub(r'\\[^a-zA-Z0-9\s]', '', text) # Remove \symbol patterns
-        text = re.sub(r'\\+', '', text)              # Remove remaining backslashes
-        
+        text = re.sub(r"\\[a-zA-Z]", "", text)  # Remove \letter patterns
+        text = re.sub(r"\\[0-9]", "", text)  # Remove \number patterns
+        text = re.sub(r"\\[^a-zA-Z0-9\s]", "", text)  # Remove \symbol patterns
+        text = re.sub(r"\\+", "", text)  # Remove remaining backslashes
+
         return text
-    
+
     @staticmethod
     def normalize_unicode(text: str) -> str:
         """Convert special Unicode characters to ASCII equivalents."""
         if not text:
             return ""
-        
+
         unicode_map = {
-            '–': '-',     # En dash
-            '—': '-',     # Em dash
-            ''': "'",     # Smart single quote left
-            ''': "'",     # Smart single quote right
-            '"': '"',     # Smart double quote left
-            '"': '"',     # Smart double quote right
-            '…': '...',   # Ellipsis
-            '™': ' TM',   # Trademark
-            '®': ' R',    # Registered
-            '©': ' C',    # Copyright
-            '°': ' deg',  # Degree symbol
+            "–": "-",  # En dash
+            "—": "-",  # Em dash
+            """: "'",     # Smart single quote left
+            """: "'",  # Smart single quote right
+            '"': '"',  # Smart double quote left
+            '"': '"',  # Smart double quote right
+            "…": "...",  # Ellipsis
+            "™": " TM",  # Trademark
+            "®": " R",  # Registered
+            "©": " C",  # Copyright
+            "°": " deg",  # Degree symbol
         }
-        
+
         for unicode_char, ascii_char in unicode_map.items():
             text = text.replace(unicode_char, ascii_char)
-        
+
         return text
-    
+
     @staticmethod
     def normalize_quotes(text: str) -> str:
         """Clean up quote-related artifacts and normalize quote marks."""
         if not text:
             return ""
-        
+
         # Remove quote artifacts
         quote_patterns = [
-            (r'\\+"', '"'),           # Multiple backslashes before quotes
-            (r'\\"', '"'),            # Escaped double quotes
-            (r"\\'", "'"),            # Escaped single quotes
-            (r'\\&', '&'),            # Escaped ampersands
-            (r'""', '"'),             # Double quotes
-            (r"''", "'"),             # Double single quotes
+            (r'\\+"', '"'),  # Multiple backslashes before quotes
+            (r'\\"', '"'),  # Escaped double quotes
+            (r"\\'", "'"),  # Escaped single quotes
+            (r"\\&", "&"),  # Escaped ampersands
+            (r'""', '"'),  # Double quotes
+            (r"''", "'"),  # Double single quotes
         ]
-        
+
         for pattern, replacement in quote_patterns:
             text = re.sub(pattern, replacement, text)
-        
+
         return text
-    
+
     @staticmethod
     def normalize_whitespace(text: str, preserve_paragraphs: bool = True) -> str:
         """Normalize whitespace while optionally preserving paragraph structure."""
         if not text:
             return ""
-        
+
         if preserve_paragraphs:
             # Preserve paragraph breaks (double newlines) but clean up excessive spacing
-            text = re.sub(r'[ \t]+', ' ', text)                    # Multiple spaces/tabs -> single space
-            text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text)        # Multiple empty lines -> double line break
-            text = re.sub(r'^\s+|\s+$', '', text, flags=re.MULTILINE)  # Trim line-level whitespace
+            text = re.sub(r"[ \t]+", " ", text)  # Multiple spaces/tabs -> single space
+            text = re.sub(
+                r"\n\s*\n\s*\n+", "\n\n", text
+            )  # Multiple empty lines -> double line break
+            text = re.sub(
+                r"^\s+|\s+$", "", text, flags=re.MULTILINE
+            )  # Trim line-level whitespace
         else:
             # Flatten all whitespace for embedding
-            text = re.sub(r'\n+', ' ', text)                      # All newlines to spaces
-            text = re.sub(r'\s+', ' ', text)                      # All whitespace to single spaces
-        
+            text = re.sub(r"\n+", " ", text)  # All newlines to spaces
+            text = re.sub(r"\s+", " ", text)  # All whitespace to single spaces
+
         return text.strip()
-    
+
     @staticmethod
     def remove_artifacts(text: str) -> str:
         """Remove document format artifacts and orphaned elements."""
         if not text:
             return ""
-        
+
         # Remove orphaned punctuation
-        text = re.sub(r'^\s*[)\]}]+\s*', '', text)               # Orphaned closing brackets at start
-        text = re.sub(r'\n\s*[)\]}]+\s*\n', '\n\n', text)       # Orphaned closing brackets on own lines
-        
+        text = re.sub(r"^\s*[)\]}]+\s*", "", text)  # Orphaned closing brackets at start
+        text = re.sub(
+            r"\n\s*[)\]}]+\s*\n", "\n\n", text
+        )  # Orphaned closing brackets on own lines
+
         # Remove excessive punctuation
-        text = re.sub(r'[.]{3,}', '...', text)                   # Multiple dots to ellipsis
-        text = re.sub(r'[-]{3,}', '---', text)                   # Multiple dashes
-        
+        text = re.sub(r"[.]{3,}", "...", text)  # Multiple dots to ellipsis
+        text = re.sub(r"[-]{3,}", "---", text)  # Multiple dashes
+
         # Remove empty parentheses and brackets
-        text = re.sub(r'\(\s*\)', '', text)                      # Empty parentheses
-        text = re.sub(r'\[\s*\]', '', text)                      # Empty square brackets
-        text = re.sub(r'\{\s*\}', '', text)                      # Empty curly brackets
-        
+        text = re.sub(r"\(\s*\)", "", text)  # Empty parentheses
+        text = re.sub(r"\[\s*\]", "", text)  # Empty square brackets
+        text = re.sub(r"\{\s*\}", "", text)  # Empty curly brackets
+
         return text
-    
+
     @classmethod
     def clean_for_chunking(cls, text: str) -> str:
         """Clean text for semantic chunking - preserves structure but normalizes content."""
         if not text:
             return ""
-        
+
         # Apply all cleaning steps while preserving paragraph structure
         text = cls.normalize_escape_sequences(text)
         text = cls.normalize_unicode(text)
         text = cls.normalize_quotes(text)
         text = cls.remove_artifacts(text)
         text = cls.normalize_whitespace(text, preserve_paragraphs=True)
-        
+
         return text
-    
+
     @classmethod
     def clean_for_embedding(cls, text: str) -> str:
         """Clean text for embedding - flattens structure and optimizes for vector similarity."""
         if not text:
             return ""
-        
+
         # Start with chunking-level cleaning
         text = cls.clean_for_chunking(text)
-        
+
         # Flatten for embedding
         text = cls.normalize_whitespace(text, preserve_paragraphs=False)
-        
+
         return text
-    
+
     @classmethod
     def clean_for_storage(cls, text: str) -> str:
         """Clean text for storage - most aggressive cleaning for database storage."""
         if not text:
             return ""
-        
+
         # Start with embedding-level cleaning
         text = cls.clean_for_embedding(text)
-        
+
         # Additional aggressive cleaning for storage
-        text = re.sub(r'\\([^a-zA-Z0-9\s])', r'\1', text)       # Remove any remaining escape sequences
-        
+        text = re.sub(
+            r"\\([^a-zA-Z0-9\s])", r"\1", text
+        )  # Remove any remaining escape sequences
+
         return text
 
 
@@ -374,37 +382,43 @@ def clean_text_content(text: str) -> str:
     return TextCleaner.clean_for_chunking(text)
 
 
-def create_semantic_chunks(text: str, max_chunk_size: int, overlap_size: int) -> TypingList[str]:
+def create_semantic_chunks(
+    text: str, max_chunk_size: int, overlap_size: int
+) -> TypingList[str]:
     """Create semantically aware chunks that respect document structure"""
     if not text or len(text) <= max_chunk_size:
         return [text] if text else []
-    
+
     chunks = []
-    
+
     # Split by double line breaks (paragraphs) first
-    paragraphs = text.split('\n\n')
-    
+    paragraphs = text.split("\n\n")
+
     current_chunk = ""
-    
+
     for paragraph in paragraphs:
         paragraph = paragraph.strip()
         if not paragraph:
             continue
-            
+
         # If adding this paragraph would exceed chunk size
         if current_chunk and len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
             # Try to split the current chunk at sentence boundaries if it's too long
             if len(current_chunk) > max_chunk_size:
-                sentence_chunks = split_by_sentences(current_chunk, max_chunk_size, overlap_size)
+                sentence_chunks = split_by_sentences(
+                    current_chunk, max_chunk_size, overlap_size
+                )
                 chunks.extend(sentence_chunks)
             else:
                 chunks.append(current_chunk.strip())
-            
+
             # Start new chunk with overlap from previous chunk if applicable
             if chunks and overlap_size > 0:
                 prev_chunk = chunks[-1]
                 overlap_text = get_text_overlap(prev_chunk, overlap_size)
-                current_chunk = overlap_text + "\n\n" + paragraph if overlap_text else paragraph
+                current_chunk = (
+                    overlap_text + "\n\n" + paragraph if overlap_text else paragraph
+                )
             else:
                 current_chunk = paragraph
         else:
@@ -413,39 +427,45 @@ def create_semantic_chunks(text: str, max_chunk_size: int, overlap_size: int) ->
                 current_chunk += "\n\n" + paragraph
             else:
                 current_chunk = paragraph
-    
+
     # Add the last chunk
     if current_chunk:
         if len(current_chunk) > max_chunk_size:
-            sentence_chunks = split_by_sentences(current_chunk, max_chunk_size, overlap_size)
+            sentence_chunks = split_by_sentences(
+                current_chunk, max_chunk_size, overlap_size
+            )
             chunks.extend(sentence_chunks)
         else:
             chunks.append(current_chunk.strip())
-    
+
     return [chunk for chunk in chunks if chunk.strip()]
 
 
-def split_by_sentences(text: str, max_chunk_size: int, overlap_size: int) -> TypingList[str]:
+def split_by_sentences(
+    text: str, max_chunk_size: int, overlap_size: int
+) -> TypingList[str]:
     """Split text by sentences when paragraph-level splitting isn't sufficient"""
     # Split by sentence endings
-    sentences = re.split(r'(?<=[.!?])\s+', text)
-    
+    sentences = re.split(r"(?<=[.!?])\s+", text)
+
     chunks = []
     current_chunk = ""
-    
+
     for sentence in sentences:
         sentence = sentence.strip()
         if not sentence:
             continue
-            
+
         # If adding this sentence would exceed chunk size
         if current_chunk and len(current_chunk) + len(sentence) + 1 > max_chunk_size:
             chunks.append(current_chunk.strip())
-            
+
             # Start new chunk with overlap
             if overlap_size > 0:
                 overlap_text = get_text_overlap(current_chunk, overlap_size)
-                current_chunk = overlap_text + " " + sentence if overlap_text else sentence
+                current_chunk = (
+                    overlap_text + " " + sentence if overlap_text else sentence
+                )
             else:
                 current_chunk = sentence
         else:
@@ -454,11 +474,11 @@ def split_by_sentences(text: str, max_chunk_size: int, overlap_size: int) -> Typ
                 current_chunk += " " + sentence
             else:
                 current_chunk = sentence
-    
+
     # Add the last chunk
     if current_chunk:
         chunks.append(current_chunk.strip())
-    
+
     return [chunk for chunk in chunks if chunk.strip()]
 
 
@@ -466,18 +486,18 @@ def get_text_overlap(text: str, overlap_size: int) -> str:
     """Get the last overlap_size characters from text, preferring word boundaries"""
     if not text or overlap_size <= 0:
         return ""
-    
+
     if len(text) <= overlap_size:
         return text
-    
+
     # Try to find a good word boundary within the overlap region
     overlap_text = text[-overlap_size:]
-    
+
     # Find the first space to avoid cutting words
-    space_index = overlap_text.find(' ')
+    space_index = overlap_text.find(" ")
     if space_index > 0:
         return overlap_text[space_index:].strip()
-    
+
     return overlap_text.strip()
 
 
@@ -570,7 +590,8 @@ async def update_embedding_config(
     request: Request, form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user)
 ):
     log.info(
-        f"Updating embedding model: {request.app.state.config.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}"
+        f"Updating embedding model: {request.app.state.config.RAG_EMBEDDING_MODEL} "
+        f"to {form_data.embedding_model}"
     )
     try:
         request.app.state.config.RAG_EMBEDDING_ENGINE = form_data.embedding_engine
@@ -1396,34 +1417,33 @@ def save_docs_to_vector_db(
     if split:
         # Apply advanced content-aware splitting and text cleaning
         processed_docs = []
-        
+
         for doc in docs:
             # Clean the text content before chunking
             if not doc.page_content:
                 continue
-            
+
             # Apply text cleaning before chunking using new modular system
             cleaned_content = TextCleaner.clean_for_chunking(doc.page_content)
-            
+
             # Create semantic chunks from cleaned content
             chunks = create_semantic_chunks(
                 cleaned_content,
                 request.app.state.config.CHUNK_SIZE,
-                request.app.state.config.CHUNK_OVERLAP
+                request.app.state.config.CHUNK_OVERLAP,
             )
-            
+
             # Create new documents for each chunk
             for i, chunk in enumerate(chunks):
                 chunk_metadata = {
                     **doc.metadata,
                     "chunk_index": i,
-                    "total_chunks": len(chunks)
+                    "total_chunks": len(chunks),
                 }
-                processed_docs.append(Document(
-                    page_content=chunk,
-                    metadata=chunk_metadata
-                ))
-        
+                processed_docs.append(
+                    Document(page_content=chunk, metadata=chunk_metadata)
+                )
+
         docs = processed_docs
 
     if len(docs) == 0:
@@ -1501,7 +1521,7 @@ def save_docs_to_vector_db(
 
         # Prepare texts for embedding using the new modular cleaning system
         cleaned_texts = [TextCleaner.clean_for_embedding(text) for text in texts]
-        
+
         embeddings = embedding_function(
             cleaned_texts,
             prefix=RAG_EMBEDDING_CONTENT_PREFIX,
@@ -1513,13 +1533,15 @@ def save_docs_to_vector_db(
         for idx in range(len(texts)):
             # Apply consistent storage-level cleaning
             text_to_store = TextCleaner.clean_for_storage(texts[idx])
-            
-            items.append({
-                "id": str(uuid.uuid4()),
-                "text": text_to_store,
-                "vector": embeddings[idx],
-                "metadata": metadatas[idx],
-            })
+
+            items.append(
+                {
+                    "id": str(uuid.uuid4()),
+                    "text": text_to_store,
+                    "vector": embeddings[idx],
+                    "metadata": metadatas[idx],
+                }
+            )
 
         VECTOR_DB_CLIENT.insert(
             collection_name=collection_name,
@@ -1565,7 +1587,9 @@ def process_file(
 
             docs = [
                 Document(
-                    page_content=TextCleaner.clean_for_chunking(form_data.content.replace("<br/>", "\n")),
+                    page_content=TextCleaner.clean_for_chunking(
+                        form_data.content.replace("<br/>", "\n")
+                    ),
                     metadata={
                         **file.meta,
                         "name": file.filename,
@@ -1588,7 +1612,9 @@ def process_file(
             if result is not None and len(result.ids[0]) > 0:
                 docs = [
                     Document(
-                        page_content=TextCleaner.clean_for_chunking(result.documents[0][idx]),
+                        page_content=TextCleaner.clean_for_chunking(
+                            result.documents[0][idx]
+                        ),
                         metadata=result.metadatas[0][idx],
                     )
                     for idx, id in enumerate(result.ids[0])
@@ -1596,7 +1622,9 @@ def process_file(
             else:
                 docs = [
                     Document(
-                        page_content=TextCleaner.clean_for_chunking(file.data.get("content", "")),
+                        page_content=TextCleaner.clean_for_chunking(
+                            file.data.get("content", "")
+                        ),
                         metadata={
                             **file.meta,
                             "name": file.filename,
@@ -1645,22 +1673,26 @@ def process_file(
                 cleaned_docs = []
                 for doc in docs:
                     cleaned_content = TextCleaner.clean_for_chunking(doc.page_content)
-                    
-                    cleaned_docs.append(Document(
-                        page_content=cleaned_content,
-                        metadata={
-                            **doc.metadata,
-                            "name": file.filename,
-                            "created_by": file.user_id,
-                            "file_id": file.id,
-                            "source": file.filename,
-                        },
-                    ))
+
+                    cleaned_docs.append(
+                        Document(
+                            page_content=cleaned_content,
+                            metadata={
+                                **doc.metadata,
+                                "name": file.filename,
+                                "created_by": file.user_id,
+                                "file_id": file.id,
+                                "source": file.filename,
+                            },
+                        )
+                    )
                 docs = cleaned_docs
             else:
                 docs = [
                     Document(
-                        page_content=TextCleaner.clean_for_chunking(file.data.get("content", "")),
+                        page_content=TextCleaner.clean_for_chunking(
+                            file.data.get("content", "")
+                        ),
                         metadata={
                             **file.meta,
                             "name": file.filename,
@@ -1670,7 +1702,9 @@ def process_file(
                         },
                     )
                 ]
-            text_content = " ".join([doc.page_content for doc in docs if doc.page_content])
+            text_content = " ".join(
+                [doc.page_content for doc in docs if doc.page_content]
+            )
 
         # Ensure text_content is never None or empty for hash calculation
         if not text_content:
@@ -2449,7 +2483,9 @@ def process_files_batch(
 
             docs: List[Document] = [
                 Document(
-                    page_content=TextCleaner.clean_for_chunking(text_content.replace("<br/>", "\n")),
+                    page_content=TextCleaner.clean_for_chunking(
+                        text_content.replace("<br/>", "\n")
+                    ),
                     metadata={
                         **file.meta,
                         "name": file.filename,
@@ -2509,10 +2545,10 @@ def delete_file_from_vector_db(file_id: str) -> bool:
     Delete all vector embeddings for a specific file from the vector database.
     This function works with any vector database (Pinecone, ChromaDB, etc.) and
     handles the cleanup when a file is deleted from the chat.
-    
+
     Args:
         file_id (str): The ID of the file to delete from vector database
-        
+
     Returns:
         bool: True if deletion was successful, False otherwise
     """
@@ -2521,30 +2557,32 @@ def delete_file_from_vector_db(file_id: str) -> bool:
         file = Files.get_file_by_id(file_id)
         if not file:
             return False
-        
+
         # Get the file hash for vector deletion
         file_hash = file.hash
         if not file_hash:
             return False
-        
+
         # Try to get collection name from file metadata
         collection_name = None
-        if hasattr(file, 'meta') and file.meta:
-            collection_name = file.meta.get('collection_name')
-        
+        if hasattr(file, "meta") and file.meta:
+            collection_name = file.meta.get("collection_name")
+
         # If no collection name in metadata, try common patterns used by Open WebUI
         if not collection_name:
             # Open WebUI typically uses these patterns:
             possible_collections = [
                 f"open-webui_file-{file_id}",  # Most common pattern
-                f"file-{file_id}",             # Alternative pattern
-                f"open-webui_{file_id}",       # Another possible pattern
+                f"file-{file_id}",  # Alternative pattern
+                f"open-webui_{file_id}",  # Another possible pattern
             ]
-            
+
             # Try each possible collection name
             for possible_collection in possible_collections:
                 try:
-                    if VECTOR_DB_CLIENT.has_collection(collection_name=possible_collection):
+                    if VECTOR_DB_CLIENT.has_collection(
+                        collection_name=possible_collection
+                    ):
                         result = VECTOR_DB_CLIENT.delete(
                             collection_name=possible_collection,
                             filter={"hash": file_hash},
@@ -2553,19 +2591,21 @@ def delete_file_from_vector_db(file_id: str) -> bool:
                         return True
                 except Exception as e:
                     continue
-            
+
             # If none of the standard patterns work, try searching through all collections
             try:
                 deleted_count = 0
-                
+
                 # Get all collections (this method varies by vector DB implementation)
-                if hasattr(VECTOR_DB_CLIENT, 'list_collections'):
+                if hasattr(VECTOR_DB_CLIENT, "list_collections"):
                     try:
                         collections = VECTOR_DB_CLIENT.list_collections()
-                        
+
                         for collection in collections:
                             try:
-                                if VECTOR_DB_CLIENT.has_collection(collection_name=collection):
+                                if VECTOR_DB_CLIENT.has_collection(
+                                    collection_name=collection
+                                ):
                                     result = VECTOR_DB_CLIENT.delete(
                                         collection_name=collection,
                                         filter={"hash": file_hash},
@@ -2576,14 +2616,16 @@ def delete_file_from_vector_db(file_id: str) -> bool:
                                 continue
                     except Exception as e:
                         pass
-                
+
                 return deleted_count > 0
-                
+
             except Exception as e:
                 return False
-        
+
         # Delete from the specific collection found in metadata
-        if collection_name and VECTOR_DB_CLIENT.has_collection(collection_name=collection_name):
+        if collection_name and VECTOR_DB_CLIENT.has_collection(
+            collection_name=collection_name
+        ):
             try:
                 result = VECTOR_DB_CLIENT.delete(
                     collection_name=collection_name,
@@ -2596,6 +2638,6 @@ def delete_file_from_vector_db(file_id: str) -> bool:
                 return False
         else:
             return False
-            
+
     except Exception as e:
         return False