diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py
index 947b7ed49..b397e5c94 100644
--- a/backend/open_webui/routers/retrieval.py
+++ b/backend/open_webui/routers/retrieval.py
@@ -1258,7 +1258,11 @@ def process_file(
                         },
                     )
                 ]
-            text_content = " ".join([doc.page_content for doc in docs])
+            text_content = " ".join([doc.page_content for doc in docs if doc.page_content])
+
+        # Ensure text_content is never None or empty for hash calculation
+        if not text_content:
+            text_content = ""
 
         log.debug(f"text_content: {text_content}")
         Files.update_file_data_by_id(
@@ -1266,7 +1270,9 @@ def process_file(
             {"content": text_content},
         )
 
-        hash = calculate_sha256_string(text_content)
+        # Ensure we always pass a valid string to calculate_sha256_string
+        hash_input = text_content if text_content else ""
+        hash = calculate_sha256_string(hash_input)
         Files.update_file_hash_by_id(file.id, hash)
 
         if not request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL:
@@ -2042,7 +2048,7 @@ def process_files_batch(
                 )
             ]
 
-            hash = calculate_sha256_string(text_content)
+            hash = calculate_sha256_string(text_content or "")
             Files.update_file_hash_by_id(file.id, hash)
             Files.update_file_data_by_id(file.id, {"content": text_content})
 
@@ -2088,8 +2094,12 @@ def process_files_batch(
 
 def clean_text_content(text: str) -> str:
     """Simple, effective text cleaning with special handling for PPTX artifacts"""
-    if not text:
-        return text
+    if not text or text is None:
+        return ""  # Always return empty string instead of None
+    
+    # Ensure we have a string
+    if not isinstance(text, str):
+        text = str(text)
     
     # Step 1: PPTX-specific cleaning - handle double-escaped sequences first
     text = text.replace('\\\\n', '\n')  # Double-escaped newlines in PPTX