From e50cde80f3e49a7f2e1f86b05d19e61eddc629eb Mon Sep 17 00:00:00 2001 From: Classic298 <27028174+Classic298@users.noreply.github.com> Date: Fri, 13 Jun 2025 13:05:33 +0200 Subject: [PATCH 1/2] Fix search unicode error (#11) --- backend/open_webui/models/chats.py | 79 +++++++++++++----------------- 1 file changed, 34 insertions(+), 45 deletions(-) diff --git a/backend/open_webui/models/chats.py b/backend/open_webui/models/chats.py index 0ac53a023..71cdd0bef 100644 --- a/backend/open_webui/models/chats.py +++ b/backend/open_webui/models/chats.py @@ -12,6 +12,7 @@ from pydantic import BaseModel, ConfigDict from sqlalchemy import BigInteger, Boolean, Column, String, Text, JSON from sqlalchemy import or_, func, select, and_, text from sqlalchemy.sql import exists +from sqlalchemy.sql.expression import bindparam #################### # Chat DB Schema @@ -232,6 +233,10 @@ class ChatTable: if chat is None: return None + # Sanitize message content for null characters before upserting + if isinstance(message.get("content"), str): + message["content"] = message["content"].replace("\x00", "") + chat = chat.chat history = chat.get("history", {}) @@ -580,7 +585,7 @@ class ChatTable: """ Filters chats based on a search query using Python, allowing pagination using skip and limit. """ - search_text = search_text.lower().strip() + search_text = search_text.replace("\u0000", "").lower().strip() if not search_text: return self.get_chat_list_by_user_id( @@ -614,24 +619,22 @@ class ChatTable: dialect_name = db.bind.dialect.name if dialect_name == "sqlite": # SQLite case: using JSON1 extension for JSON searching + sqlite_content_sql = ( + "EXISTS (" + " SELECT 1 " + " FROM json_each(Chat.chat, '$.messages') AS message " + " WHERE LOWER(message.value->>'content') LIKE '%' || :content_key || '%'" + ")" + ) + sqlite_content_clause = text(sqlite_content_sql) query = query.filter( - ( - Chat.title.ilike( - f"%{search_text}%" - ) # Case-insensitive search in title - | text( - """ - EXISTS ( - SELECT 1 - FROM json_each(Chat.chat, '$.messages') AS message - WHERE LOWER(message.value->>'content') LIKE '%' || :search_text || '%' - ) - """ - ) - ).params(search_text=search_text) + or_( + Chat.title.ilike(bindparam('title_key')), + sqlite_content_clause + ).params(title_key=f"%{search_text}%", content_key=search_text) ) - # Check if there are any tags to filter, it should have all the tags + # Tag filtering if "none" in tag_ids: query = query.filter( text( @@ -648,13 +651,7 @@ class ChatTable: and_( *[ text( - f""" - EXISTS ( - SELECT 1 - FROM json_each(Chat.meta, '$.tags') AS tag - WHERE tag.value = :tag_id_{tag_idx} - ) - """ + f"EXISTS (SELECT 1 FROM json_each(Chat.meta, '$.tags') AS tag WHERE tag.value = :tag_id_{tag_idx})" ).params(**{f"tag_id_{tag_idx}": tag_id}) for tag_idx, tag_id in enumerate(tag_ids) ] @@ -663,24 +660,22 @@ class ChatTable: elif dialect_name == "postgresql": # PostgreSQL relies on proper JSON query for search + postgres_content_sql = ( + "EXISTS (" + " SELECT 1 " + " FROM json_array_elements(Chat.chat->'messages') AS message " + " WHERE LOWER(message->>'content') LIKE '%' || :content_key || '%'" + ")" + ) + postgres_content_clause = text(postgres_content_sql) query = query.filter( - ( - Chat.title.ilike( - f"%{search_text}%" - ) # Case-insensitive search in title - | text( - """ - EXISTS ( - SELECT 1 - FROM json_array_elements(Chat.chat->'messages') AS message - WHERE LOWER(message->>'content') LIKE '%' || :search_text || '%' - ) - """ - ) - ).params(search_text=search_text) + or_( + Chat.title.ilike(bindparam('title_key')), + postgres_content_clause + ).params(title_key=f"%{search_text}%", content_key=search_text) ) - # Check if there are any tags to filter, it should have all the tags + # Tag filtering if "none" in tag_ids: query = query.filter( text( @@ -697,13 +692,7 @@ class ChatTable: and_( *[ text( - f""" - EXISTS ( - SELECT 1 - FROM json_array_elements_text(Chat.meta->'tags') AS tag - WHERE tag = :tag_id_{tag_idx} - ) - """ + f"EXISTS (SELECT 1 FROM json_array_elements_text(Chat.meta->'tags') AS tag WHERE tag = :tag_id_{tag_idx})" ).params(**{f"tag_id_{tag_idx}": tag_id}) for tag_idx, tag_id in enumerate(tag_ids) ] From 033e5c1e002626ce20eb562147b323b6fbed33c5 Mon Sep 17 00:00:00 2001 From: Classic298 <27028174+Classic298@users.noreply.github.com> Date: Fri, 13 Jun 2025 13:07:41 +0200 Subject: [PATCH 2/2] Update chats.py --- backend/open_webui/models/chats.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/backend/open_webui/models/chats.py b/backend/open_webui/models/chats.py index 71cdd0bef..92da599b3 100644 --- a/backend/open_webui/models/chats.py +++ b/backend/open_webui/models/chats.py @@ -634,7 +634,7 @@ class ChatTable: ).params(title_key=f"%{search_text}%", content_key=search_text) ) - # Tag filtering + # Check if there are any tags to filter, it should have all the tags if "none" in tag_ids: query = query.filter( text( @@ -651,7 +651,13 @@ class ChatTable: and_( *[ text( - f"EXISTS (SELECT 1 FROM json_each(Chat.meta, '$.tags') AS tag WHERE tag.value = :tag_id_{tag_idx})" + f""" + EXISTS ( + SELECT 1 + FROM json_each(Chat.meta, '$.tags') AS tag + WHERE tag.value = :tag_id_{tag_idx} + ) + """ ).params(**{f"tag_id_{tag_idx}": tag_id}) for tag_idx, tag_id in enumerate(tag_ids) ] @@ -675,7 +681,7 @@ class ChatTable: ).params(title_key=f"%{search_text}%", content_key=search_text) ) - # Tag filtering + # Check if there are any tags to filter, it should have all the tags if "none" in tag_ids: query = query.filter( text( @@ -692,7 +698,13 @@ class ChatTable: and_( *[ text( - f"EXISTS (SELECT 1 FROM json_array_elements_text(Chat.meta->'tags') AS tag WHERE tag = :tag_id_{tag_idx})" + f""" + EXISTS ( + SELECT 1 + FROM json_array_elements_text(Chat.meta->'tags') AS tag + WHERE tag = :tag_id_{tag_idx} + ) + """ ).params(**{f"tag_id_{tag_idx}": tag_id}) for tag_idx, tag_id in enumerate(tag_ids) ]