Update chats.py

clean up
This commit is contained in:
PVBLIC Foundation 2025-05-30 20:14:59 -07:00 committed by GitHub
parent 484133de4c
commit bcc2d7233d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -15,7 +15,7 @@ from sqlalchemy import or_, func, select, and_, text
from sqlalchemy.sql import exists from sqlalchemy.sql import exists
from sqlalchemy.sql.elements import TextClause from sqlalchemy.sql.elements import TextClause
# Import JSONB for PostgreSQL support # Import JSONB for PostgreSQL support - gracefully handle absence in other databases
try: try:
from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.dialects.postgresql import JSONB
except ImportError: except ImportError:
@ -25,18 +25,33 @@ except ImportError:
# Database Adapter # Database Adapter
#################### ####################
class DatabaseType(Enum): class DatabaseType(Enum):
SQLITE = "sqlite" """
POSTGRESQL_JSON = "postgresql_json" Enumeration of supported database types and their JSON capabilities.
POSTGRESQL_JSONB = "postgresql_jsonb"
UNSUPPORTED = "unsupported" Used by DatabaseAdapter to determine optimal query strategies and
features available for each database backend.
"""
SQLITE = "sqlite" # SQLite with JSON1 extension
POSTGRESQL_JSON = "postgresql_json" # PostgreSQL with standard JSON
POSTGRESQL_JSONB = "postgresql_jsonb" # PostgreSQL with binary JSONB
UNSUPPORTED = "unsupported" # Unsupported database type
class DatabaseAdapter: class DatabaseAdapter:
"""Centralized database-specific query generation with caching""" """Centralized database-specific query generation with caching"""
def __init__(self, db): def __init__(self, db):
"""
Initialize adapter with database session and setup caching.
Args:
db: SQLAlchemy database session for introspection and queries
"""
self.db = db self.db = db
self.dialect = db.bind.dialect.name self.dialect = db.bind.dialect.name
# Cache database types to avoid repeated introspection queries
self._cache: Dict[str, DatabaseType] = {} self._cache: Dict[str, DatabaseType] = {}
def get_database_type(self, column_name: str = "meta") -> DatabaseType: def get_database_type(self, column_name: str = "meta") -> DatabaseType:
@ -45,13 +60,20 @@ class DatabaseAdapter:
if cache_key in self._cache: if cache_key in self._cache:
return self._cache[cache_key] return self._cache[cache_key]
# Determine database type based on dialect and column inspection
if self.dialect == "sqlite": if self.dialect == "sqlite":
result = DatabaseType.SQLITE result = DatabaseType.SQLITE
elif self.dialect == "postgresql": elif self.dialect == "postgresql":
result = DatabaseType.POSTGRESQL_JSONB if self._is_jsonb_column(column_name) else DatabaseType.POSTGRESQL_JSON # Check if column uses JSONB or standard JSON
result = (
DatabaseType.POSTGRESQL_JSONB
if self._is_jsonb_column(column_name)
else DatabaseType.POSTGRESQL_JSON
)
else: else:
result = DatabaseType.UNSUPPORTED result = DatabaseType.UNSUPPORTED
# Cache result for future queries
self._cache[cache_key] = result self._cache[cache_key] = result
return result return result
@ -61,72 +83,99 @@ class DatabaseAdapter:
return False return False
try: try:
result = self.db.execute(text(""" # Query PostgreSQL system catalog for column data type
result = self.db.execute(
text(
"""
SELECT data_type FROM information_schema.columns SELECT data_type FROM information_schema.columns
WHERE table_name = 'chat' AND column_name = :column_name WHERE table_name = 'chat' AND column_name = :column_name
"""), {"column_name": column_name}) """
),
{"column_name": column_name},
)
row = result.fetchone() row = result.fetchone()
return row[0].lower() == 'jsonb' if row else False return row[0].lower() == "jsonb" if row else False
except Exception: except Exception:
# Gracefully handle permission or connection issues
return False return False
def _get_function_template(self, db_type: DatabaseType, function_type: str) -> Optional[str]: def _get_function_template(
self, db_type: DatabaseType, function_type: str
) -> Optional[str]:
"""Get function template for specific database type and function""" """Get function template for specific database type and function"""
templates = { templates = {
DatabaseType.SQLITE: { DatabaseType.SQLITE: {
"tag_exists": "EXISTS (SELECT 1 FROM json_each({column}, '$.tags') WHERE json_each.value = :tag_id)", # SQLite with JSON1 extension functions
"tag_exists": (
"EXISTS (SELECT 1 FROM json_each({column}, '$.tags') "
"WHERE json_each.value = :tag_id)"
),
"has_key": "json_extract({column}, '$.{path}') IS NOT NULL", "has_key": "json_extract({column}, '$.{path}') IS NOT NULL",
"array_length": "json_array_length({column}, '$.{path}')", "array_length": "json_array_length({column}, '$.{path}')",
"array_elements": "json_each({column}, '$.{path}')", "array_elements": "json_each({column}, '$.{path}')",
"content_search": """EXISTS ( "content_search": """EXISTS (
SELECT 1 FROM json_each({column}, '$.messages') AS message SELECT 1 FROM json_each({column}, '$.messages') AS message
WHERE LOWER(message.value->>'content') LIKE '%' || :search_text || '%' WHERE LOWER(message.value->>'content') LIKE '%' || :search_text || '%'
)""" )""",
}, },
DatabaseType.POSTGRESQL_JSON: { DatabaseType.POSTGRESQL_JSON: {
"tag_exists": "EXISTS (SELECT 1 FROM json_array_elements_text({column}->'tags') elem WHERE elem = :tag_id)", # PostgreSQL with standard JSON operators
"tag_exists": (
"EXISTS (SELECT 1 FROM json_array_elements_text({column}->'tags') "
"elem WHERE elem = :tag_id)"
),
"has_key": "{column} ? '{path}'", "has_key": "{column} ? '{path}'",
"array_length": "json_array_length({column}->'{path}')", "array_length": "json_array_length({column}->'{path}')",
"array_elements": "json_array_elements({column}->'{path}')", "array_elements": "json_array_elements({column}->'{path}')",
"content_search": """EXISTS ( "content_search": """EXISTS (
SELECT 1 FROM json_array_elements({column}->'messages') AS message SELECT 1 FROM json_array_elements({column}->'messages') AS message
WHERE LOWER(message->>'content') LIKE '%' || :search_text || '%' WHERE LOWER(message->>'content') LIKE '%' || :search_text || '%'
)""" )""",
}, },
DatabaseType.POSTGRESQL_JSONB: { DatabaseType.POSTGRESQL_JSONB: {
"tag_exists": "EXISTS (SELECT 1 FROM jsonb_array_elements_text({column}->'tags') elem WHERE elem = :tag_id)", # PostgreSQL with optimized JSONB binary format
"tag_exists": (
"EXISTS (SELECT 1 FROM jsonb_array_elements_text({column}->'tags') "
"elem WHERE elem = :tag_id)"
),
"has_key": "{column} ? '{path}'", "has_key": "{column} ? '{path}'",
"array_length": "jsonb_array_length({column}->'{path}')", "array_length": "jsonb_array_length({column}->'{path}')",
"array_elements": "jsonb_array_elements({column}->'{path}')", "array_elements": "jsonb_array_elements({column}->'{path}')",
"content_search": """EXISTS ( "content_search": """EXISTS (
SELECT 1 FROM jsonb_array_elements({column}->'messages') AS message SELECT 1 FROM jsonb_array_elements({column}->'messages') AS message
WHERE LOWER(message->>'content') LIKE '%' || :search_text || '%' WHERE LOWER(message->>'content') LIKE '%' || :search_text || '%'
)""" )""",
} },
} }
return templates.get(db_type, {}).get(function_type) return templates.get(db_type, {}).get(function_type)
def build_tag_filter(self, column_name: str, tag_ids: List[str], match_all: bool = True) -> Optional[Union[TextClause, and_, or_]]: def build_tag_filter(
self, column_name: str, tag_ids: List[str], match_all: bool = True
) -> Optional[Union[TextClause, and_, or_]]:
"""Build optimized tag filtering query""" """Build optimized tag filtering query"""
if not tag_ids: if not tag_ids:
return None return None
# Get database-specific template for tag existence checks
db_type = self.get_database_type(column_name) db_type = self.get_database_type(column_name)
template = self._get_function_template(db_type, "tag_exists") template = self._get_function_template(db_type, "tag_exists")
if not template: if not template:
return None return None
# Replace column placeholder with actual column reference
query_template = template.replace("{column}", f"Chat.{column_name}") query_template = template.replace("{column}", f"Chat.{column_name}")
if match_all: if match_all:
return and_(*[ # AND logic: all tags must be present
text(query_template).params(tag_id=tag_id) return and_(
for tag_id in tag_ids *[text(query_template).params(tag_id=tag_id) for tag_id in tag_ids]
]) )
else: else:
# OR logic: any tag can be present
# Use separate parameters to avoid conflicts
conditions = [] conditions = []
params = {} params = {}
for idx, tag_id in enumerate(tag_ids): for idx, tag_id in enumerate(tag_ids):
@ -145,6 +194,7 @@ class DatabaseAdapter:
if not template: if not template:
return None return None
# Replace column placeholder and bind search parameter
query = template.replace("{column}", "Chat.chat") query = template.replace("{column}", "Chat.chat")
return text(query).params(search_text=search_text) return text(query).params(search_text=search_text)
@ -152,78 +202,102 @@ class DatabaseAdapter:
"""Build filter for chats without tags""" """Build filter for chats without tags"""
db_type = self.get_database_type(column_name) db_type = self.get_database_type(column_name)
# Get templates for key existence and array length checks
has_key_template = self._get_function_template(db_type, "has_key") has_key_template = self._get_function_template(db_type, "has_key")
array_length_template = self._get_function_template(db_type, "array_length") array_length_template = self._get_function_template(db_type, "array_length")
if not has_key_template or not array_length_template: if not has_key_template or not array_length_template:
return None return None
has_key = has_key_template.replace("{column}", f"Chat.{column_name}").replace("{path}", "tags") # Build conditions for missing key and empty array
array_length = array_length_template.replace("{column}", f"Chat.{column_name}").replace("{path}", "tags") has_key = has_key_template.replace("{column}", f"Chat.{column_name}").replace(
"{path}", "tags"
return or_(
text(f"NOT ({has_key})"),
text(f"{array_length} = 0")
) )
array_length = array_length_template.replace(
"{column}", f"Chat.{column_name}"
).replace("{path}", "tags")
# Return OR condition: no tags key OR empty tags array
return or_(text(f"NOT ({has_key})"), text(f"{array_length} = 0"))
#################### ####################
# Utility Functions # Utility Functions
#################### ####################
def normalize_tag_name(tag_name: str) -> str: def normalize_tag_name(tag_name: str) -> str:
"""Normalize tag name for consistent storage and querying""" """Normalize tag name for consistent storage and querying"""
return tag_name.replace(" ", "_").lower() return tag_name.replace(" ", "_").lower()
def normalize_tag_names(tag_names: List[str]) -> List[str]: def normalize_tag_names(tag_names: List[str]) -> List[str]:
"""Normalize multiple tag names""" """Normalize multiple tag names"""
return [normalize_tag_name(tag) for tag in tag_names] return [normalize_tag_name(tag) for tag in tag_names]
#################### ####################
# Chat DB Schema # Chat DB Schema
#################### ####################
# Initialize logging with appropriate level for model operations
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["MODELS"]) log.setLevel(SRC_LOG_LEVELS["MODELS"])
class Chat(Base): class Chat(Base):
__tablename__ = "chat" __tablename__ = "chat"
# Primary identifier - UUID for global uniqueness
id = Column(String, primary_key=True) id = Column(String, primary_key=True)
# User ownership - references user system
user_id = Column(String) user_id = Column(String)
# Human-readable chat title for UI display
title = Column(Text) title = Column(Text)
chat = Column(JSON) # For JSONB support, change to: Column(JSONB) if JSONB else Column(JSON) # Full chat data including messages, history, and configuration
chat = Column(
JSON
) # For JSONB support, change to: Column(JSONB) if JSONB else Column(JSON)
created_at = Column(BigInteger) # Timestamp fields (Unix epoch for efficiency and timezone independence)
updated_at = Column(BigInteger) created_at = Column(BigInteger) # Creation timestamp
updated_at = Column(BigInteger) # Last modification timestamp
# Sharing functionality - UUID for shared public links
share_id = Column(Text, unique=True, nullable=True) share_id = Column(Text, unique=True, nullable=True)
archived = Column(Boolean, default=False) # Organization and state flags
pinned = Column(Boolean, default=False, nullable=True) archived = Column(Boolean, default=False) # Hidden from main view
pinned = Column(Boolean, default=False, nullable=True) # Pinned to top
meta = Column(JSON, server_default="{}") # For JSONB support, change to: Column(JSONB, server_default="{}") if JSONB else Column(JSON, server_default="{}") # Extensible metadata storage (tags, custom fields, etc.)
# For JSONB support, change to:
# Column(JSONB, server_default="{}") if JSONB else Column(JSON, server_default="{}")
meta = Column(JSON, server_default="{}")
# Optional folder organization
folder_id = Column(Text, nullable=True) folder_id = Column(Text, nullable=True)
class ChatModel(BaseModel): class ChatModel(BaseModel):
model_config = ConfigDict(from_attributes=True) model_config = ConfigDict(from_attributes=True)
# Core identification and ownership
id: str id: str
user_id: str user_id: str
title: str title: str
chat: dict chat: dict
# Timestamps (Unix epoch seconds)
created_at: int # timestamp in epoch created_at: int # timestamp in epoch
updated_at: int # timestamp in epoch updated_at: int # timestamp in epoch
share_id: Optional[str] = None # Optional fields with defaults
archived: bool = False share_id: Optional[str] = None # Public sharing identifier
pinned: Optional[bool] = False archived: bool = False # Archive status
pinned: Optional[bool] = False # Pin status (nullable for compatibility)
meta: dict = {} # Extensible fields
folder_id: Optional[str] = None meta: dict = {} # Metadata including tags
folder_id: Optional[str] = None # Folder organization
#################### ####################
@ -232,39 +306,90 @@ class ChatModel(BaseModel):
class ChatForm(BaseModel): class ChatForm(BaseModel):
"""
Form model for creating new chats with basic data validation.
Used for API endpoints that create new chat conversations.
Validates that the required chat data structure is present.
"""
chat: dict chat: dict
class ChatImportForm(ChatForm): class ChatImportForm(ChatForm):
meta: Optional[dict] = {} """
pinned: Optional[bool] = False Extended form model for importing chats with additional metadata.
folder_id: Optional[str] = None
Used for bulk import operations or when creating chats with
pre-existing metadata, tags, or organizational settings.
Extends ChatForm with optional metadata fields that are not
required for basic chat creation but useful for import scenarios.
"""
meta: Optional[dict] = {} # Tags and other metadata
pinned: Optional[bool] = False # Pin status
folder_id: Optional[str] = None # Folder assignment
class ChatTitleMessagesForm(BaseModel): class ChatTitleMessagesForm(BaseModel):
"""
Form model for operations that need title and messages separately.
Used by endpoints that work with chat titles and message lists
independently, such as chat generation or title updates.
"""
title: str title: str
messages: list[dict] messages: list[dict]
class ChatTitleForm(BaseModel): class ChatTitleForm(BaseModel):
"""
Simple form model for chat title updates.
Used by endpoints that only modify the chat title without
affecting the conversation content or metadata.
"""
title: str title: str
class ChatResponse(BaseModel): class ChatResponse(BaseModel):
"""
Complete response model for chat API endpoints.
Provides a comprehensive view of chat data for API responses,
including all fields that clients might need for display,
organization, and functionality.
Used by endpoints that return full chat information to ensure
consistent response structure across the API.
"""
# Core chat identification and data
id: str id: str
user_id: str user_id: str
title: str title: str
chat: dict chat: dict
# Timestamps for sorting and display
updated_at: int # timestamp in epoch updated_at: int # timestamp in epoch
created_at: int # timestamp in epoch created_at: int # timestamp in epoch
# Sharing and organization
share_id: Optional[str] = None # id of the chat to be shared share_id: Optional[str] = None # id of the chat to be shared
archived: bool archived: bool # archive status
pinned: Optional[bool] = False pinned: Optional[bool] = False # pin status
meta: dict = {} # Extensible metadata and organization
folder_id: Optional[str] = None meta: dict = {} # tags and other metadata
folder_id: Optional[str] = None # folder assignment
class ChatTitleIdResponse(BaseModel): class ChatTitleIdResponse(BaseModel):
"""
Lightweight response model for chat list operations.
Optimized for chat list views that only need basic identification
and sorting information. Reduces payload size for better performance
when displaying large numbers of chats.
Used by endpoints that return chat lists, search results, or
navigation menus where full chat content is not needed.
"""
id: str id: str
title: str title: str
updated_at: int updated_at: int
@ -273,6 +398,13 @@ class ChatTitleIdResponse(BaseModel):
class ChatTable: class ChatTable:
def __init__(self): def __init__(self):
"""
Initialize the ChatTable with default configuration.
No initialization parameters required as the class uses
dependency injection through the get_db() context manager
for database sessions.
"""
pass pass
def _get_adapter(self, db) -> DatabaseAdapter: def _get_adapter(self, db) -> DatabaseAdapter:
@ -301,10 +433,14 @@ class ChatTable:
meta_type = adapter.get_database_type("meta") meta_type = adapter.get_database_type("meta")
chat_type = adapter.get_database_type("chat") chat_type = adapter.get_database_type("chat")
# Build comprehensive compatibility report
compatibility = { compatibility = {
"database_type": dialect_name, "database_type": dialect_name,
"json_support": meta_type != DatabaseType.UNSUPPORTED, "json_support": meta_type != DatabaseType.UNSUPPORTED,
"jsonb_support": meta_type == DatabaseType.POSTGRESQL_JSONB or chat_type == DatabaseType.POSTGRESQL_JSONB, "jsonb_support": (
meta_type == DatabaseType.POSTGRESQL_JSONB
or chat_type == DatabaseType.POSTGRESQL_JSONB
),
"gin_indexes_support": dialect_name == "postgresql", "gin_indexes_support": dialect_name == "postgresql",
"tag_filtering_support": meta_type != DatabaseType.UNSUPPORTED, "tag_filtering_support": meta_type != DatabaseType.UNSUPPORTED,
"advanced_search_support": chat_type != DatabaseType.UNSUPPORTED, "advanced_search_support": chat_type != DatabaseType.UNSUPPORTED,
@ -312,17 +448,30 @@ class ChatTable:
"chat_column_type": chat_type.value, "chat_column_type": chat_type.value,
"features": [], "features": [],
"limitations": [], "limitations": [],
"recommendations": [] "recommendations": [],
} }
# Add features based on database type # Add database-specific features and limitations
if dialect_name == "sqlite": if dialect_name == "sqlite":
compatibility["features"] = ["JSON1 extension", "Basic tag filtering", "Message search"] compatibility["features"] = [
compatibility["limitations"] = ["No GIN indexes", "Limited JSON optimization"] "JSON1 extension",
"Basic tag filtering",
"Message search",
]
compatibility["limitations"] = [
"No GIN indexes",
"Limited JSON optimization",
]
elif dialect_name == "postgresql": elif dialect_name == "postgresql":
compatibility["features"] = ["Full JSON/JSONB support", "GIN indexes", "Advanced filtering"] compatibility["features"] = [
"Full JSON/JSONB support",
"GIN indexes",
"Advanced filtering",
]
if compatibility["jsonb_support"]: if compatibility["jsonb_support"]:
compatibility["features"].append("JSONB binary format optimization") compatibility["features"].append(
"JSONB binary format optimization"
)
return compatibility return compatibility
@ -330,14 +479,13 @@ class ChatTable:
log.error(f"Error checking database compatibility: {e}") log.error(f"Error checking database compatibility: {e}")
return {"error": str(e), "database_type": "unknown"} return {"error": str(e), "database_type": "unknown"}
def create_gin_indexes(self) -> bool: def create_gin_indexes(self) -> bool:
"""Create GIN indexes on JSONB columns for better query performance""" """Create GIN indexes on JSONB columns for better query performance"""
try: try:
with get_db() as db: with get_db() as db:
adapter = self._get_adapter(db) adapter = self._get_adapter(db)
# Only PostgreSQL supports GIN indexes
if db.bind.dialect.name != "postgresql": if db.bind.dialect.name != "postgresql":
return False return False
@ -347,41 +495,72 @@ class ChatTable:
has_jsonb_meta = meta_type == DatabaseType.POSTGRESQL_JSONB has_jsonb_meta = meta_type == DatabaseType.POSTGRESQL_JSONB
has_jsonb_chat = chat_type == DatabaseType.POSTGRESQL_JSONB has_jsonb_chat = chat_type == DatabaseType.POSTGRESQL_JSONB
# Need at least one JSONB column for GIN indexes
if not (has_jsonb_meta or has_jsonb_chat): if not (has_jsonb_meta or has_jsonb_chat):
return False return False
# Create GIN indexes # Create GIN indexes for meta column if it's JSONB
if has_jsonb_meta: if has_jsonb_meta:
try: try:
db.execute(text(""" # General meta column index for all JSON operations
db.execute(
text(
"""
CREATE INDEX IF NOT EXISTS idx_chat_meta_gin CREATE INDEX IF NOT EXISTS idx_chat_meta_gin
ON chat USING GIN (meta) ON chat USING GIN (meta)
""")) """
db.execute(text(""" )
)
# Specific index for tags array operations
db.execute(
text(
"""
CREATE INDEX IF NOT EXISTS idx_chat_meta_tags_gin CREATE INDEX IF NOT EXISTS idx_chat_meta_tags_gin
ON chat USING GIN ((meta->'tags')) ON chat USING GIN ((meta->'tags'))
""")) """
db.execute(text(""" )
)
# BTREE index for tag existence and count operations
db.execute(
text(
"""
CREATE INDEX IF NOT EXISTS idx_chat_has_tags CREATE INDEX IF NOT EXISTS idx_chat_has_tags
ON chat USING BTREE ((meta ? 'tags' AND jsonb_array_length(meta->'tags') > 0)) ON chat USING BTREE ((meta ? 'tags' AND
jsonb_array_length(meta->'tags') > 0))
WHERE meta ? 'tags' WHERE meta ? 'tags'
""")) """
)
)
except Exception: except Exception:
# Continue if some indexes fail - partial optimization is better than none
pass pass
# Create GIN indexes for chat column if it's JSONB
if has_jsonb_chat: if has_jsonb_chat:
try: try:
db.execute(text(""" # General chat content index for all operations
db.execute(
text(
"""
CREATE INDEX IF NOT EXISTS idx_chat_chat_gin CREATE INDEX IF NOT EXISTS idx_chat_chat_gin
ON chat USING GIN (chat) ON chat USING GIN (chat)
""")) """
db.execute(text(""" )
)
# Specific index for message search operations
db.execute(
text(
"""
CREATE INDEX IF NOT EXISTS idx_chat_messages_gin CREATE INDEX IF NOT EXISTS idx_chat_messages_gin
ON chat USING GIN ((chat->'messages')) ON chat USING GIN ((chat->'messages'))
""")) """
)
)
except Exception: except Exception:
# Continue if some indexes fail
pass pass
# Commit all index creations
db.commit() db.commit()
return True return True
@ -398,21 +577,24 @@ class ChatTable:
if db.bind.dialect.name != "postgresql": if db.bind.dialect.name != "postgresql":
return {"error": "GIN indexes are only supported on PostgreSQL"} return {"error": "GIN indexes are only supported on PostgreSQL"}
result = db.execute(text(""" # Query PostgreSQL system catalog for GIN indexes on chat table
result = db.execute(
text(
"""
SELECT indexname, indexdef SELECT indexname, indexdef
FROM pg_indexes FROM pg_indexes
WHERE tablename = 'chat' WHERE tablename = 'chat'
AND indexdef LIKE '%USING gin%' AND indexdef LIKE '%USING gin%'
""")) """
)
)
# Process existing indexes
indexes = {} indexes = {}
for row in result: for row in result:
indexes[row[0]] = { indexes[row[0]] = {"exists": True, "definition": row[1]}
"exists": True,
"definition": row[1]
}
# Check for expected indexes # Check for all expected indexes
expected_indexes = [ expected_indexes = [
"idx_chat_meta_gin", "idx_chat_meta_gin",
"idx_chat_chat_gin", "idx_chat_chat_gin",
@ -420,9 +602,10 @@ class ChatTable:
"idx_chat_has_tags", "idx_chat_has_tags",
"idx_chat_tag_count", "idx_chat_tag_count",
"idx_chat_json_tags", "idx_chat_json_tags",
"idx_chat_messages_gin" "idx_chat_messages_gin",
] ]
# Mark missing indexes
for idx_name in expected_indexes: for idx_name in expected_indexes:
if idx_name not in indexes: if idx_name not in indexes:
indexes[idx_name] = {"exists": False} indexes[idx_name] = {"exists": False}
@ -440,6 +623,7 @@ class ChatTable:
if db.bind.dialect.name != "postgresql": if db.bind.dialect.name != "postgresql":
return False return False
# List of all indexes that might have been created
indexes_to_drop = [ indexes_to_drop = [
"idx_chat_meta_gin", "idx_chat_meta_gin",
"idx_chat_chat_gin", "idx_chat_chat_gin",
@ -447,13 +631,17 @@ class ChatTable:
"idx_chat_has_tags", "idx_chat_has_tags",
"idx_chat_tag_count", "idx_chat_tag_count",
"idx_chat_json_tags", "idx_chat_json_tags",
"idx_chat_messages_gin" "idx_chat_messages_gin",
] ]
# Drop each index with CONCURRENTLY to avoid blocking
for idx_name in indexes_to_drop: for idx_name in indexes_to_drop:
try: try:
db.execute(text(f"DROP INDEX CONCURRENTLY IF EXISTS {idx_name}")) db.execute(
text(f"DROP INDEX CONCURRENTLY IF EXISTS {idx_name}")
)
except Exception: except Exception:
# Continue dropping other indexes even if one fails
pass pass
db.commit() db.commit()
@ -477,51 +665,76 @@ class ChatTable:
indexes_created = [] indexes_created = []
if has_jsonb_meta: if has_jsonb_meta:
# JSONB-specific tag indexes # JSONB-specific tag indexes for optimal performance
tag_indexes = [ tag_indexes = [
{ {
"name": "idx_chat_meta_tags_gin", "name": "idx_chat_meta_tags_gin",
"sql": "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_meta_tags_gin ON chat USING GIN ((meta->'tags'))", "sql": (
"purpose": "Fast tag containment queries (@>, ?, etc.)" "CREATE INDEX CONCURRENTLY IF NOT EXISTS "
"idx_chat_meta_tags_gin ON chat USING GIN ((meta->'tags'))"
),
"purpose": "Fast tag containment queries (@>, ?, etc.)",
}, },
{ {
"name": "idx_chat_has_tags", "name": "idx_chat_has_tags",
"sql": "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_has_tags ON chat USING BTREE ((meta ? 'tags' AND jsonb_array_length(meta->'tags') > 0)) WHERE meta ? 'tags'", "sql": (
"purpose": "Fast filtering for chats with/without tags" "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_has_tags "
"ON chat USING BTREE ((meta ? 'tags' AND "
"jsonb_array_length(meta->'tags') > 0)) WHERE meta ? 'tags'"
),
"purpose": "Fast filtering for chats with/without tags",
}, },
{ {
"name": "idx_chat_tag_count", "name": "idx_chat_tag_count",
"sql": "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_tag_count ON chat USING BTREE ((jsonb_array_length(meta->'tags'))) WHERE meta ? 'tags'", "sql": (
"purpose": "Fast filtering by number of tags" "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_tag_count "
"ON chat USING BTREE ((jsonb_array_length(meta->'tags'))) "
"WHERE meta ? 'tags'"
),
"purpose": "Fast filtering by number of tags",
}, },
{ {
"name": "idx_chat_specific_tags", "name": "idx_chat_specific_tags",
"sql": "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_specific_tags ON chat USING GIN ((meta->'tags')) WHERE jsonb_array_length(meta->'tags') > 0", "sql": (
"purpose": "Optimized for chats that actually have tags" "CREATE INDEX CONCURRENTLY IF NOT EXISTS "
} "idx_chat_specific_tags ON chat USING GIN ((meta->'tags')) "
"WHERE jsonb_array_length(meta->'tags') > 0"
),
"purpose": "Optimized for chats that actually have tags",
},
] ]
else: else:
# JSON-specific tag indexes (less optimal but still helpful) # JSON-specific tag indexes (less optimal but still helpful)
tag_indexes = [ tag_indexes = [
{ {
"name": "idx_chat_json_tags", "name": "idx_chat_json_tags",
"sql": "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_json_tags ON chat USING GIN ((meta->'tags')) WHERE meta ? 'tags'", "sql": (
"purpose": "Tag queries for JSON columns" "CREATE INDEX CONCURRENTLY IF NOT EXISTS "
"idx_chat_json_tags ON chat USING GIN ((meta->'tags')) "
"WHERE meta ? 'tags'"
),
"purpose": "Tag queries for JSON columns",
} }
] ]
# Create each index with error handling
for index_info in tag_indexes: for index_info in tag_indexes:
try: try:
db.execute(text(index_info["sql"])) db.execute(text(index_info["sql"]))
indexes_created.append(f"{index_info['name']} ({index_info['purpose']})") indexes_created.append(
f"{index_info['name']} ({index_info['purpose']})"
)
log.info(f"Created tag index: {index_info['name']}") log.info(f"Created tag index: {index_info['name']}")
except Exception as e: except Exception as e:
log.warning(f"Failed to create {index_info['name']}: {e}") log.warning(f"Failed to create {index_info['name']}: {e}")
db.commit() db.commit()
# Log results
if indexes_created: if indexes_created:
log.info(f"Successfully created tag indexes: {len(indexes_created)} indexes") log.info(
f"Successfully created tag indexes: {len(indexes_created)} indexes"
)
for idx in indexes_created: for idx in indexes_created:
log.info(f"{idx}") log.info(f"{idx}")
else: else:
@ -545,54 +758,77 @@ class ChatTable:
stats = {} stats = {}
# Get basic tag statistics # Get comprehensive tag usage statistics
result = db.execute(text(""" result = db.execute(
text(
"""
SELECT SELECT
COUNT(*) as total_chats, COUNT(*) as total_chats,
COUNT(*) FILTER (WHERE meta ? 'tags') as chats_with_tags, COUNT(*) FILTER (WHERE meta ? 'tags') as chats_with_tags,
COUNT(*) FILTER (WHERE meta ? 'tags' AND jsonb_array_length(meta->'tags') > 0) as chats_with_actual_tags, COUNT(*) FILTER (WHERE meta ? 'tags' AND
AVG(CASE WHEN meta ? 'tags' THEN jsonb_array_length(meta->'tags') ELSE 0 END) as avg_tags_per_chat jsonb_array_length(meta->'tags') > 0) as chats_with_actual_tags,
AVG(CASE WHEN meta ? 'tags' THEN
jsonb_array_length(meta->'tags') ELSE 0 END) as avg_tags_per_chat
FROM chat FROM chat
""")) """
)
)
row = result.fetchone() row = result.fetchone()
if row: if row:
stats.update({ stats.update(
{
"total_chats": row[0], "total_chats": row[0],
"chats_with_tags": row[1], "chats_with_tags": row[1],
"chats_with_actual_tags": row[2], "chats_with_actual_tags": row[2],
"avg_tags_per_chat": float(row[3]) if row[3] else 0 "avg_tags_per_chat": float(row[3]) if row[3] else 0,
}) }
)
# Get most common tags # Get most frequently used tags for optimization insights
result = db.execute(text(""" result = db.execute(
text(
"""
SELECT tag_value, COUNT(*) as usage_count SELECT tag_value, COUNT(*) as usage_count
FROM chat, jsonb_array_elements_text(meta->'tags') as tag_value FROM chat, jsonb_array_elements_text(meta->'tags') as tag_value
WHERE meta ? 'tags' WHERE meta ? 'tags'
GROUP BY tag_value GROUP BY tag_value
ORDER BY usage_count DESC ORDER BY usage_count DESC
LIMIT 10 LIMIT 10
""")) """
)
)
stats["top_tags"] = [{"tag": row[0], "count": row[1]} for row in result] stats["top_tags"] = [{"tag": row[0], "count": row[1]} for row in result]
# Check index usage # Check current index status for recommendations
indexes = self.check_gin_indexes() indexes = self.check_gin_indexes()
tag_indexes = {k: v for k, v in indexes.items() if "tag" in k.lower()} tag_indexes = {k: v for k, v in indexes.items() if "tag" in k.lower()}
stats["tag_indexes"] = tag_indexes stats["tag_indexes"] = tag_indexes
# Provide recommendations # Generate intelligent recommendations based on usage patterns
recommendations = [] recommendations = []
if stats["chats_with_actual_tags"] > 1000: if stats["chats_with_actual_tags"] > 1000:
recommendations.append("Consider creating tag-specific indexes for better performance") recommendations.append(
"Consider creating tag-specific indexes for better performance"
)
if stats["avg_tags_per_chat"] > 5: if stats["avg_tags_per_chat"] > 5:
recommendations.append("High tag usage detected - GIN indexes will provide significant benefits") recommendations.append(
"High tag usage detected - GIN indexes will provide "
"significant benefits"
)
tag_coverage = stats["chats_with_actual_tags"] / stats["total_chats"] if stats["total_chats"] > 0 else 0 tag_coverage = (
stats["chats_with_actual_tags"] / stats["total_chats"]
if stats["total_chats"] > 0
else 0
)
if tag_coverage < 0.1: if tag_coverage < 0.1:
recommendations.append("Low tag usage - consider partial indexes with WHERE clauses") recommendations.append(
"Low tag usage - consider partial indexes with WHERE clauses"
)
stats["recommendations"] = recommendations stats["recommendations"] = recommendations
stats["tag_coverage_percentage"] = round(tag_coverage * 100, 2) stats["tag_coverage_percentage"] = round(tag_coverage * 100, 2)
@ -687,12 +923,15 @@ class ChatTable:
if chat is None: if chat is None:
return None return None
# Remove all existing tags from this chat
self.delete_all_tags_by_id_and_user_id(id, user.id) self.delete_all_tags_by_id_and_user_id(id, user.id)
# Clean up orphaned tags (tags no longer used by any chat)
for tag in chat.meta.get("tags", []): for tag in chat.meta.get("tags", []):
if self.count_chats_by_tag_name_and_user_id(tag, user.id) == 0: if self.count_chats_by_tag_name_and_user_id(tag, user.id) == 0:
Tags.delete_tag_by_name_and_user_id(tag, user.id) Tags.delete_tag_by_name_and_user_id(tag, user.id)
# Add new tags to the chat
for tag_name in tags: for tag_name in tags:
if tag_name.lower() == "none": if tag_name.lower() == "none":
continue continue
@ -733,6 +972,7 @@ class ChatTable:
chat = chat.chat chat = chat.chat
history = chat.get("history", {}) history = chat.get("history", {})
# Upsert the message (merge with existing or create new)
if message_id in history.get("messages", {}): if message_id in history.get("messages", {}):
history["messages"][message_id] = { history["messages"][message_id] = {
**history["messages"][message_id], **history["messages"][message_id],
@ -741,6 +981,7 @@ class ChatTable:
else: else:
history["messages"][message_id] = message history["messages"][message_id] = message
# Update current message pointer for conversation flow
history["currentId"] = message_id history["currentId"] = message_id
chat["history"] = history chat["history"] = history
@ -756,6 +997,7 @@ class ChatTable:
chat = chat.chat chat = chat.chat
history = chat.get("history", {}) history = chat.get("history", {})
# Add status to message's status history if message exists
if message_id in history.get("messages", {}): if message_id in history.get("messages", {}):
status_history = history["messages"][message_id].get("statusHistory", []) status_history = history["messages"][message_id].get("statusHistory", [])
status_history.append(status) status_history.append(status)
@ -975,7 +1217,8 @@ class ChatTable:
all_chats = query.all() all_chats = query.all()
# result has to be destructured from sqlalchemy `row` and mapped to a dict since the `ChatModel`is not the returned dataclass. # result has to be destructured from sqlalchemy `row` and mapped to a dict
# since the `ChatModel` is not the returned dataclass.
return [ return [
ChatTitleIdResponse.model_validate( ChatTitleIdResponse.model_validate(
{ {
@ -1013,7 +1256,8 @@ class ChatTable:
try: try:
with get_db() as db: with get_db() as db:
# it is possible that the shared link was deleted. hence, # it is possible that the shared link was deleted. hence,
# we check if the chat is still shared by checking if a chat with the share_id exists # we check if the chat is still shared by checking if a chat with
# the share_id exists
chat = db.query(Chat).filter_by(share_id=id).first() chat = db.query(Chat).filter_by(share_id=id).first()
if chat: if chat:
@ -1078,7 +1322,8 @@ class ChatTable:
limit: int = 60, limit: int = 60,
) -> list[ChatModel]: ) -> list[ChatModel]:
""" """
Filters chats based on a search query using Python, allowing pagination using skip and limit. Filters chats based on a search query using Python, allowing pagination
using skip and limit.
""" """
search_text = search_text.lower().strip() search_text = search_text.lower().strip()
@ -1089,7 +1334,8 @@ class ChatTable:
search_text_words = search_text.split(" ") search_text_words = search_text.split(" ")
# search_text might contain 'tag:tag_name' format so we need to extract the tag_name, split the search_text and remove the tags # search_text might contain 'tag:tag_name' format so we need to extract
# the tag_name, split the search_text and remove the tags
tag_ids = [ tag_ids = [
normalize_tag_name(word.replace("tag:", "")) normalize_tag_name(word.replace("tag:", ""))
for word in search_text_words for word in search_text_words
@ -1207,7 +1453,12 @@ class ChatTable:
return [] return []
def get_chats_by_multiple_tags( def get_chats_by_multiple_tags(
self, user_id: str, tag_names: List[str], match_all: bool = True, skip: int = 0, limit: int = 50 self,
user_id: str,
tag_names: List[str],
match_all: bool = True,
skip: int = 0,
limit: int = 50,
) -> list[ChatModel]: ) -> list[ChatModel]:
"""Get chats that match multiple tags""" """Get chats that match multiple tags"""
with get_db() as db: with get_db() as db:
@ -1231,7 +1482,9 @@ class ChatTable:
else: else:
return [] return []
def get_chats_without_tags(self, user_id: str, skip: int = 0, limit: int = 50) -> list[ChatModel]: def get_chats_without_tags(
self, user_id: str, skip: int = 0, limit: int = 50
) -> list[ChatModel]:
"""Get chats that have no tags""" """Get chats that have no tags"""
with get_db() as db: with get_db() as db:
adapter = self._get_adapter(db) adapter = self._get_adapter(db)