From 41eb4c7d4d67a1c395a00f3ff819daa6eb030b9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=A8NW=C2=A8?= <¨neroworld@mail.ru¨> Date: Sun, 5 Apr 2026 05:00:55 +0100 Subject: [PATCH] feat: add database skills for ClickHouse, PostgreSQL, and SQLite - Add ClickHouse patterns skill - Add PostgreSQL patterns skill - Add SQLite patterns skill - Update backend-developer agent to reference PostgreSQL and SQLite skills - Update go-developer agent to reference ClickHouse, PostgreSQL, and SQLite skills - Update capability-index.yaml with database integration capabilities --- .kilo/agents/backend-developer.md | 2 + .kilo/agents/go-developer.md | 3 + .kilo/capability-index.yaml | 66 +-- .kilo/skills/clickhouse-patterns/SKILL.md | 346 +++++++++++++ .kilo/skills/postgresql-patterns/SKILL.md | 370 ++++++++++++++ .kilo/skills/sqlite-patterns/SKILL.md | 566 ++++++++++++++++++++++ 6 files changed, 1324 insertions(+), 29 deletions(-) create mode 100644 .kilo/skills/clickhouse-patterns/SKILL.md create mode 100644 .kilo/skills/postgresql-patterns/SKILL.md create mode 100644 .kilo/skills/sqlite-patterns/SKILL.md diff --git a/.kilo/agents/backend-developer.md b/.kilo/agents/backend-developer.md index 6779758..53e4a09 100644 --- a/.kilo/agents/backend-developer.md +++ b/.kilo/agents/backend-developer.md @@ -268,6 +268,8 @@ This agent uses the following skills for comprehensive Node.js development: | Skill | Purpose | |-------|---------| | `nodejs-db-patterns` | SQLite, PostgreSQL, MongoDB patterns | +| `postgresql-patterns` | Advanced PostgreSQL features and optimization | +| `sqlite-patterns` | SQLite-specific patterns and best practices | ### Package Management | Skill | Purpose | diff --git a/.kilo/agents/go-developer.md b/.kilo/agents/go-developer.md index 1dbcb59..1b3ae89 100644 --- a/.kilo/agents/go-developer.md +++ b/.kilo/agents/go-developer.md @@ -448,6 +448,9 @@ This agent uses the following skills for comprehensive Go development: | Skill | Purpose | |-------|---------| | `go-db-patterns` | GORM, sqlx, migrations, transactions | +| `clickhouse-patterns` | ClickHouse columnar database patterns | +| `postgresql-patterns` | Advanced PostgreSQL features and optimization | +| `sqlite-patterns` | SQLite-specific patterns and best practices | ### Concurrency | Skill | Purpose | diff --git a/.kilo/capability-index.yaml b/.kilo/capability-index.yaml index e123190..cc7211c 100644 --- a/.kilo/capability-index.yaml +++ b/.kilo/capability-index.yaml @@ -47,6 +47,8 @@ agents: - database_design - server_logic - authentication + - postgresql_integration + - sqlite_integration receives: - api_specifications - database_requirements @@ -66,6 +68,9 @@ agents: - go_concurrent_programming - go_authentication - go_microservices + - postgresql_integration + - sqlite_integration + - clickhouse_integration receives: - api_specifications - database_requirements @@ -476,35 +481,38 @@ agents: model: ollama-cloud/gpt-oss:120b mode: subagent -# Capability Routing Map -capability_routing: - code_writing: lead-developer - code_review: code-skeptic - test_writing: sdet-engineer - architecture: system-analyst - security: security-auditor - performance: performance-engineer - bug_fixing: the-fixer - git_operations: release-manager - ui_implementation: frontend-developer - api_development: backend-developer - e2e_testing: browser-automation - visual_testing: visual-tester - requirement_analysis: requirement-refiner - gap_analysis: capability-analyst - issue_management: product-owner - prompt_optimization: prompt-optimizer - workflow_design: workflow-architect - scoring: evaluator - duplicate_detection: history-miner - agent_design: agent-architect - markdown_validation: markdown-validator - # Cognitive Enhancement (New) - task_decomposition: planner - self_reflection: reflector - memory_retrieval: memory-manager - chain_of_thought: planner - tree_of_thoughts: planner + # Capability Routing Map + capability_routing: + code_writing: lead-developer + code_review: code-skeptic + test_writing: sdet-engineer + architecture: system-analyst + security: security-auditor + performance: performance-engineer + bug_fixing: the-fixer + git_operations: release-manager + ui_implementation: frontend-developer + e2e_testing: browser-automation + visual_testing: visual-tester + requirement_analysis: requirement-refiner + gap_analysis: capability-analyst + issue_management: product-owner + prompt_optimization: prompt-optimizer + workflow_design: workflow-architect + scoring: evaluator + duplicate_detection: history-miner + agent_design: agent-architect + markdown_validation: markdown-validator + # Database integrations + postgresql_integration: backend-developer + sqlite_integration: backend-developer + clickhouse_integration: go-developer + # Cognitive Enhancement (New) + task_decomposition: planner + self_reflection: reflector + memory_retrieval: memory-manager + chain_of_thought: planner + tree_of_thoughts: planner # Go Development go_api_development: go-developer go_database_design: go-developer diff --git a/.kilo/skills/clickhouse-patterns/SKILL.md b/.kilo/skills/clickhouse-patterns/SKILL.md new file mode 100644 index 0000000..b547586 --- /dev/null +++ b/.kilo/skills/clickhouse-patterns/SKILL.md @@ -0,0 +1,346 @@ +# ClickHouse Patterns Skill + +Comprehensive guide to ClickHouse database patterns and best practices. + +## Overview + +ClickHouse is an open-source column-oriented database management system designed for online analytical processing (OLAP). This skill covers schema design, querying, performance optimization, and integration patterns. + +## Connection Management + +### Basic Connection (using clickhouse-go driver) + +```go +import ( + "context" + "time" + + github.com/ClickHouse/clickhouse-go/v2 +) + +// ✅ Good: Connection with timeout and settings +func NewClickHouseClient(addr string, username string, password string, database string) (*clickhouse.Conn, error) { + conn, err := clickhouse.Open(&clickhouse.Options{ + Addr: []string{addr}, + Auth: clickhouse.Auth{ + Database: database, + Username: username, + Password: password, + }, + DialTimeout: 5 * time.Second, + MaxOpenConns: 100, + MaxIdleConns: 20, + ConnMaxLifetime: time.Hour, + }) + if err != nil { + return nil, fmt.Errorf("open clickhouse: %w", err) + } + + if err := conn.Ping(context.Background()); err != nil { + return nil, fmt.Errorf("ping clickhouse: %w", err) + } + + return conn, nil +} +``` + +## Schema Design + +### Table Engine Selection + +```sql +-- ✅ Good: MergeTree family for most use cases +CREATE TABLE events ( + event_date Date DEFAULT toDate(event_time), + event_time DateTime DEFAULT now(), + user_id UUID, + event_type String, + properties Nested( + key String, + value String + ), + metric Float64 +) ENGINE = MergeTree() +PARTITION BY toYYYYMM(event_date) +ORDER BY (user_id, event_type, event_time) +SAMPLE BY user_id + +-- ✅ Good: ReplicatedMergeTree for clusters +CREATE TABLE events ON CLUSTER 'my_cluster' ( + -- same structure as above +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/events', '{replica}') +PARTITION BY toYYYYMM(event_date) +ORDER BY (user_id, event_type, event_time) +``` + +### Data Types + +```sql +-- ✅ Good: Appropriate data types +CREATE TABLE users ( + user_id UUID, -- Unique identifier + age UInt8, -- Small integer for age + income Float32, -- Approximate numeric for money + is_active UInt8, -- Boolean as UInt8 (0/1) + created_at DateTime64(3), -- Millisecond precision + metadata String -- JSON or key-value store +) ENGINE = MergeTree() +ORDER BY user_id +``` + +## Query Patterns + +### Aggregations + +```sql +-- ✅ Good: Using aggregate functions with combinators +SELECT + event_type, + count() AS total_events, + uniqExact(user_id) AS unique_users, + avgIf(metric, event_type = 'purchase') AS avg_purchase_value, + quantileTD(0.95)(metric) AS p95_metric +FROM events +WHERE event_date >= today() - 7 +GROUP BY event_type +ORDER BY total_events DESC +LIMIT 10 +``` + +### Array and Nested Handling + +```sql +-- ✅ Good: Working with nested structures +SELECT + event_type, + properties.key, + properties.value, + count() +FROM events +ARRAY JOIN properties +WHERE event_date >= today() - 1 +GROUP BY event_type, properties.key, properties.value +ORDER BY count() DESC +LIMIT 20 +``` + +### Time Series Analysis + +```sql +-- ✅ Good: Time-based aggregations +SELECT + toStartOfFiveMinutes(event_time) AS time_slot, + count() AS events_per_slot, + avg(metric) AS avg_metric +FROM events +WHERE event_time >= now() - interval 1 hour +GROUP BY time_slot +ORDER BY time_slot +``` + +## Performance Optimization + +### Materialized Views + +```sql +-- ✅ Good: Pre-aggregating data +CREATE MATERIALIZED VIEW event_summary +ENGINE = SummingMergeTree() +PARTITION BY toYYYYMM(event_date) +ORDER BY (event_type, user_id) +AS SELECT + event_type, + user_id, + count() AS event_count, + sum(metric) AS total_metric +FROM events +GROUP BY event_type, user_id +``` + +### Indexing (Skip Indexes) + +```sql +-- ✅ Good: Using skip indexes for faster filtering +CREATE TABLE events_with_index ( + event_date Date DEFAULT toDate(event_time), + event_time DateTime DEFAULT now(), + user_id UUID, + event_type String, + metric Float64 +) ENGINE = MergeTree() +PARTITION BY toYYYYMM(event_date) +ORDER BY (user_id, event_time) +SETTINGS index_granularity = 8192 +``` + +### Sampling + +```sql +-- ✅ Good: Using SAMPLE clause for approximate queries +SELECT + event_type, + count() * 100 AS estimated_count -- Adjust multiplier based on sample rate +FROM events SAMPLE 0.01 -- 1% sample +WHERE event_date >= today() - 30 +GROUP BY event_type +ORDER BY estimated_count DESC +``` + +## Integration Patterns + +### Go Application Integration + +```go +// ✅ Good: Query execution with context +func GetEventStats(ctx context.Context, conn *clickhouse.Conn, eventType string) (int64, error) { + var count int64 + err := conn.QueryRow(ctx, ` + SELECT count() + FROM events + WHERE event_type = ? AND event_date >= today() - 7 + `, eventType).Scan(&count) + + if err != nil { + return 0, fmt.Errorf("query event stats: %w", err) + } + return count, nil +} + +// ✅ Good: Inserting data with batch +func InsertEvents(ctx context.Context, conn *clickhouse.Conn, events []Event) error { + batch, err := conn.PrepareBatch(ctx, `INSERT INTO events (event_time, user_id, event_type, metric) VALUES`) + if err != nil { + return fmt.Errorf("prepare batch: %w", err) + } + + for _, e := range events { + err := batch.Append(e.EventTime, e.UserID, e.EventType, e.Metric) + if err != nil { + return fmt.Errorf("append to batch: %w", err) + } + } + + if err := batch.Send(); err != nil { + return fmt.Errorf("send batch: %w", err) + } + return nil +} +``` + +## Testing + +### Testcontainers for ClickHouse + +```go +// ✅ Good: Using testcontrollers for integration tests +func setupClickHouse(t *testing.T) *clickhouse.Conn { + ctx := context.Background() + + req := testcontainers.ContainerRequest{ + Image: "clickhouse/clickhouse-server:latest", + ExposedPorts: []string{"9000/tcp", "8123/tcp"}, + Env: map[string]string{ + "CLICKHOUSE_DB": "test", + "CLICKHOUSE_USER": "default", + "CLICKHOUSE_PASSWORD": "", + }, + WaitingFor: wait.ForLog("Ready for connections"), + } + + container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ + ContainerRequest: req, + Started: true, + }) + require.NoError(t, err) + + t.Cleanup(func() { + container.Terminate(ctx) + }) + + host, err := container.Host(ctx) + require.NoError(t, err) + + port, err := container.MappedPort(ctx, "9000") + require.NoError(t, err) + + addr := fmt.Sprintf("%s:%s", host, port.Port()) + + conn, err := NewClickHouseClient(addr, "default", "", "test") + require.NoError(t, err) + + return conn +} +``` + +## Best Practices + +### ❌ Bad Patterns +```sql +-- ❌ Bad: Using * in production queries (inefficient) +SELECT * FROM events WHERE event_date = yesterday() + +-- ❌ Bad: Materialized view without proper engine +CREATE MATERIALIZED VIEW bad_view AS SELECT ... FROM events + +-- ❌ Bad: No partition key for time series data +CREATE TABLE bad_table (dt Date, val Float64) ENGINE = MergeTree() ORDER BY dt +``` + +### ✅ Good Patterns +```sql +-- ✅ Good: Explicit column selection +SELECT event_type, count(), sum(metric) +FROM events +WHERE event_date = yesterday() +GROUP BY event_type + +-- ✅ Good: Materialized view with proper engine +CREATE MATERIALIZED VIEW mv_event_summary +ENGINE = SummingMergeTree() +PARTITION BY toYYYYMM(event_date) +ORDER BY (event_type) +AS SELECT + event_type, + count() AS cnt, + sum(metric) AS total +FROM events +GROUP BY event_type + +-- ✅ Good: Proper partitioning +CREATE TABLE good_table ( + event_date Date DEFAULT toDate(event_time), + event_time DateTime, + user_id UUID, + metric Float64 +) ENGINE = MergeTree() +PARTITION BY toYYYYMM(event_date) +ORDER BY (user_id, event_time) +``` + +## Common Operations + +### Backup and Restore + +```bash +# ✅ Good: Backup using clickhouse-local +clickhouse-local --query "SELECT * FROM events WHERE event_date = '2026-04-01'" --format CSVSequencesWithNames > backup_2026-04-01.csv + +# Restore +clickhouse-local --query "INSERT INTO events FORMAT CSVWithNames" < backup_2026-04-01.csv +``` + +### Monitoring + +```sql +-- ✅ Good: Query system tables for monitoring +SELECT + database, + table, + formatReadableSize(sum(data_weight)) as data_size, + formatReadableSize(sum(data_compressed_weight)) as compressed_size, + sum(rows) as row_count +FROM system.parts +WHERE active +GROUP BY database, table +ORDER BY data_size DESC +``` \ No newline at end of file diff --git a/.kilo/skills/postgresql-patterns/SKILL.md b/.kilo/skills/postgresql-patterns/SKILL.md new file mode 100644 index 0000000..4f6b08e --- /dev/null +++ b/.kilo/skills/postgresql-patterns/SKILL.md @@ -0,0 +1,370 @@ +# PostgreSQL Patterns Skill + +Comprehensive guide to PostgreSQL database patterns and best practices. + +## Overview + +PostgreSQL is a powerful, open-source object-relational database system. This skill covers schema design, indexing, querying, transactions, and performance optimization. + +## Connection Management + +### Basic Connection (using pgx driver) + +```go +import ( + "context" + "time" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" +) + +// ✅ Good: Connection pool with context +func NewPostgreSQLPool(ctx context.Context, connString string) (*pgxpool.Pool, error) { + config, err := pgxpool.ParseConfig(connString) + if err != nil { + return nil, fmt.Errorf("parse config: %w", err) + } + + // Customize pool configuration + config.MinConns = 10 + config.MaxConns = 100 + config.MaxConnLifetime = time.Hour + config.HealthCheckPeriod = time.Minute + config.ConnConfig.ConnectTimeout = 5 * time.Second + + pool, err := pgxpool.NewWithConfig(ctx, config) + if err != nil { + return nil, fmt.Errorf("create pool: %w", err) + } + + // Verify connection + if err := pool.Ping(ctx); err != nil { + pool.Close() + return nil, fmt.Errorf("ping pool: %w", err) + } + + return pool, nil +} +``` + +## Schema Design + +### Data Types + +```sql +-- ✅ Good: Appropriate data types +CREATE TABLE users ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + email VARCHAR(255) UNIQUE NOT NULL, + password_hash TEXT NOT NULL, + first_name VARCHAR(100), + last_name VARCHAR(100), + age INTEGER CHECK (age >= 0 AND age <= 150), + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + is_active BOOLEAN DEFAULT TRUE, + profile JSONB, -- For flexible schema + settings HSTORE -- Key-value store +); + +-- Indexes for common queries +CREATE INDEX idx_users_email ON users(email); +CREATE INDEX idx_users_created_at ON users(created_at); +CREATE INDEX idx_users_profile ON users USING GIN (profile); +``` + +### Constraints + +```sql +-- ✅ Good: Using constraints for data integrity +CREATE TABLE orders ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, + order_number VARCHAR(50) UNIQUE NOT NULL, + status VARCHAR(20) NOT NULL CHECK (status IN ('pending', 'processing', 'shipped', 'delivered', 'cancelled')), + total_amount DECIMAL(10, 2) NOT NULL CHECK (total_amount >= 0), + order_date TIMESTAMPTZ NOT NULL DEFAULT NOW(), + shipped_at TIMESTAMPTZ, + delivered_at TIMESTAMPTZ, + metadata JSONB +); + +-- Partial index for active orders +CREATE INDEX idx_orders_active ON orders(id) +WHERE status IN ('pending', 'processing', 'shipped'); + +-- Expression index for case-insensitive search +CREATE INDEX idx_users_email_lower ON users((LOWER(email))); +``` + +## Query Patterns + +### Basic CRUD + +```go +// ✅ Good: Using pgx with context and proper error handling +func GetUserByID(ctx context.Context, pool *pgxpool.Pool, id UUID) (*User, error) { + var user User + err := pool.QueryRow(ctx, ` + SELECT id, email, first_name, last_name, age, created_at, updated_at, is_active + FROM users + WHERE id = $1 + `, id).Scan( + &user.ID, &user.Email, &user.FirstName, &user.LastName, + &user.Age, &user.CreatedAt, &user.UpdatedAt, &user.IsActive, + ) + if err != nil { + if errors.Is(err, pgx.ErrNoRows) { + return nil, ErrNotFound + } + return nil, fmt.Errorf("get user: %w", err) + } + return &user, nil +} + +func CreateUser(ctx context.Context, pool *pgxpool.Pool, user *User) (*User, error) { + err := pool.QueryRow(ctx, ` + INSERT INTO users (email, password_hash, first_name, last_name, age) + VALUES ($1, $2, $3, $4, $5) + RETURNING id, created_at, updated_at + `, user.Email, user.PasswordHash, user.FirstName, user.LastName, user.Age). + Scan(&user.ID, &user.CreatedAt, &user.UpdatedAt) + if err != nil { + return nil, fmt.Errorf("create user: %w", err) + } + return user, nil +} +``` + +### Transactions + +```go +// ✅ Good: Using tx with context and proper rollback +func TransferFunds(ctx context.Context, pool *pgxpool.Pool, fromID, toID UUID, amount Decimal) error { + tx, err := pool.Begin(ctx) + if err != nil { + return fmt.Errorf("begin transaction: %w", err) + } + // Ensure rollback on error + defer func() { + if err != nil { + tx.Rollback(ctx) + } + }() + + // Check sender balance + var fromBalance Decimal + err := tx.QueryRow(ctx, ` + SELECT balance FROM accounts WHERE user_id = $1 FOR UPDATE + `, fromID).Scan(&fromBalance) + if err != nil { + return fmt.Errorf("get sender balance: %w", err) + } + if fromBalance < amount { + return ErrInsufficientFunds + } + + // Update sender + _, err = tx.Exec(ctx, ` + UPDATE accounts SET balance = balance - $1 WHERE user_id = $2 + `, amount, fromID) + if err != nil { + return fmt.Errorf("update sender: %w", err) + } + + // Update receiver + _, err = tx.Exec(ctx, ` + UPDATE accounts SET balance = balance + $1 WHERE user_id = $2 + `, amount, toID) + if err != nil { + return fmt.Errorf("update receiver: %w", err) + } + + // Commit transaction + if err := tx.Commit(ctx); err != nil { + return fmt.Errorf("commit transaction: %w", err) + } + + return nil +} +``` + +## Performance Optimization + +### Indexing Strategies + +```sql +-- ✅ Good: B-tree index for equality and range queries +CREATE INDEX idx_orders_date ON orders(order_date); + +-- ✅ Good: GIN index for JSONB containment queries +CREATE INDEX idx_orders_metadata ON orders USING GIN (metadata); + +-- ✅ Good: BRIN index for large tables with natural ordering +CREATE INDEX idx_large_table_time ON large_table USING BRIN (created_at); + +-- ✅ Good: Partial index for infrequent values +CREATE INDEX idx_orders_cancelled ON orders(id) +WHERE status = 'cancelled'; + +-- ✅ Good: Covering index (include columns) +CREATE INDEX idx_orders_covering ON orders(user_id) +INCLUDE (order_number, total_amount, order_date); +``` + +### Query Optimization + +```sql +-- ✅ Good: Using EXPLAIN ANALYZE +EXPLAIN ANALYZE +SELECT o.id, o.total_amount, u.email +FROM orders o +JOIN users u ON o.user_id = u.id +WHERE o.order_date >= CURRENT_DATE - INTERVAL '30 days' + AND o.status = 'shipped' +ORDER BY o.order_date DESC +LIMIT 100; + +-- ✅ Good: Using CTE for readability and optimization +WITH recent_orders AS ( + SELECT id, user_id, total_amount, order_date + FROM orders + WHERE order_date >= CURRENT_DATE - INTERVAL '30 days' + AND status = 'shipped' +) +SELECT o.id, o.total_amount, u.email +FROM recent_orders o +JOIN users u ON o.user_id = u.id +ORDER BY o.order_date DESC +LIMIT 100; +``` + +### Connection Pooling + +```go +// ✅ Good: Using pgxpool for connection pooling +// Already demonstrated in NewPostgreSQLPool function + +// ✅ Good: Using prepared statements implicitly through pgx +// pgx automatically prepares and caches statements +``` + +## Testing + +### Testcontainers for PostgreSQL + +```go +// ✅ Good: Using testcontrollers for integration tests +func setupPostgreSQL(t *testing.T) *pgxpool.Pool { + ctx := context.Background() + + req := testcontainers.ContainerRequest{ + Image: "postgres:15-alpine", + ExposedPorts: []string{"5432/tcp"}, + Env: map[string]string{ + "POSTGRES_USER": "test", + "POSTGRES_PASSWORD": "test", + "POSTGRES_DB": "test", + }, + WaitingFor: wait.ForLog("database system is ready to accept connections"), + } + + container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ + ContainerRequest: req, + Started: true, + }) + require.NoError(t, err) + + t.Cleanup(func() { + container.Terminate(ctx) + }) + + host, err := container.Host(ctx) + require.NoError(t, err) + + port, err := container.MappedPort(ctx, "5432") + require.NoError(t, err) + + connString := fmt.Sprintf("host=%s port=%s user=test password=test dbname=test sslmode=disable", + host, port.Port()) + + pool, err := pgxpool.New(ctx, connString) + require.NoError(t, err) + + return pool +} +``` + +## Best Practices + +### ❌ Bad Patterns +```sql +-- ❌ Bad: Using SELECT * in production +SELECT * FROM orders WHERE order_date > NOW() - INTERVAL '7 days' + +-- ❌ Bad: Not using parameterized queries (SQL injection risk) +query := fmt.Sprintf("SELECT * FROM users WHERE email = '%s'", email) + +-- ❌ Bad: No connection pooling +// Creating new connection for every query +``` + +### ✅ Good Patterns +```sql +-- ✅ Good: Explicit column selection +SELECT id, order_number, total_amount, order_date +FROM orders +WHERE order_date > NOW() - INTERVAL '7 days' + +-- ✅ Good: Using parameterized queries +pool.QueryRow(ctx, "SELECT * FROM users WHERE email = $1", email) + +-- ✅ Good: Using connection pool +// Reuse pool across multiple requests +``` + +## Common Operations + +### Backup and Restore + +```bash +# ✅ Good: Logical backup with pg_dump +pg_dump -h localhost -U test -d test > backup.sql + +# Restore +psql -h localhost -U test -d test < backup.sql + +# ✅ Good: Custom format with compression +pg_dump -Fc -z 9 -f backup.dump -U test test +pg_restore -U test -d test backup.dump +``` + +### Monitoring + +```sql +-- ✅ Good: Checking active queries +SELECT pid, age(clock_timestamp(), query_start), state, query +FROM pg_stat_activity +WHERE state != 'idle' +ORDER BY query_start DESC; + +-- ✅ Good: Index usage statistics +SELECT + schemaname, + tablename, + indexname, + idx_tup_read, + idx_tup_fetch +FROM pg_stat_user_indexes +ORDER BY idx_tup_read DESC; + +-- ✅ Good: Table size +SELECT + schemaname, + tablename, + pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size +FROM pg_tables +WHERE schemaname = 'public' +ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC; +``` \ No newline at end of file diff --git a/.kilo/skills/sqlite-patterns/SKILL.md b/.kilo/skills/sqlite-patterns/SKILL.md new file mode 100644 index 0000000..81111e0 --- /dev/null +++ b/.kilo/skills/sqlite-patterns/SKILL.md @@ -0,0 +1,566 @@ +# SQLite Patterns Skill + +Comprehensive guide to SQLite database patterns and best practices. + +## Overview + +SQLite is a self-contained, serverless, zero-configuration, transactional SQL database engine. This skill covers schema design, querying, performance optimization, and integration patterns for Go applications. + +## Connection Management + +### Basic Connection (using modernc.org/sqlite driver) + +```go +import ( + "context" + "database/sql" + "time" + + "github.com/mattn/go-sqlite3" // or modernc.org/sqlite for pure Go +) + +// ✅ Good: Connection with proper configuration +func NewSQLiteDB(ds string) (*sql.DB, error) { + db, err := sql.Open("sqlite3", ds) + if err != nil { + return nil, fmt.Errorf("open sqlite: %w", err) + } + + // Important pragmas for performance and safety + _, err = db.Exec("PRAGMA journal_mode = WAL") // Write-Ahead Logging for better concurrency + if err != nil { + return nil, fmt.Errorf("set journal_mode: %w", err) + } + + _, err = db.Exec("PRAGMA synchronous = NORMAL") // Balance between safety and speed + if err != nil { + return nil, fmt.Errorf("set synchronous: %w", err) + } + + _, err = db.Exec("PRAGMA foreign_keys = ON") // Enforce foreign key constraints + if err != nil { + return nil, fmt.Errorf("set foreign_keys: %w", err) + } + + _, err = db.Exec("PRAGMA busy_timeout = 5000") // Wait 5 seconds for lock + if err != nil { + return nil, fmt.Errorf("set busy_timeout: %w", err) + } + + // Verify connection + if err := db.Ping(); err != nil { + return nil, fmt.Errorf("ping sqlite: %w", err) + } + + return db, nil +} + +// For high concurrency apps, consider connection pooling (though SQLite has limitations) +// SQLite works best with a single connection or limited concurrent writers +``` + +## Schema Design + +### Data Types (SQLite uses dynamic typing but affinities matter) + +```sql +-- ✅ Good: Table with appropriate column affinities +CREATE TABLE users ( + id INTEGER PRIMARY KEY AUTOINCREMENT, -- 64-bit signed integer + email TEXT NOT NULL UNIQUE, -- Text affinity + password_hash TEXT NOT NULL, -- Text affinity + first_name TEXT, -- Text affinity + last_name TEXT, -- Text affinity + age INTEGER CHECK (age >= 0), -- Integer affinity + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, -- Timestamp + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, + is_active BOOLEAN DEFAULT 1, -- Stored as INTEGER (0/1) + metadata TEXT -- JSON stored as TEXT +); + +-- Indexes for performance +CREATE INDEX idx_users_email ON users(email); +CREATE INDEX idx_users_created_at ON users(created_at); + +-- Trigger for automatic updated_at +CREATE TRIGGER update_users_updated_at +AFTER UPDATE ON users +FOR EACH ROW +BEGIN + UPDATE users SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; +END; +``` + +### Common Table Patterns + +```sql +-- ✅ Good: Many-to-many relationship with junction table +CREATE TABLE posts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE, + title TEXT NOT NULL, + content TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE tags ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE +); + +CREATE TABLE post_tags ( + post_id INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE, + tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE, + PRIMARY KEY (post_id, tag_id) +); +``` + +## Query Patterns + +### Basic CRUD + +```go +// ✅ Good: Using sql with context and proper error handling +func GetUserByID(ctx context.Context, db *sql.DB, id int64) (*User, error) { + var user User + err := db.QueryRowContext(ctx, ` + SELECT id, email, first_name, last_name, age, created_at, updated_at, is_active + FROM users + WHERE id = ? + `, id).Scan( + &user.ID, &user.Email, &user.FirstName, &user.LastName, + &user.Age, &user.CreatedAt, &user.UpdatedAt, &user.IsActive, + ) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, ErrNotFound + } + return nil, fmt.Errorf("get user: %w", err) + } + return &user, nil +} + +func CreateUser(ctx context.Context, db *sql.DB, user *User) (*User, error) { + result, err := db.ExecContext(ctx, ` + INSERT INTO users (email, password_hash, first_name, last_name, age) + VALUES (?, ?, ?, ?, ?) + `, user.Email, user.PasswordHash, user.FirstName, user.LastName, user.Age) + if err != nil { + return nil, fmt.Errorf("create user: %w", err) + } + + id, err := result.LastInsertId() + if err != nil { + return nil, fmt.Errorf("get last insert id: %w", err) + } + user.ID = id + + // Get the created timestamp + err = db.QueryRowContext(ctx, ` + SELECT created_at, updated_at FROM users WHERE id = ? + `, id).Scan(&user.CreatedAt, &user.UpdatedAt) + if err != nil { + return nil, fmt.Errorf("get timestamps: %w", err) + } + + return user, nil +} +``` + +### Transactions + +```go +// ✅ Good: Using tx with context and proper rollback +func TransferFunds(ctx context.Context, db *sql.DB, fromID, toID int64, amount float64) error { + tx, err := db.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("begin transaction: %w", err) + } + // Ensure rollback on error + defer func() { + if err != nil { + tx.Rollback() + } + }() + + // Check sender balance + var fromBalance float64 + err := tx.QueryRowContext(ctx, ` + SELECT balance FROM accounts WHERE user_id = ? FOR UPDATE + `, fromID).Scan(&fromBalance) + if err != nil { + return fmt.Errorf("get sender balance: %w", err) + } + if fromBalance < amount { + return ErrInsufficientFunds + } + + // Update sender + _, err = tx.ExecContext(ctx, ` + UPDATE accounts SET balance = balance - ? WHERE user_id = ? + `, amount, fromID) + if err != nil { + return fmt.Errorf("update sender: %w", err) + } + + // Update receiver + _, err = tx.ExecContext(ctx, ` + UPDATE accounts SET balance = balance + ? WHERE user_id = ? + `, amount, toID) + if err != nil { + return fmt.Errorf("update receiver: %w", err) + } + + // Commit transaction + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit transaction: %w", err) + } + + return nil +} +``` + +## Performance Optimization + +### Indexing Strategies + +```sql +-- ✅ Good: Index for WHERE clauses +CREATE INDEX idx_orders_user_id ON orders(user_id); + +-- ✅ Good: Composite index for filtering and sorting +CREATE INDEX idx_orders_date_status ON orders(created_at, status); + +-- ✅ Good: Covering index (include all needed columns in index) +CREATE INDEX idx_orders_covering ON orders(status, created_at) +INCLUDE (user_id, total_amount); + +-- ✅ Good: Partial index for infrequent values +CREATE INDEX idx_orders_pending ON orders(id) +WHERE status = 'pending'; +``` + +### Query Optimization + +```sql +-- ✅ Good: Using EXPLAIN QUERY PLAN +EXPLAIN QUERY PLAN +SELECT o.id, o.total_amount, u.email +FROM orders o +JOIN users u ON o.user_id = u.id +WHERE o.order_date >= date('now', '-30 days') + AND o.status = 'shipped' +ORDER BY o.order_date DESC +LIMIT 100; + +-- ✅ Good: Avoid SELECT * +SELECT id, order_number, total_amount, order_date +FROM orders +WHERE order_date > date('now', '-7 days') +``` + +### PRAGMA Settings for Performance + +```go +// ✅ Good: Applying performance pragmas +func ApplyPerformancePragmas(db *sql.DB) error { + pragmas := []string{ + "PRAGMA journal_mode = WAL", // Better concurrency + "PRAGMA synchronous = NORMAL", // Faster writes + "PRAGMA cache_size = 10000", // Increase cache (pages) + "PRAGMA temp_store = MEMORY", // Store temp tables in memory + "PRAGMA mmap_size = 268435456", // 256MB memory map + "PRAGMA page_size = 4096", // Optimal page size + "PRAGMA locking_mode = EXCLUSIVE", // For single-writer scenarios + } + + for _, pragma := range pragmas { + if _, err := db.Exec(pragma); err != nil { + return fmt.Errorf("exec %s: %w", pragma, err) + } + } + return nil +} +``` + +## Testing + +### Test Database Isolation + +```go +// ✅ Good: Using in-memory database for unit tests +func NewTestDB(t *testing.T) *sql.DB { + t.Helper() + + db, err := sql.Open("sqlite3", ":memory:") + if err != nil { + t.Fatalf("open sqlite: %v", err) + } + + // Apply pragmas for test performance + _, err = db.Exec("PRAGMA journal_mode = WAL") + if err != nil { + t.Fatalf("set journal_mode: %v", err) + } + + // Apply schema + if err := applySchema(db); err != nil { + t.Fatalf("apply schema: %v", err) + } + + return db +} + +// ✅ Good: Using file-based database for integration tests +func NewIntegrationTestDB(t *testing.T) (*sql.DB, func()) { + t.Helper() + + tmpDir := t.TempDir() + ds := filepath.Join(tmpDir, "test.db") + + db, err := sql.Open("sqlite3", ds) + if err != nil { + t.Fatalf("open sqlite: %v", err) + } + + // Apply pragmas + _, err = db.Exec("PRAGMA journal_mode = WAL") + if err != nil { + t.Fatalf("set journal_mode: %v", err) + } + + // Apply schema + if err := applySchema(db); err != nil { + t.Fatalf("apply schema: %v", err) + } + + cleanup := func() { + db.Close() + // Files automatically cleaned up by t.TempDir() + } + + return db, cleanup +} +``` + +## Best Practices + +### ❌ Bad Patterns +```sql +-- ❌ Bad: Using PRAGMA synchronous = OFF (can cause corruption) +PRAGMA synchronous = OFF; + +-- ❌ Bad: No foreign key enforcement (defaults to off in SQLite) +-- Remember to always set: PRAGMA foreign_keys = ON; + +-- ❌ Bad: Using TEXT PRIMARY KEY without UNIQUE (still works but inefficient) +CREATE TABLE bad (id TEXT PRIMARY KEY, data TEXT); -- Better: INTEGER PK + +-- ❌ Bad: Long transactions blocking writers +// Keep transactions short in SQLite due to database-level locking +``` + +### ✅ Good Patterns +```sql +-- ✅ Good: Always enforce foreign keys +PRAGMA foreign_keys = ON; + +-- ✅ Good: Use WAL mode for better concurrency +PRAGMA journal_mode = WAL; + +-- ✅ Good: Keep transactions short +BEGIN; +-- Quick operations here +COMMIT; + +-- ✅ Good: Use INTEGER PRIMARY KEY for auto-increment +CREATE TABLE good ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + data TEXT +); + +-- ✅ Good: Index foreign key columns +CREATE INDEX idx_orders_user_id ON orders(user_id); +``` + +## Common Operations + +### Backup and Restore + +```bash +# ✅ Good: Backup using .dump command +sqlite3 production.db ".dump" > backup.sql + +# Restore +sqlite3 development.db < backup.sql + +# ✅ Good: Online backup using VACUUM INTO (SQLite 3.27+) +sqlite3 production.db "VACUUM INTO 'backup.db';" + +# ✅ Good: Copy file while locked (use .backup command) +sqlite3 production.db ".backup backup.db" +``` + +### Monitoring + +```sql +-- ✅ Good: Checking database size +SELECT + page_count * page_size as size, + page_count * page_size / 1024.0 as size_kb, + page_count * page_size / 1048576.0 as size_mb +FROM pragma_page_count(), pragma_page_size(); + +-- ✅ Good: Checking schema integrity +PRAGMA integrity_check; + +-- ✅ Good: Listing tables and indexes +SELECT name, type FROM sqlite_master WHERE type IN ('table', 'index') ORDER BY type, name; + +-- ✅ Good: Table info +PRAGMA table_info(users); + +-- ✅ Good: Index info +PRAGMA index_info(idx_users_email); +``` + +## Go-Specific Patterns + +### Using sqlx with SQLite + +```go +import ( + "github.com/jmoiron/sqlx" +) + +// ✅ Good: sqlx with SQLite +func NewSQLiteDBx(ds string) (*sqlx.DB, error) { + db, err := sql.Open("sqlite3", ds) + if err != nil { + return nil, fmt.Errorf("open sqlite: %w", err) + } + + // Apply pragmas + if _, err := db.Exec("PRAGMA journal_mode = WAL"); err != nil { + return nil, fmt.Errorf("set journal_mode: %w", err) + } + if _, err := db.Exec("PRAGMA foreign_keys = ON"); err != nil { + return nil, fmt.Errorf("set foreign_keys: %w", err) + } + + return sqlx.NewDb(db, "sqlite3"), nil +} + +// ✅ Good: Named queries with sqlx +func GetUserByEmail(db *sqlx.DB, email string) (*User, error) { + var user User + err := db.Get(&user, ` + SELECT id, email, first_name, last_name, age, created_at, updated_at, is_active + FROM users + WHERE email = :email + `, map[string]interface{}{"email": email}) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, ErrNotFound + } + return nil, fmt.Errorf("get user by email: %w", err) + } + return &user, nil +} +``` + +### Using ORM (GORM) with SQLite + +```go +import ( + "gorm.io/driver/sqlite" + "gorm.io/gorm" +) + +// ✅ Good: GORM with SQLite +func NewGormDB(ds string) (*gorm.DB, error) { + db, err := gorm.Open(sqlite.Open(ds), &gorm.Config{}) + if err != nil { + return nil, fmt.Errorf("open gorm: %w", err) + } + + // Configure connection pool (though SQLite limitations apply) + sqlDB, err := db.DB() + if err != nil { + return nil, fmt.Errorf("get db: %w", err) + } + + // SetMaxIdleConns and SetMaxOpenConns still apply to underlying connection + sqlDB.SetMaxIdleConns(10) + sqlDB.SetMaxOpenConns(100) + sqlDB.SetConnMaxLifetime(time.Hour) + + // Apply pragmas + if err := sqlDB.ExecContext(context.Background(), "PRAGMA journal_mode = WAL").Error; err != nil { + return nil, fmt.Errorf("set journal_mode: %w", err) + } + if err := sqlDB.ExecContext(context.Background(), "PRAGMA foreign_keys = ON").Error; err != nil { + return nil, fmt.Errorf("set foreign_keys: %w", err) + } + + return db, nil +} +``` + +## Limitations and Workarounds + +### Concurrency Limitations + +SQLite has database-level locking for writes. For high-concurrency write scenarios: + +1. **Use WAL mode** - Allows multiple readers while one writer is active +2. **Queue writes** - Use a worker pool to serialize writes +3. **Consider client-side caching** - Read-heavy workloads scale well +4. **Sharding** - Split data across multiple SQLite files by tenant/date + +```go +// ✅ Good: Write queue for high concurrency +type WriteQueue struct { + db *sql.DB + ch chan func() error + wg sync.WaitGroup +} + +func NewWriteQueue(db *sql.DB, workerCount int) *WriteQueue { + wq := &WriteQueue{ + db: db, + ch: make(chan func() error, 100), + } + + wq.wg.Add(workerCount) + for i := 0; i < workerCount; i++ { + go func() { + defer wq.wg.Done() + for fn := range wq.ch { + fn() // Execute the write function + } + }() + } + return wq +} + +func (wq *WriteQueue) Enqueue(fn func() error) { + wq.ch <- fn +} + +func (wq *WriteQueue) Close() { + close(wq.ch) + wq.wg.Wait() +} +``` + +## Conclusion + +SQLite is excellent for: +- Desktop and mobile applications +- Small to medium web applications (low to medium write concurrency) +- Development and testing +- Embedded devices +- Applications requiring zero-administration + +Follow these patterns to get the best performance and reliability from SQLite in your Go applications. \ No newline at end of file