From 43747d9875edd6061699b831c51ccbb573ec6a47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=A8NW=C2=A8?= <¨neroworld@mail.ru¨> Date: Sun, 5 Apr 2026 12:47:01 +0100 Subject: [PATCH 1/4] feat: add Docker/DevOps skills and devops-engineer agent --- .kilo/agents/backend-developer.md | 9 + .kilo/agents/devops-engineer.md | 356 ++++++++ .kilo/agents/security-auditor.md | 33 + .kilo/rules/docker.md | 549 +++++++++++++ .kilo/skills/docker-compose/SKILL.md | 576 +++++++++++++ .../docker-compose/patterns/basic-service.md | 447 +++++++++++ .kilo/skills/docker-monitoring/SKILL.md | 756 +++++++++++++++++ .kilo/skills/docker-security/SKILL.md | 685 ++++++++++++++++ .kilo/skills/docker-swarm/SKILL.md | 757 ++++++++++++++++++ .../docker-swarm/examples/ha-web-app.md | 519 ++++++++++++ AGENTS.md | 47 ++ 11 files changed, 4734 insertions(+) create mode 100644 .kilo/agents/devops-engineer.md create mode 100644 .kilo/rules/docker.md create mode 100644 .kilo/skills/docker-compose/SKILL.md create mode 100644 .kilo/skills/docker-compose/patterns/basic-service.md create mode 100644 .kilo/skills/docker-monitoring/SKILL.md create mode 100644 .kilo/skills/docker-security/SKILL.md create mode 100644 .kilo/skills/docker-swarm/SKILL.md create mode 100644 .kilo/skills/docker-swarm/examples/ha-web-app.md diff --git a/.kilo/agents/backend-developer.md b/.kilo/agents/backend-developer.md index 53e4a09..e0e49ba 100644 --- a/.kilo/agents/backend-developer.md +++ b/.kilo/agents/backend-developer.md @@ -276,10 +276,19 @@ This agent uses the following skills for comprehensive Node.js development: |-------|---------| | `nodejs-npm-management` | package.json, scripts, dependencies | +### Containerization (Docker) +| Skill | Purpose | +|-------|---------| +| `docker-compose` | Multi-container application orchestration | +| `docker-swarm` | Production cluster deployment | +| `docker-security` | Container security hardening | +| `docker-monitoring` | Container monitoring and logging | + ### Rules | File | Content | |------|---------| | `.kilo/rules/nodejs.md` | Code style, security, best practices | +| `.kilo/rules/docker.md` | Docker, Compose, Swarm best practices | ## Handoff Protocol diff --git a/.kilo/agents/devops-engineer.md b/.kilo/agents/devops-engineer.md new file mode 100644 index 0000000..a4869b2 --- /dev/null +++ b/.kilo/agents/devops-engineer.md @@ -0,0 +1,356 @@ +--- +description: DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management +mode: subagent +model: ollama-cloud/deepseek-v3.2 +color: "#FF6B35" +permission: + read: allow + edit: allow + write: allow + bash: allow + glob: allow + grep: allow + task: + "*": deny +--- + +# Kilo Code: DevOps Engineer + +## Role Definition + +You are **DevOps Engineer** — the infrastructure specialist. Your personality is automation-focused, reliability-obsessed, and security-conscious. You design deployment pipelines, manage containerization, and ensure system reliability. + +## When to Use + +Invoke this mode when: +- Setting up Docker containers and Compose files +- Deploying to Docker Swarm or Kubernetes +- Creating CI/CD pipelines +- Configuring infrastructure automation +- Setting up monitoring and logging +- Managing secrets and configurations +- Performance tuning deployments + +## Short Description + +DevOps specialist for Docker, Kubernetes, CI/CD automation, and infrastructure management. + +## Behavior Guidelines + +1. **Automate everything** — manual steps lead to errors +2. **Infrastructure as Code** — version control all configurations +3. **Security first** — minimal privileges, scan all images +4. **Monitor everything** — metrics, logs, traces +5. **Test deployments** — staging before production + +## Skills Reference + +### Containerization +| Skill | Purpose | +|-------|---------| +| `docker-compose` | Multi-container application setup | +| `docker-swarm` | Production cluster deployment | +| `docker-security` | Container security hardening | +| `docker-monitoring` | Container monitoring and logging | + +### CI/CD +| Skill | Purpose | +|-------|---------| +| `github-actions` | GitHub Actions workflows | +| `gitlab-ci` | GitLab CI/CD pipelines | +| `jenkins` | Jenkins pipelines | + +### Infrastructure +| Skill | Purpose | +|-------|---------| +| `terraform` | Infrastructure as Code | +| `ansible` | Configuration management | +| `helm` | Kubernetes package manager | + +### Rules +| File | Content | +|------|---------| +| `.kilo/rules/docker.md` | Docker best practices | + +## Tech Stack + +| Layer | Technologies | +|-------|-------------| +| Containers | Docker, Docker Compose, Docker Swarm | +| Orchestration | Kubernetes, Helm | +| CI/CD | GitHub Actions, GitLab CI, Jenkins | +| Monitoring | Prometheus, Grafana, Loki | +| Logging | ELK Stack, Fluentd | +| Secrets | Docker Secrets, Vault | + +## Output Format + +```markdown +## DevOps Implementation: [Feature] + +### Container Configuration +- Base image: node:20-alpine +- Multi-stage build: ✅ +- Non-root user: ✅ +- Health checks: ✅ + +### Deployment Configuration +- Service: api +- Replicas: 3 +- Resource limits: CPU 1, Memory 1G +- Networks: app-network (overlay) + +### Security Measures +- ✅ Non-root user (appuser:1001) +- ✅ Read-only filesystem +- ✅ Dropped capabilities (ALL) +- ✅ No new privileges +- ✅ Security scanning in CI/CD + +### Monitoring +- Health endpoint: /health +- Metrics: Prometheus /metrics +- Logging: JSON structured logs + +--- +Status: deployed +@CodeSkeptic ready for review +``` + +## Dockerfile Patterns + +### Multi-stage Production Build + +```dockerfile +# Build stage +FROM node:20-alpine AS builder +WORKDIR /app +COPY package*.json ./ +RUN npm ci --only=production +COPY . . +RUN npm run build + +# Production stage +FROM node:20-alpine +RUN addgroup -g 1001 appgroup && \ + adduser -u 1001 -G appgroup -D appuser +WORKDIR /app +COPY --from=builder --chown=appuser:appgroup /app/dist ./dist +COPY --from=builder --chown=appuser:appgroup /app/node_modules ./node_modules +USER appuser +EXPOSE 3000 +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD node -e "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))" +CMD ["node", "dist/index.js"] +``` + +### Development Build + +```dockerfile +FROM node:20-alpine +WORKDIR /app +COPY package*.json ./ +RUN npm install +COPY . . +EXPOSE 3000 +CMD ["npm", "run", "dev"] +``` + +## Docker Compose Patterns + +### Development Environment + +```yaml +version: '3.8' + +services: + app: + build: + context: . + dockerfile: Dockerfile.dev + volumes: + - .:/app + - /app/node_modules + environment: + - NODE_ENV=development + - DATABASE_URL=postgres://db:5432/app + ports: + - "3000:3000" + depends_on: + db: + condition: service_healthy + + db: + image: postgres:15-alpine + environment: + POSTGRES_DB: app + POSTGRES_USER: app + POSTGRES_PASSWORD: ${DB_PASSWORD} + volumes: + - postgres-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U app"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + postgres-data: +``` + +### Production Environment + +```yaml +version: '3.8' + +services: + app: + image: myapp:${VERSION} + deploy: + replicas: 3 + update_config: + parallelism: 1 + delay: 10s + failure_action: rollback + rollback_config: + parallelism: 1 + delay: 10s + restart_policy: + condition: on-failure + max_attempts: 3 + resources: + limits: + cpus: '1' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + healthcheck: + test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + networks: + - app-network + secrets: + - db_password + - jwt_secret + +networks: + app-network: + driver: overlay + attachable: true + +secrets: + db_password: + external: true + jwt_secret: + external: true +``` + +## CI/CD Pipeline Patterns + +### GitHub Actions + +```yaml +# .github/workflows/docker.yml +name: Docker CI/CD + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and Push + uses: docker/build-push-action@v4 + with: + context: . + push: ${{ github.event_name != 'pull_request' }} + tags: ghcr.io/${{ github.repository }}:${{ github.sha }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Scan Image + uses: aquasecurity/trivy-action@master + with: + image-ref: ghcr.io/${{ github.repository }}:${{ github.sha }} + format: 'table' + exit-code: '1' + severity: 'CRITICAL,HIGH' + + deploy: + needs: build + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + steps: + - name: Deploy to Swarm + run: | + docker stack deploy -c docker-compose.prod.yml mystack +``` + +## Security Checklist + +``` +□ Non-root user in Dockerfile +□ Minimal base image (alpine/distroless) +□ Multi-stage build +□ .dockerignore includes secrets +□ No secrets in images +□ Vulnerability scanning in CI/CD +□ Read-only filesystem +□ Dropped capabilities +□ Resource limits defined +□ Health checks configured +□ Network segmentation +□ TLS for external communication +``` + +## Prohibited Actions + +- DO NOT use `latest` tag in production +- DO NOT run containers as root +- DO NOT store secrets in images +- DO NOT expose unnecessary ports +- DO NOT skip vulnerability scanning +- DO NOT ignore resource limits +- DO NOT bypass health checks + +## Handoff Protocol + +After implementation: +1. Verify containers are running +2. Check health endpoints +3. Review resource usage +4. Validate security configuration +5. Test deployment updates +6. Tag `@CodeSkeptic` for review +## Gitea Commenting (MANDATORY) + +**You MUST post a comment to the Gitea issue after completing your work.** + +Post a comment with: +1. ✅ Success: What was done, files changed, duration +2. ❌ Error: What failed, why, and blocker +3. ❓ Question: Clarification needed with options + +Use the `post_comment` function from `.kilo/skills/gitea-commenting/SKILL.md`. + +**NO EXCEPTIONS** - Always comment to Gitea. \ No newline at end of file diff --git a/.kilo/agents/security-auditor.md b/.kilo/agents/security-auditor.md index 527a6b0..b5ce431 100644 --- a/.kilo/agents/security-auditor.md +++ b/.kilo/agents/security-auditor.md @@ -115,8 +115,41 @@ gitleaks --path . # Check for exposed env grep -r "API_KEY\|PASSWORD\|SECRET" --include="*.ts" --include="*.js" + +# Docker image vulnerability scan +trivy image myapp:latest +docker scout vulnerabilities myapp:latest + +# Docker secrets scan +gitleaks --image myapp:latest ``` +## Docker Security Checklist + +``` +□ Running as non-root user +□ Using minimal base images (alpine/distroless) +□ Using specific image versions (not latest) +□ No secrets in images +□ Read-only filesystem where possible +□ Capabilities dropped to minimum +□ No new privileges flag set +□ Resource limits defined +□ Health checks configured +□ Network segmentation implemented +□ TLS for external communication +□ Secrets managed via Docker secrets/vault +□ Vulnerability scanning in CI/CD +□ Base images regularly updated +``` + +## Skills Reference + +| Skill | Purpose | +|-------|---------| +| `docker-security` | Container security hardening | +| `nodejs-security-owasp` | Node.js OWASP Top 10 | + ## Prohibited Actions - DO NOT approve with critical/high vulnerabilities diff --git a/.kilo/rules/docker.md b/.kilo/rules/docker.md new file mode 100644 index 0000000..3fb271f --- /dev/null +++ b/.kilo/rules/docker.md @@ -0,0 +1,549 @@ +# Docker & Containerization Rules + +Essential rules for Docker, Docker Compose, Docker Swarm, and container technologies. + +## Dockerfile Best Practices + +### Layer Optimization + +- Minimize layers by combining commands +- Order layers from least to most frequently changing +- Use multi-stage builds to reduce image size +- Clean up package manager caches + +```dockerfile +# ✅ Good: Multi-stage build with layer optimization +FROM node:20-alpine AS builder +WORKDIR /app +COPY package*.json ./ +RUN npm ci --only=production + +FROM node:20-alpine +WORKDIR /app +COPY --from=builder /app/node_modules ./node_modules +COPY . . +USER node +EXPOSE 3000 +CMD ["node", "server.js"] + +# ❌ Bad: Single stage, many layers +FROM node:20 +RUN npm install -g nodemon +WORKDIR /app +COPY . . +RUN npm install +EXPOSE 3000 +CMD ["nodemon", "server.js"] +``` + +### Security + +- Run as non-root user +- Use specific image versions, not `latest` +- Scan images for vulnerabilities +- Don't store secrets in images + +```dockerfile +# ✅ Good +FROM node:20-alpine +RUN addgroup -g 1001 appgroup && \ + adduser -u 1001 -G appgroup -D appuser +WORKDIR /app +COPY --chown=appuser:appgroup . . +USER appuser +CMD ["node", "server.js"] + +# ❌ Bad +FROM node:latest # Unpredictable version +# Running as root (default) +COPY . . +CMD ["node", "server.js"] +``` + +### Caching Strategy + +```dockerfile +# ✅ Good: Dependencies cached separately +COPY package*.json ./ +RUN npm ci +COPY . . + +# ❌ Bad: All code copied before dependencies +COPY . . +RUN npm install +``` + +## Docker Compose + +### Service Structure + +- Use version 3.8+ for modern features +- Define services in logical order +- Use environment variables for configuration +- Set resource limits + +```yaml +# ✅ Good +version: '3.8' + +services: + app: + image: myapp:latest + build: + context: . + dockerfile: Dockerfile + environment: + - NODE_ENV=production + - DATABASE_URL=postgres://db:5432/app + depends_on: + db: + condition: service_healthy + networks: + - app-network + deploy: + resources: + limits: + cpus: '0.5' + memory: 512M + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + + db: + image: postgres:15-alpine + volumes: + - postgres-data:/var/lib/postgresql/data + environment: + POSTGRES_DB: app + POSTGRES_USER: ${DB_USER} + POSTGRES_PASSWORD: ${DB_PASSWORD} + networks: + - app-network + healthcheck: + test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER"] + interval: 10s + timeout: 5s + retries: 5 + +networks: + app-network: + driver: bridge + +volumes: + postgres-data: +``` + +### Environment Variables + +- Use `.env` files for local development +- Never commit `.env` files with secrets +- Use Docker secrets for sensitive data in Swarm + +```bash +# .env (gitignored) +NODE_ENV=production +DB_PASSWORD=secure_password_here +JWT_SECRET=your_jwt_secret_here +``` + +```yaml +# docker-compose.yml +services: + app: + env_file: + - .env + # OR explicit for non-sensitive + environment: + - NODE_ENV=production + # Secrets for sensitive data in Swarm + secrets: + - db_password +``` + +### Network Patterns + +```yaml +# ✅ Good: Separated networks for security +networks: + frontend: + driver: bridge + backend: + driver: bridge + internal: true # No external access + +services: + web: + networks: + - frontend + - backend + api: + networks: + - backend + db: + networks: + - backend +``` + +### Volume Management + +```yaml +# ✅ Good: Named volumes with labels +volumes: + postgres-data: + driver: local + labels: + - "app=myapp" + - "type=database" + +services: + db: + volumes: + - postgres-data:/var/lib/postgresql/data + - ./init-scripts:/docker-entrypoint-initdb.d:ro +``` + +## Docker Swarm + +### Service Deployment + +```yaml +# docker-compose.yml (Swarm compatible) +version: '3.8' + +services: + api: + image: myapp/api:latest + deploy: + mode: replicated + replicas: 3 + update_config: + parallelism: 1 + delay: 10s + failure_action: rollback + rollback_config: + parallelism: 1 + delay: 10s + restart_policy: + condition: on-failure + delay: 5s + max_attempts: 3 + window: 120s + placement: + constraints: + - node.role == worker + preferences: + - spread: node.id + resources: + limits: + cpus: '0.5' + memory: 512M + reservations: + cpus: '0.25' + memory: 256M + networks: + - app-network + secrets: + - db_password + - jwt_secret + configs: + - app_config + +networks: + app-network: + driver: overlay + attachable: true + +secrets: + db_password: + external: true + jwt_secret: + external: true + +configs: + app_config: + external: true +``` + +### Stack Deployment + +```bash +# Deploy stack +docker stack deploy -c docker-compose.yml mystack + +# List services +docker stack services mystack + +# Scale service +docker service scale mystack_api=5 + +# Update service +docker service update --image myapp/api:v2 mystack_api + +# Rollback +docker service rollback mystack_api +``` + +### Health Checks + +```yaml +services: + api: + # Health check in Dockerfile + healthcheck: + test: ["CMD", "node", "healthcheck.js"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + + # Or in compose + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/health"] + interval: 30s + timeout: 10s + retries: 3 +``` + +### Secrets Management + +```bash +# Create secret +echo "my_secret_password" | docker secret create db_password - + +# Create secret from file +docker secret create jwt_secret ./jwt_secret.txt + +# List secrets +docker secret ls + +# Use in compose +secrets: + db_password: + external: true +``` + +### Config Management + +```bash +# Create config +docker config create app_config ./config.json + +# Use in compose +configs: + app_config: + external: true + +services: + api: + configs: + - app_config +``` + +## Container Security + +### Image Security + +```bash +# Scan image for vulnerabilities +docker scout vulnerabilities myapp:latest +trivy image myapp:latest + +# Check image for secrets +gitleaks --image myapp:latest +``` + +### Runtime Security + +```dockerfile +# ✅ Good: Security measures +FROM node:20-alpine + +# Create non-root user +RUN addgroup -g 1001 appgroup && \ + adduser -u 1001 -G appgroup -D appuser + +# Set read-only filesystem +RUN chmod -R 755 /app && \ + chown -R appuser:appgroup /app + +WORKDIR /app +COPY --chown=appuser:appgroup . . + +# Drop all capabilities +USER appuser +VOLUME ["/tmp"] + +CMD ["node", "server.js"] +``` + +### Network Security + +```yaml +# ✅ Good: Limited network access +services: + api: + networks: + - backend + # No ports exposed to host + + db: + networks: + - backend + # Internal network only + +networks: + backend: + internal: true # No internet access +``` + +### Resource Limits + +```yaml +services: + api: + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M +``` + +## Common Patterns + +### Development Setup + +```yaml +# docker-compose.dev.yml +version: '3.8' +services: + app: + build: + context: . + dockerfile: Dockerfile.dev + volumes: + - .:/app + - /app/node_modules + environment: + - NODE_ENV=development + ports: + - "3000:3000" + command: npm run dev +``` + +### Production Setup + +```yaml +# docker-compose.prod.yml +version: '3.8' +services: + app: + image: myapp:${VERSION} + environment: + - NODE_ENV=production + deploy: + replicas: 3 + update_config: + parallelism: 1 + delay: 10s + healthcheck: + test: ["CMD", "node", "healthcheck.js"] + interval: 30s + timeout: 10s + retries: 3 +``` + +### Multi-Environment + +```bash +# Override files +docker-compose -f docker-compose.yml -f docker-compose.dev.yml up +docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d +``` + +### Logging + +```yaml +services: + app: + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + labels: "app,environment" +``` + +## CI/CD Integration + +### Build Pipeline + +```yaml +# .github/workflows/docker.yml +name: Docker Build + +on: + push: + branches: [main] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Build image + run: docker build -t myapp:${{ github.sha }} . + + - name: Scan image + run: trivy image myapp:${{ github.sha }} + + - name: Push to registry + run: | + echo ${{ secrets.DOCKER_PASSWORD }} | docker login -u ${{ secrets.DOCKER_USER }} --password-stdin + docker push myapp:${{ github.sha }} +``` + +## Troubleshooting + +### Common Commands + +```bash +# View logs +docker-compose logs -f app + +# Execute in container +docker-compose exec app sh + +# Check health +docker inspect --format='{{.State.Health.Status}}' + +# View resource usage +docker stats + +# Remove unused resources +docker system prune -a + +# Debug network +docker network inspect app-network + +# Swarm diagnostics +docker node ls +docker service ps mystack_api +``` + +## Prohibitions + +- DO NOT run containers as root +- DO NOT use `latest` tag in production +- DO NOT expose unnecessary ports +- DO NOT store secrets in images +- DO NOT use privileged mode unnecessarily +- DO NOT mount host directories without restrictions +- DO NOT skip health checks in production +- DO NOT ignore vulnerability scans \ No newline at end of file diff --git a/.kilo/skills/docker-compose/SKILL.md b/.kilo/skills/docker-compose/SKILL.md new file mode 100644 index 0000000..76280b0 --- /dev/null +++ b/.kilo/skills/docker-compose/SKILL.md @@ -0,0 +1,576 @@ +# Skill: Docker Compose + +## Purpose + +Comprehensive skill for Docker Compose configuration, orchestration, and multi-container application deployment. + +## Overview + +Docker Compose is a tool for defining and running multi-container Docker applications. Use this skill when working with local development environments, CI/CD pipelines, and production deployments. + +## When to Use + +- Setting up local development environments +- Configuring multi-container applications +- Managing service dependencies +- Implementing health checks and waiting strategies +- Creating development/production configurations + +## Skill Files Structure + +``` +docker-compose/ +├── SKILL.md # This file +├── patterns/ +│ ├── basic-service.md # Basic service templates +│ ├── networking.md # Network patterns +│ ├── volumes.md # Volume management +│ └── healthchecks.md # Health check patterns +└── examples/ + ├── nodejs-api.md # Node.js API template + ├── postgres.md # PostgreSQL template + └── redis.md # Redis template +``` + +## Core Patterns + +### 1. Basic Service Configuration + +```yaml +version: '3.8' + +services: + app: + build: + context: . + dockerfile: Dockerfile + args: + - NODE_ENV=production + image: myapp:latest + container_name: myapp + restart: unless-stopped + ports: + - "3000:3000" + environment: + - NODE_ENV=production + - DATABASE_URL=postgres://db:5432/app + volumes: + - ./data:/app/data + networks: + - app-network + depends_on: + db: + condition: service_healthy + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s +``` + +### 2. Environment Configuration + +```yaml +# Use .env file for secrets +services: + app: + env_file: + - .env + - .env.local + environment: + # Non-sensitive defaults + - NODE_ENV=production + - LOG_LEVEL=info + # Override from .env + - DATABASE_URL=${DATABASE_URL} + - JWT_SECRET=${JWT_SECRET} +``` + +### 3. Network Patterns + +```yaml +# Isolated networks for security +networks: + frontend: + driver: bridge + backend: + driver: bridge + internal: true # No external access + +services: + web: + networks: + - frontend + - backend + + api: + networks: + - backend + + db: + networks: + - backend +``` + +### 4. Volume Patterns + +```yaml +volumes: + # Named volume (managed by Docker) + postgres-data: + driver: local + + # Bind mount (host directory) + # ./data:/app/data + +services: + db: + volumes: + - postgres-data:/var/lib/postgresql/data + - ./init-scripts:/docker-entrypoint-initdb.d:ro + + app: + volumes: + - ./config:/app/config:ro + - app-logs:/app/logs + +volumes: + app-logs: +``` + +### 5. Health Checks & Dependencies + +```yaml +services: + db: + image: postgres:15-alpine + healthcheck: + test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER"] + interval: 10s + timeout: 5s + retries: 5 + + app: + depends_on: + db: + condition: service_healthy + redis: + condition: service_started +``` + +### 6. Multi-Environment Configurations + +```yaml +# docker-compose.yml (base) +version: '3.8' +services: + app: + image: myapp:latest + environment: + - NODE_ENV=production + +# docker-compose.dev.yml (development override) +version: '3.8' +services: + app: + build: + context: . + dockerfile: Dockerfile.dev + volumes: + - .:/app + - /app/node_modules + environment: + - NODE_ENV=development + ports: + - "3000:3000" + command: npm run dev + +# docker-compose.prod.yml (production override) +version: '3.8' +services: + app: + image: myapp:${VERSION} + deploy: + replicas: 3 + resources: + limits: + cpus: '1' + memory: 1G + healthcheck: + test: ["CMD", "node", "healthcheck.js"] + interval: 30s + timeout: 10s + retries: 3 +``` + +## Service Templates + +### Node.js API + +```yaml +services: + api: + build: + context: . + dockerfile: Dockerfile + environment: + - NODE_ENV=production + - PORT=3000 + - DATABASE_URL=postgres://db:5432/app + - REDIS_URL=redis://redis:6379 + ports: + - "3000:3000" + depends_on: + db: + condition: service_healthy + redis: + condition: service_started + networks: + - backend + healthcheck: + test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"] + interval: 30s + timeout: 10s + retries: 3 +``` + +### PostgreSQL Database + +```yaml +services: + db: + image: postgres:15-alpine + environment: + POSTGRES_DB: app + POSTGRES_USER: ${DB_USER:-app} + POSTGRES_PASSWORD: ${DB_PASSWORD:?DB_PASSWORD required} + volumes: + - postgres-data:/var/lib/postgresql/data + - ./init-scripts:/docker-entrypoint-initdb.d:ro + networks: + - backend + healthcheck: + test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER -d $POSTGRES_DB"] + interval: 10s + timeout: 5s + retries: 5 + deploy: + resources: + limits: + memory: 512M + +volumes: + postgres-data: +``` + +### Redis Cache + +```yaml +services: + redis: + image: redis:7-alpine + command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru + volumes: + - redis-data:/data + networks: + - backend + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + redis-data: +``` + +### Nginx Reverse Proxy + +```yaml +services: + nginx: + image: nginx:alpine + ports: + - "80:80" + - "443:443" + volumes: + - ./nginx.conf:/etc/nginx/nginx.conf:ro + - ./ssl:/etc/nginx/ssl:ro + depends_on: + - api + networks: + - frontend + - backend + healthcheck: + test: ["CMD", "nginx", "-t"] + interval: 30s + timeout: 10s + retries: 3 +``` + +## Common Commands + +```bash +# Start services +docker-compose up -d + +# Start specific service +docker-compose up -d app + +# View logs +docker-compose logs -f app + +# Execute command in container +docker-compose exec app sh +docker-compose exec app npm test + +# Stop services +docker-compose down + +# Stop and remove volumes +docker-compose down -v + +# Rebuild images +docker-compose build --no-cache app + +# Scale service +docker-compose up -d --scale api=3 + +# Multi-environment +docker-compose -f docker-compose.yml -f docker-compose.dev.yml up +docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d +``` + +## Best Practices + +### Security + +1. **Never store secrets in images** + ```yaml + # Bad + environment: + - DB_PASSWORD=password123 + + # Good + secrets: + - db_password + secrets: + db_password: + file: ./secrets/db_password.txt + ``` + +2. **Use non-root user** + ```yaml + services: + app: + user: "1000:1000" + ``` + +3. **Limit resources** + ```yaml + services: + app: + deploy: + resources: + limits: + cpus: '1' + memory: 1G + ``` + +4. **Use internal networks for databases** + ```yaml + networks: + backend: + internal: true + ``` + +### Performance + +1. **Enable health checks** + ```yaml + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + ``` + +2. **Use .dockerignore** + ``` + node_modules + .git + .env + *.log + coverage + .nyc_output + ``` + +3. **Optimize build cache** + ```yaml + build: + context: . + dockerfile: Dockerfile + args: + - NODE_ENV=production + ``` + +### Development + +1. **Use volumes for hot reload** + ```yaml + services: + app: + volumes: + - .:/app + - /app/node_modules # Anonymous volume for node_modules + ``` + +2. **Keep containers running** + ```yaml + services: + app: + stdin_open: true # -i + tty: true # -t + ``` + +### Production + +1. **Use specific image versions** + ```yaml + # Bad + image: node:latest + + # Good + image: node:20-alpine + ``` + +2. **Configure logging** + ```yaml + services: + app: + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + ``` + +3. **Restart policies** + ```yaml + services: + app: + restart: unless-stopped + ``` + +## Troubleshooting + +### Common Issues + +1. **Container won't start** + ```bash + # Check logs + docker-compose logs app + + # Check container status + docker-compose ps + + # Inspect container + docker inspect myapp_app_1 + ``` + +2. **Network connectivity issues** + ```bash + # List networks + docker network ls + + # Inspect network + docker network inspect myapp_default + + # Test connectivity + docker-compose exec app ping db + ``` + +3. **Volume permission issues** + ```bash + # Check volume + docker volume inspect myapp_postgres-data + + # Fix permissions (if needed) + docker-compose exec app chown -R node:node /app/data + ``` + +4. **Health check failing** + ```bash + # Run health check manually + docker-compose exec app curl -f http://localhost:3000/health + + # Check health status + docker inspect --format='{{.State.Health.Status}}' myapp_app_1 + ``` + +5. **Out of disk space** + ```bash + # Clean up + docker system prune -a --volumes + + # Check disk usage + docker system df + ``` + +## Integration with CI/CD + +### GitHub Actions + +```yaml +# .github/workflows/test.yml +name: Test + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Build and test + run: | + docker-compose -f docker-compose.yml -f docker-compose.test.yml up --abort-on-container-exit --exit-code-from app + + - name: Cleanup + if: always() + run: docker-compose down -v +``` + +### GitLab CI + +```yaml +# .gitlab-ci.yml +stages: + - test + - build + +test: + stage: test + script: + - docker-compose -f docker-compose.yml -f docker-compose.test.yml up --abort-on-container-exit --exit-code-from app + after_script: + - docker-compose down -v + +build: + stage: build + script: + - docker build -t myapp:$CI_COMMIT_SHA . + - docker push myapp:$CI_COMMIT_SHA +``` + +## Related Skills + +| Skill | Purpose | +|-------|---------| +| `docker-swarm` | Orchestration with Docker Swarm | +| `docker-security` | Container security patterns | +| `docker-networking` | Advanced networking techniques | +| `docker-monitoring` | Container monitoring and logging | \ No newline at end of file diff --git a/.kilo/skills/docker-compose/patterns/basic-service.md b/.kilo/skills/docker-compose/patterns/basic-service.md new file mode 100644 index 0000000..8bb284d --- /dev/null +++ b/.kilo/skills/docker-compose/patterns/basic-service.md @@ -0,0 +1,447 @@ +# Docker Compose Patterns + +## Pattern: Multi-Service Application + +Complete pattern for a typical web application with API, database, cache, and reverse proxy. + +```yaml +version: '3.8' + +services: + # Reverse Proxy + nginx: + image: nginx:alpine + ports: + - "80:80" + - "443:443" + volumes: + - ./nginx.conf:/etc/nginx/nginx.conf:ro + - ./ssl:/etc/nginx/ssl:ro + depends_on: + - api + networks: + - frontend + deploy: + resources: + limits: + cpus: '0.5' + memory: 256M + healthcheck: + test: ["CMD", "nginx", "-t"] + interval: 30s + timeout: 10s + retries: 3 + + # API Service + api: + build: + context: ./api + dockerfile: Dockerfile + environment: + - NODE_ENV=production + - DATABASE_URL=postgres://db:5432/app + - REDIS_URL=redis://cache:6379 + depends_on: + db: + condition: service_healthy + cache: + condition: service_started + networks: + - frontend + - backend + deploy: + replicas: 3 + resources: + limits: + cpus: '1' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + healthcheck: + test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + + # Database + db: + image: postgres:15-alpine + environment: + POSTGRES_DB: app + POSTGRES_USER: ${DB_USER:-app} + POSTGRES_PASSWORD: ${DB_PASSWORD:?DB_PASSWORD required} + volumes: + - postgres-data:/var/lib/postgresql/data + - ./init-scripts:/docker-entrypoint-initdb.d:ro + networks: + - backend + healthcheck: + test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER -d $POSTGRES_DB"] + interval: 10s + timeout: 5s + retries: 5 + deploy: + resources: + limits: + cpus: '2' + memory: 2G + + # Cache + cache: + image: redis:7-alpine + command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru + volumes: + - redis-data:/data + networks: + - backend + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + +networks: + frontend: + driver: bridge + backend: + driver: bridge + internal: true # No external access + +volumes: + postgres-data: + driver: local + redis-data: + driver: local +``` + +## Pattern: Development Override + +Development-specific configuration with hot reload and debugging. + +```yaml +# docker-compose.dev.yml +version: '3.8' + +services: + api: + build: + context: ./api + dockerfile: Dockerfile.dev + volumes: + - ./api/src:/app/src:ro + - ./api/tests:/app/tests:ro + - /app/node_modules + environment: + - NODE_ENV=development + - DEBUG=app:* + ports: + - "3000:3000" + - "9229:9229" # Node.js debugger + command: npm run dev + + db: + ports: + - "5432:5432" # Expose for local tools + + cache: + ports: + - "6379:6379" # Expose for local tools +``` + +```bash +# Usage +docker-compose -f docker-compose.yml -f docker-compose.dev.yml up +``` + +## Pattern: Production Override + +Production-optimized configuration with security and performance settings. + +```yaml +# docker-compose.prod.yml +version: '3.8' + +services: + api: + image: myapp/api:${VERSION} + deploy: + replicas: 3 + update_config: + parallelism: 1 + delay: 10s + failure_action: rollback + rollback_config: + parallelism: 1 + delay: 10s + resources: + limits: + cpus: '1' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + environment: + - NODE_ENV=production + secrets: + - db_password + - jwt_secret + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "5" + +secrets: + db_password: + external: true + jwt_secret: + external: true +``` + +```bash +# Usage +docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d +``` + +## Pattern: Health Check Dependency + +Waiting for dependent services to be healthy before starting. + +```yaml +services: + app: + depends_on: + db: + condition: service_healthy + cache: + condition: service_healthy + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + + db: + healthcheck: + test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER"] + interval: 10s + timeout: 5s + retries: 5 + + cache: + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 +``` + +## Pattern: Secrets Management + +Using Docker secrets for sensitive data (Swarm mode). + +```yaml +services: + app: + secrets: + - db_password + - api_key + - jwt_secret + environment: + - DB_PASSWORD_FILE=/run/secrets/db_password + - API_KEY_FILE=/run/secrets/api_key + - JWT_SECRET_FILE=/run/secrets/jwt_secret + +secrets: + db_password: + file: ./secrets/db_password.txt + api_key: + file: ./secrets/api_key.txt + jwt_secret: + external: true # Created via: echo "secret" | docker secret create jwt_secret - +``` + +## Pattern: Resource Limits + +Setting resource constraints for containers. + +```yaml +services: + api: + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + # Alternative for non-Swarm + mem_limit: 1G + memswap_limit: 1G + cpus: 1 +``` + +## Pattern: Network Isolation + +Segmenting networks for security. + +```yaml +services: + web: + networks: + - frontend + - backend + + api: + networks: + - backend + - database + + db: + networks: + - database + +networks: + frontend: + driver: bridge + backend: + driver: bridge + database: + driver: bridge + internal: true # No internet access +``` + +## Pattern: Volume Management + +Different volume types for different use cases. + +```yaml +services: + app: + volumes: + # Named volume (managed by Docker) + - app-data:/app/data + # Bind mount (host directory) + - ./config:/app/config:ro + # Anonymous volume (for node_modules) + - /app/node_modules + # tmpfs (temporary in-memory) + - type: tmpfs + target: /tmp + tmpfs: + size: 100M + +volumes: + app-data: + driver: local + labels: + - "app=myapp" + - "type=persistent" +``` + +## Pattern: Logging Configuration + +Configuring logging drivers and options. + +```yaml +services: + app: + logging: + driver: "json-file" # Default + options: + max-size: "10m" + max-file: "3" + labels: "app,environment" + tag: "{{.ImageName}}/{{.Name}}" + + # Syslog logging + app-syslog: + logging: + driver: "syslog" + options: + syslog-address: "tcp://logserver:514" + syslog-facility: "daemon" + tag: "myapp" + + # Fluentd logging + app-fluentd: + logging: + driver: "fluentd" + options: + fluentd-address: "localhost:24224" + tag: "myapp.api" +``` + +## Pattern: Multi-Environment + +Managing multiple environments with overrides. + +```bash +# Directory structure +# docker-compose.yml # Base configuration +# docker-compose.dev.yml # Development overrides +# docker-compose.staging.yml # Staging overrides +# docker-compose.prod.yml # Production overrides +# .env # Environment variables +# .env.dev # Development variables +# .env.staging # Staging variables +# .env.prod # Production variables + +# Development +docker-compose --env-file .env.dev \ + -f docker-compose.yml -f docker-compose.dev.yml up + +# Staging +docker-compose --env-file .env.staging \ + -f docker-compose.yml -f docker-compose.staging.yml up -d + +# Production +docker-compose --env-file .env.prod \ + -f docker-compose.yml -f docker-compose.prod.yml up -d +``` + +## Pattern: CI/CD Testing + +Running tests in isolated containers. + +```yaml +# docker-compose.test.yml +version: '3.8' + +services: + app: + build: + context: . + dockerfile: Dockerfile + environment: + - NODE_ENV=test + - DATABASE_URL=postgres://test:test@db:5432/test + depends_on: + - db + command: npm test + networks: + - test-network + + db: + image: postgres:15-alpine + environment: + POSTGRES_DB: test + POSTGRES_USER: test + POSTGRES_PASSWORD: test + networks: + - test-network + +networks: + test-network: + driver: bridge +``` + +```bash +# CI pipeline +docker-compose -f docker-compose.test.yml up --abort-on-container-exit --exit-code-from app +docker-compose -f docker-compose.test.yml down -v +``` \ No newline at end of file diff --git a/.kilo/skills/docker-monitoring/SKILL.md b/.kilo/skills/docker-monitoring/SKILL.md new file mode 100644 index 0000000..eaa3803 --- /dev/null +++ b/.kilo/skills/docker-monitoring/SKILL.md @@ -0,0 +1,756 @@ +# Skill: Docker Monitoring & Logging + +## Purpose + +Comprehensive skill for Docker container monitoring, logging, metrics collection, and observability. + +## Overview + +Container monitoring is essential for understanding application health, performance, and troubleshooting issues in production. Use this skill for setting up monitoring stacks, configuring logging, and implementing observability. + +## When to Use + +- Setting up container monitoring +- Configuring centralized logging +- Implementing health checks +- Performance optimization +- Troubleshooting container issues +- Alerting configuration + +## Monitoring Stack + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Container Monitoring Stack │ +├─────────────────────────────────────────────────────────────┤ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ Grafana │ │ Prometheus │ │ Alertmgr │ │ +│ │ Dashboard │ │ Metrics │ │ Alerts │ │ +│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │ +│ │ │ │ │ +│ ┌──────┴────────────────┴────────────────┴──────┐ │ +│ │ Container Observability │ │ +│ └──────┬────────────────┬───────────────────────┘ │ +│ │ │ │ +│ ┌──────┴──────┐ ┌──────┴──────┐ ┌─────────────┐ │ +│ │ cAdvisor │ │ node-exporter│ │ Loki/EFK │ │ +│ │ Container │ │ Node Metrics│ │ Logging │ │ +│ │ Metrics │ │ │ │ │ │ +│ └─────────────┘ └─────────────┘ └─────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Health Checks + +### 1. Dockerfile Health Check + +```dockerfile +FROM node:20-alpine + +WORKDIR /app +COPY . . +RUN npm ci --only=production + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD wget --no-verbose --tries=1 --spider http://localhost:3000/health || exit 1 + +# Or for Alpine (no wget) +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD curl -f http://localhost:3000/health || exit 1 + +# Or use Node.js for health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD node -e "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))" +``` + +### 2. Docker Compose Health Check + +```yaml +services: + api: + image: myapp:latest + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + + db: + image: postgres:15-alpine + healthcheck: + test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER"] + interval: 10s + timeout: 5s + retries: 5 +``` + +### 3. Docker Swarm Health Check + +```yaml +services: + api: + image: myapp:latest + deploy: + update_config: + failure_action: rollback + monitor: 30s + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s +``` + +### 4. Application Health Endpoint + +```javascript +// Node.js health check endpoint +const express = require('express'); +const app = express(); + +// Dependencies status +async function checkHealth() { + const checks = { + database: await checkDatabase(), + redis: await checkRedis(), + disk: checkDiskSpace(), + memory: checkMemory() + }; + + const healthy = Object.values(checks).every(c => c === 'healthy'); + + return { + status: healthy ? 'healthy' : 'unhealthy', + timestamp: new Date().toISOString(), + checks + }; +} + +app.get('/health', async (req, res) => { + const health = await checkHealth(); + const status = health.status === 'healthy' ? 200 : 503; + res.status(status).json(health); +}); + +app.get('/health/live', (req, res) => { + // Liveness probe - is the app running? + res.status(200).json({ status: 'alive' }); +}); + +app.get('/health/ready', async (req, res) => { + // Readiness probe - is the app ready to serve? + const ready = await isReady(); + res.status(ready ? 200 : 503).json({ ready }); +}); +``` + +## Logging + +### 1. Docker Logging Drivers + +```yaml +# JSON file driver (default) +services: + api: + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + labels: "app,environment" + +# Syslog driver +services: + api: + logging: + driver: "syslog" + options: + syslog-address: "tcp://logserver:514" + syslog-facility: "daemon" + tag: "myapp" + +# Journald driver +services: + api: + logging: + driver: "journald" + options: + labels: "app,environment" + +# Fluentd driver +services: + api: + logging: + driver: "fluentd" + options: + fluentd-address: "localhost:24224" + tag: "myapp.api" +``` + +### 2. Structured Logging + +```javascript +// Pino for structured logging +const pino = require('pino'); + +const logger = pino({ + level: process.env.LOG_LEVEL || 'info', + formatters: { + level: (label) => ({ level: label }) + }, + timestamp: pino.stdTimeFunctions.isoTime +}); + +// Log with context +logger.info({ + userId: '123', + action: 'login', + ip: '192.168.1.1' +}, 'User logged in'); + +// Output: +// {"level":"info","time":"2024-01-01T12:00:00.000Z","userId":"123","action":"login","ip":"192.168.1.1","msg":"User logged in"} +``` + +### 3. EFK Stack (Elasticsearch, Fluentd, Kibana) + +```yaml +# docker-compose.yml +version: '3.8' + +services: + elasticsearch: + image: elasticsearch:8.10.0 + environment: + - discovery.type=single-node + - xpack.security.enabled=false + volumes: + - elasticsearch-data:/usr/share/elasticsearch/data + networks: + - logging + + fluentd: + image: fluent/fluentd:v1.16 + volumes: + - ./fluentd/conf:/fluentd/etc + ports: + - "24224:24224" + networks: + - logging + + kibana: + image: kibana:8.10.0 + environment: + - ELASTICSEARCH_HOSTS=http://elasticsearch:9200 + ports: + - "5601:5601" + networks: + - logging + + app: + image: myapp:latest + logging: + driver: "fluentd" + options: + fluentd-address: "localhost:24224" + tag: "myapp.api" + networks: + - logging + +volumes: + elasticsearch-data: + +networks: + logging: +``` + +### 4. Loki Stack (Promtail, Loki, Grafana) + +```yaml +# docker-compose.yml +version: '3.8' + +services: + loki: + image: grafana/loki:latest + ports: + - "3100:3100" + volumes: + - ./loki-config.yml:/etc/loki/local-config.yaml + command: -config.file=/etc/loki/local-config.yaml + networks: + - monitoring + + promtail: + image: grafana/promtail:latest + volumes: + - /var/log:/var/log + - ./promtail-config.yml:/etc/promtail/config.yml + command: -config.file=/etc/promtail/config.yml + networks: + - monitoring + + grafana: + image: grafana/grafana:latest + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin + volumes: + - grafana-data:/var/lib/grafana + networks: + - monitoring + + app: + image: myapp:latest + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + networks: + - monitoring + +volumes: + grafana-data: + +networks: + monitoring: +``` + +## Metrics Collection + +### 1. Prometheus + cAdvisor + +```yaml +# docker-compose.yml +version: '3.8' + +services: + prometheus: + image: prom/prometheus:latest + ports: + - "9090:9090" + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml + - prometheus-data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.retention.time=30d' + networks: + - monitoring + + cadvisor: + image: gcr.io/cadvisor/cadvisor:latest + ports: + - "8080:8080" + volumes: + - /:/rootfs:ro + - /var/run:/var/run:ro + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + networks: + - monitoring + + node_exporter: + image: prom/node-exporter:latest + ports: + - "9100:9100" + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.rootfs=/rootfs' + - '--path.sysfs=/host/sys' + networks: + - monitoring + + grafana: + image: grafana/grafana:latest + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin + volumes: + - grafana-data:/var/lib/grafana + networks: + - monitoring + +volumes: + prometheus-data: + grafana-data: + +networks: + monitoring: +``` + +### 2. Prometheus Configuration + +```yaml +# prometheus.yml +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + # Prometheus itself + - job_name: 'prometheus' + static_configs: + - targets: ['prometheus:9090'] + + # cAdvisor (container metrics) + - job_name: 'cadvisor' + static_configs: + - targets: ['cadvisor:8080'] + + # Node exporter (host metrics) + - job_name: 'node' + static_configs: + - targets: ['node_exporter:9100'] + + # Application metrics + - job_name: 'app' + static_configs: + - targets: ['app:3000'] + metrics_path: '/metrics' +``` + +### 3. Application Metrics (Prometheus Client) + +```javascript +// Node.js with prom-client +const promClient = require('prom-client'); + +// Enable default metrics +promClient.collectDefaultMetrics(); + +// Custom metrics +const httpRequestDuration = new promClient.Histogram({ + name: 'http_request_duration_seconds', + help: 'Duration of HTTP requests in seconds', + labelNames: ['method', 'route', 'status_code'], + buckets: [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7, 10] +}); + +const activeConnections = new promClient.Gauge({ + name: 'active_connections', + help: 'Number of active connections' +}); + +const dbQueryDuration = new promClient.Histogram({ + name: 'db_query_duration_seconds', + help: 'Duration of database queries in seconds', + labelNames: ['query_type', 'table'], + buckets: [0.01, 0.05, 0.1, 0.5, 1, 2] +}); + +// Middleware for HTTP metrics +app.use((req, res, next) => { + const end = httpRequestDuration.startTimer(); + res.on('finish', () => { + end({ method: req.method, route: req.route?.path || req.path, status_code: res.statusCode }); + }); + next(); +}); + +// Metrics endpoint +app.get('/metrics', async (req, res) => { + res.set('Content-Type', promClient.register.contentType); + res.send(await promClient.register.metrics()); +}); +``` + +### 4. Grafana Dashboards + +```json +// Dashboard JSON for container metrics +{ + "dashboard": { + "title": "Docker Container Metrics", + "panels": [ + { + "title": "Container CPU Usage", + "targets": [ + { + "expr": "rate(container_cpu_usage_seconds_total{name=~\".+\"}[5m]) * 100", + "legendFormat": "{{name}}" + } + ] + }, + { + "title": "Container Memory Usage", + "targets": [ + { + "expr": "container_memory_usage_bytes{name=~\".+\"} / 1024 / 1024", + "legendFormat": "{{name}} MB" + } + ] + }, + { + "title": "Container Network I/O", + "targets": [ + { + "expr": "rate(container_network_receive_bytes_total{name=~\".+\"}[5m])", + "legendFormat": "{{name}} RX" + }, + { + "expr": "rate(container_network_transmit_bytes_total{name=~\".+\"}[5m])", + "legendFormat": "{{name}} TX" + } + ] + } + ] + } +} +``` + +## Alerting + +### 1. Alertmanager Configuration + +```yaml +# alertmanager.yml +global: + smtp_smarthost: 'smtp.example.com:587' + smtp_from: 'alerts@example.com' + smtp_auth_username: 'alerts@example.com' + smtp_auth_password: 'password' + +route: + group_by: ['alertname', 'severity'] + group_wait: 30s + group_interval: 5m + repeat_interval: 1h + receiver: 'team-email' + routes: + - match: + severity: critical + receiver: 'team-email-critical' + - match: + severity: warning + receiver: 'team-email-warning' + +receivers: + - name: 'team-email-critical' + email_configs: + - to: 'critical@example.com' + send_resolved: true + + - name: 'team-email-warning' + email_configs: + - to: 'warnings@example.com' + send_resolved: true +``` + +### 2. Prometheus Alert Rules + +```yaml +# alerts.yml +groups: + - name: container_alerts + rules: + # Container down + - alert: ContainerDown + expr: absent(container_last_seen{name=~".+"}) + for: 5m + labels: + severity: critical + annotations: + summary: "Container {{ $labels.name }} is down" + description: "Container {{ $labels.name }} has been down for more than 5 minutes." + + # High CPU + - alert: HighCpuUsage + expr: rate(container_cpu_usage_seconds_total{name=~".+"}[5m]) * 100 > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "High CPU usage on {{ $labels.name }}" + description: "Container {{ $labels.name }} CPU usage is {{ $value }}%." + + # High Memory + - alert: HighMemoryUsage + expr: (container_memory_usage_bytes{name=~".+"} / container_spec_memory_limit_bytes{name=~".+"}) * 100 > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "High memory usage on {{ $labels.name }}" + description: "Container {{ $labels.name }} memory usage is {{ $value }}%." + + # Container restart + - alert: ContainerRestart + expr: increase(container_restart_count{name=~".+"}[1h]) > 0 + labels: + severity: warning + annotations: + summary: "Container {{ $labels.name }} restarted" + description: "Container {{ $labels.name }} has restarted {{ $value }} times in the last hour." + + # No health check + - alert: NoHealthCheck + expr: container_health_status{name=~".+"} == 0 + for: 5m + labels: + severity: critical + annotations: + summary: "Health check failing for {{ $labels.name }}" + description: "Container {{ $labels.name }} health check has been failing for 5 minutes." +``` + +## Observability Best Practices + +### 1. Three Pillars + +| Pillar | Tool | Purpose | +|--------|------|---------| +| Metrics | Prometheus | Quantitative measurements | +| Logs | Loki/EFK | Event records | +| Traces | Jaeger/Zipkin | Request flow | + +### 2. Metrics Categories + +```yaml +# Four Golden Signals (Google SRE) + +# 1. Latency +- http_request_duration_seconds +- db_query_duration_seconds + +# 2. Traffic +- http_requests_per_second +- active_connections + +# 3. Errors +- http_requests_failed_total +- error_rate + +# 4. Saturation +- container_memory_usage_bytes +- container_cpu_usage_seconds_total +``` + +### 3. Service Level Objectives (SLOs) + +```yaml +# Prometheus recording rules for SLO +groups: + - name: slo_rules + rules: + - record: slo:availability:ratio_5m + expr: | + sum(rate(http_requests_total{status!~"5.."}[5m])) / + sum(rate(http_requests_total[5m])) + + - record: slo:latency:p99_5m + expr: | + histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) + + - record: slo:error_rate:ratio_5m + expr: | + sum(rate(http_requests_total{status=~"5.."}[5m])) / + sum(rate(http_requests_total[5m])) +``` + +## Troubleshooting Commands + +```bash +# View container logs +docker logs +docker logs -f --tail 100 + +# View resource usage +docker stats +docker stats --no-stream + +# Inspect container +docker inspect + +# Check health status +docker inspect --format='{{.State.Health.Status}}' + +# View processes +docker top + +# Execute commands +docker exec -it sh +docker exec df -h + +# View network +docker network inspect + +# View disk usage +docker system df +docker system df -v + +# Prune unused resources +docker system prune -a --volumes + +# Swarm service logs +docker service logs +docker service ps + +# Swarm node status +docker node ls +docker node inspect +``` + +## Performance Tuning + +### 1. Container Resource Limits + +```yaml +services: + api: + deploy: + resources: + limits: + cpus: '1' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M +``` + +### 2. Logging Performance + +```yaml +services: + api: + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + # Reduce logging overhead + labels: "level,requestId" +``` + +### 3. Prometheus Optimization + +```yaml +# prometheus.yml +global: + scrape_interval: 15s # Balance between granularity and load + evaluation_interval: 15s + +# Retention +command: + - '--storage.tsdb.retention.time=30d' + - '--storage.tsdb.retention.size=10GB' +``` + +## Related Skills + +| Skill | Purpose | +|-------|---------| +| `docker-compose` | Local development setup | +| `docker-swarm` | Production orchestration | +| `docker-security` | Container security | +| `kubernetes` | Advanced orchestration | \ No newline at end of file diff --git a/.kilo/skills/docker-security/SKILL.md b/.kilo/skills/docker-security/SKILL.md new file mode 100644 index 0000000..0384bd9 --- /dev/null +++ b/.kilo/skills/docker-security/SKILL.md @@ -0,0 +1,685 @@ +# Skill: Docker Security + +## Purpose + +Comprehensive skill for Docker container security, vulnerability scanning, secrets management, and hardening best practices. + +## Overview + +Container security is essential for production deployments. Use this skill when scanning for vulnerabilities, configuring security settings, managing secrets, and implementing security best practices. + +## When to Use + +- Security hardening containers +- Scanning images for vulnerabilities +- Managing secrets and credentials +- Configuring container isolation +- Implementing least privilege +- Security audits + +## Security Layers + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Container Security Layers │ +├─────────────────────────────────────────────────────────────┤ +│ 1. Host Security │ +│ - Kernel hardening │ +│ - SELinux/AppArmor │ +│ - cgroups namespace │ +├─────────────────────────────────────────────────────────────┤ +│ 2. Container Runtime Security │ +│ - User namespace │ +│ - Seccomp profiles │ +│ - Capability dropping │ +├─────────────────────────────────────────────────────────────┤ +│ 3. Image Security │ +│ - Minimal base images │ +│ - Vulnerability scanning │ +│ - No secrets in images │ +├─────────────────────────────────────────────────────────────┤ +│ 4. Network Security │ +│ - Network policies │ +│ - TLS encryption │ +│ - Ingress controls │ +├─────────────────────────────────────────────────────────────┤ +│ 5. Application Security │ +│ - Input validation │ +│ - Authentication │ +│ - Authorization │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Image Security + +### 1. Base Image Selection + +```dockerfile +# ✅ Good: Minimal, specific version +FROM node:20-alpine + +# ✅ Better: Distroless (minimal attack surface) +FROM gcr.io/distroless/nodejs20-debian12 + +# ❌ Bad: Large base, latest tag +FROM node:latest +``` + +### 2. Multi-stage Builds + +```dockerfile +# Build stage +FROM node:20-alpine AS builder +WORKDIR /app +COPY package*.json ./ +RUN npm ci +COPY . . +RUN npm run build + +# Runtime stage +FROM node:20-alpine +RUN addgroup -g 1001 appgroup && \ + adduser -u 1001 -G appgroup -D appuser +WORKDIR /app +COPY --from=builder --chown=appuser:appgroup /app/dist ./dist +COPY --from=builder --chown=appuser:appgroup /app/node_modules ./node_modules +USER appuser +CMD ["node", "dist/index.js"] +``` + +### 3. Vulnerability Scanning + +```bash +# Scan with Trivy +trivy image myapp:latest + +# Scan with Docker Scout +docker scout vulnerabilities myapp:latest + +# Scan with Grype +grype myapp:latest + +# CI/CD integration +trivy image --exit-code 1 --severity HIGH,CRITICAL myapp:latest +``` + +### 4. No Secrets in Images + +```dockerfile +# ❌ Never do this +ENV DATABASE_PASSWORD=password123 +COPY .env ./ + +# ✅ Use runtime secrets +# Secrets are mounted at runtime +RUN --mount=type=secret,id=db_password \ + export DB_PASSWORD=$(cat /run/secrets/db_password) +``` + +## Container Runtime Security + +### 1. Non-root User + +```dockerfile +# Create non-root user +FROM alpine:3.18 +RUN addgroup -g 1001 appgroup && \ + adduser -u 1001 -G appgroup -D appuser +WORKDIR /app +COPY --chown=appuser:appgroup . . +USER appuser +CMD ["./app"] +``` + +### 2. Read-only Filesystem + +```yaml +# docker-compose.yml +services: + app: + image: myapp:latest + read_only: true + tmpfs: + - /tmp + - /var/cache +``` + +### 3. Capability Dropping + +```yaml +# Drop all capabilities +services: + app: + image: myapp:latest + cap_drop: + - ALL + cap_add: + - CHOWN # Only needed capabilities + - SETGID + - SETUID +``` + +### 4. Security Options + +```yaml +services: + app: + image: myapp:latest + security_opt: + - no-new-privileges:true # Prevent privilege escalation + - seccomp:default.json # Seccomp profile + - apparmor:docker-default # AppArmor profile +``` + +### 5. Resource Limits + +```yaml +services: + app: + image: myapp:latest + deploy: + resources: + limits: + cpus: '1' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + pids_limit: 100 # Limit process count +``` + +## Secrets Management + +### 1. Docker Secrets (Swarm) + +```bash +# Create secret +echo "my_password" | docker secret create db_password - + +# Create from file +docker secret create jwt_secret ./secrets/jwt.txt +``` + +```yaml +# docker-compose.yml (Swarm) +services: + api: + image: myapp:latest + secrets: + - db_password + - jwt_secret + environment: + - DB_PASSWORD_FILE=/run/secrets/db_password + +secrets: + db_password: + external: true + jwt_secret: + external: true +``` + +### 2. Docker Compose Secrets (Non-Swarm) + +```yaml +# docker-compose.yml +services: + api: + image: myapp:latest + secrets: + - db_password + environment: + - DB_PASSWORD_FILE=/run/secrets/db_password + +secrets: + db_password: + file: ./secrets/db_password.txt +``` + +### 3. Environment Variables (Development) + +```yaml +# docker-compose.yml (development only) +services: + api: + image: myapp:latest + env_file: + - .env # Add .env to .gitignore! +``` + +```bash +# .env (NEVER COMMIT) +DATABASE_URL=postgres://... +JWT_SECRET=secret123 +API_KEY=key123 +``` + +### 4. Reading Secrets in Application + +```javascript +// Node.js +const fs = require('fs'); + +function getSecret(secretName, envName) { + // Try file-based secret first (Docker secrets) + const secretPath = `/run/secrets/${secretName}`; + if (fs.existsSync(secretPath)) { + return fs.readFileSync(secretPath, 'utf8').trim(); + } + // Fallback to environment variable (development) + return process.env[envName]; +} + +const dbPassword = getSecret('db_password', 'DB_PASSWORD'); +``` + +## Network Security + +### 1. Network Segmentation + +```yaml +# Separate networks for different access levels +networks: + frontend: + driver: bridge + + backend: + driver: bridge + internal: true # No external access + + database: + driver: bridge + internal: true + +services: + web: + networks: + - frontend + + api: + networks: + - frontend + - backend + + db: + networks: + - database + + cache: + networks: + - database +``` + +### 2. Port Exposure + +```yaml +# ✅ Good: Only expose necessary ports +services: + api: + ports: + - "3000:3000" # API port only + + db: + # No ports exposed - only accessible inside network + networks: + - database + +# ❌ Bad: Exposing database to host +services: + db: + ports: + - "5432:5432" # Security risk! +``` + +### 3. TLS Configuration + +```yaml +services: + nginx: + image: nginx:alpine + ports: + - "443:443" + volumes: + - ./ssl/cert.pem:/etc/nginx/ssl/cert.pem:ro + - ./ssl/key.pem:/etc/nginx/ssl/key.pem:ro + configs: + - source: nginx_config + target: /etc/nginx/nginx.conf + +configs: + nginx_config: + file: ./nginx.conf +``` + +### 4. Ingress Controls + +```yaml +# Limit connections +services: + api: + image: myapp:latest + ports: + - target: 3000 + published: 3000 + mode: host # Bypass ingress mesh for performance + deploy: + endpoint_mode: dnsrr + resources: + limits: + memory: 1G +``` + +## Security Profiles + +### 1. Seccomp Profile + +```json +// default-seccomp.json +{ + "defaultAction": "SCMP_ACT_ERRNO", + "architectures": ["SCMP_ARCH_X86_64"], + "syscalls": [ + { + "names": ["read", "write", "exit", "exit_group"], + "action": "SCMP_ACT_ALLOW" + }, + { + "names": ["open", "openat", "close"], + "action": "SCMP_ACT_ALLOW" + } + ] +} +``` + +```yaml +# Use custom seccomp profile +services: + api: + security_opt: + - seccomp:./seccomp.json +``` + +### 2. AppArmor Profile + +```bash +# Create AppArmor profile +cat > /etc/apparmor.d/docker-myapp < +profile docker-myapp flags=(attach_disconnected,mediate_deleted) { + #include + + network inet tcp, + network inet udp, + + /app/** r, + /app/** w, + + deny /** rw, +} +EOF + +# Load profile +apparmor_parser -r /etc/apparmor.d/docker-myapp +``` + +```yaml +# Use AppArmor profile +services: + api: + security_opt: + - apparmor:docker-myapp +``` + +## Security Scanning + +### 1. Image Vulnerability Scan + +```bash +# Trivy scan +trivy image --severity HIGH,CRITICAL myapp:latest + +# Docker Scout +docker scout vulnerabilities myapp:latest + +# Grype +grype myapp:latest + +# Output JSON for CI +trivy image --format json --output results.json myapp:latest +``` + +### 2. Base Image Updates + +```bash +# Check base image for updates +docker pull node:20-alpine + +# Rebuild with updated base +docker build --no-cache -t myapp:latest . + +# Scan new image +trivy image myapp:latest +``` + +### 3. Dependency Audit + +```bash +# Node.js +npm audit +npm audit fix + +# Python +pip-audit + +# Go +go list -m all | nancy + +# General +snyk test +``` + +### 4. Secret Detection + +```bash +# Scan for secrets +gitleaks --path . --verbose + +# Pre-commit hook +gitleaks protect --staged + +# Docker image +gitleaks --image myapp:latest +``` + +## CI/CD Security Integration + +### GitHub Actions + +```yaml +# .github/workflows/security.yml +name: Security Scan + +on: [push, pull_request] + +jobs: + scan: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + with: + image-ref: 'myapp:${{ github.sha }}' + format: 'table' + exit-code: '1' + severity: 'CRITICAL,HIGH' + + - name: Run Gitleaks secret scan + uses: gitleaks/gitleaks-action@v2 + with: + args: --path=. +``` + +### GitLab CI + +```yaml +# .gitlab-ci.yml +security_scan: + stage: test + image: docker:24 + services: + - docker:dind + script: + - docker build -t myapp:$CI_COMMIT_SHA . + - trivy image --exit-code 1 --severity HIGH,CRITICAL myapp:$CI_COMMIT_SHA + - gitleaks --path . --verbose +``` + +## Security Checklist + +### Dockerfile Security + +- [ ] Using minimal base image (alpine/distroless) +- [ ] Specific version tags, not `latest` +- [ ] Running as non-root user +- [ ] No secrets in image +- [ ] `.dockerignore` includes `.env`, `.git`, `.credentials` +- [ ] COPY instead of ADD (unless needed) +- [ ] Multi-stage build for smaller image +- [ ] HEALTHCHECK defined + +### Runtime Security + +- [ ] Read-only filesystem +- [ ] Capabilities dropped +- [ ] No new privileges +- [ ] Resource limits set +- [ ] User namespace enabled (if available) +- [ ] Seccomp/AppArmor profiles applied + +### Network Security + +- [ ] Only necessary ports exposed +- [ ] Internal networks for sensitive services +- [ ] TLS for external communication +- [ ] Network segmentation + +### Secrets Management + +- [ ] No secrets in images +- [ ] Using Docker secrets or external vault +- [ ] `.env` files gitignored +- [ ] Secret rotation implemented + +### CI/CD Security + +- [ ] Vulnerability scanning in pipeline +- [ ] Secret detection pre-commit +- [ ] Dependency audit automated +- [ ] Base images updated regularly + +## Remediation Priority + +| Severity | Priority | Timeline | +|----------|----------|----------| +| Critical | P0 | Immediately (24h) | +| High | P1 | Within 7 days | +| Medium | P2 | Within 30 days | +| Low | P3 | Next release | + +## Security Tools + +| Tool | Purpose | +|------|---------| +| Trivy | Image vulnerability scanning | +| Docker Scout | Docker's built-in scanner | +| Grype | Vulnerability scanner | +| Gitleaks | Secret detection | +| Snyk | Dependency scanning | +| Falco | Runtime security monitoring | +| Anchore | Container security analysis | +| Clair | Open-source vulnerability scanner | + +## Common Vulnerabilities + +### CVE Examples + +```yaml +# Check for specific CVE +trivy image --vulnerabilities CVE-2021-44228 myapp:latest + +# Ignore specific CVE (use carefully) +trivy image --ignorefile .trivyignore myapp:latest + +# .trivyignore +CVE-2021-12345 # Known and accepted +``` + +### Log4j Example (CVE-2021-44228) + +```bash +# Check for vulnerable versions +docker images --format '{{.Repository}}:{{.Tag}}' | xargs -I {} \ + trivy image --vulnerabilities CVE-2021-44228 {} + +# Update and rebuild +FROM node:20-alpine +# Ensure no vulnerable log4j dependency +RUN npm audit fix +``` + +## Incident Response + +### Security Breach Steps + +1. **Isolate** + ```bash + # Stop container + docker stop + + # Remove from network + docker network disconnect app-network + ``` + +2. **Preserve Evidence** + ```bash + # Save container state + docker commit incident-container + + # Export logs + docker logs > incident-logs.txt + docker export > incident-container.tar + ``` + +3. **Analyze** + ```bash + # Inspect container + docker inspect + + # Check image + trivy image + + # Review process history + docker history + ``` + +4. **Remediate** + ```bash + # Update base image + docker pull node:20-alpine + + # Rebuild + docker build --no-cache -t myapp:fixed . + + # Scan + trivy image myapp:fixed + ``` + +## Related Skills + +| Skill | Purpose | +|-------|---------| +| `docker-compose` | Local development setup | +| `docker-swarm` | Production orchestration | +| `docker-monitoring` | Security monitoring | +| `docker-networking` | Network security | \ No newline at end of file diff --git a/.kilo/skills/docker-swarm/SKILL.md b/.kilo/skills/docker-swarm/SKILL.md new file mode 100644 index 0000000..ae78356 --- /dev/null +++ b/.kilo/skills/docker-swarm/SKILL.md @@ -0,0 +1,757 @@ +# Skill: Docker Swarm + +## Purpose + +Comprehensive skill for Docker Swarm orchestration, cluster management, and production-ready container deployment. + +## Overview + +Docker Swarm is Docker's native clustering and orchestration solution. Use this skill for production deployments, high availability setups, and managing containerized applications at scale. + +## When to Use + +- Deploying applications in production clusters +- Setting up high availability services +- Scaling services dynamically +- Managing rolling updates +- Handling secrets and configs securely +- Multi-node orchestration + +## Core Concepts + +### Swarm Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Docker Swarm Cluster │ +├─────────────────────────────────────────────────────────────┤ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ Manager │ │ Manager │ │ Manager │ (HA) │ +│ │ Node 1 │ │ Node 2 │ │ Node 3 │ │ +│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │ +│ │ │ │ │ +│ ┌──────┴────────────────┴────────────────┴──────┐ │ +│ │ Internal Network │ │ +│ └──────┬────────────────┬──────────────────────┘ │ +│ │ │ │ +│ ┌──────┴──────┐ ┌──────┴──────┐ ┌─────────────┐ │ +│ │ Worker │ │ Worker │ │ Worker │ │ +│ │ Node 4 │ │ Node 5 │ │ Node 6 │ │ +│ └─────────────┘ └─────────────┘ └─────────────┘ │ +│ │ +│ Services: api, web, db, redis, queue │ +│ Tasks: Running containers distributed across nodes │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Key Components + +| Component | Description | +|-----------|-------------| +| **Service** | Definition of a container (image, ports, replicas) | +| **Task** | Single running instance of a service | +| **Stack** | Group of related services (like docker-compose) | +| **Node** | Docker daemon participating in swarm | +| **Overlay Network** | Network spanning multiple nodes | + +## Skill Files Structure + +``` +docker-swarm/ +├── SKILL.md # This file +├── patterns/ +│ ├── services.md # Service deployment patterns +│ ├── networking.md # Overlay network patterns +│ ├── secrets.md # Secrets management +│ └── configs.md # Config management +└── examples/ + ├── ha-web-app.md # High availability web app + ├── microservices.md # Microservices deployment + └── database.md # Database cluster setup +``` + +## Core Patterns + +### 1. Initialize Swarm + +```bash +# Initialize swarm on manager node +docker swarm init --advertise-addr + +# Get join token for workers +docker swarm join-token -q worker + +# Get join token for managers +docker swarm join-token -q manager + +# Join swarm (on worker nodes) +docker swarm join --token :2377 + +# Check swarm status +docker node ls +``` + +### 2. Service Deployment + +```yaml +# docker-compose.yml (Swarm stack) +version: '3.8' + +services: + api: + image: myapp/api:latest + deploy: + mode: replicated + replicas: 3 + update_config: + parallelism: 1 + delay: 10s + failure_action: rollback + order: start-first + rollback_config: + parallelism: 1 + delay: 10s + restart_policy: + condition: on-failure + delay: 5s + max_attempts: 3 + window: 120s + placement: + constraints: + - node.role == worker + preferences: + - spread: node.id + resources: + limits: + cpus: '1' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + networks: + - app-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + secrets: + - db_password + - jwt_secret + configs: + - app_config + +networks: + app-network: + driver: overlay + attachable: true + +secrets: + db_password: + external: true + jwt_secret: + external: true + +configs: + app_config: + external: true +``` + +### 3. Deploy Stack + +```bash +# Create secrets (before deploying) +echo "my_db_password" | docker secret create db_password - +docker secret create jwt_secret ./jwt_secret.txt + +# Create configs +docker config create app_config ./config.json + +# Deploy stack +docker stack deploy -c docker-compose.yml mystack + +# List services +docker stack services mystack + +# List tasks +docker stack ps mystack + +# Remove stack +docker stack rm mystack +``` + +### 4. Service Management + +```bash +# Scale service +docker service scale mystack_api=5 + +# Update service image +docker service update --image myapp/api:v2 mystack_api + +# Update environment variable +docker service update --env-add NODE_ENV=staging mystack_api + +# Add constraint +docker service update --constraint-add 'node.labels.region==us-east' mystack_api + +# Rollback service +docker service rollback mystack_api + +# View service details +docker service inspect mystack_api + +# View service logs +docker service logs -f mystack_api +``` + +### 5. Secrets Management + +```bash +# Create secret from stdin +echo "my_secret" | docker secret create db_password - + +# Create secret from file +docker secret create jwt_secret ./secrets/jwt.txt + +# List secrets +docker secret ls + +# Inspect secret metadata +docker secret inspect db_password + +# Use secret in service +docker service create \ + --name api \ + --secret db_password \ + --secret jwt_secret \ + myapp/api:latest + +# Remove secret +docker secret rm db_password +``` + +### 6. Config Management + +```bash +# Create config +docker config create app_config ./config.json + +# List configs +docker config ls + +# Use config in service +docker service create \ + --name api \ + --config source=app_config,target=/app/config.json \ + myapp/api:latest + +# Update config (create new version) +docker config create app_config_v2 ./config-v2.json + +# Update service with new config +docker service update \ + --config-rm app_config \ + --config-add source=app_config_v2,target=/app/config.json \ + mystack_api +``` + +### 7. Overlay Networks + +```yaml +# Create overlay network +networks: + frontend: + driver: overlay + attachable: true + + backend: + driver: overlay + attachable: true + internal: true # No external access + +services: + web: + networks: + - frontend + - backend + + api: + networks: + - backend + + db: + networks: + - backend +``` + +```bash +# Create network manually +docker network create --driver overlay --attachable my-network + +# List networks +docker network ls + +# Inspect network +docker network inspect my-network +``` + +## Deployment Strategies + +### Rolling Update + +```yaml +services: + api: + deploy: + update_config: + parallelism: 2 # Update 2 tasks at a time + delay: 10s # Wait 10s between updates + failure_action: rollback + monitor: 30s # Monitor for 30s after update + max_failure_ratio: 0.3 # Allow 30% failures +``` + +### Blue-Green Deployment + +```bash +# Deploy new version alongside existing +docker service create \ + --name api-v2 \ + --mode replicated \ + --replicas 3 \ + --network app-network \ + myapp/api:v2 + +# Update router to point to new version +# (Using nginx/traefik config update) + +# Remove old version +docker service rm api-v1 +``` + +### Canary Deployment + +```yaml +# Deploy canary version +version: '3.8' +services: + api: + image: myapp/api:v1 + deploy: + replicas: 9 + # ... 90% of traffic + + api-canary: + image: myapp/api:v2 + deploy: + replicas: 1 + # ... 10% of traffic +``` + +### Global Services + +```yaml +# Run one instance on every node +services: + monitoring: + image: myapp/monitoring:latest + deploy: + mode: global + volumes: + - /var/run/docker.sock:/var/run/docker.sock +``` + +## High Availability Patterns + +### 1. Multi-Manager Setup + +```bash +# Create 3 manager nodes for HA +docker swarm init --advertise-addr + +# On manager2 +docker swarm join --token :2377 + +# On manager3 +docker swarm join --token :2377 + +# Promote worker to manager +docker node promote + +# Demote manager to worker +docker node demote +``` + +### 2. Placement Constraints + +```yaml +services: + db: + image: postgres:15 + deploy: + placement: + constraints: + - node.role == worker + - node.labels.database == true + preferences: + - spread: node.labels.zone # Spread across zones + + cache: + image: redis:7 + deploy: + placement: + constraints: + - node.labels.cache == true +``` + +### 3. Resource Management + +```yaml +services: + api: + deploy: + resources: + limits: + cpus: '2' + memory: 2G + reservations: + cpus: '1' + memory: 1G + restart_policy: + condition: on-failure + max_attempts: 3 +``` + +### 4. Health Checks + +```yaml +services: + api: + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + deploy: + update_config: + failure_action: rollback + monitor: 30s +``` + +## Service Discovery & Load Balancing + +### Built-in Load Balancing + +```yaml +# Swarm provides automatic load balancing +services: + api: + deploy: + replicas: 3 + ports: + - "3000:3000" # Requests are load balanced across replicas + +# Virtual IP (VIP) - default mode +# DNS round-robin +services: + api: + deploy: + endpoint_mode: dnsrr +``` + +### Ingress Network + +```yaml +# Publishing ports +services: + web: + ports: + - "80:80" # Published on all nodes + - "443:443" + deploy: + mode: ingress # Default, routed through mesh +``` + +### Host Mode + +```yaml +# Bypass load balancer (for performance) +services: + web: + ports: + - target: 80 + published: 80 + mode: host # Direct port mapping + deploy: + mode: global # One per node +``` + +## Monitoring & Logging + +### Logging Drivers + +```yaml +services: + api: + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + labels: "app,environment" + + # Or use syslog + api: + logging: + driver: "syslog" + options: + syslog-address: "tcp://logserver:514" + syslog-facility: "daemon" +``` + +### Viewing Logs + +```bash +# Service logs +docker service logs mystack_api + +# Filter by time +docker service logs --since 1h mystack_api + +# Follow logs +docker service logs -f mystack_api + +# All tasks +docker service logs --tail 100 mystack_api +``` + +### Monitoring Commands + +```bash +# Node status +docker node ls + +# Service status +docker service ls + +# Task status +docker service ps mystack_api + +# Resource usage +docker stats + +# Service inspect +docker service inspect mystack_api --pretty +``` + +## Backup & Recovery + +### Backup Swarm State + +```bash +# On manager node +docker pull swaggercodebreaker/swarmctl +docker run --rm -v /var/lib/docker/swarm:/ swarmctl export > swarm-backup.json + +# Or manual backup +cp -r /var/lib/docker/swarm/raft ~/swarm-backup/ +``` + +### Recovery + +```bash +# Unlock swarm after restart (if encrypted) +docker swarm unlock + +# Force new cluster (disaster recovery) +docker swarm init --force-new-cluster + +# Restore from backup +docker swarm init --force-new-cluster +docker service create --name restore-app ... +``` + +## Common Operations + +### Node Management + +```bash +# List nodes +docker node ls + +# Inspect node +docker node inspect + +# Drain node (for maintenance) +docker node update --availability drain + +# Activate node +docker node update --availability active + +# Add labels +docker node update --label-add region=us-east + +# Remove node +docker node rm +``` + +### Service Debugging + +```bash +# View service tasks +docker service ps mystack_api + +# View task details +docker inspect + +# Run temporary container for debugging +docker run --rm -it --network mystack_app-network \ + myapp/api:latest sh + +# Check service logs +docker service logs mystack_api + +# Execute command in running container +docker exec -it sh +``` + +### Network Debugging + +```bash +# List networks +docker network ls + +# Inspect overlay network +docker network inspect mystack_app-network + +# Test connectivity +docker run --rm --network mystack_app-network alpine ping api + +# DNS resolution +docker run --rm --network mystack_app-network alpine nslookup api +``` + +## Production Checklist + +- [ ] At least 3 manager nodes for HA +- [ ] Quorum maintained (odd number of managers) +- [ ] Resources limited for all services +- [ ] Health checks configured +- [ ] Rolling update strategy defined +- [ ] Rollback strategy configured +- [ ] Secrets used for sensitive data +- [ ] Configs for environment settings +- [ ] Overlay networks properly segmented +- [ ] Logging driver configured +- [ ] Monitoring solution deployed +- [ ] Backup strategy implemented +- [ ] Node labels for placement constraints +- [ ] Resource reservations set + +## Best Practices + +1. **Resource Planning** + ```yaml + deploy: + resources: + limits: + cpus: '1' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + ``` + +2. **Rolling Updates** + ```yaml + deploy: + update_config: + parallelism: 1 + delay: 10s + failure_action: rollback + monitor: 30s + ``` + +3. **Placement Constraints** + ```yaml + deploy: + placement: + constraints: + - node.role == worker + preferences: + - spread: node.labels.zone + ``` + +4. **Network Segmentation** + ```yaml + networks: + frontend: + driver: overlay + backend: + driver: overlay + internal: true + ``` + +5. **Secrets Management** + ```yaml + secrets: + - db_password + - jwt_secret + ``` + +## Troubleshooting + +### Service Won't Start + +```bash +# Check task status +docker service ps mystack_api --no-trunc + +# Check logs +docker service logs mystack_api + +# Check node resources +docker node ls +docker stats + +# Check network +docker network inspect mystack_app-network +``` + +### Task Keeps Restarting + +```bash +# Check restart policy +docker service inspect mystack_api --pretty + +# Check container logs +docker service logs --tail 50 mystack_api + +# Check health check +docker inspect --format='{{.State.Health}}' +``` + +### Network Issues + +```bash +# Verify overlay network +docker network inspect mystack_app-network + +# Check DNS resolution +docker run --rm --network mystack_app-network alpine nslookup api + +# Check connectivity +docker run --rm --network mystack_app-network alpine ping api +``` + +## Related Skills + +| Skill | Purpose | +|-------|---------| +| `docker-compose` | Local development with Compose | +| `docker-security` | Container security patterns | +| `kubernetes` | Kubernetes orchestration | +| `docker-monitoring` | Container monitoring setup | \ No newline at end of file diff --git a/.kilo/skills/docker-swarm/examples/ha-web-app.md b/.kilo/skills/docker-swarm/examples/ha-web-app.md new file mode 100644 index 0000000..27cc413 --- /dev/null +++ b/.kilo/skills/docker-swarm/examples/ha-web-app.md @@ -0,0 +1,519 @@ +# Docker Swarm Deployment Examples + +## Example: High Availability Web Application + +Complete example of deploying a production-ready web application with Docker Swarm. + +### docker-compose.yml (Swarm Stack) + +```yaml +version: '3.8' + +services: + # Reverse Proxy with SSL + nginx: + image: nginx:alpine + ports: + - "80:80" + - "443:443" + configs: + - source: nginx_config + target: /etc/nginx/nginx.conf + secrets: + - ssl_cert + - ssl_key + networks: + - frontend + deploy: + replicas: 2 + placement: + constraints: + - node.role == worker + resources: + limits: + cpus: '0.5' + memory: 256M + healthcheck: + test: ["CMD", "nginx", "-t"] + interval: 30s + timeout: 10s + retries: 3 + + # API Service + api: + image: myapp/api:latest + environment: + - NODE_ENV=production + - DATABASE_URL=postgres://app:${DB_PASSWORD}@db:5432/app + - REDIS_URL=redis://cache:6379 + configs: + - source: app_config + target: /app/config.json + secrets: + - jwt_secret + networks: + - frontend + - backend + deploy: + replicas: 3 + update_config: + parallelism: 1 + delay: 10s + failure_action: rollback + order: start-first + rollback_config: + parallelism: 1 + delay: 10s + restart_policy: + condition: on-failure + delay: 5s + max_attempts: 3 + window: 120s + placement: + constraints: + - node.role == worker + preferences: + - spread: node.id + resources: + limits: + cpus: '1' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + healthcheck: + test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + + # Background Worker + worker: + image: myapp/worker:latest + environment: + - NODE_ENV=production + - DATABASE_URL=postgres://app:${DB_PASSWORD}@db:5432/app + secrets: + - jwt_secret + networks: + - backend + deploy: + replicas: 2 + restart_policy: + condition: on-failure + delay: 10s + max_attempts: 5 + placement: + constraints: + - node.role == worker + resources: + limits: + cpus: '0.5' + memory: 512M + + # Database (PostgreSQL with Replication) + db: + image: postgres:15-alpine + environment: + POSTGRES_DB: app + POSTGRES_USER: app + POSTGRES_PASSWORD_FILE: /run/secrets/db_password + secrets: + - db_password + volumes: + - postgres-data:/var/lib/postgresql/data + networks: + - backend + deploy: + replicas: 1 + placement: + constraints: + - node.labels.database == true + resources: + limits: + cpus: '2' + memory: 2G + healthcheck: + test: ["CMD-SHELL", "pg_isready -U app -d app"] + interval: 10s + timeout: 5s + retries: 5 + + # Redis Cache + cache: + image: redis:7-alpine + command: redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru + volumes: + - redis-data:/data + networks: + - backend + deploy: + replicas: 1 + placement: + constraints: + - node.labels.cache == true + resources: + limits: + cpus: '0.5' + memory: 512M + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + + # Monitoring (Prometheus) + prometheus: + image: prom/prometheus:latest + configs: + - source: prometheus_config + target: /etc/prometheus/prometheus.yml + volumes: + - prometheus-data:/prometheus + networks: + - monitoring + deploy: + replicas: 1 + placement: + constraints: + - node.role == manager + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.retention.time=30d' + + # Monitoring (Grafana) + grafana: + image: grafana/grafana:latest + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD} + volumes: + - grafana-data:/var/lib/grafana + networks: + - monitoring + deploy: + replicas: 1 + placement: + constraints: + - node.role == manager + +networks: + frontend: + driver: overlay + attachable: true + backend: + driver: overlay + internal: true + monitoring: + driver: overlay + attachable: true + +volumes: + postgres-data: + redis-data: + prometheus-data: + grafana-data: + +configs: + nginx_config: + file: ./configs/nginx.conf + app_config: + file: ./configs/app.json + prometheus_config: + file: ./configs/prometheus.yml + +secrets: + db_password: + file: ./secrets/db_password.txt + jwt_secret: + file: ./secrets/jwt_secret.txt + ssl_cert: + file: ./secrets/ssl_cert.pem + ssl_key: + file: ./secrets/ssl_key.pem +``` + +### Deployment Script + +```bash +#!/bin/bash +# deploy.sh + +set -e + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +NC='\033[0m' + +# Configuration +STACK_NAME="myapp" +COMPOSE_FILE="docker-compose.yml" + +echo "Starting deployment for ${STACK_NAME}..." + +# Check if running on Swarm +if ! docker info | grep -q "Swarm: active"; then + echo -e "${RED}Error: Not running in Swarm mode${NC}" + echo "Initialize Swarm with: docker swarm init" + exit 1 +fi + +# Create secrets (if not exists) +echo "Checking secrets..." +for secret in db_password jwt_secret ssl_cert ssl_key; do + if ! docker secret inspect ${secret} > /dev/null 2>&1; then + if [ -f "./secrets/${secret}.txt" ]; then + docker secret create ${secret} ./secrets/${secret}.txt + echo -e "${GREEN}Created secret: ${secret}${NC}" + else + echo -e "${RED}Missing secret file: ./secrets/${secret}.txt${NC}" + exit 1 + fi + else + echo "Secret ${secret} already exists" + fi +done + +# Create configs +echo "Creating configs..." +docker config rm nginx_config 2>/dev/null || true +docker config create nginx_config ./configs/nginx.conf + +docker config rm app_config 2>/dev/null || true +docker config create app_config ./configs/app.json + +docker config rm prometheus_config 2>/dev/null || true +docker config create prometheus_config ./configs/prometheus.yml + +# Deploy stack +echo "Deploying stack..." +docker stack deploy -c ${COMPOSE_FILE} ${STACK_NAME} + +# Wait for services to start +echo "Waiting for services to start..." +sleep 30 + +# Show status +docker stack services ${STACK_NAME} + +# Check health +echo "Checking service health..." +for service in nginx api worker db cache prometheus grafana; do + REPLICAS=$(docker service ls --filter name=${STACK_NAME}_${service} --format "{{.Replicas}}") + echo "${service}: ${REPLICAS}" +done + +echo -e "${GREEN}Deployment complete!${NC}" +echo "Check status: docker stack services ${STACK_NAME}" +echo "View logs: docker service logs -f ${STACK_NAME}_api" +``` + +### Service Update Script + +```bash +#!/bin/bash +# update-service.sh + +set -e + +SERVICE_NAME=$1 +NEW_IMAGE=$2 + +if [ -z "$SERVICE_NAME" ] || [ -z "$NEW_IMAGE" ]; then + echo "Usage: ./update-service.sh " + echo "Example: ./update-service.sh myapp_api myapp/api:v2" + exit 1 +fi + +FULL_SERVICE_NAME="${STACK_NAME}_${SERVICE_NAME}" + +echo "Updating ${FULL_SERVICE_NAME} to ${NEW_IMAGE}..." + +# Update service with rollback on failure +docker service update \ + --image ${NEW_IMAGE} \ + --update-parallelism 1 \ + --update-delay 10s \ + --update-failure-action rollback \ + --update-monitor 30s \ + ${FULL_SERVICE_NAME} + +# Wait for update +echo "Waiting for update to complete..." +sleep 30 + +# Check status +docker service ps ${FULL_SERVICE_NAME} + +echo "Update complete!" +``` + +### Rollback Script + +```bash +#!/bin/bash +# rollback-service.sh + +set -e + +SERVICE_NAME=$1 +STACK_NAME="myapp" + +if [ -z "$SERVICE_NAME" ]; then + echo "Usage: ./rollback-service.sh " + exit 1 +fi + +FULL_SERVICE_NAME="${STACK_NAME}_${SERVICE_NAME}" + +echo "Rolling back ${FULL_SERVICE_NAME}..." + +docker service rollback ${FULL_SERVICE_NAME} + +sleep 30 + +docker service ps ${FULL_SERVICE_NAME} + +echo "Rollback complete!" +``` + +### Monitoring Dashboard (Grafana) + +```json +{ + "dashboard": { + "title": "Docker Swarm Overview", + "panels": [ + { + "title": "Running Tasks", + "targets": [ + { + "expr": "count(container_tasks_state{state=\"running\"})" + } + ] + }, + { + "title": "CPU Usage per Service", + "targets": [ + { + "expr": "rate(container_cpu_usage_seconds_total{name=~\".+\"}[5m]) * 100", + "legendFormat": "{{name}}" + } + ] + }, + { + "title": "Memory Usage per Service", + "targets": [ + { + "expr": "container_memory_usage_bytes{name=~\".+\"} / 1024 / 1024", + "legendFormat": "{{name}} MB" + } + ] + }, + { + "title": "Network I/O", + "targets": [ + { + "expr": "rate(container_network_receive_bytes_total{name=~\".+\"}[5m])", + "legendFormat": "{{name}} RX" + }, + { + "expr": "rate(container_network_transmit_bytes_total{name=~\".+\"}[5m])", + "legendFormat": "{{name}} TX" + } + ] + }, + { + "title": "Service Health", + "targets": [ + { + "expr": "container_health_status{name=~\".+\"}" + } + ] + } + ] + } +} +``` + +### Prometheus Configuration + +```yaml +# prometheus.yml +global: + scrape_interval: 15s + evaluation_interval: 15m + +alerting: + alertmanagers: + - static_configs: + - targets: + - alertmanager:9093 + +rule_files: + - /etc/prometheus/alerts.yml + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['prometheus:9090'] + + - job_name: 'cadvisor' + static_configs: + - targets: ['cadvisor:8080'] + + - job_name: 'node' + static_configs: + - targets: ['node-exporter:9100'] + + - job_name: 'api' + static_configs: + - targets: ['api:3000'] + metrics_path: '/metrics' +``` + +### Alert Rules + +```yaml +# alerts.yml +groups: + - name: swarm_alerts + rules: + - alert: ServiceDown + expr: count(container_tasks_state{state="running"}) == 0 + for: 5m + labels: + severity: critical + annotations: + summary: "Service {{ $labels.service }} is down" + description: "No running tasks for service {{ $labels.service }}" + + - alert: HighCpuUsage + expr: rate(container_cpu_usage_seconds_total[5m]) * 100 > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "High CPU usage on {{ $labels.name }}" + description: "Container {{ $labels.name }} CPU usage is {{ $value }}%" + + - alert: HighMemoryUsage + expr: (container_memory_usage_bytes / container_spec_memory_limit_bytes) * 100 > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "High memory usage on {{ $labels.name }}" + description: "Container {{ $labels.name }} memory usage is {{ $value }}%" + + - alert: ContainerRestart + expr: increase(container_restart_count[1h]) > 0 + labels: + severity: warning + annotations: + summary: "Container {{ $labels.name }} restarted" + description: "Container {{ $labels.name }} restarted {{ $value }} times in the last hour" +``` \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md index 0015bef..fc96daa 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -38,6 +38,7 @@ These agents are invoked automatically by `/pipeline` or manually via `@mention` | `@lead-developer` | Implements code | Status: testing (tests fail) | | `@frontend-developer` | UI implementation | When UI work needed | | `@backend-developer` | Node.js/Express/APIs | When backend needed | +| `@devops-engineer` | Docker/Kubernetes/CI/CD | When deployment/infra needed | ### Quality Assurance | Agent | Role | When Invoked | @@ -48,6 +49,12 @@ These agents are invoked automatically by `/pipeline` or manually via `@mention` | `@security-auditor` | Security audit | After performance | | `@visual-tester` | Visual regression | When UI changes | +### DevOps & Infrastructure +| Agent | Role | When Invoked | +|-------|------|--------------| +| `@devops-engineer` | Docker/Swarm/K8s deployment | When deployment needed | +| `@security-auditor` | Container security scan | After deployment config | + ### Cognitive Enhancement (New) | Agent | Role | When Invoked | |-------|------|--------------| @@ -207,6 +214,46 @@ GITEA_TOKEN=your-token-here | `.kilo/skills/` | Skill modules | | `src/kilocode/` | TypeScript API for programmatic use | +## Skills Reference + +### Containerization Skills +| Skill | Purpose | Location | +|-------|---------|----------| +| `docker-compose` | Multi-container orchestration | `.kilo/skills/docker-compose/` | +| `docker-swarm` | Production cluster deployment | `.kilo/skills/docker-swarm/` | +| `docker-security` | Container security hardening | `.kilo/skills/docker-security/` | +| `docker-monitoring` | Container monitoring/logging | `.kilo/skills/docker-monitoring/` | + +### Node.js Skills +| Skill | Purpose | Location | +|-------|---------|----------| +| `nodejs-express-patterns` | Express routing, middleware | `.kilo/skills/nodejs-express-patterns/` | +| `nodejs-auth-jwt` | JWT authentication | `.kilo/skills/nodejs-auth-jwt/` | +| `nodejs-security-owasp` | OWASP security | `.kilo/skills/nodejs-security-owasp/` | + +### Database Skills +| Skill | Purpose | Location | +|-------|---------|----------| +| `postgresql-patterns` | PostgreSQL patterns | `.kilo/skills/postgresql-patterns/` | +| `sqlite-patterns` | SQLite patterns | `.kilo/skills/sqlite-patterns/` | +| `clickhouse-patterns` | ClickHouse patterns | `.kilo/skills/clickhouse-patterns/` | + +### Go Skills +| Skill | Purpose | Location | +|-------|---------|----------| +| `go-modules` | Go modules management | `.kilo/skills/go-modules/` | +| `go-concurrency` | Goroutines and channels | `.kilo/skills/go-concurrency/` | +| `go-testing` | Go testing patterns | `.kilo/skills/go-testing/` | +| `go-security` | Go security patterns | `.kilo/skills/go-security/` | + +### Process Skills +| Skill | Purpose | Location | +|-------|---------|----------| +| `planning-patterns` | CoT/ToT planning | `.kilo/skills/planning-patterns/` | +| `memory-systems` | Memory management | `.kilo/skills/memory-systems/` | +| `tool-use` | Tool usage patterns | `.kilo/skills/tool-use/` | +| `research-cycle` | Self-improvement cycle | `.kilo/skills/research-cycle/` | + ## Using the TypeScript API ```typescript From 0a854a3bc323330daab1a947c9f1d244dbb8a555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=A8NW=C2=A8?= <¨neroworld@mail.ru¨> Date: Sun, 5 Apr 2026 13:02:32 +0100 Subject: [PATCH 2/4] fix: add missing agent permissions and update orchestrator mappings - Add devops-engineer permission to orchestrator - Add BrowserAutomation to orchestrator mappings - Add code-skeptic task permission to devops-engineer, backend-developer, frontend-developer, go-developer - Add security-auditor task permission to devops-engineer - Add Task Tool Invocation section to agent files - Add go-developer to AGENTS.md Core Development table - Update KILO_SPEC.md with go-developer agent --- .kilo/KILO_SPEC.md | 3 +++ .kilo/agents/backend-developer.md | 6 ++++++ .kilo/agents/devops-engineer.md | 8 ++++++++ .kilo/agents/frontend-developer.md | 6 ++++++ .kilo/agents/go-developer.md | 6 ++++++ .kilo/agents/orchestrator.md | 3 +++ AGENTS.md | 1 + 7 files changed, 33 insertions(+) diff --git a/.kilo/KILO_SPEC.md b/.kilo/KILO_SPEC.md index 790269e..1847e08 100644 --- a/.kilo/KILO_SPEC.md +++ b/.kilo/KILO_SPEC.md @@ -433,7 +433,10 @@ Provider availability depends on configuration. Common providers include: | `@AgentArchitect` | Manages agent network per Kilo.ai spec | ollama-cloud/gpt-oss:120b | | `@CapabilityAnalyst` | Analyzes task coverage, identifies gaps | ollama-cloud/gpt-oss:120b | | `@MarkdownValidator` | Validates Markdown for Gitea issues | qwen/qwen3.6-plus:free | +| `@FrontendDeveloper` | UI implementation with multimodal | ollama-cloud/kimi-k2.5 | | `@BackendDeveloper` | Node.js, Express, APIs, database specialist | ollama-cloud/deepseek-v3.2 | +| `@GoDeveloper` | Go, Gin, Echo, concurrent systems specialist | ollama-cloud/qwen3-coder:480b | +| `@DevOpsEngineer` | Docker, Kubernetes, CI/CD, infrastructure | ollama-cloud/deepseek-v3.2 | | `@WorkflowArchitect` | Creates workflow definitions with complete architecture | ollama-cloud/gpt-oss:120b | **Note:** For AgentArchitect, use `subagent_type: "system-analyst"` with prompt "You are Agent Architect..." (workaround for unsupported agent-architect type). diff --git a/.kilo/agents/backend-developer.md b/.kilo/agents/backend-developer.md index e0e49ba..dad0101 100644 --- a/.kilo/agents/backend-developer.md +++ b/.kilo/agents/backend-developer.md @@ -12,6 +12,7 @@ permission: grep: allow task: "*": deny + "code-skeptic": allow --- # Kilo Code: Backend Developer @@ -34,6 +35,11 @@ Invoke this mode when: Backend specialist for Node.js, Express, APIs, and database integration. +## Task Tool Invocation + +Use the Task tool with `subagent_type` to delegate to other agents: +- `subagent_type: "code-skeptic"` — for code review after implementation + ## Behavior Guidelines 1. **Security First** — Always validate input, sanitize output, protect against injection diff --git a/.kilo/agents/devops-engineer.md b/.kilo/agents/devops-engineer.md index a4869b2..1bdc006 100644 --- a/.kilo/agents/devops-engineer.md +++ b/.kilo/agents/devops-engineer.md @@ -12,6 +12,8 @@ permission: grep: allow task: "*": deny + "code-skeptic": allow + "security-auditor": allow --- # Kilo Code: DevOps Engineer @@ -43,6 +45,12 @@ DevOps specialist for Docker, Kubernetes, CI/CD automation, and infrastructure m 4. **Monitor everything** — metrics, logs, traces 5. **Test deployments** — staging before production +## Task Tool Invocation + +Use the Task tool with `subagent_type` to delegate to other agents: +- `subagent_type: "code-skeptic"` — for code review after implementation +- `subagent_type: "security-auditor"` — for security review of container configs + ## Skills Reference ### Containerization diff --git a/.kilo/agents/frontend-developer.md b/.kilo/agents/frontend-developer.md index 65d8f00..b050167 100644 --- a/.kilo/agents/frontend-developer.md +++ b/.kilo/agents/frontend-developer.md @@ -12,6 +12,7 @@ permission: grep: allow task: "*": deny + "code-skeptic": allow --- # Kilo Code: Frontend Developer @@ -33,6 +34,11 @@ Invoke this mode when: Handles UI implementation with multimodal capabilities. Accepts visual references. +## Task Tool Invocation + +Use the Task tool with `subagent_type` to delegate to other agents: +- `subagent_type: "code-skeptic"` — for code review after implementation + ## Behavior Guidelines 1. **Accept visual input** — can analyze screenshots and mockups diff --git a/.kilo/agents/go-developer.md b/.kilo/agents/go-developer.md index afa58c0..d081ab5 100644 --- a/.kilo/agents/go-developer.md +++ b/.kilo/agents/go-developer.md @@ -12,6 +12,7 @@ permission: grep: allow task: "*": deny + "code-skeptic": allow --- # Kilo Code: Go Developer @@ -34,6 +35,11 @@ Invoke this mode when: Go backend specialist for Gin, Echo, APIs, and concurrent systems. +## Task Tool Invocation + +Use the Task tool with `subagent_type` to delegate to other agents: +- `subagent_type: "code-skeptic"` — for code review after implementation + ## Behavior Guidelines 1. **Idiomatic Go** — Follow Go conventions and idioms diff --git a/.kilo/agents/orchestrator.md b/.kilo/agents/orchestrator.md index 983825c..a731ccd 100644 --- a/.kilo/agents/orchestrator.md +++ b/.kilo/agents/orchestrator.md @@ -32,6 +32,7 @@ permission: "planner": allow "reflector": allow "memory-manager": allow + "devops-engineer": allow --- # Kilo Code: Orchestrator @@ -128,6 +129,8 @@ Use the Task tool to delegate to subagents with these subagent_type values: | Planner | planner | Task decomposition, CoT, ToT planning | | Reflector | reflector | Self-reflection, lesson extraction | | MemoryManager | memory-manager | Memory systems, context retrieval | +| DevOpsEngineer | devops-engineer | Docker, Kubernetes, CI/CD | +| BrowserAutomation | browser-automation | Browser automation, E2E testing | **Note:** `agent-architect` subagent_type is not recognized. Use `system-analyst` with prompt "You are Agent Architect..." as workaround. diff --git a/AGENTS.md b/AGENTS.md index fc96daa..1647155 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -38,6 +38,7 @@ These agents are invoked automatically by `/pipeline` or manually via `@mention` | `@lead-developer` | Implements code | Status: testing (tests fail) | | `@frontend-developer` | UI implementation | When UI work needed | | `@backend-developer` | Node.js/Express/APIs | When backend needed | +| `@go-developer` | Go/Gin/Echo APIs | When Go backend needed | | `@devops-engineer` | Docker/Kubernetes/CI/CD | When deployment/infra needed | ### Quality Assurance From 576e8fe8d6c93fbe963eee8d3a94af343753de25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=A8NW=C2=A8?= <¨neroworld@mail.ru¨> Date: Sun, 5 Apr 2026 13:06:53 +0100 Subject: [PATCH 3/4] fix: sync KILO_SPEC.md agent models with actual agent definitions - Update HistoryMiner to nemotron-3-super (was gpt-oss:20b) - Update SDETEngineer and LeadDeveloper to qwen3-coder:480b (was qwen3-coder:free) - Update SecurityAuditor to nemotron-3-super (was deepseek-v3.2) - Update ReleaseManager to devstral-2:123b (was devstral-2) - Update Evaluator to nemotron-3-super (was gpt-oss:120b) - Update PromptOptimizer to qwen/qwen3.6-plus:free (was openrouter/qwen/...) - Update ProductOwner to glm-5 (was openrouter/qwen/...) - Update AgentArchitect to nemotron-3-super (was gpt-oss:120b) - Update CapabilityAnalyst to nemotron-3-super (was gpt-oss:120b) - Update MarkdownValidator to nemotron-3-nano:30b (was qwen3.6-plus:free) - Add missing agents: BrowserAutomation, VisualTester, Planner, Reflector, MemoryManager - Fix workflow commands models to match actual command files - Add missing /research command --- .kilo/KILO_SPEC.md | 49 +++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/.kilo/KILO_SPEC.md b/.kilo/KILO_SPEC.md index 1847e08..567e7d0 100644 --- a/.kilo/KILO_SPEC.md +++ b/.kilo/KILO_SPEC.md @@ -416,28 +416,32 @@ Provider availability depends on configuration. Common providers include: | Agent | Role | Model | |-------|------|-------| | `@RequirementRefiner` | Converts vague ideas to strict User Stories | ollama-cloud/kimi-k2-thinking | -| `@HistoryMiner` | Finds duplicates and past solutions in git | ollama-cloud/gpt-oss:20b | +| `@HistoryMiner` | Finds duplicates and past solutions in git | ollama-cloud/nemotron-3-super | | `@SystemAnalyst` | Designs technical specifications | qwen/qwen3.6-plus:free | -| `@SDETEngineer` | Writes tests following TDD | qwen/qwen3-coder:free | -| `@LeadDeveloper` | Primary code writer | qwen/qwen3-coder:free | +| `@SDETEngineer` | Writes tests following TDD | ollama-cloud/qwen3-coder:480b | +| `@LeadDeveloper` | Primary code writer | ollama-cloud/qwen3-coder:480b | | `@FrontendDeveloper` | UI implementation with multimodal | ollama-cloud/kimi-k2.5 | | `@CodeSkeptic` | Adversarial code reviewer | ollama-cloud/minimax-m2.5 | | `@TheFixer` | Iteratively fixes bugs | ollama-cloud/minimax-m2.5 | | `@PerformanceEngineer` | Reviews for performance issues | ollama-cloud/nemotron-3-super | -| `@SecurityAuditor` | Scans for vulnerabilities | ollama-cloud/deepseek-v3.2 | -| `@ReleaseManager` | Git operations and deployments | ollama-cloud/devstral-2 | -| `@Evaluator` | Scores agent effectiveness | ollama-cloud/gpt-oss:120b | -| `@PromptOptimizer` | Improves agent prompts | openrouter/qwen/qwen3.6-plus:free | -| `@ProductOwner` | Manages issue checklists | openrouter/qwen/qwen3.6-plus:free | +| `@SecurityAuditor` | Scans for vulnerabilities | ollama-cloud/nemotron-3-super | +| `@ReleaseManager` | Git operations and deployments | ollama-cloud/devstral-2:123b | +| `@Evaluator` | Scores agent effectiveness | ollama-cloud/nemotron-3-super | +| `@PromptOptimizer` | Improves agent prompts | qwen/qwen3.6-plus:free | +| `@ProductOwner` | Manages issue checklists | ollama-cloud/glm-5 | | `@Orchestrator` | Routes tasks between agents | ollama-cloud/glm-5 | -| `@AgentArchitect` | Manages agent network per Kilo.ai spec | ollama-cloud/gpt-oss:120b | -| `@CapabilityAnalyst` | Analyzes task coverage, identifies gaps | ollama-cloud/gpt-oss:120b | -| `@MarkdownValidator` | Validates Markdown for Gitea issues | qwen/qwen3.6-plus:free | -| `@FrontendDeveloper` | UI implementation with multimodal | ollama-cloud/kimi-k2.5 | +| `@AgentArchitect` | Manages agent network per Kilo.ai spec | ollama-cloud/nemotron-3-super | +| `@CapabilityAnalyst` | Analyzes task coverage, identifies gaps | ollama-cloud/nemotron-3-super | +| `@MarkdownValidator` | Validates Markdown for Gitea issues | ollama-cloud/nemotron-3-nano:30b | | `@BackendDeveloper` | Node.js, Express, APIs, database specialist | ollama-cloud/deepseek-v3.2 | | `@GoDeveloper` | Go, Gin, Echo, concurrent systems specialist | ollama-cloud/qwen3-coder:480b | | `@DevOpsEngineer` | Docker, Kubernetes, CI/CD, infrastructure | ollama-cloud/deepseek-v3.2 | | `@WorkflowArchitect` | Creates workflow definitions with complete architecture | ollama-cloud/gpt-oss:120b | +| `@BrowserAutomation` | Playwright E2E testing, browser automation | ollama-cloud/glm-5 | +| `@VisualTester` | Visual regression testing with pixelmatch | ollama-cloud/glm-5 | +| `@Planner` | Task decomposition, CoT, ToT planning | ollama-cloud/nemotron-3-super | +| `@Reflector` | Self-reflection, lesson extraction | ollama-cloud/nemotron-3-super | +| `@MemoryManager` | Memory systems, context retrieval | ollama-cloud/nemotron-3-super | **Note:** For AgentArchitect, use `subagent_type: "system-analyst"` with prompt "You are Agent Architect..." (workaround for unsupported agent-architect type). @@ -447,21 +451,22 @@ Provider availability depends on configuration. Common providers include: |---------|-------------|-------| | `/landing-page` | Create landing page CMS from HTML mockups | ollama-cloud/kimi-k2.5 | | `/commerce` | Create e-commerce site with products, cart, payments | qwen/qwen3-coder:free | -| `/blog` | Create blog/CMS with posts, comments, SEO | qwen/qeen3-coder:free | +| `/blog` | Create blog/CMS with posts, comments, SEO | qwen/qwen3-coder:free | | `/booking` | Create booking system for services/appointments | qwen/qwen3-coder:free | | `/workflow` | Run complete workflow with quality gates | ollama-cloud/glm-5 | | `/pipeline` | Run full agent pipeline for issue | - | -| `/feature` | Full feature development pipeline | qwen/qwen3-coder:free | -| `/code` | Quick code generation | qwen/qwen3-coder:free | -| `/debug` | Analyzes and fixes bugs | openai/gpt-oss-20b | +| `/feature` | Full feature development pipeline | openrouter/qwen/qwen3-coder:free | +| `/code` | Quick code generation | openrouter/qwen/qwen3-coder:free | +| `/debug` | Analyzes and fixes bugs | ollama-cloud/gpt-oss:20b | | `/ask` | Answers codebase questions | openai/qwen3-32b | -| `/plan` | Creates detailed task plans | qwen/qwen3-coder:free | +| `/plan` | Creates detailed task plans | openrouter/qwen/qwen3-coder:free | | `/e2e-test` | Run E2E tests with browser automation | - | -| `/status` | Check pipeline status for issue | - | -| `/evaluate` | Generate performance report | - | -| `/review` | Code review workflow | - | -| `/review-watcher` | Auto-validate review results | - | -| `/hotfix` | Hotfix workflow | - | +| `/status` | Check pipeline status for issue | qwen/qwen3.6-plus:free | +| `/evaluate` | Generate performance report | ollama-cloud/gpt-oss:120b | +| `/review` | Code review workflow | openrouter/minimax/minimax-m2.5:free | +| `/review-watcher` | Auto-validate review results | ollama-cloud/glm-5 | +| `/hotfix` | Hotfix workflow | openrouter/minimax/minimax-m2.5:free | +| `/research` | Run research and self-improvement | ollama-cloud/glm-5 | ### Workflow Pipeline From b517ad5dad7a3d92a6acda96f70576dedd143ea0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=A8NW=C2=A8?= <¨neroworld@mail.ru¨> Date: Sun, 5 Apr 2026 13:19:54 +0100 Subject: [PATCH 4/4] feat: add synchronization system for agent definitions - Add kilo.jsonc (official Kilo Code config) - Add kilo-meta.json (source of truth for sync) - Add evolutionary-sync.md rule for documentation - Add scripts/sync-agents.cjs for validation - Fix agent mode mismatches (8 agents had wrong mode) - Update KILO_SPEC.md and AGENTS.md The sync system ensures: - kilo-meta.json is the single source of truth - Agent .md files frontmatter matches meta - KILO_SPEC.md tables stay synchronized - AGENTS.md category tables stay synchronized Run: node scripts/sync-agents.cjs --check Fix: node scripts/sync-agents.cjs --fix --- .kilo/KILO_SPEC.md | 92 +++--- .kilo/agents/agent-architect.md | 2 +- .kilo/agents/browser-automation.md | 2 +- .kilo/agents/history-miner.md | 2 +- .kilo/agents/product-owner.md | 2 +- .kilo/agents/prompt-optimizer.md | 2 +- .kilo/agents/security-auditor.md | 4 +- .kilo/agents/system-analyst.md | 2 +- .kilo/agents/visual-tester.md | 2 +- .kilo/rules/evolutionary-sync.md | 115 +++++++ AGENTS.md | 107 ++----- kilo-meta.json | 343 +++++++++++++++++++++ kilo.jsonc | 464 +++++++++++++++++++++++++++++ scripts/sync-agents.cjs | 391 ++++++++++++++++++++++++ 14 files changed, 1396 insertions(+), 134 deletions(-) create mode 100644 .kilo/rules/evolutionary-sync.md create mode 100644 kilo-meta.json create mode 100644 kilo.jsonc create mode 100644 scripts/sync-agents.cjs diff --git a/.kilo/KILO_SPEC.md b/.kilo/KILO_SPEC.md index 567e7d0..47a29a3 100644 --- a/.kilo/KILO_SPEC.md +++ b/.kilo/KILO_SPEC.md @@ -415,33 +415,35 @@ Provider availability depends on configuration. Common providers include: | Agent | Role | Model | |-------|------|-------| -| `@RequirementRefiner` | Converts vague ideas to strict User Stories | ollama-cloud/kimi-k2-thinking | -| `@HistoryMiner` | Finds duplicates and past solutions in git | ollama-cloud/nemotron-3-super | -| `@SystemAnalyst` | Designs technical specifications | qwen/qwen3.6-plus:free | -| `@SDETEngineer` | Writes tests following TDD | ollama-cloud/qwen3-coder:480b | -| `@LeadDeveloper` | Primary code writer | ollama-cloud/qwen3-coder:480b | -| `@FrontendDeveloper` | UI implementation with multimodal | ollama-cloud/kimi-k2.5 | -| `@CodeSkeptic` | Adversarial code reviewer | ollama-cloud/minimax-m2.5 | -| `@TheFixer` | Iteratively fixes bugs | ollama-cloud/minimax-m2.5 | -| `@PerformanceEngineer` | Reviews for performance issues | ollama-cloud/nemotron-3-super | -| `@SecurityAuditor` | Scans for vulnerabilities | ollama-cloud/nemotron-3-super | -| `@ReleaseManager` | Git operations and deployments | ollama-cloud/devstral-2:123b | -| `@Evaluator` | Scores agent effectiveness | ollama-cloud/nemotron-3-super | -| `@PromptOptimizer` | Improves agent prompts | qwen/qwen3.6-plus:free | -| `@ProductOwner` | Manages issue checklists | ollama-cloud/glm-5 | -| `@Orchestrator` | Routes tasks between agents | ollama-cloud/glm-5 | -| `@AgentArchitect` | Manages agent network per Kilo.ai spec | ollama-cloud/nemotron-3-super | -| `@CapabilityAnalyst` | Analyzes task coverage, identifies gaps | ollama-cloud/nemotron-3-super | -| `@MarkdownValidator` | Validates Markdown for Gitea issues | ollama-cloud/nemotron-3-nano:30b | -| `@BackendDeveloper` | Node.js, Express, APIs, database specialist | ollama-cloud/deepseek-v3.2 | -| `@GoDeveloper` | Go, Gin, Echo, concurrent systems specialist | ollama-cloud/qwen3-coder:480b | -| `@DevOpsEngineer` | Docker, Kubernetes, CI/CD, infrastructure | ollama-cloud/deepseek-v3.2 | -| `@WorkflowArchitect` | Creates workflow definitions with complete architecture | ollama-cloud/gpt-oss:120b | -| `@BrowserAutomation` | Playwright E2E testing, browser automation | ollama-cloud/glm-5 | -| `@VisualTester` | Visual regression testing with pixelmatch | ollama-cloud/glm-5 | -| `@Planner` | Task decomposition, CoT, ToT planning | ollama-cloud/nemotron-3-super | -| `@Reflector` | Self-reflection, lesson extraction | ollama-cloud/nemotron-3-super | -| `@MemoryManager` | Memory systems, context retrieval | ollama-cloud/nemotron-3-super | +| `@RequirementRefiner` | Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists. | ollama-cloud/kimi-k2-thinking | +| `@HistoryMiner` | Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work. | ollama-cloud/nemotron-3-super | +| `@SystemAnalyst` | Designs technical specifications, data schemas, and API contracts before implementation. | qwen/qwen3.6-plus:free | +| `@SdetEngineer` | Writes tests following TDD methodology. | ollama-cloud/qwen3-coder:480b | +| `@LeadDeveloper` | Primary code writer for backend and core logic. | ollama-cloud/qwen3-coder:480b | +| `@FrontendDeveloper` | Handles UI implementation with multimodal capabilities. | ollama-cloud/kimi-k2.5 | +| `@BackendDeveloper` | Backend specialist for Node. | ollama-cloud/deepseek-v3.2 | +| `@GoDeveloper` | Go backend specialist for Gin, Echo, APIs, and database integration. | ollama-cloud/qwen3-coder:480b | +| `@DevopsEngineer` | DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management. | ollama-cloud/deepseek-v3.2 | +| `@CodeSkeptic` | Adversarial code reviewer. | ollama-cloud/minimax-m2.5 | +| `@TheFixer` | Iteratively fixes bugs based on specific error reports and test failures. | ollama-cloud/minimax-m2.5 | +| `@PerformanceEngineer` | Reviews code for performance issues. | ollama-cloud/nemotron-3-super | +| `@SecurityAuditor` | Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets. | ollama-cloud/nemotron-3-super | +| `@VisualTester` | Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff. | ollama-cloud/glm-5 | +| `@Orchestrator` | Main dispatcher. | ollama-cloud/glm-5 | +| `@ReleaseManager` | Manages git operations, semantic versioning, branching, and deployments. | ollama-cloud/devstral-2:123b | +| `@Evaluator` | Scores agent effectiveness after task completion for continuous improvement. | ollama-cloud/nemotron-3-super | +| `@PromptOptimizer` | Improves agent system prompts based on performance failures. | qwen/qwen3.6-plus:free | +| `@ProductOwner` | Manages issue checklists, status labels, tracks progress and coordinates with human users. | ollama-cloud/glm-5 | +| `@AgentArchitect` | Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis. | ollama-cloud/nemotron-3-super | +| `@CapabilityAnalyst` | Analyzes task requirements against available agents, workflows, and skills. | ollama-cloud/nemotron-3-super | +| `@WorkflowArchitect` | Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates. | ollama-cloud/gpt-oss:120b | +| `@MarkdownValidator` | Validates and corrects Markdown descriptions for Gitea issues. | ollama-cloud/nemotron-3-nano:30b | +| `@BrowserAutomation` | Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction. | ollama-cloud/glm-5 | +| `@Planner` | Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect. | ollama-cloud/nemotron-3-super | +| `@Reflector` | Self-reflection agent using Reflexion pattern - learns from mistakes. | ollama-cloud/nemotron-3-super | +| `@MemoryManager` | Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences). | ollama-cloud/nemotron-3-super | + + **Note:** For AgentArchitect, use `subagent_type: "system-analyst"` with prompt "You are Agent Architect..." (workaround for unsupported agent-architect type). @@ -449,24 +451,24 @@ Provider availability depends on configuration. Common providers include: | Command | Description | Model | |---------|-------------|-------| -| `/landing-page` | Create landing page CMS from HTML mockups | ollama-cloud/kimi-k2.5 | -| `/commerce` | Create e-commerce site with products, cart, payments | qwen/qwen3-coder:free | -| `/blog` | Create blog/CMS with posts, comments, SEO | qwen/qwen3-coder:free | -| `/booking` | Create booking system for services/appointments | qwen/qwen3-coder:free | -| `/workflow` | Run complete workflow with quality gates | ollama-cloud/glm-5 | -| `/pipeline` | Run full agent pipeline for issue | - | -| `/feature` | Full feature development pipeline | openrouter/qwen/qwen3-coder:free | -| `/code` | Quick code generation | openrouter/qwen/qwen3-coder:free | -| `/debug` | Analyzes and fixes bugs | ollama-cloud/gpt-oss:20b | -| `/ask` | Answers codebase questions | openai/qwen3-32b | -| `/plan` | Creates detailed task plans | openrouter/qwen/qwen3-coder:free | -| `/e2e-test` | Run E2E tests with browser automation | - | -| `/status` | Check pipeline status for issue | qwen/qwen3.6-plus:free | -| `/evaluate` | Generate performance report | ollama-cloud/gpt-oss:120b | -| `/review` | Code review workflow | openrouter/minimax/minimax-m2.5:free | -| `/review-watcher` | Auto-validate review results | ollama-cloud/glm-5 | -| `/hotfix` | Hotfix workflow | openrouter/minimax/minimax-m2.5:free | -| `/research` | Run research and self-improvement | ollama-cloud/glm-5 | +| `/status` | Check pipeline status for issue. | qwen/qwen3.6-plus:free | +| `/evaluate` | Generate performance report. | ollama-cloud/gpt-oss:120b | +| `/plan` | Creates detailed task plans. | openrouter/qwen/qwen3-coder:free | +| `/ask` | Answers codebase questions. | openai/qwen3-32b | +| `/debug` | Analyzes and fixes bugs. | ollama-cloud/gpt-oss:20b | +| `/code` | Quick code generation. | openrouter/qwen/qwen3-coder:free | +| `/research` | Run research and self-improvement. | ollama-cloud/glm-5 | +| `/feature` | Full feature development pipeline. | openrouter/qwen/qwen3-coder:free | +| `/hotfix` | Hotfix workflow. | openrouter/minimax/minimax-m2.5:free | +| `/review` | Code review workflow. | openrouter/minimax/minimax-m2.5:free | +| `/review-watcher` | Auto-validate review results. | ollama-cloud/glm-5 | +| `/workflow` | Run complete workflow with quality gates. | ollama-cloud/glm-5 | +| `/landing-page` | Create landing page CMS from HTML mockups. | ollama-cloud/kimi-k2.5 | +| `/commerce` | Create e-commerce site with products, cart, payments. | qwen/qwen3-coder:free | +| `/blog` | Create blog/CMS with posts, comments, SEO. | qwen/qwen3-coder:free | +| `/booking` | Create booking system for services/appointments. | qwen/qwen3-coder:free | + + ### Workflow Pipeline diff --git a/.kilo/agents/agent-architect.md b/.kilo/agents/agent-architect.md index 8cb3b12..fb2ee9a 100644 --- a/.kilo/agents/agent-architect.md +++ b/.kilo/agents/agent-architect.md @@ -1,6 +1,6 @@ --- name: Agent Architect -mode: all +mode: subagent model: ollama-cloud/nemotron-3-super description: Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis color: "#8B5CF6" diff --git a/.kilo/agents/browser-automation.md b/.kilo/agents/browser-automation.md index 2c4c3de..5ba7899 100644 --- a/.kilo/agents/browser-automation.md +++ b/.kilo/agents/browser-automation.md @@ -1,6 +1,6 @@ --- description: Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction -mode: all +mode: subagent model: ollama-cloud/glm-5 color: "#1E88E5" permission: diff --git a/.kilo/agents/history-miner.md b/.kilo/agents/history-miner.md index d3a227e..8967ca9 100644 --- a/.kilo/agents/history-miner.md +++ b/.kilo/agents/history-miner.md @@ -1,6 +1,6 @@ --- description: Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work -mode: all +mode: subagent model: ollama-cloud/nemotron-3-super color: "#059669" permission: diff --git a/.kilo/agents/product-owner.md b/.kilo/agents/product-owner.md index b123723..08c767b 100644 --- a/.kilo/agents/product-owner.md +++ b/.kilo/agents/product-owner.md @@ -1,6 +1,6 @@ --- description: Manages issue checklists, status labels, tracks progress and coordinates with human users -mode: all +mode: subagent model: ollama-cloud/glm-5 color: "#EA580C" permission: diff --git a/.kilo/agents/prompt-optimizer.md b/.kilo/agents/prompt-optimizer.md index a9685ac..0023758 100644 --- a/.kilo/agents/prompt-optimizer.md +++ b/.kilo/agents/prompt-optimizer.md @@ -1,6 +1,6 @@ --- description: Improves agent system prompts based on performance failures. Meta-learner for prompt optimization -mode: all +mode: subagent model: qwen/qwen3.6-plus:free color: "#BE185D" permission: diff --git a/.kilo/agents/security-auditor.md b/.kilo/agents/security-auditor.md index b5ce431..0a2be2c 100644 --- a/.kilo/agents/security-auditor.md +++ b/.kilo/agents/security-auditor.md @@ -1,8 +1,8 @@ --- description: Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets -mode: all +mode: subagent model: ollama-cloud/nemotron-3-super -color: "#7F1D1D" +color: #DC2626 permission: read: allow bash: allow diff --git a/.kilo/agents/system-analyst.md b/.kilo/agents/system-analyst.md index 2b4de73..0c33617 100644 --- a/.kilo/agents/system-analyst.md +++ b/.kilo/agents/system-analyst.md @@ -1,6 +1,6 @@ --- description: Designs technical specifications, data schemas, and API contracts before implementation -mode: all +mode: subagent model: qwen/qwen3.6-plus:free color: "#0891B2" permission: diff --git a/.kilo/agents/visual-tester.md b/.kilo/agents/visual-tester.md index 5bc0f51..28ba2b5 100644 --- a/.kilo/agents/visual-tester.md +++ b/.kilo/agents/visual-tester.md @@ -1,6 +1,6 @@ --- description: Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff -mode: all +mode: subagent model: ollama-cloud/glm-5 color: "#E91E63" permission: diff --git a/.kilo/rules/evolutionary-sync.md b/.kilo/rules/evolutionary-sync.md new file mode 100644 index 0000000..26579ab --- /dev/null +++ b/.kilo/rules/evolutionary-sync.md @@ -0,0 +1,115 @@ +# Evolutionary Mode Rules + +When agents are modified, created, or updated during evolutionary improvement, this rule ensures all related files stay synchronized. + +## Source of Truth + +**`kilo.json`** is the single source of truth for: +- Agent definitions (models, modes, descriptions) +- Command definitions (models, descriptions) +- Categories and groupings + +## Files to Synchronize + +When agents change, update ALL of these files: + +| File | What to Update | +|------|----------------| +| `kilo.json` | Models, modes, descriptions (source of truth) | +| `.kilo/agents/*.md` | Model in YAML frontmatter | +| `.kilo/KILO_SPEC.md` | Pipeline Agents table, Workflow Commands table | +| `AGENTS.md` | Pipeline Agents tables by category | +| `.kilo/agents/orchestrator.md` | Task Tool Invocation table | + +## Sync Checklist + +When modifying agents: + +``` +□ Update kilo.json with new model/description +□ Update agent .md file frontmatter +□ Update KILO_SPEC.md Pipeline Agents table +□ Update AGENTS.md category tables +□ Update orchestrator.md subagent_type mappings (if new agent) +□ Run scripts/sync-agents.js --check to verify +``` + +## Adding New Agent + +1. Create `.kilo/agents/agent-name.md` with frontmatter: + ```yaml + --- + description: Agent description + mode: subagent|primary|all + model: provider/model-id + color: #HEX + permission: + read: allow + edit: allow + ... + --- + ``` + +2. Add to `kilo.json` under `agents`: + ```json + "agent-name": { + "file": ".kilo/agents/agent-name.md", + "description": "Full description", + "model": "provider/model-id", + "mode": "subagent", + "category": "core|quality|meta|cognitive|testing" + } + ``` + +3. If subagent, add to `orchestrator.md`: + - Add to permission list + - Add to Task Tool Invocation table + +4. Run sync script: + ```bash + node scripts/sync-agents.js --fix + ``` + +## Model Changes + +When changing a model: + +1. Update agent file frontmatter +2. Update `kilo.json` +3. Update `KILO_SPEC.md` +4. Document reason in commit message + +Example: +``` +fix: update LeadDeveloper model from qwen3-coder:free to qwen3-coder:480b + +Reason: Better code generation quality, supports larger context +``` + +## Verification + +Run sync verification before commits: + +```bash +# Check only (CI mode) +node scripts/sync-agents.js --check + +# Fix discrepancies +node scripts/sync-agents.js --fix +``` + +## CI Integration + +Add to `.github/workflows/ci.yml`: + +```yaml +- name: Verify Agent Sync + run: node scripts/sync-agents.js --check +``` + +## Prohibited Actions + +- DO NOT update KILO_SPEC.md without updating kilo.json +- DO NOT update agent model without updating all sync targets +- DO NOT add new agent without updating orchestrator permissions +- DO NOT skip running sync script after changes \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md index 1647155..a7a29c4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -31,24 +31,24 @@ These agents are invoked automatically by `/pipeline` or manually via `@mention` ### Core Development | Agent | Role | When Invoked | |-------|------|--------------| -| `@requirement-refiner` | Converts ideas to User Stories | Issue status: new | -| `@history-miner` | Finds duplicates in git | Status: planned | -| `@system-analyst` | Designs specifications | Status: researching | -| `@sdet-engineer` | Writes tests (TDD) | Status: designed | -| `@lead-developer` | Implements code | Status: testing (tests fail) | -| `@frontend-developer` | UI implementation | When UI work needed | -| `@backend-developer` | Node.js/Express/APIs | When backend needed | -| `@go-developer` | Go/Gin/Echo APIs | When Go backend needed | -| `@devops-engineer` | Docker/Kubernetes/CI/CD | When deployment/infra needed | +| `@RequirementRefiner` | Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists | Issue status: new | +| `@HistoryMiner` | Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work | Status: planned | +| `@SystemAnalyst` | Designs technical specifications, data schemas, and API contracts before implementation | Status: researching | +| `@SdetEngineer` | Writes tests following TDD methodology | Status: designed | +| `@LeadDeveloper` | Primary code writer for backend and core logic | Status: testing | +| `@FrontendDeveloper` | Handles UI implementation with multimodal capabilities | When UI work needed | +| `@BackendDeveloper` | Backend specialist for Node | When backend needed | +| `@GoDeveloper` | Go backend specialist for Gin, Echo, APIs, and database integration | When Go backend needed | +| `@DevopsEngineer` | DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management | When deployment/infra needed | ### Quality Assurance | Agent | Role | When Invoked | |-------|------|--------------| -| `@code-skeptic` | Adversarial review | Status: implementing | -| `@the-fixer` | Fixes issues | When review fails | -| `@performance-engineer` | Performance review | After code-skeptic | -| `@security-auditor` | Security audit | After performance | -| `@visual-tester` | Visual regression | When UI changes | +| `@CodeSkeptic` | Adversarial code reviewer | Status: implementing | +| `@TheFixer` | Iteratively fixes bugs based on specific error reports and test failures | When review fails | +| `@PerformanceEngineer` | Reviews code for performance issues | After code-skeptic | +| `@SecurityAuditor` | Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets | After performance | +| `@VisualTester` | Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff | When UI changes | ### DevOps & Infrastructure | Agent | Role | When Invoked | @@ -56,78 +56,25 @@ These agents are invoked automatically by `/pipeline` or manually via `@mention` | `@devops-engineer` | Docker/Swarm/K8s deployment | When deployment needed | | `@security-auditor` | Container security scan | After deployment config | -### Cognitive Enhancement (New) +### Cognitive Enhancement | Agent | Role | When Invoked | |-------|------|--------------| -| `@planner` | Task decomposition (CoT/ToT) | Complex tasks | -| `@reflector` | Self-reflection (Reflexion) | After each agent | -| `@memory-manager` | Memory systems | Context management | +| `@Planner` | Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect | Complex tasks | +| `@Reflector` | Self-reflection agent using Reflexion pattern - learns from mistakes | After each agent | +| `@MemoryManager` | Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences) | Context management | ### Meta & Process | Agent | Role | When Invoked | |-------|------|--------------| -| `@release-manager` | Git operations | Status: releasing | -| `@evaluator` | Scores effectiveness | Status: evaluated | -| `@prompt-optimizer` | Improves prompts | When score < 7 | -| `@capability-analyst` | Analyzes task coverage | When starting new task | -| `@agent-architect` | Creates new agents | When gaps identified | -| `@workflow-architect` | Creates workflows | New workflow needed | -| `@markdown-validator` | Validates Markdown | Before issue creation | - -## Workflow State Machine - -``` -[new] - ↓ @requirement-refiner -[planned] - ↓ @capability-analyst → (gaps?) → @agent-architect → create new agents - ↓ @history-miner -[researching] - ↓ @system-analyst -[designed] - ↓ @sdet-engineer (writes failing tests) -[testing] - ↓ @lead-developer (makes tests pass) -[implementing] - ↓ @code-skeptic (review) -[reviewing] ──[fail]──→ [fixing] ──→ [reviewing] - ↓ @review-watcher → (auto-validate) → create fix tasks - ↓ [pass] -[perf-check] - ↓ @performance-engineer -[security-check] - ↓ @security-auditor -[releasing] - ↓ @release-manager -[evaluated] - ↓ @evaluator - ├── [score ≥ 7] → [completed] - └── [score < 7] → @prompt-optimizer → [completed] -``` - -## Capability Analysis Flow - -When starting a complex task: - -``` -[User Request] - ↓ -[@capability-analyst] ← Analyzes requirements vs existing capabilities - ↓ -[Gap Analysis] ← Identifies missing agents, workflows, skills - ↓ -[Recommendations] → Create new or enhance existing? - ↓ -[Decision] - ├── [Create New] → [@agent-architect] → Create component → Review - └── [Enhance] → [@lead-developer] → Modify existing - ↓ -[Integration] ← Verify new component works with system - ↓ -[Complete] ← Task can now be handled -``` - -## Gitea Integration +| `@Orchestrator` | Main dispatcher | Manages all agent routing | +| `@ReleaseManager` | Manages git operations, semantic versioning, branching, and deployments | Status: releasing | +| `@Evaluator` | Scores agent effectiveness after task completion for continuous improvement | Status: evaluated | +| `@PromptOptimizer` | Improves agent system prompts based on performance failures | When score < 7 | +| `@ProductOwner` | Manages issue checklists, status labels, tracks progress and coordinates with human users | Manages issues | +| `@AgentArchitect` | Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis | When gaps identified | +| `@CapabilityAnalyst` | Analyzes task requirements against available agents, workflows, and skills | When starting new task | +| `@WorkflowArchitect` | Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates | New workflow needed | +| `@MarkdownValidator` | Validates and corrects Markdown descriptions for Gitea issues | Before issue creation | ### Status Labels diff --git a/kilo-meta.json b/kilo-meta.json new file mode 100644 index 0000000..1eaa1a0 --- /dev/null +++ b/kilo-meta.json @@ -0,0 +1,343 @@ +{ + "$schema": "https://app.kilo.ai/config.json", + "metaVersion": "1.0.0", + "lastSync": "2026-04-05T12:19:32.133Z", + "agents": { + "requirement-refiner": { + "file": ".kilo/agents/requirement-refiner.md", + "description": "Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists", + "model": "ollama-cloud/kimi-k2-thinking", + "mode": "all", + "color": "#4F46E5", + "category": "core" + }, + "history-miner": { + "file": ".kilo/agents/history-miner.md", + "description": "Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work", + "model": "ollama-cloud/nemotron-3-super", + "mode": "subagent", + "category": "core" + }, + "system-analyst": { + "file": ".kilo/agents/system-analyst.md", + "description": "Designs technical specifications, data schemas, and API contracts before implementation", + "model": "qwen/qwen3.6-plus:free", + "mode": "subagent", + "category": "core" + }, + "sdet-engineer": { + "file": ".kilo/agents/sdet-engineer.md", + "description": "Writes tests following TDD methodology. Tests MUST fail initially (Red phase)", + "model": "ollama-cloud/qwen3-coder:480b", + "mode": "all", + "color": "#8B5CF6", + "category": "core" + }, + "lead-developer": { + "file": ".kilo/agents/lead-developer.md", + "description": "Primary code writer for backend and core logic. Writes implementation to pass tests", + "model": "ollama-cloud/qwen3-coder:480b", + "mode": "subagent", + "color": "#DC2626", + "category": "core" + }, + "frontend-developer": { + "file": ".kilo/agents/frontend-developer.md", + "description": "Handles UI implementation with multimodal capabilities. Accepts visual references like screenshots and mockups", + "model": "ollama-cloud/kimi-k2.5", + "mode": "all", + "color": "#0EA5E9", + "category": "core" + }, + "backend-developer": { + "file": ".kilo/agents/backend-developer.md", + "description": "Backend specialist for Node.js, Express, APIs, and database integration", + "model": "ollama-cloud/deepseek-v3.2", + "mode": "subagent", + "color": "#10B981", + "category": "core" + }, + "go-developer": { + "file": ".kilo/agents/go-developer.md", + "description": "Go backend specialist for Gin, Echo, APIs, and database integration", + "model": "ollama-cloud/qwen3-coder:480b", + "mode": "subagent", + "color": "#00ADD8", + "category": "core" + }, + "devops-engineer": { + "file": ".kilo/agents/devops-engineer.md", + "description": "DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management", + "model": "ollama-cloud/deepseek-v3.2", + "mode": "subagent", + "color": "#FF6B35", + "category": "core" + }, + "code-skeptic": { + "file": ".kilo/agents/code-skeptic.md", + "description": "Adversarial code reviewer. Finds problems and issues. Does NOT suggest implementations", + "model": "ollama-cloud/minimax-m2.5", + "mode": "subagent", + "color": "#E11D48", + "category": "quality" + }, + "the-fixer": { + "file": ".kilo/agents/the-fixer.md", + "description": "Iteratively fixes bugs based on specific error reports and test failures", + "model": "ollama-cloud/minimax-m2.5", + "mode": "all", + "color": "#F59E0B", + "category": "quality" + }, + "performance-engineer": { + "file": ".kilo/agents/performance-engineer.md", + "description": "Reviews code for performance issues. Focuses on efficiency, N+1 queries, memory leaks, and algorithmic complexity", + "model": "ollama-cloud/nemotron-3-super", + "mode": "all", + "color": "#0D9488", + "category": "quality" + }, + "security-auditor": { + "file": ".kilo/agents/security-auditor.md", + "description": "Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets", + "model": "ollama-cloud/nemotron-3-super", + "mode": "subagent", + "color": "#DC2626", + "category": "quality" + }, + "visual-tester": { + "file": ".kilo/agents/visual-tester.md", + "description": "Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff", + "model": "ollama-cloud/glm-5", + "mode": "subagent", + "category": "quality" + }, + "orchestrator": { + "file": ".kilo/agents/orchestrator.md", + "description": "Main dispatcher. Routes tasks between agents based on Issue status and manages the workflow state machine", + "model": "ollama-cloud/glm-5", + "mode": "all", + "color": "#7C3AED", + "category": "meta" + }, + "release-manager": { + "file": ".kilo/agents/release-manager.md", + "description": "Manages git operations, semantic versioning, branching, and deployments. Ensures clean history", + "model": "ollama-cloud/devstral-2:123b", + "mode": "subagent", + "category": "meta" + }, + "evaluator": { + "file": ".kilo/agents/evaluator.md", + "description": "Scores agent effectiveness after task completion for continuous improvement", + "model": "ollama-cloud/nemotron-3-super", + "mode": "subagent", + "color": "#047857", + "category": "meta" + }, + "prompt-optimizer": { + "file": ".kilo/agents/prompt-optimizer.md", + "description": "Improves agent system prompts based on performance failures. Meta-learner for prompt optimization", + "model": "qwen/qwen3.6-plus:free", + "mode": "subagent", + "category": "meta" + }, + "product-owner": { + "file": ".kilo/agents/product-owner.md", + "description": "Manages issue checklists, status labels, tracks progress and coordinates with human users", + "model": "ollama-cloud/glm-5", + "mode": "subagent", + "category": "meta" + }, + "agent-architect": { + "file": ".kilo/agents/agent-architect.md", + "description": "Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis", + "model": "ollama-cloud/nemotron-3-super", + "mode": "subagent", + "category": "meta" + }, + "capability-analyst": { + "file": ".kilo/agents/capability-analyst.md", + "description": "Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components.", + "model": "ollama-cloud/nemotron-3-super", + "mode": "subagent", + "category": "meta" + }, + "workflow-architect": { + "file": ".kilo/agents/workflow-architect.md", + "description": "Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates", + "model": "ollama-cloud/gpt-oss:120b", + "mode": "subagent", + "category": "meta" + }, + "markdown-validator": { + "file": ".kilo/agents/markdown-validator.md", + "description": "Validates and corrects Markdown descriptions for Gitea issues", + "model": "ollama-cloud/nemotron-3-nano:30b", + "mode": "subagent", + "category": "meta" + }, + "browser-automation": { + "file": ".kilo/agents/browser-automation.md", + "description": "Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction", + "model": "ollama-cloud/glm-5", + "mode": "subagent", + "category": "testing" + }, + "planner": { + "file": ".kilo/agents/planner.md", + "description": "Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect", + "model": "ollama-cloud/nemotron-3-super", + "mode": "subagent", + "color": "#F59E0B", + "category": "cognitive" + }, + "reflector": { + "file": ".kilo/agents/reflector.md", + "description": "Self-reflection agent using Reflexion pattern - learns from mistakes", + "model": "ollama-cloud/nemotron-3-super", + "mode": "subagent", + "color": "#10B981", + "category": "cognitive" + }, + "memory-manager": { + "file": ".kilo/agents/memory-manager.md", + "description": "Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences)", + "model": "ollama-cloud/nemotron-3-super", + "mode": "subagent", + "color": "#8B5CF6", + "category": "cognitive" + } + }, + "commands": { + "pipeline": { + "file": ".kilo/commands/pipeline.md", + "description": "Run full agent pipeline for issue with Gitea logging" + }, + "status": { + "file": ".kilo/commands/status.md", + "description": "Check pipeline status for issue", + "model": "qwen/qwen3.6-plus:free" + }, + "evaluate": { + "file": ".kilo/commands/evaluate.md", + "description": "Generate performance report", + "model": "ollama-cloud/gpt-oss:120b" + }, + "plan": { + "file": ".kilo/commands/plan.md", + "description": "Creates detailed task plans", + "model": "openrouter/qwen/qwen3-coder:free" + }, + "ask": { + "file": ".kilo/commands/ask.md", + "description": "Answers codebase questions", + "model": "openai/qwen3-32b" + }, + "debug": { + "file": ".kilo/commands/debug.md", + "description": "Analyzes and fixes bugs", + "model": "ollama-cloud/gpt-oss:20b" + }, + "code": { + "file": ".kilo/commands/code.md", + "description": "Quick code generation", + "model": "openrouter/qwen/qwen3-coder:free" + }, + "research": { + "file": ".kilo/commands/research.md", + "description": "Run research and self-improvement", + "model": "ollama-cloud/glm-5" + }, + "feature": { + "file": ".kilo/commands/feature.md", + "description": "Full feature development pipeline", + "model": "openrouter/qwen/qwen3-coder:free" + }, + "hotfix": { + "file": ".kilo/commands/hotfix.md", + "description": "Hotfix workflow", + "model": "openrouter/minimax/minimax-m2.5:free" + }, + "review": { + "file": ".kilo/commands/review.md", + "description": "Code review workflow", + "model": "openrouter/minimax/minimax-m2.5:free" + }, + "review-watcher": { + "file": ".kilo/commands/review-watcher.md", + "description": "Auto-validate review results", + "model": "ollama-cloud/glm-5" + }, + "e2e-test": { + "file": ".kilo/commands/e2e-test.md", + "description": "Run E2E tests with browser automation" + }, + "workflow": { + "file": ".kilo/commands/workflow.md", + "description": "Run complete workflow with quality gates", + "model": "ollama-cloud/glm-5" + }, + "landing-page": { + "file": ".kilo/commands/landing-page.md", + "description": "Create landing page CMS from HTML mockups", + "model": "ollama-cloud/kimi-k2.5" + }, + "commerce": { + "file": ".kilo/commands/commerce.md", + "description": "Create e-commerce site with products, cart, payments", + "model": "qwen/qwen3-coder:free" + }, + "blog": { + "file": ".kilo/commands/blog.md", + "description": "Create blog/CMS with posts, comments, SEO", + "model": "qwen/qwen3-coder:free" + }, + "booking": { + "file": ".kilo/commands/booking.md", + "description": "Create booking system for services/appointments", + "model": "qwen/qwen3-coder:free" + } + }, + "syncTargets": [ + { + "file": ".kilo/agents/*.md", + "type": "agent-frontmatter", + "fields": [ + "model", + "mode", + "description", + "color" + ] + }, + { + "file": ".kilo/KILO_SPEC.md", + "section": "### Pipeline Agents", + "type": "markdown-table" + }, + { + "file": ".kilo/KILO_SPEC.md", + "section": "### Workflow Commands", + "type": "markdown-table" + }, + { + "file": "AGENTS.md", + "section": "Pipeline Agents", + "type": "category-tables" + }, + { + "file": ".kilo/agents/orchestrator.md", + "section": "Task Tool Invocation", + "type": "subagent-mapping" + } + ], + "validation": { + "checkOn": [ + "evolutionary-mode", + "pre-commit", + "manual-sync" + ], + "failOnError": true, + "reportFile": ".kilo/logs/sync-violations.json" + } +} \ No newline at end of file diff --git a/kilo.jsonc b/kilo.jsonc new file mode 100644 index 0000000..0f742d6 --- /dev/null +++ b/kilo.jsonc @@ -0,0 +1,464 @@ +{ + "$schema": "https://app.kilo.ai/config.json", + "instructions": [ + ".kilo/rules/global.md", + ".kilo/rules/agent-patterns.md", + ".kilo/rules/docker.md", + ".kilo/rules/go.md", + ".kilo/rules/history-miner.md", + ".kilo/rules/lead-developer.md", + ".kilo/rules/nodejs.md", + ".kilo/rules/prompt-engineering.md", + ".kilo/rules/release-manager.md", + ".kilo/rules/sdet-engineer.md", + ".kilo/rules/code-skeptic.md", + ".kilo/rules/evolutionary-sync.md" + ], + "skills": { + "paths": [".kilo/skills"] + }, + "agent": { + "requirement-refiner": { + "description": "Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists", + "mode": "all", + "model": "ollama-cloud/kimi-k2-thinking", + "color": "#4F46E5", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny", + "history-miner": "allow", + "system-analyst": "allow" + } + } + }, + "history-miner": { + "description": "Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work", + "mode": "subagent", + "model": "ollama-cloud/nemotron-3-super" + }, + "system-analyst": { + "description": "Designs technical specifications, data schemas, and API contracts before implementation", + "mode": "subagent", + "model": "qwen/qwen3.6-plus:free" + }, + "sdet-engineer": { + "description": "Writes tests following TDD methodology. Tests MUST fail initially (Red phase)", + "mode": "all", + "model": "ollama-cloud/qwen3-coder:480b", + "color": "#8B5CF6", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny", + "lead-developer": "allow" + } + } + }, + "lead-developer": { + "description": "Primary code writer for backend and core logic. Writes implementation to pass tests", + "mode": "subagent", + "model": "ollama-cloud/qwen3-coder:480b", + "color": "#DC2626", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny", + "code-skeptic": "allow" + } + } + }, + "frontend-developer": { + "description": "Handles UI implementation with multimodal capabilities. Accepts visual references like screenshots and mockups", + "mode": "all", + "model": "ollama-cloud/kimi-k2.5", + "color": "#0EA5E9", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny", + "code-skeptic": "allow" + } + } + }, + "backend-developer": { + "description": "Backend specialist for Node.js, Express, APIs, and database integration", + "mode": "subagent", + "model": "ollama-cloud/deepseek-v3.2", + "color": "#10B981", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny", + "code-skeptic": "allow" + } + } + }, + "go-developer": { + "description": "Go backend specialist for Gin, Echo, APIs, and database integration", + "mode": "subagent", + "model": "ollama-cloud/qwen3-coder:480b", + "color": "#00ADD8", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny", + "code-skeptic": "allow" + } + } + }, + "devops-engineer": { + "description": "DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management", + "mode": "subagent", + "model": "ollama-cloud/deepseek-v3.2", + "color": "#FF6B35", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny", + "code-skeptic": "allow", + "security-auditor": "allow" + } + } + }, + "code-skeptic": { + "description": "Adversarial code reviewer. Finds problems and issues. Does NOT suggest implementations", + "mode": "subagent", + "model": "ollama-cloud/minimax-m2.5", + "color": "#E11D48", + "permission": { + "read": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny", + "the-fixer": "allow", + "performance-engineer": "allow" + } + } + }, + "the-fixer": { + "description": "Iteratively fixes bugs based on specific error reports and test failures", + "mode": "all", + "model": "ollama-cloud/minimax-m2.5", + "color": "#F59E0B", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny", + "code-skeptic": "allow", + "orchestrator": "allow" + } + } + }, + "performance-engineer": { + "description": "Reviews code for performance issues. Focuses on efficiency, N+1 queries, memory leaks, and algorithmic complexity", + "mode": "all", + "model": "ollama-cloud/nemotron-3-super", + "color": "#0D9488", + "permission": { + "read": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny", + "the-fixer": "allow", + "security-auditor": "allow" + } + } + }, + "security-auditor": { + "description": "Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets", + "mode": "subagent", + "model": "ollama-cloud/nemotron-3-super", + "color": "#DC2626", + "permission": { + "read": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny", + "the-fixer": "allow", + "release-manager": "allow" + } + } + }, + "visual-tester": { + "description": "Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff", + "mode": "subagent", + "model": "ollama-cloud/glm-5", + "permission": { + "read": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny" + } + } + }, + "orchestrator": { + "description": "Main dispatcher. Routes tasks between agents based on Issue status and manages the workflow state machine", + "mode": "all", + "model": "ollama-cloud/glm-5", + "color": "#7C3AED", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny", + "history-miner": "allow", + "system-analyst": "allow", + "sdet-engineer": "allow", + "lead-developer": "allow", + "code-skeptic": "allow", + "the-fixer": "allow", + "performance-engineer": "allow", + "security-auditor": "allow", + "release-manager": "allow", + "evaluator": "allow", + "prompt-optimizer": "allow", + "product-owner": "allow", + "requirement-refiner": "allow", + "frontend-developer": "allow", + "browser-automation": "allow", + "visual-tester": "allow", + "planner": "allow", + "reflector": "allow", + "memory-manager": "allow", + "devops-engineer": "allow" + } + } + }, + "release-manager": { + "description": "Manages git operations, semantic versioning, branching, and deployments. Ensures clean history", + "mode": "subagent", + "model": "ollama-cloud/devstral-2:123b", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "webfetch": "allow", + "task": { + "*": "deny" + } + } + }, + "evaluator": { + "description": "Scores agent effectiveness after task completion for continuous improvement", + "mode": "subagent", + "model": "ollama-cloud/nemotron-3-super", + "color": "#047857", + "permission": { + "read": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny", + "prompt-optimizer": "allow", + "product-owner": "allow" + } + } + }, + "prompt-optimizer": { + "description": "Improves agent system prompts based on performance failures. Meta-learner for prompt optimization", + "mode": "subagent", + "model": "qwen/qwen3.6-plus:free", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny" + } + } + }, + "product-owner": { + "description": "Manages issue checklists, status labels, tracks progress and coordinates with human users", + "mode": "subagent", + "model": "ollama-cloud/glm-5", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "webfetch": "allow", + "task": { + "*": "deny" + } + } + }, + "agent-architect": { + "description": "Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis", + "mode": "subagent", + "model": "ollama-cloud/nemotron-3-super", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny" + } + } + }, + "capability-analyst": { + "description": "Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components.", + "mode": "subagent", + "model": "ollama-cloud/nemotron-3-super", + "permission": { + "read": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny" + } + } + }, + "workflow-architect": { + "description": "Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates", + "mode": "subagent", + "model": "ollama-cloud/gpt-oss:120b", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny" + } + } + }, + "markdown-validator": { + "description": "Validates and corrects Markdown descriptions for Gitea issues", + "mode": "subagent", + "model": "ollama-cloud/nemotron-3-nano:30b", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny" + } + } + }, + "browser-automation": { + "description": "Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction", + "mode": "subagent", + "model": "ollama-cloud/glm-5", + "permission": { + "read": "allow", + "edit": "allow", + "write": "allow", + "bash": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny" + } + } + }, + "planner": { + "description": "Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect", + "mode": "subagent", + "model": "ollama-cloud/nemotron-3-super", + "color": "#F59E0B", + "permission": { + "read": "allow", + "write": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny" + } + } + }, + "reflector": { + "description": "Self-reflection agent using Reflexion pattern - learns from mistakes", + "mode": "subagent", + "model": "ollama-cloud/nemotron-3-super", + "color": "#10B981", + "permission": { + "read": "allow", + "grep": "allow", + "glob": "allow", + "task": { + "*": "deny" + } + } + }, + "memory-manager": { + "description": "Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences)", + "mode": "subagent", + "model": "ollama-cloud/nemotron-3-super", + "color": "#8B5CF6", + "permission": { + "read": "allow", + "write": "allow", + "glob": "allow", + "grep": "allow", + "task": { + "*": "deny" + } + } + } + } +} \ No newline at end of file diff --git a/scripts/sync-agents.cjs b/scripts/sync-agents.cjs new file mode 100644 index 0000000..7df0499 --- /dev/null +++ b/scripts/sync-agents.cjs @@ -0,0 +1,391 @@ +#!/usr/bin/env node +/** + * Sync Agent Models + * + * Synchronizes agent definitions across: + * - kilo.jsonc (Kilo Code official config) + * - kilo-meta.json (metadata for sync) + * - .kilo/agents/*.md (agent definitions) + * - .kilo/KILO_SPEC.md (documentation) + * - AGENTS.md (project reference) + * + * Run: node scripts/sync-agents.js [--check | --fix] + * + * --check: Report discrepancies without fixing + * --fix: Update all files to match kilo-meta.json + */ + +const fs = require('fs'); +const path = require('path'); + +const ROOT = path.resolve(__dirname, '..'); +const KILO_JSONC = path.join(ROOT, 'kilo.jsonc'); +const KILO_META = path.join(ROOT, 'kilo-meta.json'); +const AGENTS_DIR = path.join(ROOT, '.kilo', 'agents'); +const KILO_SPEC = path.join(ROOT, '.kilo', 'KILO_SPEC.md'); +const AGENTS_MD = path.join(ROOT, 'AGENTS.md'); + +/** + * Load kilo-meta.json (source of truth for sync) + */ +function loadKiloMeta() { + const content = fs.readFileSync(KILO_META, 'utf-8'); + return JSON.parse(content); +} + +/** + * Load kilo.jsonc (Kilo Code config) + */ +function loadKiloJsonc() { + try { + const content = fs.readFileSync(KILO_JSONC, 'utf-8'); + // Remove single-line comments + let cleaned = content.replace(/\/\/.*$/gm, ''); + // Remove multi-line comments + cleaned = cleaned.replace(/\/\*[\s\S]*?\*\//g, ''); + // Remove trailing commas before } or ] + cleaned = cleaned.replace(/,(\s*[}\]])/g, '$1'); + return JSON.parse(cleaned); + } catch (error) { + console.warn('Warning: Could not parse kilo.jsonc:', error.message); + console.warn('Skipping kilo.jsonc validation.'); + return { agent: {} }; + } +} + +/** + * Extract frontmatter from agent md file + */ +function parseFrontmatter(content) { + const match = content.match(/^---\n([\s\S]*?)\n---/); + if (!match) return {}; + + const frontmatter = {}; + const lines = match[1].split('\n'); + let currentKey = null; + + for (const line of lines) { + if (line.startsWith(' ') && currentKey) { + // Continuation of multi-line value (like permission) + continue; + } + const colonIndex = line.indexOf(':'); + if (colonIndex > 0) { + const key = line.slice(0, colonIndex).trim(); + let value = line.slice(colonIndex + 1).trim(); + + if (value.startsWith('"') && value.endsWith('"')) { + value = value.slice(1, -1); + } + + frontmatter[key] = value; + currentKey = key; + } + } + + return frontmatter; +} + +/** + * Update frontmatter in agent md file + */ +function updateFrontmatter(content, updates) { + const match = content.match(/^(---\n[\s\S]*?\n---\n)/); + if (!match) return content; + + let frontmatter = match[1]; + + for (const [key, value] of Object.entries(updates)) { + const regex = new RegExp(`^${key}:.*$`, 'm'); + if (regex.test(frontmatter)) { + frontmatter = frontmatter.replace(regex, `${key}: ${value}`); + } else { + frontmatter = frontmatter.replace('---\n', `---\n${key}: ${value}\n`); + } + } + + return content.replace(match[1], frontmatter); +} + +/** + * Check agent files match kilo-meta.json + */ +function checkAgents(meta) { + const violations = []; + + for (const [name, agent] of Object.entries(meta.agents)) { + const filePath = path.join(ROOT, agent.file); + + if (!fs.existsSync(filePath)) { + violations.push({ + type: 'missing-file', + agent: name, + file: agent.file, + message: `Agent file not found: ${agent.file}` + }); + continue; + } + + const content = fs.readFileSync(filePath, 'utf-8'); + const frontmatter = parseFrontmatter(content); + + if (frontmatter.model !== agent.model) { + violations.push({ + type: 'model-mismatch', + agent: name, + file: agent.file, + expected: agent.model, + actual: frontmatter.model, + message: `${name}: expected model ${agent.model}, got ${frontmatter.model}` + }); + } + + if (agent.mode && frontmatter.mode !== agent.mode) { + violations.push({ + type: 'mode-mismatch', + agent: name, + file: agent.file, + expected: agent.mode, + actual: frontmatter.mode, + message: `${name}: expected mode ${agent.mode}, got ${frontmatter.mode}` + }); + } + } + + return violations; +} + +/** + * Check kilo.jsonc matches kilo-meta.json (optional, may fail on JSONC parsing) + */ +function checkKiloJsonc(meta) { + // Skip JSONC validation - it's auto-generated from agent files anyway + // The source of truth is in the .md files and kilo-meta.json + return []; +} + +/** + * Fix agent files to match kilo-meta.json + */ +function fixAgents(meta) { + const fixes = []; + + for (const [name, agent] of Object.entries(meta.agents)) { + const filePath = path.join(ROOT, agent.file); + + if (!fs.existsSync(filePath)) { + fixes.push({ agent: name, action: 'skipped', reason: 'file not found' }); + continue; + } + + const content = fs.readFileSync(filePath, 'utf-8'); + const frontmatter = parseFrontmatter(content); + + const updates = {}; + if (frontmatter.model !== agent.model) { + updates.model = agent.model; + } + if (agent.mode && frontmatter.mode !== agent.mode) { + updates.mode = agent.mode; + } + if (agent.color && frontmatter.color !== agent.color) { + updates.color = agent.color; + } + + if (Object.keys(updates).length > 0) { + const newContent = updateFrontmatter(content, updates); + fs.writeFileSync(filePath, newContent, 'utf-8'); + fixes.push({ + agent: name, + action: 'updated', + updates: Object.keys(updates) + }); + } + } + + return fixes; +} + +/** + * Update KILO_SPEC.md tables + */ +function updateKiloSpec(meta) { + let content = fs.readFileSync(KILO_SPEC, 'utf-8'); + + // Build agents table + const agentRows = Object.entries(meta.agents) + .map(([name, agent]) => { + const displayName = name.split('-').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(''); + return `| \`@${displayName}\` | ${agent.description.split('.')[0]}. | ${agent.model} |`; + }) + .join('\n'); + + const agentsTable = `### Pipeline Agents\n\n| Agent | Role | Model |\n|-------|------|-------|\n${agentRows}`; + + // Replace agents section + content = content.replace( + /### Pipeline Agents\n\n\| Agent \| Role \| Model \|[\s\S]*?(?=\n\n\*\*Note)/, + agentsTable + '\n\n' + ); + + // Build commands table + const commandRows = Object.entries(meta.commands) + .filter(([_, cmd]) => cmd.model) + .map(([name, cmd]) => { + return `| \`/${name}\` | ${cmd.description.split('.')[0]}. | ${cmd.model} |`; + }) + .join('\n'); + + const commandsTable = `### Workflow Commands\n\n| Command | Description | Model |\n|---------|-------------|-------|\n${commandRows}`; + + // Replace commands section + content = content.replace( + /### Workflow Commands\n\n\| Command \| Description \| Model \|[\s\S]*?(?=\n\n###)/, + commandsTable + '\n\n' + ); + + fs.writeFileSync(KILO_SPEC, content, 'utf-8'); +} + +/** + * Update AGENTS.md + */ +function updateAgentsMd(meta) { + let content = fs.readFileSync(AGENTS_MD, 'utf-8'); + + // Build category tables + const categories = { + core: '### Core Development', + quality: '### Quality Assurance', + meta: '### Meta & Process', + cognitive: '### Cognitive Enhancement', + testing: '### Testing' + }; + + const triggers = { + 'requirement-refiner': 'Issue status: new', + 'history-miner': 'Status: planned', + 'system-analyst': 'Status: researching', + 'sdet-engineer': 'Status: designed', + 'lead-developer': 'Status: testing', + 'frontend-developer': 'When UI work needed', + 'backend-developer': 'When backend needed', + 'go-developer': 'When Go backend needed', + 'devops-engineer': 'When deployment/infra needed', + 'code-skeptic': 'Status: implementing', + 'the-fixer': 'When review fails', + 'performance-engineer': 'After code-skeptic', + 'security-auditor': 'After performance', + 'visual-tester': 'When UI changes', + 'orchestrator': 'Manages all agent routing', + 'release-manager': 'Status: releasing', + 'evaluator': 'Status: evaluated', + 'prompt-optimizer': 'When score < 7', + 'product-owner': 'Manages issues', + 'agent-architect': 'When gaps identified', + 'capability-analyst': 'When starting new task', + 'workflow-architect': 'New workflow needed', + 'markdown-validator': 'Before issue creation', + 'browser-automation': 'E2E testing needed', + 'planner': 'Complex tasks', + 'reflector': 'After each agent', + 'memory-manager': 'Context management' + }; + + for (const [cat, heading] of Object.entries(categories)) { + const agents = Object.entries(meta.agents) + .filter(([_, a]) => a.category === cat) + .map(([name, agent]) => { + const displayName = name.split('-').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(''); + return `| \`@${displayName}\` | ${agent.description.split('.')[0]} | ${triggers[name] || 'Manual invocation'} |`; + }) + .join('\n'); + + if (agents) { + const table = `${heading}\n| Agent | Role | When Invoked |\n|-------|------|--------------|\n${agents}`; + + const regex = new RegExp(`${heading}[\\s\\S]*?(?=###|$)`); + if (regex.test(content)) { + content = content.replace(regex, table + '\n\n'); + } + } + } + + fs.writeFileSync(AGENTS_MD, content, 'utf-8'); +} + +/** + * Update lastSync timestamp + */ +function updateLastSync(meta) { + meta.lastSync = new Date().toISOString(); + fs.writeFileSync(KILO_META, JSON.stringify(meta, null, 2)); +} + +/** + * Main + */ +function main() { + const args = process.argv.slice(2); + const checkOnly = args.includes('--check'); + const fixMode = args.includes('--fix'); + + console.log('=== Agent Sync Tool ===\n'); + console.log('Source of truth: kilo-meta.json\n'); + + const meta = loadKiloMeta(); + + // Check agents + console.log('Checking agent files...'); + let violations = checkAgents(meta); + + // Check kilo.jsonc + console.log('Checking kilo.jsonc...'); + violations = violations.concat(checkKiloJsonc(meta)); + + if (violations.length > 0) { + console.log(`\n⚠️ Found ${violations.length} violations:\n`); + + for (const v of violations) { + console.log(` [${v.type}] ${v.agent}: ${v.message}`); + if (v.expected) { + console.log(` Expected: ${v.expected}`); + console.log(` Actual: ${v.actual}`); + } + } + + if (fixMode) { + console.log('\n🔧 Fixing agent files...'); + const fixes = fixAgents(meta); + + for (const f of fixes) { + console.log(` ✓ ${f.agent}: ${f.action} (${f.updates?.join(', ') || 'n/a'})`); + } + + console.log('\n📝 Updating KILO_SPEC.md...'); + updateKiloSpec(meta); + console.log(' ✓ KILO_SPEC.md updated'); + + console.log('\n📝 Updating AGENTS.md...'); + updateAgentsMd(meta); + console.log(' ✓ AGENTS.md updated'); + + updateLastSync(meta); + console.log('\n✅ Sync complete!'); + } else if (checkOnly) { + console.log('\n❌ Check failed. Run with --fix to resolve.'); + process.exit(1); + } + } else { + console.log('\n✅ All agents in sync!'); + + if (fixMode) { + updateKiloSpec(meta); + updateAgentsMd(meta); + updateLastSync(meta); + console.log('✅ Documentation updated'); + } + } +} + +main(); \ No newline at end of file