Compare commits
4 Commits
main
...
agent-sync
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b517ad5dad | ||
|
|
576e8fe8d6 | ||
|
|
0a854a3bc3 | ||
|
|
43747d9875 |
@@ -415,26 +415,35 @@ Provider availability depends on configuration. Common providers include:
|
||||
|
||||
| Agent | Role | Model |
|
||||
|-------|------|-------|
|
||||
| `@RequirementRefiner` | Converts vague ideas to strict User Stories | ollama-cloud/kimi-k2-thinking |
|
||||
| `@HistoryMiner` | Finds duplicates and past solutions in git | ollama-cloud/gpt-oss:20b |
|
||||
| `@SystemAnalyst` | Designs technical specifications | qwen/qwen3.6-plus:free |
|
||||
| `@SDETEngineer` | Writes tests following TDD | qwen/qwen3-coder:free |
|
||||
| `@LeadDeveloper` | Primary code writer | qwen/qwen3-coder:free |
|
||||
| `@FrontendDeveloper` | UI implementation with multimodal | ollama-cloud/kimi-k2.5 |
|
||||
| `@CodeSkeptic` | Adversarial code reviewer | ollama-cloud/minimax-m2.5 |
|
||||
| `@TheFixer` | Iteratively fixes bugs | ollama-cloud/minimax-m2.5 |
|
||||
| `@PerformanceEngineer` | Reviews for performance issues | ollama-cloud/nemotron-3-super |
|
||||
| `@SecurityAuditor` | Scans for vulnerabilities | ollama-cloud/deepseek-v3.2 |
|
||||
| `@ReleaseManager` | Git operations and deployments | ollama-cloud/devstral-2 |
|
||||
| `@Evaluator` | Scores agent effectiveness | ollama-cloud/gpt-oss:120b |
|
||||
| `@PromptOptimizer` | Improves agent prompts | openrouter/qwen/qwen3.6-plus:free |
|
||||
| `@ProductOwner` | Manages issue checklists | openrouter/qwen/qwen3.6-plus:free |
|
||||
| `@Orchestrator` | Routes tasks between agents | ollama-cloud/glm-5 |
|
||||
| `@AgentArchitect` | Manages agent network per Kilo.ai spec | ollama-cloud/gpt-oss:120b |
|
||||
| `@CapabilityAnalyst` | Analyzes task coverage, identifies gaps | ollama-cloud/gpt-oss:120b |
|
||||
| `@MarkdownValidator` | Validates Markdown for Gitea issues | qwen/qwen3.6-plus:free |
|
||||
| `@BackendDeveloper` | Node.js, Express, APIs, database specialist | ollama-cloud/deepseek-v3.2 |
|
||||
| `@WorkflowArchitect` | Creates workflow definitions with complete architecture | ollama-cloud/gpt-oss:120b |
|
||||
| `@RequirementRefiner` | Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists. | ollama-cloud/kimi-k2-thinking |
|
||||
| `@HistoryMiner` | Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work. | ollama-cloud/nemotron-3-super |
|
||||
| `@SystemAnalyst` | Designs technical specifications, data schemas, and API contracts before implementation. | qwen/qwen3.6-plus:free |
|
||||
| `@SdetEngineer` | Writes tests following TDD methodology. | ollama-cloud/qwen3-coder:480b |
|
||||
| `@LeadDeveloper` | Primary code writer for backend and core logic. | ollama-cloud/qwen3-coder:480b |
|
||||
| `@FrontendDeveloper` | Handles UI implementation with multimodal capabilities. | ollama-cloud/kimi-k2.5 |
|
||||
| `@BackendDeveloper` | Backend specialist for Node. | ollama-cloud/deepseek-v3.2 |
|
||||
| `@GoDeveloper` | Go backend specialist for Gin, Echo, APIs, and database integration. | ollama-cloud/qwen3-coder:480b |
|
||||
| `@DevopsEngineer` | DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management. | ollama-cloud/deepseek-v3.2 |
|
||||
| `@CodeSkeptic` | Adversarial code reviewer. | ollama-cloud/minimax-m2.5 |
|
||||
| `@TheFixer` | Iteratively fixes bugs based on specific error reports and test failures. | ollama-cloud/minimax-m2.5 |
|
||||
| `@PerformanceEngineer` | Reviews code for performance issues. | ollama-cloud/nemotron-3-super |
|
||||
| `@SecurityAuditor` | Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets. | ollama-cloud/nemotron-3-super |
|
||||
| `@VisualTester` | Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff. | ollama-cloud/glm-5 |
|
||||
| `@Orchestrator` | Main dispatcher. | ollama-cloud/glm-5 |
|
||||
| `@ReleaseManager` | Manages git operations, semantic versioning, branching, and deployments. | ollama-cloud/devstral-2:123b |
|
||||
| `@Evaluator` | Scores agent effectiveness after task completion for continuous improvement. | ollama-cloud/nemotron-3-super |
|
||||
| `@PromptOptimizer` | Improves agent system prompts based on performance failures. | qwen/qwen3.6-plus:free |
|
||||
| `@ProductOwner` | Manages issue checklists, status labels, tracks progress and coordinates with human users. | ollama-cloud/glm-5 |
|
||||
| `@AgentArchitect` | Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis. | ollama-cloud/nemotron-3-super |
|
||||
| `@CapabilityAnalyst` | Analyzes task requirements against available agents, workflows, and skills. | ollama-cloud/nemotron-3-super |
|
||||
| `@WorkflowArchitect` | Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates. | ollama-cloud/gpt-oss:120b |
|
||||
| `@MarkdownValidator` | Validates and corrects Markdown descriptions for Gitea issues. | ollama-cloud/nemotron-3-nano:30b |
|
||||
| `@BrowserAutomation` | Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction. | ollama-cloud/glm-5 |
|
||||
| `@Planner` | Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect. | ollama-cloud/nemotron-3-super |
|
||||
| `@Reflector` | Self-reflection agent using Reflexion pattern - learns from mistakes. | ollama-cloud/nemotron-3-super |
|
||||
| `@MemoryManager` | Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences). | ollama-cloud/nemotron-3-super |
|
||||
|
||||
|
||||
|
||||
**Note:** For AgentArchitect, use `subagent_type: "system-analyst"` with prompt "You are Agent Architect..." (workaround for unsupported agent-architect type).
|
||||
|
||||
@@ -442,23 +451,24 @@ Provider availability depends on configuration. Common providers include:
|
||||
|
||||
| Command | Description | Model |
|
||||
|---------|-------------|-------|
|
||||
| `/landing-page` | Create landing page CMS from HTML mockups | ollama-cloud/kimi-k2.5 |
|
||||
| `/commerce` | Create e-commerce site with products, cart, payments | qwen/qwen3-coder:free |
|
||||
| `/blog` | Create blog/CMS with posts, comments, SEO | qwen/qeen3-coder:free |
|
||||
| `/booking` | Create booking system for services/appointments | qwen/qwen3-coder:free |
|
||||
| `/workflow` | Run complete workflow with quality gates | ollama-cloud/glm-5 |
|
||||
| `/pipeline` | Run full agent pipeline for issue | - |
|
||||
| `/feature` | Full feature development pipeline | qwen/qwen3-coder:free |
|
||||
| `/code` | Quick code generation | qwen/qwen3-coder:free |
|
||||
| `/debug` | Analyzes and fixes bugs | openai/gpt-oss-20b |
|
||||
| `/ask` | Answers codebase questions | openai/qwen3-32b |
|
||||
| `/plan` | Creates detailed task plans | qwen/qwen3-coder:free |
|
||||
| `/e2e-test` | Run E2E tests with browser automation | - |
|
||||
| `/status` | Check pipeline status for issue | - |
|
||||
| `/evaluate` | Generate performance report | - |
|
||||
| `/review` | Code review workflow | - |
|
||||
| `/review-watcher` | Auto-validate review results | - |
|
||||
| `/hotfix` | Hotfix workflow | - |
|
||||
| `/status` | Check pipeline status for issue. | qwen/qwen3.6-plus:free |
|
||||
| `/evaluate` | Generate performance report. | ollama-cloud/gpt-oss:120b |
|
||||
| `/plan` | Creates detailed task plans. | openrouter/qwen/qwen3-coder:free |
|
||||
| `/ask` | Answers codebase questions. | openai/qwen3-32b |
|
||||
| `/debug` | Analyzes and fixes bugs. | ollama-cloud/gpt-oss:20b |
|
||||
| `/code` | Quick code generation. | openrouter/qwen/qwen3-coder:free |
|
||||
| `/research` | Run research and self-improvement. | ollama-cloud/glm-5 |
|
||||
| `/feature` | Full feature development pipeline. | openrouter/qwen/qwen3-coder:free |
|
||||
| `/hotfix` | Hotfix workflow. | openrouter/minimax/minimax-m2.5:free |
|
||||
| `/review` | Code review workflow. | openrouter/minimax/minimax-m2.5:free |
|
||||
| `/review-watcher` | Auto-validate review results. | ollama-cloud/glm-5 |
|
||||
| `/workflow` | Run complete workflow with quality gates. | ollama-cloud/glm-5 |
|
||||
| `/landing-page` | Create landing page CMS from HTML mockups. | ollama-cloud/kimi-k2.5 |
|
||||
| `/commerce` | Create e-commerce site with products, cart, payments. | qwen/qwen3-coder:free |
|
||||
| `/blog` | Create blog/CMS with posts, comments, SEO. | qwen/qwen3-coder:free |
|
||||
| `/booking` | Create booking system for services/appointments. | qwen/qwen3-coder:free |
|
||||
|
||||
|
||||
|
||||
### Workflow Pipeline
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: Agent Architect
|
||||
mode: all
|
||||
mode: subagent
|
||||
model: ollama-cloud/nemotron-3-super
|
||||
description: Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis
|
||||
color: "#8B5CF6"
|
||||
|
||||
@@ -12,6 +12,7 @@ permission:
|
||||
grep: allow
|
||||
task:
|
||||
"*": deny
|
||||
"code-skeptic": allow
|
||||
---
|
||||
|
||||
# Kilo Code: Backend Developer
|
||||
@@ -34,6 +35,11 @@ Invoke this mode when:
|
||||
|
||||
Backend specialist for Node.js, Express, APIs, and database integration.
|
||||
|
||||
## Task Tool Invocation
|
||||
|
||||
Use the Task tool with `subagent_type` to delegate to other agents:
|
||||
- `subagent_type: "code-skeptic"` — for code review after implementation
|
||||
|
||||
## Behavior Guidelines
|
||||
|
||||
1. **Security First** — Always validate input, sanitize output, protect against injection
|
||||
@@ -276,10 +282,19 @@ This agent uses the following skills for comprehensive Node.js development:
|
||||
|-------|---------|
|
||||
| `nodejs-npm-management` | package.json, scripts, dependencies |
|
||||
|
||||
### Containerization (Docker)
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `docker-compose` | Multi-container application orchestration |
|
||||
| `docker-swarm` | Production cluster deployment |
|
||||
| `docker-security` | Container security hardening |
|
||||
| `docker-monitoring` | Container monitoring and logging |
|
||||
|
||||
### Rules
|
||||
| File | Content |
|
||||
|------|---------|
|
||||
| `.kilo/rules/nodejs.md` | Code style, security, best practices |
|
||||
| `.kilo/rules/docker.md` | Docker, Compose, Swarm best practices |
|
||||
|
||||
## Handoff Protocol
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
description: Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction
|
||||
mode: all
|
||||
mode: subagent
|
||||
model: ollama-cloud/glm-5
|
||||
color: "#1E88E5"
|
||||
permission:
|
||||
|
||||
364
.kilo/agents/devops-engineer.md
Normal file
364
.kilo/agents/devops-engineer.md
Normal file
@@ -0,0 +1,364 @@
|
||||
---
|
||||
description: DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management
|
||||
mode: subagent
|
||||
model: ollama-cloud/deepseek-v3.2
|
||||
color: "#FF6B35"
|
||||
permission:
|
||||
read: allow
|
||||
edit: allow
|
||||
write: allow
|
||||
bash: allow
|
||||
glob: allow
|
||||
grep: allow
|
||||
task:
|
||||
"*": deny
|
||||
"code-skeptic": allow
|
||||
"security-auditor": allow
|
||||
---
|
||||
|
||||
# Kilo Code: DevOps Engineer
|
||||
|
||||
## Role Definition
|
||||
|
||||
You are **DevOps Engineer** — the infrastructure specialist. Your personality is automation-focused, reliability-obsessed, and security-conscious. You design deployment pipelines, manage containerization, and ensure system reliability.
|
||||
|
||||
## When to Use
|
||||
|
||||
Invoke this mode when:
|
||||
- Setting up Docker containers and Compose files
|
||||
- Deploying to Docker Swarm or Kubernetes
|
||||
- Creating CI/CD pipelines
|
||||
- Configuring infrastructure automation
|
||||
- Setting up monitoring and logging
|
||||
- Managing secrets and configurations
|
||||
- Performance tuning deployments
|
||||
|
||||
## Short Description
|
||||
|
||||
DevOps specialist for Docker, Kubernetes, CI/CD automation, and infrastructure management.
|
||||
|
||||
## Behavior Guidelines
|
||||
|
||||
1. **Automate everything** — manual steps lead to errors
|
||||
2. **Infrastructure as Code** — version control all configurations
|
||||
3. **Security first** — minimal privileges, scan all images
|
||||
4. **Monitor everything** — metrics, logs, traces
|
||||
5. **Test deployments** — staging before production
|
||||
|
||||
## Task Tool Invocation
|
||||
|
||||
Use the Task tool with `subagent_type` to delegate to other agents:
|
||||
- `subagent_type: "code-skeptic"` — for code review after implementation
|
||||
- `subagent_type: "security-auditor"` — for security review of container configs
|
||||
|
||||
## Skills Reference
|
||||
|
||||
### Containerization
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `docker-compose` | Multi-container application setup |
|
||||
| `docker-swarm` | Production cluster deployment |
|
||||
| `docker-security` | Container security hardening |
|
||||
| `docker-monitoring` | Container monitoring and logging |
|
||||
|
||||
### CI/CD
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `github-actions` | GitHub Actions workflows |
|
||||
| `gitlab-ci` | GitLab CI/CD pipelines |
|
||||
| `jenkins` | Jenkins pipelines |
|
||||
|
||||
### Infrastructure
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `terraform` | Infrastructure as Code |
|
||||
| `ansible` | Configuration management |
|
||||
| `helm` | Kubernetes package manager |
|
||||
|
||||
### Rules
|
||||
| File | Content |
|
||||
|------|---------|
|
||||
| `.kilo/rules/docker.md` | Docker best practices |
|
||||
|
||||
## Tech Stack
|
||||
|
||||
| Layer | Technologies |
|
||||
|-------|-------------|
|
||||
| Containers | Docker, Docker Compose, Docker Swarm |
|
||||
| Orchestration | Kubernetes, Helm |
|
||||
| CI/CD | GitHub Actions, GitLab CI, Jenkins |
|
||||
| Monitoring | Prometheus, Grafana, Loki |
|
||||
| Logging | ELK Stack, Fluentd |
|
||||
| Secrets | Docker Secrets, Vault |
|
||||
|
||||
## Output Format
|
||||
|
||||
```markdown
|
||||
## DevOps Implementation: [Feature]
|
||||
|
||||
### Container Configuration
|
||||
- Base image: node:20-alpine
|
||||
- Multi-stage build: ✅
|
||||
- Non-root user: ✅
|
||||
- Health checks: ✅
|
||||
|
||||
### Deployment Configuration
|
||||
- Service: api
|
||||
- Replicas: 3
|
||||
- Resource limits: CPU 1, Memory 1G
|
||||
- Networks: app-network (overlay)
|
||||
|
||||
### Security Measures
|
||||
- ✅ Non-root user (appuser:1001)
|
||||
- ✅ Read-only filesystem
|
||||
- ✅ Dropped capabilities (ALL)
|
||||
- ✅ No new privileges
|
||||
- ✅ Security scanning in CI/CD
|
||||
|
||||
### Monitoring
|
||||
- Health endpoint: /health
|
||||
- Metrics: Prometheus /metrics
|
||||
- Logging: JSON structured logs
|
||||
|
||||
---
|
||||
Status: deployed
|
||||
@CodeSkeptic ready for review
|
||||
```
|
||||
|
||||
## Dockerfile Patterns
|
||||
|
||||
### Multi-stage Production Build
|
||||
|
||||
```dockerfile
|
||||
# Build stage
|
||||
FROM node:20-alpine AS builder
|
||||
WORKDIR /app
|
||||
COPY package*.json ./
|
||||
RUN npm ci --only=production
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
|
||||
# Production stage
|
||||
FROM node:20-alpine
|
||||
RUN addgroup -g 1001 appgroup && \
|
||||
adduser -u 1001 -G appgroup -D appuser
|
||||
WORKDIR /app
|
||||
COPY --from=builder --chown=appuser:appgroup /app/dist ./dist
|
||||
COPY --from=builder --chown=appuser:appgroup /app/node_modules ./node_modules
|
||||
USER appuser
|
||||
EXPOSE 3000
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD node -e "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"
|
||||
CMD ["node", "dist/index.js"]
|
||||
```
|
||||
|
||||
### Development Build
|
||||
|
||||
```dockerfile
|
||||
FROM node:20-alpine
|
||||
WORKDIR /app
|
||||
COPY package*.json ./
|
||||
RUN npm install
|
||||
COPY . .
|
||||
EXPOSE 3000
|
||||
CMD ["npm", "run", "dev"]
|
||||
```
|
||||
|
||||
## Docker Compose Patterns
|
||||
|
||||
### Development Environment
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
app:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.dev
|
||||
volumes:
|
||||
- .:/app
|
||||
- /app/node_modules
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
- DATABASE_URL=postgres://db:5432/app
|
||||
ports:
|
||||
- "3000:3000"
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
|
||||
db:
|
||||
image: postgres:15-alpine
|
||||
environment:
|
||||
POSTGRES_DB: app
|
||||
POSTGRES_USER: app
|
||||
POSTGRES_PASSWORD: ${DB_PASSWORD}
|
||||
volumes:
|
||||
- postgres-data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U app"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
volumes:
|
||||
postgres-data:
|
||||
```
|
||||
|
||||
### Production Environment
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
app:
|
||||
image: myapp:${VERSION}
|
||||
deploy:
|
||||
replicas: 3
|
||||
update_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
failure_action: rollback
|
||||
rollback_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
max_attempts: 3
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
networks:
|
||||
- app-network
|
||||
secrets:
|
||||
- db_password
|
||||
- jwt_secret
|
||||
|
||||
networks:
|
||||
app-network:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
|
||||
secrets:
|
||||
db_password:
|
||||
external: true
|
||||
jwt_secret:
|
||||
external: true
|
||||
```
|
||||
|
||||
## CI/CD Pipeline Patterns
|
||||
|
||||
### GitHub Actions
|
||||
|
||||
```yaml
|
||||
# .github/workflows/docker.yml
|
||||
name: Docker CI/CD
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Login to Registry
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and Push
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ghcr.io/${{ github.repository }}:${{ github.sha }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Scan Image
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
image-ref: ghcr.io/${{ github.repository }}:${{ github.sha }}
|
||||
format: 'table'
|
||||
exit-code: '1'
|
||||
severity: 'CRITICAL,HIGH'
|
||||
|
||||
deploy:
|
||||
needs: build
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Deploy to Swarm
|
||||
run: |
|
||||
docker stack deploy -c docker-compose.prod.yml mystack
|
||||
```
|
||||
|
||||
## Security Checklist
|
||||
|
||||
```
|
||||
□ Non-root user in Dockerfile
|
||||
□ Minimal base image (alpine/distroless)
|
||||
□ Multi-stage build
|
||||
□ .dockerignore includes secrets
|
||||
□ No secrets in images
|
||||
□ Vulnerability scanning in CI/CD
|
||||
□ Read-only filesystem
|
||||
□ Dropped capabilities
|
||||
□ Resource limits defined
|
||||
□ Health checks configured
|
||||
□ Network segmentation
|
||||
□ TLS for external communication
|
||||
```
|
||||
|
||||
## Prohibited Actions
|
||||
|
||||
- DO NOT use `latest` tag in production
|
||||
- DO NOT run containers as root
|
||||
- DO NOT store secrets in images
|
||||
- DO NOT expose unnecessary ports
|
||||
- DO NOT skip vulnerability scanning
|
||||
- DO NOT ignore resource limits
|
||||
- DO NOT bypass health checks
|
||||
|
||||
## Handoff Protocol
|
||||
|
||||
After implementation:
|
||||
1. Verify containers are running
|
||||
2. Check health endpoints
|
||||
3. Review resource usage
|
||||
4. Validate security configuration
|
||||
5. Test deployment updates
|
||||
6. Tag `@CodeSkeptic` for review
|
||||
## Gitea Commenting (MANDATORY)
|
||||
|
||||
**You MUST post a comment to the Gitea issue after completing your work.**
|
||||
|
||||
Post a comment with:
|
||||
1. ✅ Success: What was done, files changed, duration
|
||||
2. ❌ Error: What failed, why, and blocker
|
||||
3. ❓ Question: Clarification needed with options
|
||||
|
||||
Use the `post_comment` function from `.kilo/skills/gitea-commenting/SKILL.md`.
|
||||
|
||||
**NO EXCEPTIONS** - Always comment to Gitea.
|
||||
@@ -12,6 +12,7 @@ permission:
|
||||
grep: allow
|
||||
task:
|
||||
"*": deny
|
||||
"code-skeptic": allow
|
||||
---
|
||||
|
||||
# Kilo Code: Frontend Developer
|
||||
@@ -33,6 +34,11 @@ Invoke this mode when:
|
||||
|
||||
Handles UI implementation with multimodal capabilities. Accepts visual references.
|
||||
|
||||
## Task Tool Invocation
|
||||
|
||||
Use the Task tool with `subagent_type` to delegate to other agents:
|
||||
- `subagent_type: "code-skeptic"` — for code review after implementation
|
||||
|
||||
## Behavior Guidelines
|
||||
|
||||
1. **Accept visual input** — can analyze screenshots and mockups
|
||||
|
||||
@@ -12,6 +12,7 @@ permission:
|
||||
grep: allow
|
||||
task:
|
||||
"*": deny
|
||||
"code-skeptic": allow
|
||||
---
|
||||
|
||||
# Kilo Code: Go Developer
|
||||
@@ -34,6 +35,11 @@ Invoke this mode when:
|
||||
|
||||
Go backend specialist for Gin, Echo, APIs, and concurrent systems.
|
||||
|
||||
## Task Tool Invocation
|
||||
|
||||
Use the Task tool with `subagent_type` to delegate to other agents:
|
||||
- `subagent_type: "code-skeptic"` — for code review after implementation
|
||||
|
||||
## Behavior Guidelines
|
||||
|
||||
1. **Idiomatic Go** — Follow Go conventions and idioms
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
description: Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work
|
||||
mode: all
|
||||
mode: subagent
|
||||
model: ollama-cloud/nemotron-3-super
|
||||
color: "#059669"
|
||||
permission:
|
||||
|
||||
@@ -32,6 +32,7 @@ permission:
|
||||
"planner": allow
|
||||
"reflector": allow
|
||||
"memory-manager": allow
|
||||
"devops-engineer": allow
|
||||
---
|
||||
|
||||
# Kilo Code: Orchestrator
|
||||
@@ -128,6 +129,8 @@ Use the Task tool to delegate to subagents with these subagent_type values:
|
||||
| Planner | planner | Task decomposition, CoT, ToT planning |
|
||||
| Reflector | reflector | Self-reflection, lesson extraction |
|
||||
| MemoryManager | memory-manager | Memory systems, context retrieval |
|
||||
| DevOpsEngineer | devops-engineer | Docker, Kubernetes, CI/CD |
|
||||
| BrowserAutomation | browser-automation | Browser automation, E2E testing |
|
||||
|
||||
**Note:** `agent-architect` subagent_type is not recognized. Use `system-analyst` with prompt "You are Agent Architect..." as workaround.
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
description: Manages issue checklists, status labels, tracks progress and coordinates with human users
|
||||
mode: all
|
||||
mode: subagent
|
||||
model: ollama-cloud/glm-5
|
||||
color: "#EA580C"
|
||||
permission:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
description: Improves agent system prompts based on performance failures. Meta-learner for prompt optimization
|
||||
mode: all
|
||||
mode: subagent
|
||||
model: qwen/qwen3.6-plus:free
|
||||
color: "#BE185D"
|
||||
permission:
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
description: Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets
|
||||
mode: all
|
||||
mode: subagent
|
||||
model: ollama-cloud/nemotron-3-super
|
||||
color: "#7F1D1D"
|
||||
color: #DC2626
|
||||
permission:
|
||||
read: allow
|
||||
bash: allow
|
||||
@@ -115,8 +115,41 @@ gitleaks --path .
|
||||
|
||||
# Check for exposed env
|
||||
grep -r "API_KEY\|PASSWORD\|SECRET" --include="*.ts" --include="*.js"
|
||||
|
||||
# Docker image vulnerability scan
|
||||
trivy image myapp:latest
|
||||
docker scout vulnerabilities myapp:latest
|
||||
|
||||
# Docker secrets scan
|
||||
gitleaks --image myapp:latest
|
||||
```
|
||||
|
||||
## Docker Security Checklist
|
||||
|
||||
```
|
||||
□ Running as non-root user
|
||||
□ Using minimal base images (alpine/distroless)
|
||||
□ Using specific image versions (not latest)
|
||||
□ No secrets in images
|
||||
□ Read-only filesystem where possible
|
||||
□ Capabilities dropped to minimum
|
||||
□ No new privileges flag set
|
||||
□ Resource limits defined
|
||||
□ Health checks configured
|
||||
□ Network segmentation implemented
|
||||
□ TLS for external communication
|
||||
□ Secrets managed via Docker secrets/vault
|
||||
□ Vulnerability scanning in CI/CD
|
||||
□ Base images regularly updated
|
||||
```
|
||||
|
||||
## Skills Reference
|
||||
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `docker-security` | Container security hardening |
|
||||
| `nodejs-security-owasp` | Node.js OWASP Top 10 |
|
||||
|
||||
## Prohibited Actions
|
||||
|
||||
- DO NOT approve with critical/high vulnerabilities
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
description: Designs technical specifications, data schemas, and API contracts before implementation
|
||||
mode: all
|
||||
mode: subagent
|
||||
model: qwen/qwen3.6-plus:free
|
||||
color: "#0891B2"
|
||||
permission:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
description: Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff
|
||||
mode: all
|
||||
mode: subagent
|
||||
model: ollama-cloud/glm-5
|
||||
color: "#E91E63"
|
||||
permission:
|
||||
|
||||
549
.kilo/rules/docker.md
Normal file
549
.kilo/rules/docker.md
Normal file
@@ -0,0 +1,549 @@
|
||||
# Docker & Containerization Rules
|
||||
|
||||
Essential rules for Docker, Docker Compose, Docker Swarm, and container technologies.
|
||||
|
||||
## Dockerfile Best Practices
|
||||
|
||||
### Layer Optimization
|
||||
|
||||
- Minimize layers by combining commands
|
||||
- Order layers from least to most frequently changing
|
||||
- Use multi-stage builds to reduce image size
|
||||
- Clean up package manager caches
|
||||
|
||||
```dockerfile
|
||||
# ✅ Good: Multi-stage build with layer optimization
|
||||
FROM node:20-alpine AS builder
|
||||
WORKDIR /app
|
||||
COPY package*.json ./
|
||||
RUN npm ci --only=production
|
||||
|
||||
FROM node:20-alpine
|
||||
WORKDIR /app
|
||||
COPY --from=builder /app/node_modules ./node_modules
|
||||
COPY . .
|
||||
USER node
|
||||
EXPOSE 3000
|
||||
CMD ["node", "server.js"]
|
||||
|
||||
# ❌ Bad: Single stage, many layers
|
||||
FROM node:20
|
||||
RUN npm install -g nodemon
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
RUN npm install
|
||||
EXPOSE 3000
|
||||
CMD ["nodemon", "server.js"]
|
||||
```
|
||||
|
||||
### Security
|
||||
|
||||
- Run as non-root user
|
||||
- Use specific image versions, not `latest`
|
||||
- Scan images for vulnerabilities
|
||||
- Don't store secrets in images
|
||||
|
||||
```dockerfile
|
||||
# ✅ Good
|
||||
FROM node:20-alpine
|
||||
RUN addgroup -g 1001 appgroup && \
|
||||
adduser -u 1001 -G appgroup -D appuser
|
||||
WORKDIR /app
|
||||
COPY --chown=appuser:appgroup . .
|
||||
USER appuser
|
||||
CMD ["node", "server.js"]
|
||||
|
||||
# ❌ Bad
|
||||
FROM node:latest # Unpredictable version
|
||||
# Running as root (default)
|
||||
COPY . .
|
||||
CMD ["node", "server.js"]
|
||||
```
|
||||
|
||||
### Caching Strategy
|
||||
|
||||
```dockerfile
|
||||
# ✅ Good: Dependencies cached separately
|
||||
COPY package*.json ./
|
||||
RUN npm ci
|
||||
COPY . .
|
||||
|
||||
# ❌ Bad: All code copied before dependencies
|
||||
COPY . .
|
||||
RUN npm install
|
||||
```
|
||||
|
||||
## Docker Compose
|
||||
|
||||
### Service Structure
|
||||
|
||||
- Use version 3.8+ for modern features
|
||||
- Define services in logical order
|
||||
- Use environment variables for configuration
|
||||
- Set resource limits
|
||||
|
||||
```yaml
|
||||
# ✅ Good
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
app:
|
||||
image: myapp:latest
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
- DATABASE_URL=postgres://db:5432/app
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- app-network
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
|
||||
db:
|
||||
image: postgres:15-alpine
|
||||
volumes:
|
||||
- postgres-data:/var/lib/postgresql/data
|
||||
environment:
|
||||
POSTGRES_DB: app
|
||||
POSTGRES_USER: ${DB_USER}
|
||||
POSTGRES_PASSWORD: ${DB_PASSWORD}
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
networks:
|
||||
app-network:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
postgres-data:
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
- Use `.env` files for local development
|
||||
- Never commit `.env` files with secrets
|
||||
- Use Docker secrets for sensitive data in Swarm
|
||||
|
||||
```bash
|
||||
# .env (gitignored)
|
||||
NODE_ENV=production
|
||||
DB_PASSWORD=secure_password_here
|
||||
JWT_SECRET=your_jwt_secret_here
|
||||
```
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
services:
|
||||
app:
|
||||
env_file:
|
||||
- .env
|
||||
# OR explicit for non-sensitive
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
# Secrets for sensitive data in Swarm
|
||||
secrets:
|
||||
- db_password
|
||||
```
|
||||
|
||||
### Network Patterns
|
||||
|
||||
```yaml
|
||||
# ✅ Good: Separated networks for security
|
||||
networks:
|
||||
frontend:
|
||||
driver: bridge
|
||||
backend:
|
||||
driver: bridge
|
||||
internal: true # No external access
|
||||
|
||||
services:
|
||||
web:
|
||||
networks:
|
||||
- frontend
|
||||
- backend
|
||||
api:
|
||||
networks:
|
||||
- backend
|
||||
db:
|
||||
networks:
|
||||
- backend
|
||||
```
|
||||
|
||||
### Volume Management
|
||||
|
||||
```yaml
|
||||
# ✅ Good: Named volumes with labels
|
||||
volumes:
|
||||
postgres-data:
|
||||
driver: local
|
||||
labels:
|
||||
- "app=myapp"
|
||||
- "type=database"
|
||||
|
||||
services:
|
||||
db:
|
||||
volumes:
|
||||
- postgres-data:/var/lib/postgresql/data
|
||||
- ./init-scripts:/docker-entrypoint-initdb.d:ro
|
||||
```
|
||||
|
||||
## Docker Swarm
|
||||
|
||||
### Service Deployment
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml (Swarm compatible)
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: myapp/api:latest
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 3
|
||||
update_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
failure_action: rollback
|
||||
rollback_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
max_attempts: 3
|
||||
window: 120s
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == worker
|
||||
preferences:
|
||||
- spread: node.id
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
reservations:
|
||||
cpus: '0.25'
|
||||
memory: 256M
|
||||
networks:
|
||||
- app-network
|
||||
secrets:
|
||||
- db_password
|
||||
- jwt_secret
|
||||
configs:
|
||||
- app_config
|
||||
|
||||
networks:
|
||||
app-network:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
|
||||
secrets:
|
||||
db_password:
|
||||
external: true
|
||||
jwt_secret:
|
||||
external: true
|
||||
|
||||
configs:
|
||||
app_config:
|
||||
external: true
|
||||
```
|
||||
|
||||
### Stack Deployment
|
||||
|
||||
```bash
|
||||
# Deploy stack
|
||||
docker stack deploy -c docker-compose.yml mystack
|
||||
|
||||
# List services
|
||||
docker stack services mystack
|
||||
|
||||
# Scale service
|
||||
docker service scale mystack_api=5
|
||||
|
||||
# Update service
|
||||
docker service update --image myapp/api:v2 mystack_api
|
||||
|
||||
# Rollback
|
||||
docker service rollback mystack_api
|
||||
```
|
||||
|
||||
### Health Checks
|
||||
|
||||
```yaml
|
||||
services:
|
||||
api:
|
||||
# Health check in Dockerfile
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "healthcheck.js"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
# Or in compose
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
```
|
||||
|
||||
### Secrets Management
|
||||
|
||||
```bash
|
||||
# Create secret
|
||||
echo "my_secret_password" | docker secret create db_password -
|
||||
|
||||
# Create secret from file
|
||||
docker secret create jwt_secret ./jwt_secret.txt
|
||||
|
||||
# List secrets
|
||||
docker secret ls
|
||||
|
||||
# Use in compose
|
||||
secrets:
|
||||
db_password:
|
||||
external: true
|
||||
```
|
||||
|
||||
### Config Management
|
||||
|
||||
```bash
|
||||
# Create config
|
||||
docker config create app_config ./config.json
|
||||
|
||||
# Use in compose
|
||||
configs:
|
||||
app_config:
|
||||
external: true
|
||||
|
||||
services:
|
||||
api:
|
||||
configs:
|
||||
- app_config
|
||||
```
|
||||
|
||||
## Container Security
|
||||
|
||||
### Image Security
|
||||
|
||||
```bash
|
||||
# Scan image for vulnerabilities
|
||||
docker scout vulnerabilities myapp:latest
|
||||
trivy image myapp:latest
|
||||
|
||||
# Check image for secrets
|
||||
gitleaks --image myapp:latest
|
||||
```
|
||||
|
||||
### Runtime Security
|
||||
|
||||
```dockerfile
|
||||
# ✅ Good: Security measures
|
||||
FROM node:20-alpine
|
||||
|
||||
# Create non-root user
|
||||
RUN addgroup -g 1001 appgroup && \
|
||||
adduser -u 1001 -G appgroup -D appuser
|
||||
|
||||
# Set read-only filesystem
|
||||
RUN chmod -R 755 /app && \
|
||||
chown -R appuser:appgroup /app
|
||||
|
||||
WORKDIR /app
|
||||
COPY --chown=appuser:appgroup . .
|
||||
|
||||
# Drop all capabilities
|
||||
USER appuser
|
||||
VOLUME ["/tmp"]
|
||||
|
||||
CMD ["node", "server.js"]
|
||||
```
|
||||
|
||||
### Network Security
|
||||
|
||||
```yaml
|
||||
# ✅ Good: Limited network access
|
||||
services:
|
||||
api:
|
||||
networks:
|
||||
- backend
|
||||
# No ports exposed to host
|
||||
|
||||
db:
|
||||
networks:
|
||||
- backend
|
||||
# Internal network only
|
||||
|
||||
networks:
|
||||
backend:
|
||||
internal: true # No internet access
|
||||
```
|
||||
|
||||
### Resource Limits
|
||||
|
||||
```yaml
|
||||
services:
|
||||
api:
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1.0'
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
```
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Development Setup
|
||||
|
||||
```yaml
|
||||
# docker-compose.dev.yml
|
||||
version: '3.8'
|
||||
services:
|
||||
app:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.dev
|
||||
volumes:
|
||||
- .:/app
|
||||
- /app/node_modules
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
ports:
|
||||
- "3000:3000"
|
||||
command: npm run dev
|
||||
```
|
||||
|
||||
### Production Setup
|
||||
|
||||
```yaml
|
||||
# docker-compose.prod.yml
|
||||
version: '3.8'
|
||||
services:
|
||||
app:
|
||||
image: myapp:${VERSION}
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
deploy:
|
||||
replicas: 3
|
||||
update_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "healthcheck.js"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
```
|
||||
|
||||
### Multi-Environment
|
||||
|
||||
```bash
|
||||
# Override files
|
||||
docker-compose -f docker-compose.yml -f docker-compose.dev.yml up
|
||||
docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||
```
|
||||
|
||||
### Logging
|
||||
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
labels: "app,environment"
|
||||
```
|
||||
|
||||
## CI/CD Integration
|
||||
|
||||
### Build Pipeline
|
||||
|
||||
```yaml
|
||||
# .github/workflows/docker.yml
|
||||
name: Docker Build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Build image
|
||||
run: docker build -t myapp:${{ github.sha }} .
|
||||
|
||||
- name: Scan image
|
||||
run: trivy image myapp:${{ github.sha }}
|
||||
|
||||
- name: Push to registry
|
||||
run: |
|
||||
echo ${{ secrets.DOCKER_PASSWORD }} | docker login -u ${{ secrets.DOCKER_USER }} --password-stdin
|
||||
docker push myapp:${{ github.sha }}
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Commands
|
||||
|
||||
```bash
|
||||
# View logs
|
||||
docker-compose logs -f app
|
||||
|
||||
# Execute in container
|
||||
docker-compose exec app sh
|
||||
|
||||
# Check health
|
||||
docker inspect --format='{{.State.Health.Status}}' <container>
|
||||
|
||||
# View resource usage
|
||||
docker stats
|
||||
|
||||
# Remove unused resources
|
||||
docker system prune -a
|
||||
|
||||
# Debug network
|
||||
docker network inspect app-network
|
||||
|
||||
# Swarm diagnostics
|
||||
docker node ls
|
||||
docker service ps mystack_api
|
||||
```
|
||||
|
||||
## Prohibitions
|
||||
|
||||
- DO NOT run containers as root
|
||||
- DO NOT use `latest` tag in production
|
||||
- DO NOT expose unnecessary ports
|
||||
- DO NOT store secrets in images
|
||||
- DO NOT use privileged mode unnecessarily
|
||||
- DO NOT mount host directories without restrictions
|
||||
- DO NOT skip health checks in production
|
||||
- DO NOT ignore vulnerability scans
|
||||
115
.kilo/rules/evolutionary-sync.md
Normal file
115
.kilo/rules/evolutionary-sync.md
Normal file
@@ -0,0 +1,115 @@
|
||||
# Evolutionary Mode Rules
|
||||
|
||||
When agents are modified, created, or updated during evolutionary improvement, this rule ensures all related files stay synchronized.
|
||||
|
||||
## Source of Truth
|
||||
|
||||
**`kilo.json`** is the single source of truth for:
|
||||
- Agent definitions (models, modes, descriptions)
|
||||
- Command definitions (models, descriptions)
|
||||
- Categories and groupings
|
||||
|
||||
## Files to Synchronize
|
||||
|
||||
When agents change, update ALL of these files:
|
||||
|
||||
| File | What to Update |
|
||||
|------|----------------|
|
||||
| `kilo.json` | Models, modes, descriptions (source of truth) |
|
||||
| `.kilo/agents/*.md` | Model in YAML frontmatter |
|
||||
| `.kilo/KILO_SPEC.md` | Pipeline Agents table, Workflow Commands table |
|
||||
| `AGENTS.md` | Pipeline Agents tables by category |
|
||||
| `.kilo/agents/orchestrator.md` | Task Tool Invocation table |
|
||||
|
||||
## Sync Checklist
|
||||
|
||||
When modifying agents:
|
||||
|
||||
```
|
||||
□ Update kilo.json with new model/description
|
||||
□ Update agent .md file frontmatter
|
||||
□ Update KILO_SPEC.md Pipeline Agents table
|
||||
□ Update AGENTS.md category tables
|
||||
□ Update orchestrator.md subagent_type mappings (if new agent)
|
||||
□ Run scripts/sync-agents.js --check to verify
|
||||
```
|
||||
|
||||
## Adding New Agent
|
||||
|
||||
1. Create `.kilo/agents/agent-name.md` with frontmatter:
|
||||
```yaml
|
||||
---
|
||||
description: Agent description
|
||||
mode: subagent|primary|all
|
||||
model: provider/model-id
|
||||
color: #HEX
|
||||
permission:
|
||||
read: allow
|
||||
edit: allow
|
||||
...
|
||||
---
|
||||
```
|
||||
|
||||
2. Add to `kilo.json` under `agents`:
|
||||
```json
|
||||
"agent-name": {
|
||||
"file": ".kilo/agents/agent-name.md",
|
||||
"description": "Full description",
|
||||
"model": "provider/model-id",
|
||||
"mode": "subagent",
|
||||
"category": "core|quality|meta|cognitive|testing"
|
||||
}
|
||||
```
|
||||
|
||||
3. If subagent, add to `orchestrator.md`:
|
||||
- Add to permission list
|
||||
- Add to Task Tool Invocation table
|
||||
|
||||
4. Run sync script:
|
||||
```bash
|
||||
node scripts/sync-agents.js --fix
|
||||
```
|
||||
|
||||
## Model Changes
|
||||
|
||||
When changing a model:
|
||||
|
||||
1. Update agent file frontmatter
|
||||
2. Update `kilo.json`
|
||||
3. Update `KILO_SPEC.md`
|
||||
4. Document reason in commit message
|
||||
|
||||
Example:
|
||||
```
|
||||
fix: update LeadDeveloper model from qwen3-coder:free to qwen3-coder:480b
|
||||
|
||||
Reason: Better code generation quality, supports larger context
|
||||
```
|
||||
|
||||
## Verification
|
||||
|
||||
Run sync verification before commits:
|
||||
|
||||
```bash
|
||||
# Check only (CI mode)
|
||||
node scripts/sync-agents.js --check
|
||||
|
||||
# Fix discrepancies
|
||||
node scripts/sync-agents.js --fix
|
||||
```
|
||||
|
||||
## CI Integration
|
||||
|
||||
Add to `.github/workflows/ci.yml`:
|
||||
|
||||
```yaml
|
||||
- name: Verify Agent Sync
|
||||
run: node scripts/sync-agents.js --check
|
||||
```
|
||||
|
||||
## Prohibited Actions
|
||||
|
||||
- DO NOT update KILO_SPEC.md without updating kilo.json
|
||||
- DO NOT update agent model without updating all sync targets
|
||||
- DO NOT add new agent without updating orchestrator permissions
|
||||
- DO NOT skip running sync script after changes
|
||||
576
.kilo/skills/docker-compose/SKILL.md
Normal file
576
.kilo/skills/docker-compose/SKILL.md
Normal file
@@ -0,0 +1,576 @@
|
||||
# Skill: Docker Compose
|
||||
|
||||
## Purpose
|
||||
|
||||
Comprehensive skill for Docker Compose configuration, orchestration, and multi-container application deployment.
|
||||
|
||||
## Overview
|
||||
|
||||
Docker Compose is a tool for defining and running multi-container Docker applications. Use this skill when working with local development environments, CI/CD pipelines, and production deployments.
|
||||
|
||||
## When to Use
|
||||
|
||||
- Setting up local development environments
|
||||
- Configuring multi-container applications
|
||||
- Managing service dependencies
|
||||
- Implementing health checks and waiting strategies
|
||||
- Creating development/production configurations
|
||||
|
||||
## Skill Files Structure
|
||||
|
||||
```
|
||||
docker-compose/
|
||||
├── SKILL.md # This file
|
||||
├── patterns/
|
||||
│ ├── basic-service.md # Basic service templates
|
||||
│ ├── networking.md # Network patterns
|
||||
│ ├── volumes.md # Volume management
|
||||
│ └── healthchecks.md # Health check patterns
|
||||
└── examples/
|
||||
├── nodejs-api.md # Node.js API template
|
||||
├── postgres.md # PostgreSQL template
|
||||
└── redis.md # Redis template
|
||||
```
|
||||
|
||||
## Core Patterns
|
||||
|
||||
### 1. Basic Service Configuration
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
app:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
- NODE_ENV=production
|
||||
image: myapp:latest
|
||||
container_name: myapp
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
- DATABASE_URL=postgres://db:5432/app
|
||||
volumes:
|
||||
- ./data:/app/data
|
||||
networks:
|
||||
- app-network
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
```
|
||||
|
||||
### 2. Environment Configuration
|
||||
|
||||
```yaml
|
||||
# Use .env file for secrets
|
||||
services:
|
||||
app:
|
||||
env_file:
|
||||
- .env
|
||||
- .env.local
|
||||
environment:
|
||||
# Non-sensitive defaults
|
||||
- NODE_ENV=production
|
||||
- LOG_LEVEL=info
|
||||
# Override from .env
|
||||
- DATABASE_URL=${DATABASE_URL}
|
||||
- JWT_SECRET=${JWT_SECRET}
|
||||
```
|
||||
|
||||
### 3. Network Patterns
|
||||
|
||||
```yaml
|
||||
# Isolated networks for security
|
||||
networks:
|
||||
frontend:
|
||||
driver: bridge
|
||||
backend:
|
||||
driver: bridge
|
||||
internal: true # No external access
|
||||
|
||||
services:
|
||||
web:
|
||||
networks:
|
||||
- frontend
|
||||
- backend
|
||||
|
||||
api:
|
||||
networks:
|
||||
- backend
|
||||
|
||||
db:
|
||||
networks:
|
||||
- backend
|
||||
```
|
||||
|
||||
### 4. Volume Patterns
|
||||
|
||||
```yaml
|
||||
volumes:
|
||||
# Named volume (managed by Docker)
|
||||
postgres-data:
|
||||
driver: local
|
||||
|
||||
# Bind mount (host directory)
|
||||
# ./data:/app/data
|
||||
|
||||
services:
|
||||
db:
|
||||
volumes:
|
||||
- postgres-data:/var/lib/postgresql/data
|
||||
- ./init-scripts:/docker-entrypoint-initdb.d:ro
|
||||
|
||||
app:
|
||||
volumes:
|
||||
- ./config:/app/config:ro
|
||||
- app-logs:/app/logs
|
||||
|
||||
volumes:
|
||||
app-logs:
|
||||
```
|
||||
|
||||
### 5. Health Checks & Dependencies
|
||||
|
||||
```yaml
|
||||
services:
|
||||
db:
|
||||
image: postgres:15-alpine
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
app:
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_started
|
||||
```
|
||||
|
||||
### 6. Multi-Environment Configurations
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml (base)
|
||||
version: '3.8'
|
||||
services:
|
||||
app:
|
||||
image: myapp:latest
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
|
||||
# docker-compose.dev.yml (development override)
|
||||
version: '3.8'
|
||||
services:
|
||||
app:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.dev
|
||||
volumes:
|
||||
- .:/app
|
||||
- /app/node_modules
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
ports:
|
||||
- "3000:3000"
|
||||
command: npm run dev
|
||||
|
||||
# docker-compose.prod.yml (production override)
|
||||
version: '3.8'
|
||||
services:
|
||||
app:
|
||||
image: myapp:${VERSION}
|
||||
deploy:
|
||||
replicas: 3
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "healthcheck.js"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
```
|
||||
|
||||
## Service Templates
|
||||
|
||||
### Node.js API
|
||||
|
||||
```yaml
|
||||
services:
|
||||
api:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
- PORT=3000
|
||||
- DATABASE_URL=postgres://db:5432/app
|
||||
- REDIS_URL=redis://redis:6379
|
||||
ports:
|
||||
- "3000:3000"
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_started
|
||||
networks:
|
||||
- backend
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
```
|
||||
|
||||
### PostgreSQL Database
|
||||
|
||||
```yaml
|
||||
services:
|
||||
db:
|
||||
image: postgres:15-alpine
|
||||
environment:
|
||||
POSTGRES_DB: app
|
||||
POSTGRES_USER: ${DB_USER:-app}
|
||||
POSTGRES_PASSWORD: ${DB_PASSWORD:?DB_PASSWORD required}
|
||||
volumes:
|
||||
- postgres-data:/var/lib/postgresql/data
|
||||
- ./init-scripts:/docker-entrypoint-initdb.d:ro
|
||||
networks:
|
||||
- backend
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER -d $POSTGRES_DB"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
|
||||
volumes:
|
||||
postgres-data:
|
||||
```
|
||||
|
||||
### Redis Cache
|
||||
|
||||
```yaml
|
||||
services:
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
networks:
|
||||
- backend
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
volumes:
|
||||
redis-data:
|
||||
```
|
||||
|
||||
### Nginx Reverse Proxy
|
||||
|
||||
```yaml
|
||||
services:
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
volumes:
|
||||
- ./nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
- ./ssl:/etc/nginx/ssl:ro
|
||||
depends_on:
|
||||
- api
|
||||
networks:
|
||||
- frontend
|
||||
- backend
|
||||
healthcheck:
|
||||
test: ["CMD", "nginx", "-t"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
```
|
||||
|
||||
## Common Commands
|
||||
|
||||
```bash
|
||||
# Start services
|
||||
docker-compose up -d
|
||||
|
||||
# Start specific service
|
||||
docker-compose up -d app
|
||||
|
||||
# View logs
|
||||
docker-compose logs -f app
|
||||
|
||||
# Execute command in container
|
||||
docker-compose exec app sh
|
||||
docker-compose exec app npm test
|
||||
|
||||
# Stop services
|
||||
docker-compose down
|
||||
|
||||
# Stop and remove volumes
|
||||
docker-compose down -v
|
||||
|
||||
# Rebuild images
|
||||
docker-compose build --no-cache app
|
||||
|
||||
# Scale service
|
||||
docker-compose up -d --scale api=3
|
||||
|
||||
# Multi-environment
|
||||
docker-compose -f docker-compose.yml -f docker-compose.dev.yml up
|
||||
docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Security
|
||||
|
||||
1. **Never store secrets in images**
|
||||
```yaml
|
||||
# Bad
|
||||
environment:
|
||||
- DB_PASSWORD=password123
|
||||
|
||||
# Good
|
||||
secrets:
|
||||
- db_password
|
||||
secrets:
|
||||
db_password:
|
||||
file: ./secrets/db_password.txt
|
||||
```
|
||||
|
||||
2. **Use non-root user**
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
user: "1000:1000"
|
||||
```
|
||||
|
||||
3. **Limit resources**
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
```
|
||||
|
||||
4. **Use internal networks for databases**
|
||||
```yaml
|
||||
networks:
|
||||
backend:
|
||||
internal: true
|
||||
```
|
||||
|
||||
### Performance
|
||||
|
||||
1. **Enable health checks**
|
||||
```yaml
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
```
|
||||
|
||||
2. **Use .dockerignore**
|
||||
```
|
||||
node_modules
|
||||
.git
|
||||
.env
|
||||
*.log
|
||||
coverage
|
||||
.nyc_output
|
||||
```
|
||||
|
||||
3. **Optimize build cache**
|
||||
```yaml
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
- NODE_ENV=production
|
||||
```
|
||||
|
||||
### Development
|
||||
|
||||
1. **Use volumes for hot reload**
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
volumes:
|
||||
- .:/app
|
||||
- /app/node_modules # Anonymous volume for node_modules
|
||||
```
|
||||
|
||||
2. **Keep containers running**
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
stdin_open: true # -i
|
||||
tty: true # -t
|
||||
```
|
||||
|
||||
### Production
|
||||
|
||||
1. **Use specific image versions**
|
||||
```yaml
|
||||
# Bad
|
||||
image: node:latest
|
||||
|
||||
# Good
|
||||
image: node:20-alpine
|
||||
```
|
||||
|
||||
2. **Configure logging**
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
```
|
||||
|
||||
3. **Restart policies**
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
restart: unless-stopped
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **Container won't start**
|
||||
```bash
|
||||
# Check logs
|
||||
docker-compose logs app
|
||||
|
||||
# Check container status
|
||||
docker-compose ps
|
||||
|
||||
# Inspect container
|
||||
docker inspect myapp_app_1
|
||||
```
|
||||
|
||||
2. **Network connectivity issues**
|
||||
```bash
|
||||
# List networks
|
||||
docker network ls
|
||||
|
||||
# Inspect network
|
||||
docker network inspect myapp_default
|
||||
|
||||
# Test connectivity
|
||||
docker-compose exec app ping db
|
||||
```
|
||||
|
||||
3. **Volume permission issues**
|
||||
```bash
|
||||
# Check volume
|
||||
docker volume inspect myapp_postgres-data
|
||||
|
||||
# Fix permissions (if needed)
|
||||
docker-compose exec app chown -R node:node /app/data
|
||||
```
|
||||
|
||||
4. **Health check failing**
|
||||
```bash
|
||||
# Run health check manually
|
||||
docker-compose exec app curl -f http://localhost:3000/health
|
||||
|
||||
# Check health status
|
||||
docker inspect --format='{{.State.Health.Status}}' myapp_app_1
|
||||
```
|
||||
|
||||
5. **Out of disk space**
|
||||
```bash
|
||||
# Clean up
|
||||
docker system prune -a --volumes
|
||||
|
||||
# Check disk usage
|
||||
docker system df
|
||||
```
|
||||
|
||||
## Integration with CI/CD
|
||||
|
||||
### GitHub Actions
|
||||
|
||||
```yaml
|
||||
# .github/workflows/test.yml
|
||||
name: Test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Build and test
|
||||
run: |
|
||||
docker-compose -f docker-compose.yml -f docker-compose.test.yml up --abort-on-container-exit --exit-code-from app
|
||||
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: docker-compose down -v
|
||||
```
|
||||
|
||||
### GitLab CI
|
||||
|
||||
```yaml
|
||||
# .gitlab-ci.yml
|
||||
stages:
|
||||
- test
|
||||
- build
|
||||
|
||||
test:
|
||||
stage: test
|
||||
script:
|
||||
- docker-compose -f docker-compose.yml -f docker-compose.test.yml up --abort-on-container-exit --exit-code-from app
|
||||
after_script:
|
||||
- docker-compose down -v
|
||||
|
||||
build:
|
||||
stage: build
|
||||
script:
|
||||
- docker build -t myapp:$CI_COMMIT_SHA .
|
||||
- docker push myapp:$CI_COMMIT_SHA
|
||||
```
|
||||
|
||||
## Related Skills
|
||||
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `docker-swarm` | Orchestration with Docker Swarm |
|
||||
| `docker-security` | Container security patterns |
|
||||
| `docker-networking` | Advanced networking techniques |
|
||||
| `docker-monitoring` | Container monitoring and logging |
|
||||
447
.kilo/skills/docker-compose/patterns/basic-service.md
Normal file
447
.kilo/skills/docker-compose/patterns/basic-service.md
Normal file
@@ -0,0 +1,447 @@
|
||||
# Docker Compose Patterns
|
||||
|
||||
## Pattern: Multi-Service Application
|
||||
|
||||
Complete pattern for a typical web application with API, database, cache, and reverse proxy.
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
# Reverse Proxy
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
volumes:
|
||||
- ./nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
- ./ssl:/etc/nginx/ssl:ro
|
||||
depends_on:
|
||||
- api
|
||||
networks:
|
||||
- frontend
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 256M
|
||||
healthcheck:
|
||||
test: ["CMD", "nginx", "-t"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
# API Service
|
||||
api:
|
||||
build:
|
||||
context: ./api
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
- DATABASE_URL=postgres://db:5432/app
|
||||
- REDIS_URL=redis://cache:6379
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
cache:
|
||||
condition: service_started
|
||||
networks:
|
||||
- frontend
|
||||
- backend
|
||||
deploy:
|
||||
replicas: 3
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
# Database
|
||||
db:
|
||||
image: postgres:15-alpine
|
||||
environment:
|
||||
POSTGRES_DB: app
|
||||
POSTGRES_USER: ${DB_USER:-app}
|
||||
POSTGRES_PASSWORD: ${DB_PASSWORD:?DB_PASSWORD required}
|
||||
volumes:
|
||||
- postgres-data:/var/lib/postgresql/data
|
||||
- ./init-scripts:/docker-entrypoint-initdb.d:ro
|
||||
networks:
|
||||
- backend
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER -d $POSTGRES_DB"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2'
|
||||
memory: 2G
|
||||
|
||||
# Cache
|
||||
cache:
|
||||
image: redis:7-alpine
|
||||
command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
networks:
|
||||
- backend
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
networks:
|
||||
frontend:
|
||||
driver: bridge
|
||||
backend:
|
||||
driver: bridge
|
||||
internal: true # No external access
|
||||
|
||||
volumes:
|
||||
postgres-data:
|
||||
driver: local
|
||||
redis-data:
|
||||
driver: local
|
||||
```
|
||||
|
||||
## Pattern: Development Override
|
||||
|
||||
Development-specific configuration with hot reload and debugging.
|
||||
|
||||
```yaml
|
||||
# docker-compose.dev.yml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
api:
|
||||
build:
|
||||
context: ./api
|
||||
dockerfile: Dockerfile.dev
|
||||
volumes:
|
||||
- ./api/src:/app/src:ro
|
||||
- ./api/tests:/app/tests:ro
|
||||
- /app/node_modules
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
- DEBUG=app:*
|
||||
ports:
|
||||
- "3000:3000"
|
||||
- "9229:9229" # Node.js debugger
|
||||
command: npm run dev
|
||||
|
||||
db:
|
||||
ports:
|
||||
- "5432:5432" # Expose for local tools
|
||||
|
||||
cache:
|
||||
ports:
|
||||
- "6379:6379" # Expose for local tools
|
||||
```
|
||||
|
||||
```bash
|
||||
# Usage
|
||||
docker-compose -f docker-compose.yml -f docker-compose.dev.yml up
|
||||
```
|
||||
|
||||
## Pattern: Production Override
|
||||
|
||||
Production-optimized configuration with security and performance settings.
|
||||
|
||||
```yaml
|
||||
# docker-compose.prod.yml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: myapp/api:${VERSION}
|
||||
deploy:
|
||||
replicas: 3
|
||||
update_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
failure_action: rollback
|
||||
rollback_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
secrets:
|
||||
- db_password
|
||||
- jwt_secret
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "5"
|
||||
|
||||
secrets:
|
||||
db_password:
|
||||
external: true
|
||||
jwt_secret:
|
||||
external: true
|
||||
```
|
||||
|
||||
```bash
|
||||
# Usage
|
||||
docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||
```
|
||||
|
||||
## Pattern: Health Check Dependency
|
||||
|
||||
Waiting for dependent services to be healthy before starting.
|
||||
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
cache:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
db:
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
cache:
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
```
|
||||
|
||||
## Pattern: Secrets Management
|
||||
|
||||
Using Docker secrets for sensitive data (Swarm mode).
|
||||
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
secrets:
|
||||
- db_password
|
||||
- api_key
|
||||
- jwt_secret
|
||||
environment:
|
||||
- DB_PASSWORD_FILE=/run/secrets/db_password
|
||||
- API_KEY_FILE=/run/secrets/api_key
|
||||
- JWT_SECRET_FILE=/run/secrets/jwt_secret
|
||||
|
||||
secrets:
|
||||
db_password:
|
||||
file: ./secrets/db_password.txt
|
||||
api_key:
|
||||
file: ./secrets/api_key.txt
|
||||
jwt_secret:
|
||||
external: true # Created via: echo "secret" | docker secret create jwt_secret -
|
||||
```
|
||||
|
||||
## Pattern: Resource Limits
|
||||
|
||||
Setting resource constraints for containers.
|
||||
|
||||
```yaml
|
||||
services:
|
||||
api:
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1.0'
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
# Alternative for non-Swarm
|
||||
mem_limit: 1G
|
||||
memswap_limit: 1G
|
||||
cpus: 1
|
||||
```
|
||||
|
||||
## Pattern: Network Isolation
|
||||
|
||||
Segmenting networks for security.
|
||||
|
||||
```yaml
|
||||
services:
|
||||
web:
|
||||
networks:
|
||||
- frontend
|
||||
- backend
|
||||
|
||||
api:
|
||||
networks:
|
||||
- backend
|
||||
- database
|
||||
|
||||
db:
|
||||
networks:
|
||||
- database
|
||||
|
||||
networks:
|
||||
frontend:
|
||||
driver: bridge
|
||||
backend:
|
||||
driver: bridge
|
||||
database:
|
||||
driver: bridge
|
||||
internal: true # No internet access
|
||||
```
|
||||
|
||||
## Pattern: Volume Management
|
||||
|
||||
Different volume types for different use cases.
|
||||
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
volumes:
|
||||
# Named volume (managed by Docker)
|
||||
- app-data:/app/data
|
||||
# Bind mount (host directory)
|
||||
- ./config:/app/config:ro
|
||||
# Anonymous volume (for node_modules)
|
||||
- /app/node_modules
|
||||
# tmpfs (temporary in-memory)
|
||||
- type: tmpfs
|
||||
target: /tmp
|
||||
tmpfs:
|
||||
size: 100M
|
||||
|
||||
volumes:
|
||||
app-data:
|
||||
driver: local
|
||||
labels:
|
||||
- "app=myapp"
|
||||
- "type=persistent"
|
||||
```
|
||||
|
||||
## Pattern: Logging Configuration
|
||||
|
||||
Configuring logging drivers and options.
|
||||
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
logging:
|
||||
driver: "json-file" # Default
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
labels: "app,environment"
|
||||
tag: "{{.ImageName}}/{{.Name}}"
|
||||
|
||||
# Syslog logging
|
||||
app-syslog:
|
||||
logging:
|
||||
driver: "syslog"
|
||||
options:
|
||||
syslog-address: "tcp://logserver:514"
|
||||
syslog-facility: "daemon"
|
||||
tag: "myapp"
|
||||
|
||||
# Fluentd logging
|
||||
app-fluentd:
|
||||
logging:
|
||||
driver: "fluentd"
|
||||
options:
|
||||
fluentd-address: "localhost:24224"
|
||||
tag: "myapp.api"
|
||||
```
|
||||
|
||||
## Pattern: Multi-Environment
|
||||
|
||||
Managing multiple environments with overrides.
|
||||
|
||||
```bash
|
||||
# Directory structure
|
||||
# docker-compose.yml # Base configuration
|
||||
# docker-compose.dev.yml # Development overrides
|
||||
# docker-compose.staging.yml # Staging overrides
|
||||
# docker-compose.prod.yml # Production overrides
|
||||
# .env # Environment variables
|
||||
# .env.dev # Development variables
|
||||
# .env.staging # Staging variables
|
||||
# .env.prod # Production variables
|
||||
|
||||
# Development
|
||||
docker-compose --env-file .env.dev \
|
||||
-f docker-compose.yml -f docker-compose.dev.yml up
|
||||
|
||||
# Staging
|
||||
docker-compose --env-file .env.staging \
|
||||
-f docker-compose.yml -f docker-compose.staging.yml up -d
|
||||
|
||||
# Production
|
||||
docker-compose --env-file .env.prod \
|
||||
-f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||
```
|
||||
|
||||
## Pattern: CI/CD Testing
|
||||
|
||||
Running tests in isolated containers.
|
||||
|
||||
```yaml
|
||||
# docker-compose.test.yml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
app:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
- NODE_ENV=test
|
||||
- DATABASE_URL=postgres://test:test@db:5432/test
|
||||
depends_on:
|
||||
- db
|
||||
command: npm test
|
||||
networks:
|
||||
- test-network
|
||||
|
||||
db:
|
||||
image: postgres:15-alpine
|
||||
environment:
|
||||
POSTGRES_DB: test
|
||||
POSTGRES_USER: test
|
||||
POSTGRES_PASSWORD: test
|
||||
networks:
|
||||
- test-network
|
||||
|
||||
networks:
|
||||
test-network:
|
||||
driver: bridge
|
||||
```
|
||||
|
||||
```bash
|
||||
# CI pipeline
|
||||
docker-compose -f docker-compose.test.yml up --abort-on-container-exit --exit-code-from app
|
||||
docker-compose -f docker-compose.test.yml down -v
|
||||
```
|
||||
756
.kilo/skills/docker-monitoring/SKILL.md
Normal file
756
.kilo/skills/docker-monitoring/SKILL.md
Normal file
@@ -0,0 +1,756 @@
|
||||
# Skill: Docker Monitoring & Logging
|
||||
|
||||
## Purpose
|
||||
|
||||
Comprehensive skill for Docker container monitoring, logging, metrics collection, and observability.
|
||||
|
||||
## Overview
|
||||
|
||||
Container monitoring is essential for understanding application health, performance, and troubleshooting issues in production. Use this skill for setting up monitoring stacks, configuring logging, and implementing observability.
|
||||
|
||||
## When to Use
|
||||
|
||||
- Setting up container monitoring
|
||||
- Configuring centralized logging
|
||||
- Implementing health checks
|
||||
- Performance optimization
|
||||
- Troubleshooting container issues
|
||||
- Alerting configuration
|
||||
|
||||
## Monitoring Stack
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Container Monitoring Stack │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ Grafana │ │ Prometheus │ │ Alertmgr │ │
|
||||
│ │ Dashboard │ │ Metrics │ │ Alerts │ │
|
||||
│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │
|
||||
│ │ │ │ │
|
||||
│ ┌──────┴────────────────┴────────────────┴──────┐ │
|
||||
│ │ Container Observability │ │
|
||||
│ └──────┬────────────────┬───────────────────────┘ │
|
||||
│ │ │ │
|
||||
│ ┌──────┴──────┐ ┌──────┴──────┐ ┌─────────────┐ │
|
||||
│ │ cAdvisor │ │ node-exporter│ │ Loki/EFK │ │
|
||||
│ │ Container │ │ Node Metrics│ │ Logging │ │
|
||||
│ │ Metrics │ │ │ │ │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Health Checks
|
||||
|
||||
### 1. Dockerfile Health Check
|
||||
|
||||
```dockerfile
|
||||
FROM node:20-alpine
|
||||
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
RUN npm ci --only=production
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD wget --no-verbose --tries=1 --spider http://localhost:3000/health || exit 1
|
||||
|
||||
# Or for Alpine (no wget)
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD curl -f http://localhost:3000/health || exit 1
|
||||
|
||||
# Or use Node.js for health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD node -e "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"
|
||||
```
|
||||
|
||||
### 2. Docker Compose Health Check
|
||||
|
||||
```yaml
|
||||
services:
|
||||
api:
|
||||
image: myapp:latest
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
db:
|
||||
image: postgres:15-alpine
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
```
|
||||
|
||||
### 3. Docker Swarm Health Check
|
||||
|
||||
```yaml
|
||||
services:
|
||||
api:
|
||||
image: myapp:latest
|
||||
deploy:
|
||||
update_config:
|
||||
failure_action: rollback
|
||||
monitor: 30s
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
```
|
||||
|
||||
### 4. Application Health Endpoint
|
||||
|
||||
```javascript
|
||||
// Node.js health check endpoint
|
||||
const express = require('express');
|
||||
const app = express();
|
||||
|
||||
// Dependencies status
|
||||
async function checkHealth() {
|
||||
const checks = {
|
||||
database: await checkDatabase(),
|
||||
redis: await checkRedis(),
|
||||
disk: checkDiskSpace(),
|
||||
memory: checkMemory()
|
||||
};
|
||||
|
||||
const healthy = Object.values(checks).every(c => c === 'healthy');
|
||||
|
||||
return {
|
||||
status: healthy ? 'healthy' : 'unhealthy',
|
||||
timestamp: new Date().toISOString(),
|
||||
checks
|
||||
};
|
||||
}
|
||||
|
||||
app.get('/health', async (req, res) => {
|
||||
const health = await checkHealth();
|
||||
const status = health.status === 'healthy' ? 200 : 503;
|
||||
res.status(status).json(health);
|
||||
});
|
||||
|
||||
app.get('/health/live', (req, res) => {
|
||||
// Liveness probe - is the app running?
|
||||
res.status(200).json({ status: 'alive' });
|
||||
});
|
||||
|
||||
app.get('/health/ready', async (req, res) => {
|
||||
// Readiness probe - is the app ready to serve?
|
||||
const ready = await isReady();
|
||||
res.status(ready ? 200 : 503).json({ ready });
|
||||
});
|
||||
```
|
||||
|
||||
## Logging
|
||||
|
||||
### 1. Docker Logging Drivers
|
||||
|
||||
```yaml
|
||||
# JSON file driver (default)
|
||||
services:
|
||||
api:
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
labels: "app,environment"
|
||||
|
||||
# Syslog driver
|
||||
services:
|
||||
api:
|
||||
logging:
|
||||
driver: "syslog"
|
||||
options:
|
||||
syslog-address: "tcp://logserver:514"
|
||||
syslog-facility: "daemon"
|
||||
tag: "myapp"
|
||||
|
||||
# Journald driver
|
||||
services:
|
||||
api:
|
||||
logging:
|
||||
driver: "journald"
|
||||
options:
|
||||
labels: "app,environment"
|
||||
|
||||
# Fluentd driver
|
||||
services:
|
||||
api:
|
||||
logging:
|
||||
driver: "fluentd"
|
||||
options:
|
||||
fluentd-address: "localhost:24224"
|
||||
tag: "myapp.api"
|
||||
```
|
||||
|
||||
### 2. Structured Logging
|
||||
|
||||
```javascript
|
||||
// Pino for structured logging
|
||||
const pino = require('pino');
|
||||
|
||||
const logger = pino({
|
||||
level: process.env.LOG_LEVEL || 'info',
|
||||
formatters: {
|
||||
level: (label) => ({ level: label })
|
||||
},
|
||||
timestamp: pino.stdTimeFunctions.isoTime
|
||||
});
|
||||
|
||||
// Log with context
|
||||
logger.info({
|
||||
userId: '123',
|
||||
action: 'login',
|
||||
ip: '192.168.1.1'
|
||||
}, 'User logged in');
|
||||
|
||||
// Output:
|
||||
// {"level":"info","time":"2024-01-01T12:00:00.000Z","userId":"123","action":"login","ip":"192.168.1.1","msg":"User logged in"}
|
||||
```
|
||||
|
||||
### 3. EFK Stack (Elasticsearch, Fluentd, Kibana)
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
elasticsearch:
|
||||
image: elasticsearch:8.10.0
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
- xpack.security.enabled=false
|
||||
volumes:
|
||||
- elasticsearch-data:/usr/share/elasticsearch/data
|
||||
networks:
|
||||
- logging
|
||||
|
||||
fluentd:
|
||||
image: fluent/fluentd:v1.16
|
||||
volumes:
|
||||
- ./fluentd/conf:/fluentd/etc
|
||||
ports:
|
||||
- "24224:24224"
|
||||
networks:
|
||||
- logging
|
||||
|
||||
kibana:
|
||||
image: kibana:8.10.0
|
||||
environment:
|
||||
- ELASTICSEARCH_HOSTS=http://elasticsearch:9200
|
||||
ports:
|
||||
- "5601:5601"
|
||||
networks:
|
||||
- logging
|
||||
|
||||
app:
|
||||
image: myapp:latest
|
||||
logging:
|
||||
driver: "fluentd"
|
||||
options:
|
||||
fluentd-address: "localhost:24224"
|
||||
tag: "myapp.api"
|
||||
networks:
|
||||
- logging
|
||||
|
||||
volumes:
|
||||
elasticsearch-data:
|
||||
|
||||
networks:
|
||||
logging:
|
||||
```
|
||||
|
||||
### 4. Loki Stack (Promtail, Loki, Grafana)
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
loki:
|
||||
image: grafana/loki:latest
|
||||
ports:
|
||||
- "3100:3100"
|
||||
volumes:
|
||||
- ./loki-config.yml:/etc/loki/local-config.yaml
|
||||
command: -config.file=/etc/loki/local-config.yaml
|
||||
networks:
|
||||
- monitoring
|
||||
|
||||
promtail:
|
||||
image: grafana/promtail:latest
|
||||
volumes:
|
||||
- /var/log:/var/log
|
||||
- ./promtail-config.yml:/etc/promtail/config.yml
|
||||
command: -config.file=/etc/promtail/config.yml
|
||||
networks:
|
||||
- monitoring
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||
volumes:
|
||||
- grafana-data:/var/lib/grafana
|
||||
networks:
|
||||
- monitoring
|
||||
|
||||
app:
|
||||
image: myapp:latest
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
networks:
|
||||
- monitoring
|
||||
|
||||
volumes:
|
||||
grafana-data:
|
||||
|
||||
networks:
|
||||
monitoring:
|
||||
```
|
||||
|
||||
## Metrics Collection
|
||||
|
||||
### 1. Prometheus + cAdvisor
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
- prometheus-data:/prometheus
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.retention.time=30d'
|
||||
networks:
|
||||
- monitoring
|
||||
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:latest
|
||||
ports:
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:ro
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
networks:
|
||||
- monitoring
|
||||
|
||||
node_exporter:
|
||||
image: prom/node-exporter:latest
|
||||
ports:
|
||||
- "9100:9100"
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
command:
|
||||
- '--path.procfs=/host/proc'
|
||||
- '--path.rootfs=/rootfs'
|
||||
- '--path.sysfs=/host/sys'
|
||||
networks:
|
||||
- monitoring
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||
volumes:
|
||||
- grafana-data:/var/lib/grafana
|
||||
networks:
|
||||
- monitoring
|
||||
|
||||
volumes:
|
||||
prometheus-data:
|
||||
grafana-data:
|
||||
|
||||
networks:
|
||||
monitoring:
|
||||
```
|
||||
|
||||
### 2. Prometheus Configuration
|
||||
|
||||
```yaml
|
||||
# prometheus.yml
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
# Prometheus itself
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['prometheus:9090']
|
||||
|
||||
# cAdvisor (container metrics)
|
||||
- job_name: 'cadvisor'
|
||||
static_configs:
|
||||
- targets: ['cadvisor:8080']
|
||||
|
||||
# Node exporter (host metrics)
|
||||
- job_name: 'node'
|
||||
static_configs:
|
||||
- targets: ['node_exporter:9100']
|
||||
|
||||
# Application metrics
|
||||
- job_name: 'app'
|
||||
static_configs:
|
||||
- targets: ['app:3000']
|
||||
metrics_path: '/metrics'
|
||||
```
|
||||
|
||||
### 3. Application Metrics (Prometheus Client)
|
||||
|
||||
```javascript
|
||||
// Node.js with prom-client
|
||||
const promClient = require('prom-client');
|
||||
|
||||
// Enable default metrics
|
||||
promClient.collectDefaultMetrics();
|
||||
|
||||
// Custom metrics
|
||||
const httpRequestDuration = new promClient.Histogram({
|
||||
name: 'http_request_duration_seconds',
|
||||
help: 'Duration of HTTP requests in seconds',
|
||||
labelNames: ['method', 'route', 'status_code'],
|
||||
buckets: [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7, 10]
|
||||
});
|
||||
|
||||
const activeConnections = new promClient.Gauge({
|
||||
name: 'active_connections',
|
||||
help: 'Number of active connections'
|
||||
});
|
||||
|
||||
const dbQueryDuration = new promClient.Histogram({
|
||||
name: 'db_query_duration_seconds',
|
||||
help: 'Duration of database queries in seconds',
|
||||
labelNames: ['query_type', 'table'],
|
||||
buckets: [0.01, 0.05, 0.1, 0.5, 1, 2]
|
||||
});
|
||||
|
||||
// Middleware for HTTP metrics
|
||||
app.use((req, res, next) => {
|
||||
const end = httpRequestDuration.startTimer();
|
||||
res.on('finish', () => {
|
||||
end({ method: req.method, route: req.route?.path || req.path, status_code: res.statusCode });
|
||||
});
|
||||
next();
|
||||
});
|
||||
|
||||
// Metrics endpoint
|
||||
app.get('/metrics', async (req, res) => {
|
||||
res.set('Content-Type', promClient.register.contentType);
|
||||
res.send(await promClient.register.metrics());
|
||||
});
|
||||
```
|
||||
|
||||
### 4. Grafana Dashboards
|
||||
|
||||
```json
|
||||
// Dashboard JSON for container metrics
|
||||
{
|
||||
"dashboard": {
|
||||
"title": "Docker Container Metrics",
|
||||
"panels": [
|
||||
{
|
||||
"title": "Container CPU Usage",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(container_cpu_usage_seconds_total{name=~\".+\"}[5m]) * 100",
|
||||
"legendFormat": "{{name}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Container Memory Usage",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "container_memory_usage_bytes{name=~\".+\"} / 1024 / 1024",
|
||||
"legendFormat": "{{name}} MB"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Container Network I/O",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(container_network_receive_bytes_total{name=~\".+\"}[5m])",
|
||||
"legendFormat": "{{name}} RX"
|
||||
},
|
||||
{
|
||||
"expr": "rate(container_network_transmit_bytes_total{name=~\".+\"}[5m])",
|
||||
"legendFormat": "{{name}} TX"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Alerting
|
||||
|
||||
### 1. Alertmanager Configuration
|
||||
|
||||
```yaml
|
||||
# alertmanager.yml
|
||||
global:
|
||||
smtp_smarthost: 'smtp.example.com:587'
|
||||
smtp_from: 'alerts@example.com'
|
||||
smtp_auth_username: 'alerts@example.com'
|
||||
smtp_auth_password: 'password'
|
||||
|
||||
route:
|
||||
group_by: ['alertname', 'severity']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 1h
|
||||
receiver: 'team-email'
|
||||
routes:
|
||||
- match:
|
||||
severity: critical
|
||||
receiver: 'team-email-critical'
|
||||
- match:
|
||||
severity: warning
|
||||
receiver: 'team-email-warning'
|
||||
|
||||
receivers:
|
||||
- name: 'team-email-critical'
|
||||
email_configs:
|
||||
- to: 'critical@example.com'
|
||||
send_resolved: true
|
||||
|
||||
- name: 'team-email-warning'
|
||||
email_configs:
|
||||
- to: 'warnings@example.com'
|
||||
send_resolved: true
|
||||
```
|
||||
|
||||
### 2. Prometheus Alert Rules
|
||||
|
||||
```yaml
|
||||
# alerts.yml
|
||||
groups:
|
||||
- name: container_alerts
|
||||
rules:
|
||||
# Container down
|
||||
- alert: ContainerDown
|
||||
expr: absent(container_last_seen{name=~".+"})
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Container {{ $labels.name }} is down"
|
||||
description: "Container {{ $labels.name }} has been down for more than 5 minutes."
|
||||
|
||||
# High CPU
|
||||
- alert: HighCpuUsage
|
||||
expr: rate(container_cpu_usage_seconds_total{name=~".+"}[5m]) * 100 > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High CPU usage on {{ $labels.name }}"
|
||||
description: "Container {{ $labels.name }} CPU usage is {{ $value }}%."
|
||||
|
||||
# High Memory
|
||||
- alert: HighMemoryUsage
|
||||
expr: (container_memory_usage_bytes{name=~".+"} / container_spec_memory_limit_bytes{name=~".+"}) * 100 > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High memory usage on {{ $labels.name }}"
|
||||
description: "Container {{ $labels.name }} memory usage is {{ $value }}%."
|
||||
|
||||
# Container restart
|
||||
- alert: ContainerRestart
|
||||
expr: increase(container_restart_count{name=~".+"}[1h]) > 0
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Container {{ $labels.name }} restarted"
|
||||
description: "Container {{ $labels.name }} has restarted {{ $value }} times in the last hour."
|
||||
|
||||
# No health check
|
||||
- alert: NoHealthCheck
|
||||
expr: container_health_status{name=~".+"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Health check failing for {{ $labels.name }}"
|
||||
description: "Container {{ $labels.name }} health check has been failing for 5 minutes."
|
||||
```
|
||||
|
||||
## Observability Best Practices
|
||||
|
||||
### 1. Three Pillars
|
||||
|
||||
| Pillar | Tool | Purpose |
|
||||
|--------|------|---------|
|
||||
| Metrics | Prometheus | Quantitative measurements |
|
||||
| Logs | Loki/EFK | Event records |
|
||||
| Traces | Jaeger/Zipkin | Request flow |
|
||||
|
||||
### 2. Metrics Categories
|
||||
|
||||
```yaml
|
||||
# Four Golden Signals (Google SRE)
|
||||
|
||||
# 1. Latency
|
||||
- http_request_duration_seconds
|
||||
- db_query_duration_seconds
|
||||
|
||||
# 2. Traffic
|
||||
- http_requests_per_second
|
||||
- active_connections
|
||||
|
||||
# 3. Errors
|
||||
- http_requests_failed_total
|
||||
- error_rate
|
||||
|
||||
# 4. Saturation
|
||||
- container_memory_usage_bytes
|
||||
- container_cpu_usage_seconds_total
|
||||
```
|
||||
|
||||
### 3. Service Level Objectives (SLOs)
|
||||
|
||||
```yaml
|
||||
# Prometheus recording rules for SLO
|
||||
groups:
|
||||
- name: slo_rules
|
||||
rules:
|
||||
- record: slo:availability:ratio_5m
|
||||
expr: |
|
||||
sum(rate(http_requests_total{status!~"5.."}[5m])) /
|
||||
sum(rate(http_requests_total[5m]))
|
||||
|
||||
- record: slo:latency:p99_5m
|
||||
expr: |
|
||||
histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))
|
||||
|
||||
- record: slo:error_rate:ratio_5m
|
||||
expr: |
|
||||
sum(rate(http_requests_total{status=~"5.."}[5m])) /
|
||||
sum(rate(http_requests_total[5m]))
|
||||
```
|
||||
|
||||
## Troubleshooting Commands
|
||||
|
||||
```bash
|
||||
# View container logs
|
||||
docker logs <container_id>
|
||||
docker logs -f --tail 100 <container_id>
|
||||
|
||||
# View resource usage
|
||||
docker stats
|
||||
docker stats --no-stream
|
||||
|
||||
# Inspect container
|
||||
docker inspect <container_id>
|
||||
|
||||
# Check health status
|
||||
docker inspect --format='{{.State.Health.Status}}' <container_id>
|
||||
|
||||
# View processes
|
||||
docker top <container_id>
|
||||
|
||||
# Execute commands
|
||||
docker exec -it <container_id> sh
|
||||
docker exec <container_id> df -h
|
||||
|
||||
# View network
|
||||
docker network inspect <network_name>
|
||||
|
||||
# View disk usage
|
||||
docker system df
|
||||
docker system df -v
|
||||
|
||||
# Prune unused resources
|
||||
docker system prune -a --volumes
|
||||
|
||||
# Swarm service logs
|
||||
docker service logs <service_name>
|
||||
docker service ps <service_name>
|
||||
|
||||
# Swarm node status
|
||||
docker node ls
|
||||
docker node inspect <node_id>
|
||||
```
|
||||
|
||||
## Performance Tuning
|
||||
|
||||
### 1. Container Resource Limits
|
||||
|
||||
```yaml
|
||||
services:
|
||||
api:
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
```
|
||||
|
||||
### 2. Logging Performance
|
||||
|
||||
```yaml
|
||||
services:
|
||||
api:
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
# Reduce logging overhead
|
||||
labels: "level,requestId"
|
||||
```
|
||||
|
||||
### 3. Prometheus Optimization
|
||||
|
||||
```yaml
|
||||
# prometheus.yml
|
||||
global:
|
||||
scrape_interval: 15s # Balance between granularity and load
|
||||
evaluation_interval: 15s
|
||||
|
||||
# Retention
|
||||
command:
|
||||
- '--storage.tsdb.retention.time=30d'
|
||||
- '--storage.tsdb.retention.size=10GB'
|
||||
```
|
||||
|
||||
## Related Skills
|
||||
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `docker-compose` | Local development setup |
|
||||
| `docker-swarm` | Production orchestration |
|
||||
| `docker-security` | Container security |
|
||||
| `kubernetes` | Advanced orchestration |
|
||||
685
.kilo/skills/docker-security/SKILL.md
Normal file
685
.kilo/skills/docker-security/SKILL.md
Normal file
@@ -0,0 +1,685 @@
|
||||
# Skill: Docker Security
|
||||
|
||||
## Purpose
|
||||
|
||||
Comprehensive skill for Docker container security, vulnerability scanning, secrets management, and hardening best practices.
|
||||
|
||||
## Overview
|
||||
|
||||
Container security is essential for production deployments. Use this skill when scanning for vulnerabilities, configuring security settings, managing secrets, and implementing security best practices.
|
||||
|
||||
## When to Use
|
||||
|
||||
- Security hardening containers
|
||||
- Scanning images for vulnerabilities
|
||||
- Managing secrets and credentials
|
||||
- Configuring container isolation
|
||||
- Implementing least privilege
|
||||
- Security audits
|
||||
|
||||
## Security Layers
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Container Security Layers │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ 1. Host Security │
|
||||
│ - Kernel hardening │
|
||||
│ - SELinux/AppArmor │
|
||||
│ - cgroups namespace │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ 2. Container Runtime Security │
|
||||
│ - User namespace │
|
||||
│ - Seccomp profiles │
|
||||
│ - Capability dropping │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ 3. Image Security │
|
||||
│ - Minimal base images │
|
||||
│ - Vulnerability scanning │
|
||||
│ - No secrets in images │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ 4. Network Security │
|
||||
│ - Network policies │
|
||||
│ - TLS encryption │
|
||||
│ - Ingress controls │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ 5. Application Security │
|
||||
│ - Input validation │
|
||||
│ - Authentication │
|
||||
│ - Authorization │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Image Security
|
||||
|
||||
### 1. Base Image Selection
|
||||
|
||||
```dockerfile
|
||||
# ✅ Good: Minimal, specific version
|
||||
FROM node:20-alpine
|
||||
|
||||
# ✅ Better: Distroless (minimal attack surface)
|
||||
FROM gcr.io/distroless/nodejs20-debian12
|
||||
|
||||
# ❌ Bad: Large base, latest tag
|
||||
FROM node:latest
|
||||
```
|
||||
|
||||
### 2. Multi-stage Builds
|
||||
|
||||
```dockerfile
|
||||
# Build stage
|
||||
FROM node:20-alpine AS builder
|
||||
WORKDIR /app
|
||||
COPY package*.json ./
|
||||
RUN npm ci
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
|
||||
# Runtime stage
|
||||
FROM node:20-alpine
|
||||
RUN addgroup -g 1001 appgroup && \
|
||||
adduser -u 1001 -G appgroup -D appuser
|
||||
WORKDIR /app
|
||||
COPY --from=builder --chown=appuser:appgroup /app/dist ./dist
|
||||
COPY --from=builder --chown=appuser:appgroup /app/node_modules ./node_modules
|
||||
USER appuser
|
||||
CMD ["node", "dist/index.js"]
|
||||
```
|
||||
|
||||
### 3. Vulnerability Scanning
|
||||
|
||||
```bash
|
||||
# Scan with Trivy
|
||||
trivy image myapp:latest
|
||||
|
||||
# Scan with Docker Scout
|
||||
docker scout vulnerabilities myapp:latest
|
||||
|
||||
# Scan with Grype
|
||||
grype myapp:latest
|
||||
|
||||
# CI/CD integration
|
||||
trivy image --exit-code 1 --severity HIGH,CRITICAL myapp:latest
|
||||
```
|
||||
|
||||
### 4. No Secrets in Images
|
||||
|
||||
```dockerfile
|
||||
# ❌ Never do this
|
||||
ENV DATABASE_PASSWORD=password123
|
||||
COPY .env ./
|
||||
|
||||
# ✅ Use runtime secrets
|
||||
# Secrets are mounted at runtime
|
||||
RUN --mount=type=secret,id=db_password \
|
||||
export DB_PASSWORD=$(cat /run/secrets/db_password)
|
||||
```
|
||||
|
||||
## Container Runtime Security
|
||||
|
||||
### 1. Non-root User
|
||||
|
||||
```dockerfile
|
||||
# Create non-root user
|
||||
FROM alpine:3.18
|
||||
RUN addgroup -g 1001 appgroup && \
|
||||
adduser -u 1001 -G appgroup -D appuser
|
||||
WORKDIR /app
|
||||
COPY --chown=appuser:appgroup . .
|
||||
USER appuser
|
||||
CMD ["./app"]
|
||||
```
|
||||
|
||||
### 2. Read-only Filesystem
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
services:
|
||||
app:
|
||||
image: myapp:latest
|
||||
read_only: true
|
||||
tmpfs:
|
||||
- /tmp
|
||||
- /var/cache
|
||||
```
|
||||
|
||||
### 3. Capability Dropping
|
||||
|
||||
```yaml
|
||||
# Drop all capabilities
|
||||
services:
|
||||
app:
|
||||
image: myapp:latest
|
||||
cap_drop:
|
||||
- ALL
|
||||
cap_add:
|
||||
- CHOWN # Only needed capabilities
|
||||
- SETGID
|
||||
- SETUID
|
||||
```
|
||||
|
||||
### 4. Security Options
|
||||
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
image: myapp:latest
|
||||
security_opt:
|
||||
- no-new-privileges:true # Prevent privilege escalation
|
||||
- seccomp:default.json # Seccomp profile
|
||||
- apparmor:docker-default # AppArmor profile
|
||||
```
|
||||
|
||||
### 5. Resource Limits
|
||||
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
image: myapp:latest
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
pids_limit: 100 # Limit process count
|
||||
```
|
||||
|
||||
## Secrets Management
|
||||
|
||||
### 1. Docker Secrets (Swarm)
|
||||
|
||||
```bash
|
||||
# Create secret
|
||||
echo "my_password" | docker secret create db_password -
|
||||
|
||||
# Create from file
|
||||
docker secret create jwt_secret ./secrets/jwt.txt
|
||||
```
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml (Swarm)
|
||||
services:
|
||||
api:
|
||||
image: myapp:latest
|
||||
secrets:
|
||||
- db_password
|
||||
- jwt_secret
|
||||
environment:
|
||||
- DB_PASSWORD_FILE=/run/secrets/db_password
|
||||
|
||||
secrets:
|
||||
db_password:
|
||||
external: true
|
||||
jwt_secret:
|
||||
external: true
|
||||
```
|
||||
|
||||
### 2. Docker Compose Secrets (Non-Swarm)
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
services:
|
||||
api:
|
||||
image: myapp:latest
|
||||
secrets:
|
||||
- db_password
|
||||
environment:
|
||||
- DB_PASSWORD_FILE=/run/secrets/db_password
|
||||
|
||||
secrets:
|
||||
db_password:
|
||||
file: ./secrets/db_password.txt
|
||||
```
|
||||
|
||||
### 3. Environment Variables (Development)
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml (development only)
|
||||
services:
|
||||
api:
|
||||
image: myapp:latest
|
||||
env_file:
|
||||
- .env # Add .env to .gitignore!
|
||||
```
|
||||
|
||||
```bash
|
||||
# .env (NEVER COMMIT)
|
||||
DATABASE_URL=postgres://...
|
||||
JWT_SECRET=secret123
|
||||
API_KEY=key123
|
||||
```
|
||||
|
||||
### 4. Reading Secrets in Application
|
||||
|
||||
```javascript
|
||||
// Node.js
|
||||
const fs = require('fs');
|
||||
|
||||
function getSecret(secretName, envName) {
|
||||
// Try file-based secret first (Docker secrets)
|
||||
const secretPath = `/run/secrets/${secretName}`;
|
||||
if (fs.existsSync(secretPath)) {
|
||||
return fs.readFileSync(secretPath, 'utf8').trim();
|
||||
}
|
||||
// Fallback to environment variable (development)
|
||||
return process.env[envName];
|
||||
}
|
||||
|
||||
const dbPassword = getSecret('db_password', 'DB_PASSWORD');
|
||||
```
|
||||
|
||||
## Network Security
|
||||
|
||||
### 1. Network Segmentation
|
||||
|
||||
```yaml
|
||||
# Separate networks for different access levels
|
||||
networks:
|
||||
frontend:
|
||||
driver: bridge
|
||||
|
||||
backend:
|
||||
driver: bridge
|
||||
internal: true # No external access
|
||||
|
||||
database:
|
||||
driver: bridge
|
||||
internal: true
|
||||
|
||||
services:
|
||||
web:
|
||||
networks:
|
||||
- frontend
|
||||
|
||||
api:
|
||||
networks:
|
||||
- frontend
|
||||
- backend
|
||||
|
||||
db:
|
||||
networks:
|
||||
- database
|
||||
|
||||
cache:
|
||||
networks:
|
||||
- database
|
||||
```
|
||||
|
||||
### 2. Port Exposure
|
||||
|
||||
```yaml
|
||||
# ✅ Good: Only expose necessary ports
|
||||
services:
|
||||
api:
|
||||
ports:
|
||||
- "3000:3000" # API port only
|
||||
|
||||
db:
|
||||
# No ports exposed - only accessible inside network
|
||||
networks:
|
||||
- database
|
||||
|
||||
# ❌ Bad: Exposing database to host
|
||||
services:
|
||||
db:
|
||||
ports:
|
||||
- "5432:5432" # Security risk!
|
||||
```
|
||||
|
||||
### 3. TLS Configuration
|
||||
|
||||
```yaml
|
||||
services:
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
ports:
|
||||
- "443:443"
|
||||
volumes:
|
||||
- ./ssl/cert.pem:/etc/nginx/ssl/cert.pem:ro
|
||||
- ./ssl/key.pem:/etc/nginx/ssl/key.pem:ro
|
||||
configs:
|
||||
- source: nginx_config
|
||||
target: /etc/nginx/nginx.conf
|
||||
|
||||
configs:
|
||||
nginx_config:
|
||||
file: ./nginx.conf
|
||||
```
|
||||
|
||||
### 4. Ingress Controls
|
||||
|
||||
```yaml
|
||||
# Limit connections
|
||||
services:
|
||||
api:
|
||||
image: myapp:latest
|
||||
ports:
|
||||
- target: 3000
|
||||
published: 3000
|
||||
mode: host # Bypass ingress mesh for performance
|
||||
deploy:
|
||||
endpoint_mode: dnsrr
|
||||
resources:
|
||||
limits:
|
||||
memory: 1G
|
||||
```
|
||||
|
||||
## Security Profiles
|
||||
|
||||
### 1. Seccomp Profile
|
||||
|
||||
```json
|
||||
// default-seccomp.json
|
||||
{
|
||||
"defaultAction": "SCMP_ACT_ERRNO",
|
||||
"architectures": ["SCMP_ARCH_X86_64"],
|
||||
"syscalls": [
|
||||
{
|
||||
"names": ["read", "write", "exit", "exit_group"],
|
||||
"action": "SCMP_ACT_ALLOW"
|
||||
},
|
||||
{
|
||||
"names": ["open", "openat", "close"],
|
||||
"action": "SCMP_ACT_ALLOW"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
```yaml
|
||||
# Use custom seccomp profile
|
||||
services:
|
||||
api:
|
||||
security_opt:
|
||||
- seccomp:./seccomp.json
|
||||
```
|
||||
|
||||
### 2. AppArmor Profile
|
||||
|
||||
```bash
|
||||
# Create AppArmor profile
|
||||
cat > /etc/apparmor.d/docker-myapp <<EOF
|
||||
#include <tunables/global>
|
||||
profile docker-myapp flags=(attach_disconnected,mediate_deleted) {
|
||||
#include <abstractions/base>
|
||||
|
||||
network inet tcp,
|
||||
network inet udp,
|
||||
|
||||
/app/** r,
|
||||
/app/** w,
|
||||
|
||||
deny /** rw,
|
||||
}
|
||||
EOF
|
||||
|
||||
# Load profile
|
||||
apparmor_parser -r /etc/apparmor.d/docker-myapp
|
||||
```
|
||||
|
||||
```yaml
|
||||
# Use AppArmor profile
|
||||
services:
|
||||
api:
|
||||
security_opt:
|
||||
- apparmor:docker-myapp
|
||||
```
|
||||
|
||||
## Security Scanning
|
||||
|
||||
### 1. Image Vulnerability Scan
|
||||
|
||||
```bash
|
||||
# Trivy scan
|
||||
trivy image --severity HIGH,CRITICAL myapp:latest
|
||||
|
||||
# Docker Scout
|
||||
docker scout vulnerabilities myapp:latest
|
||||
|
||||
# Grype
|
||||
grype myapp:latest
|
||||
|
||||
# Output JSON for CI
|
||||
trivy image --format json --output results.json myapp:latest
|
||||
```
|
||||
|
||||
### 2. Base Image Updates
|
||||
|
||||
```bash
|
||||
# Check base image for updates
|
||||
docker pull node:20-alpine
|
||||
|
||||
# Rebuild with updated base
|
||||
docker build --no-cache -t myapp:latest .
|
||||
|
||||
# Scan new image
|
||||
trivy image myapp:latest
|
||||
```
|
||||
|
||||
### 3. Dependency Audit
|
||||
|
||||
```bash
|
||||
# Node.js
|
||||
npm audit
|
||||
npm audit fix
|
||||
|
||||
# Python
|
||||
pip-audit
|
||||
|
||||
# Go
|
||||
go list -m all | nancy
|
||||
|
||||
# General
|
||||
snyk test
|
||||
```
|
||||
|
||||
### 4. Secret Detection
|
||||
|
||||
```bash
|
||||
# Scan for secrets
|
||||
gitleaks --path . --verbose
|
||||
|
||||
# Pre-commit hook
|
||||
gitleaks protect --staged
|
||||
|
||||
# Docker image
|
||||
gitleaks --image myapp:latest
|
||||
```
|
||||
|
||||
## CI/CD Security Integration
|
||||
|
||||
### GitHub Actions
|
||||
|
||||
```yaml
|
||||
# .github/workflows/security.yml
|
||||
name: Security Scan
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
scan:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
image-ref: 'myapp:${{ github.sha }}'
|
||||
format: 'table'
|
||||
exit-code: '1'
|
||||
severity: 'CRITICAL,HIGH'
|
||||
|
||||
- name: Run Gitleaks secret scan
|
||||
uses: gitleaks/gitleaks-action@v2
|
||||
with:
|
||||
args: --path=.
|
||||
```
|
||||
|
||||
### GitLab CI
|
||||
|
||||
```yaml
|
||||
# .gitlab-ci.yml
|
||||
security_scan:
|
||||
stage: test
|
||||
image: docker:24
|
||||
services:
|
||||
- docker:dind
|
||||
script:
|
||||
- docker build -t myapp:$CI_COMMIT_SHA .
|
||||
- trivy image --exit-code 1 --severity HIGH,CRITICAL myapp:$CI_COMMIT_SHA
|
||||
- gitleaks --path . --verbose
|
||||
```
|
||||
|
||||
## Security Checklist
|
||||
|
||||
### Dockerfile Security
|
||||
|
||||
- [ ] Using minimal base image (alpine/distroless)
|
||||
- [ ] Specific version tags, not `latest`
|
||||
- [ ] Running as non-root user
|
||||
- [ ] No secrets in image
|
||||
- [ ] `.dockerignore` includes `.env`, `.git`, `.credentials`
|
||||
- [ ] COPY instead of ADD (unless needed)
|
||||
- [ ] Multi-stage build for smaller image
|
||||
- [ ] HEALTHCHECK defined
|
||||
|
||||
### Runtime Security
|
||||
|
||||
- [ ] Read-only filesystem
|
||||
- [ ] Capabilities dropped
|
||||
- [ ] No new privileges
|
||||
- [ ] Resource limits set
|
||||
- [ ] User namespace enabled (if available)
|
||||
- [ ] Seccomp/AppArmor profiles applied
|
||||
|
||||
### Network Security
|
||||
|
||||
- [ ] Only necessary ports exposed
|
||||
- [ ] Internal networks for sensitive services
|
||||
- [ ] TLS for external communication
|
||||
- [ ] Network segmentation
|
||||
|
||||
### Secrets Management
|
||||
|
||||
- [ ] No secrets in images
|
||||
- [ ] Using Docker secrets or external vault
|
||||
- [ ] `.env` files gitignored
|
||||
- [ ] Secret rotation implemented
|
||||
|
||||
### CI/CD Security
|
||||
|
||||
- [ ] Vulnerability scanning in pipeline
|
||||
- [ ] Secret detection pre-commit
|
||||
- [ ] Dependency audit automated
|
||||
- [ ] Base images updated regularly
|
||||
|
||||
## Remediation Priority
|
||||
|
||||
| Severity | Priority | Timeline |
|
||||
|----------|----------|----------|
|
||||
| Critical | P0 | Immediately (24h) |
|
||||
| High | P1 | Within 7 days |
|
||||
| Medium | P2 | Within 30 days |
|
||||
| Low | P3 | Next release |
|
||||
|
||||
## Security Tools
|
||||
|
||||
| Tool | Purpose |
|
||||
|------|---------|
|
||||
| Trivy | Image vulnerability scanning |
|
||||
| Docker Scout | Docker's built-in scanner |
|
||||
| Grype | Vulnerability scanner |
|
||||
| Gitleaks | Secret detection |
|
||||
| Snyk | Dependency scanning |
|
||||
| Falco | Runtime security monitoring |
|
||||
| Anchore | Container security analysis |
|
||||
| Clair | Open-source vulnerability scanner |
|
||||
|
||||
## Common Vulnerabilities
|
||||
|
||||
### CVE Examples
|
||||
|
||||
```yaml
|
||||
# Check for specific CVE
|
||||
trivy image --vulnerabilities CVE-2021-44228 myapp:latest
|
||||
|
||||
# Ignore specific CVE (use carefully)
|
||||
trivy image --ignorefile .trivyignore myapp:latest
|
||||
|
||||
# .trivyignore
|
||||
CVE-2021-12345 # Known and accepted
|
||||
```
|
||||
|
||||
### Log4j Example (CVE-2021-44228)
|
||||
|
||||
```bash
|
||||
# Check for vulnerable versions
|
||||
docker images --format '{{.Repository}}:{{.Tag}}' | xargs -I {} \
|
||||
trivy image --vulnerabilities CVE-2021-44228 {}
|
||||
|
||||
# Update and rebuild
|
||||
FROM node:20-alpine
|
||||
# Ensure no vulnerable log4j dependency
|
||||
RUN npm audit fix
|
||||
```
|
||||
|
||||
## Incident Response
|
||||
|
||||
### Security Breach Steps
|
||||
|
||||
1. **Isolate**
|
||||
```bash
|
||||
# Stop container
|
||||
docker stop <container_id>
|
||||
|
||||
# Remove from network
|
||||
docker network disconnect app-network <container_id>
|
||||
```
|
||||
|
||||
2. **Preserve Evidence**
|
||||
```bash
|
||||
# Save container state
|
||||
docker commit <container_id> incident-container
|
||||
|
||||
# Export logs
|
||||
docker logs <container_id> > incident-logs.txt
|
||||
docker export <container_id> > incident-container.tar
|
||||
```
|
||||
|
||||
3. **Analyze**
|
||||
```bash
|
||||
# Inspect container
|
||||
docker inspect <container_id>
|
||||
|
||||
# Check image
|
||||
trivy image <image_name>
|
||||
|
||||
# Review process history
|
||||
docker history <image_name>
|
||||
```
|
||||
|
||||
4. **Remediate**
|
||||
```bash
|
||||
# Update base image
|
||||
docker pull node:20-alpine
|
||||
|
||||
# Rebuild
|
||||
docker build --no-cache -t myapp:fixed .
|
||||
|
||||
# Scan
|
||||
trivy image myapp:fixed
|
||||
```
|
||||
|
||||
## Related Skills
|
||||
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `docker-compose` | Local development setup |
|
||||
| `docker-swarm` | Production orchestration |
|
||||
| `docker-monitoring` | Security monitoring |
|
||||
| `docker-networking` | Network security |
|
||||
757
.kilo/skills/docker-swarm/SKILL.md
Normal file
757
.kilo/skills/docker-swarm/SKILL.md
Normal file
@@ -0,0 +1,757 @@
|
||||
# Skill: Docker Swarm
|
||||
|
||||
## Purpose
|
||||
|
||||
Comprehensive skill for Docker Swarm orchestration, cluster management, and production-ready container deployment.
|
||||
|
||||
## Overview
|
||||
|
||||
Docker Swarm is Docker's native clustering and orchestration solution. Use this skill for production deployments, high availability setups, and managing containerized applications at scale.
|
||||
|
||||
## When to Use
|
||||
|
||||
- Deploying applications in production clusters
|
||||
- Setting up high availability services
|
||||
- Scaling services dynamically
|
||||
- Managing rolling updates
|
||||
- Handling secrets and configs securely
|
||||
- Multi-node orchestration
|
||||
|
||||
## Core Concepts
|
||||
|
||||
### Swarm Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Docker Swarm Cluster │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ Manager │ │ Manager │ │ Manager │ (HA) │
|
||||
│ │ Node 1 │ │ Node 2 │ │ Node 3 │ │
|
||||
│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │
|
||||
│ │ │ │ │
|
||||
│ ┌──────┴────────────────┴────────────────┴──────┐ │
|
||||
│ │ Internal Network │ │
|
||||
│ └──────┬────────────────┬──────────────────────┘ │
|
||||
│ │ │ │
|
||||
│ ┌──────┴──────┐ ┌──────┴──────┐ ┌─────────────┐ │
|
||||
│ │ Worker │ │ Worker │ │ Worker │ │
|
||||
│ │ Node 4 │ │ Node 5 │ │ Node 6 │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
│ │
|
||||
│ Services: api, web, db, redis, queue │
|
||||
│ Tasks: Running containers distributed across nodes │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Key Components
|
||||
|
||||
| Component | Description |
|
||||
|-----------|-------------|
|
||||
| **Service** | Definition of a container (image, ports, replicas) |
|
||||
| **Task** | Single running instance of a service |
|
||||
| **Stack** | Group of related services (like docker-compose) |
|
||||
| **Node** | Docker daemon participating in swarm |
|
||||
| **Overlay Network** | Network spanning multiple nodes |
|
||||
|
||||
## Skill Files Structure
|
||||
|
||||
```
|
||||
docker-swarm/
|
||||
├── SKILL.md # This file
|
||||
├── patterns/
|
||||
│ ├── services.md # Service deployment patterns
|
||||
│ ├── networking.md # Overlay network patterns
|
||||
│ ├── secrets.md # Secrets management
|
||||
│ └── configs.md # Config management
|
||||
└── examples/
|
||||
├── ha-web-app.md # High availability web app
|
||||
├── microservices.md # Microservices deployment
|
||||
└── database.md # Database cluster setup
|
||||
```
|
||||
|
||||
## Core Patterns
|
||||
|
||||
### 1. Initialize Swarm
|
||||
|
||||
```bash
|
||||
# Initialize swarm on manager node
|
||||
docker swarm init --advertise-addr <MANAGER_IP>
|
||||
|
||||
# Get join token for workers
|
||||
docker swarm join-token -q worker
|
||||
|
||||
# Get join token for managers
|
||||
docker swarm join-token -q manager
|
||||
|
||||
# Join swarm (on worker nodes)
|
||||
docker swarm join --token <TOKEN> <MANAGER_IP>:2377
|
||||
|
||||
# Check swarm status
|
||||
docker node ls
|
||||
```
|
||||
|
||||
### 2. Service Deployment
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml (Swarm stack)
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: myapp/api:latest
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 3
|
||||
update_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
failure_action: rollback
|
||||
order: start-first
|
||||
rollback_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
max_attempts: 3
|
||||
window: 120s
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == worker
|
||||
preferences:
|
||||
- spread: node.id
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
secrets:
|
||||
- db_password
|
||||
- jwt_secret
|
||||
configs:
|
||||
- app_config
|
||||
|
||||
networks:
|
||||
app-network:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
|
||||
secrets:
|
||||
db_password:
|
||||
external: true
|
||||
jwt_secret:
|
||||
external: true
|
||||
|
||||
configs:
|
||||
app_config:
|
||||
external: true
|
||||
```
|
||||
|
||||
### 3. Deploy Stack
|
||||
|
||||
```bash
|
||||
# Create secrets (before deploying)
|
||||
echo "my_db_password" | docker secret create db_password -
|
||||
docker secret create jwt_secret ./jwt_secret.txt
|
||||
|
||||
# Create configs
|
||||
docker config create app_config ./config.json
|
||||
|
||||
# Deploy stack
|
||||
docker stack deploy -c docker-compose.yml mystack
|
||||
|
||||
# List services
|
||||
docker stack services mystack
|
||||
|
||||
# List tasks
|
||||
docker stack ps mystack
|
||||
|
||||
# Remove stack
|
||||
docker stack rm mystack
|
||||
```
|
||||
|
||||
### 4. Service Management
|
||||
|
||||
```bash
|
||||
# Scale service
|
||||
docker service scale mystack_api=5
|
||||
|
||||
# Update service image
|
||||
docker service update --image myapp/api:v2 mystack_api
|
||||
|
||||
# Update environment variable
|
||||
docker service update --env-add NODE_ENV=staging mystack_api
|
||||
|
||||
# Add constraint
|
||||
docker service update --constraint-add 'node.labels.region==us-east' mystack_api
|
||||
|
||||
# Rollback service
|
||||
docker service rollback mystack_api
|
||||
|
||||
# View service details
|
||||
docker service inspect mystack_api
|
||||
|
||||
# View service logs
|
||||
docker service logs -f mystack_api
|
||||
```
|
||||
|
||||
### 5. Secrets Management
|
||||
|
||||
```bash
|
||||
# Create secret from stdin
|
||||
echo "my_secret" | docker secret create db_password -
|
||||
|
||||
# Create secret from file
|
||||
docker secret create jwt_secret ./secrets/jwt.txt
|
||||
|
||||
# List secrets
|
||||
docker secret ls
|
||||
|
||||
# Inspect secret metadata
|
||||
docker secret inspect db_password
|
||||
|
||||
# Use secret in service
|
||||
docker service create \
|
||||
--name api \
|
||||
--secret db_password \
|
||||
--secret jwt_secret \
|
||||
myapp/api:latest
|
||||
|
||||
# Remove secret
|
||||
docker secret rm db_password
|
||||
```
|
||||
|
||||
### 6. Config Management
|
||||
|
||||
```bash
|
||||
# Create config
|
||||
docker config create app_config ./config.json
|
||||
|
||||
# List configs
|
||||
docker config ls
|
||||
|
||||
# Use config in service
|
||||
docker service create \
|
||||
--name api \
|
||||
--config source=app_config,target=/app/config.json \
|
||||
myapp/api:latest
|
||||
|
||||
# Update config (create new version)
|
||||
docker config create app_config_v2 ./config-v2.json
|
||||
|
||||
# Update service with new config
|
||||
docker service update \
|
||||
--config-rm app_config \
|
||||
--config-add source=app_config_v2,target=/app/config.json \
|
||||
mystack_api
|
||||
```
|
||||
|
||||
### 7. Overlay Networks
|
||||
|
||||
```yaml
|
||||
# Create overlay network
|
||||
networks:
|
||||
frontend:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
|
||||
backend:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
internal: true # No external access
|
||||
|
||||
services:
|
||||
web:
|
||||
networks:
|
||||
- frontend
|
||||
- backend
|
||||
|
||||
api:
|
||||
networks:
|
||||
- backend
|
||||
|
||||
db:
|
||||
networks:
|
||||
- backend
|
||||
```
|
||||
|
||||
```bash
|
||||
# Create network manually
|
||||
docker network create --driver overlay --attachable my-network
|
||||
|
||||
# List networks
|
||||
docker network ls
|
||||
|
||||
# Inspect network
|
||||
docker network inspect my-network
|
||||
```
|
||||
|
||||
## Deployment Strategies
|
||||
|
||||
### Rolling Update
|
||||
|
||||
```yaml
|
||||
services:
|
||||
api:
|
||||
deploy:
|
||||
update_config:
|
||||
parallelism: 2 # Update 2 tasks at a time
|
||||
delay: 10s # Wait 10s between updates
|
||||
failure_action: rollback
|
||||
monitor: 30s # Monitor for 30s after update
|
||||
max_failure_ratio: 0.3 # Allow 30% failures
|
||||
```
|
||||
|
||||
### Blue-Green Deployment
|
||||
|
||||
```bash
|
||||
# Deploy new version alongside existing
|
||||
docker service create \
|
||||
--name api-v2 \
|
||||
--mode replicated \
|
||||
--replicas 3 \
|
||||
--network app-network \
|
||||
myapp/api:v2
|
||||
|
||||
# Update router to point to new version
|
||||
# (Using nginx/traefik config update)
|
||||
|
||||
# Remove old version
|
||||
docker service rm api-v1
|
||||
```
|
||||
|
||||
### Canary Deployment
|
||||
|
||||
```yaml
|
||||
# Deploy canary version
|
||||
version: '3.8'
|
||||
services:
|
||||
api:
|
||||
image: myapp/api:v1
|
||||
deploy:
|
||||
replicas: 9
|
||||
# ... 90% of traffic
|
||||
|
||||
api-canary:
|
||||
image: myapp/api:v2
|
||||
deploy:
|
||||
replicas: 1
|
||||
# ... 10% of traffic
|
||||
```
|
||||
|
||||
### Global Services
|
||||
|
||||
```yaml
|
||||
# Run one instance on every node
|
||||
services:
|
||||
monitoring:
|
||||
image: myapp/monitoring:latest
|
||||
deploy:
|
||||
mode: global
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
```
|
||||
|
||||
## High Availability Patterns
|
||||
|
||||
### 1. Multi-Manager Setup
|
||||
|
||||
```bash
|
||||
# Create 3 manager nodes for HA
|
||||
docker swarm init --advertise-addr <MANAGER1_IP>
|
||||
|
||||
# On manager2
|
||||
docker swarm join --token <MANAGER_TOKEN> <MANAGER1_IP>:2377
|
||||
|
||||
# On manager3
|
||||
docker swarm join --token <MANAGER_TOKEN> <MANAGER1_IP>:2377
|
||||
|
||||
# Promote worker to manager
|
||||
docker node promote <NODE_ID>
|
||||
|
||||
# Demote manager to worker
|
||||
docker node demote <NODE_ID>
|
||||
```
|
||||
|
||||
### 2. Placement Constraints
|
||||
|
||||
```yaml
|
||||
services:
|
||||
db:
|
||||
image: postgres:15
|
||||
deploy:
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == worker
|
||||
- node.labels.database == true
|
||||
preferences:
|
||||
- spread: node.labels.zone # Spread across zones
|
||||
|
||||
cache:
|
||||
image: redis:7
|
||||
deploy:
|
||||
placement:
|
||||
constraints:
|
||||
- node.labels.cache == true
|
||||
```
|
||||
|
||||
### 3. Resource Management
|
||||
|
||||
```yaml
|
||||
services:
|
||||
api:
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2'
|
||||
memory: 2G
|
||||
reservations:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
max_attempts: 3
|
||||
```
|
||||
|
||||
### 4. Health Checks
|
||||
|
||||
```yaml
|
||||
services:
|
||||
api:
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
deploy:
|
||||
update_config:
|
||||
failure_action: rollback
|
||||
monitor: 30s
|
||||
```
|
||||
|
||||
## Service Discovery & Load Balancing
|
||||
|
||||
### Built-in Load Balancing
|
||||
|
||||
```yaml
|
||||
# Swarm provides automatic load balancing
|
||||
services:
|
||||
api:
|
||||
deploy:
|
||||
replicas: 3
|
||||
ports:
|
||||
- "3000:3000" # Requests are load balanced across replicas
|
||||
|
||||
# Virtual IP (VIP) - default mode
|
||||
# DNS round-robin
|
||||
services:
|
||||
api:
|
||||
deploy:
|
||||
endpoint_mode: dnsrr
|
||||
```
|
||||
|
||||
### Ingress Network
|
||||
|
||||
```yaml
|
||||
# Publishing ports
|
||||
services:
|
||||
web:
|
||||
ports:
|
||||
- "80:80" # Published on all nodes
|
||||
- "443:443"
|
||||
deploy:
|
||||
mode: ingress # Default, routed through mesh
|
||||
```
|
||||
|
||||
### Host Mode
|
||||
|
||||
```yaml
|
||||
# Bypass load balancer (for performance)
|
||||
services:
|
||||
web:
|
||||
ports:
|
||||
- target: 80
|
||||
published: 80
|
||||
mode: host # Direct port mapping
|
||||
deploy:
|
||||
mode: global # One per node
|
||||
```
|
||||
|
||||
## Monitoring & Logging
|
||||
|
||||
### Logging Drivers
|
||||
|
||||
```yaml
|
||||
services:
|
||||
api:
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
labels: "app,environment"
|
||||
|
||||
# Or use syslog
|
||||
api:
|
||||
logging:
|
||||
driver: "syslog"
|
||||
options:
|
||||
syslog-address: "tcp://logserver:514"
|
||||
syslog-facility: "daemon"
|
||||
```
|
||||
|
||||
### Viewing Logs
|
||||
|
||||
```bash
|
||||
# Service logs
|
||||
docker service logs mystack_api
|
||||
|
||||
# Filter by time
|
||||
docker service logs --since 1h mystack_api
|
||||
|
||||
# Follow logs
|
||||
docker service logs -f mystack_api
|
||||
|
||||
# All tasks
|
||||
docker service logs --tail 100 mystack_api
|
||||
```
|
||||
|
||||
### Monitoring Commands
|
||||
|
||||
```bash
|
||||
# Node status
|
||||
docker node ls
|
||||
|
||||
# Service status
|
||||
docker service ls
|
||||
|
||||
# Task status
|
||||
docker service ps mystack_api
|
||||
|
||||
# Resource usage
|
||||
docker stats
|
||||
|
||||
# Service inspect
|
||||
docker service inspect mystack_api --pretty
|
||||
```
|
||||
|
||||
## Backup & Recovery
|
||||
|
||||
### Backup Swarm State
|
||||
|
||||
```bash
|
||||
# On manager node
|
||||
docker pull swaggercodebreaker/swarmctl
|
||||
docker run --rm -v /var/lib/docker/swarm:/ swarmctl export > swarm-backup.json
|
||||
|
||||
# Or manual backup
|
||||
cp -r /var/lib/docker/swarm/raft ~/swarm-backup/
|
||||
```
|
||||
|
||||
### Recovery
|
||||
|
||||
```bash
|
||||
# Unlock swarm after restart (if encrypted)
|
||||
docker swarm unlock
|
||||
|
||||
# Force new cluster (disaster recovery)
|
||||
docker swarm init --force-new-cluster
|
||||
|
||||
# Restore from backup
|
||||
docker swarm init --force-new-cluster
|
||||
docker service create --name restore-app ...
|
||||
```
|
||||
|
||||
## Common Operations
|
||||
|
||||
### Node Management
|
||||
|
||||
```bash
|
||||
# List nodes
|
||||
docker node ls
|
||||
|
||||
# Inspect node
|
||||
docker node inspect <NODE_ID>
|
||||
|
||||
# Drain node (for maintenance)
|
||||
docker node update --availability drain <NODE_ID>
|
||||
|
||||
# Activate node
|
||||
docker node update --availability active <NODE_ID>
|
||||
|
||||
# Add labels
|
||||
docker node update --label-add region=us-east <NODE_ID>
|
||||
|
||||
# Remove node
|
||||
docker node rm <NODE_ID>
|
||||
```
|
||||
|
||||
### Service Debugging
|
||||
|
||||
```bash
|
||||
# View service tasks
|
||||
docker service ps mystack_api
|
||||
|
||||
# View task details
|
||||
docker inspect <TASK_ID>
|
||||
|
||||
# Run temporary container for debugging
|
||||
docker run --rm -it --network mystack_app-network \
|
||||
myapp/api:latest sh
|
||||
|
||||
# Check service logs
|
||||
docker service logs mystack_api
|
||||
|
||||
# Execute command in running container
|
||||
docker exec -it <CONTAINER_ID> sh
|
||||
```
|
||||
|
||||
### Network Debugging
|
||||
|
||||
```bash
|
||||
# List networks
|
||||
docker network ls
|
||||
|
||||
# Inspect overlay network
|
||||
docker network inspect mystack_app-network
|
||||
|
||||
# Test connectivity
|
||||
docker run --rm --network mystack_app-network alpine ping api
|
||||
|
||||
# DNS resolution
|
||||
docker run --rm --network mystack_app-network alpine nslookup api
|
||||
```
|
||||
|
||||
## Production Checklist
|
||||
|
||||
- [ ] At least 3 manager nodes for HA
|
||||
- [ ] Quorum maintained (odd number of managers)
|
||||
- [ ] Resources limited for all services
|
||||
- [ ] Health checks configured
|
||||
- [ ] Rolling update strategy defined
|
||||
- [ ] Rollback strategy configured
|
||||
- [ ] Secrets used for sensitive data
|
||||
- [ ] Configs for environment settings
|
||||
- [ ] Overlay networks properly segmented
|
||||
- [ ] Logging driver configured
|
||||
- [ ] Monitoring solution deployed
|
||||
- [ ] Backup strategy implemented
|
||||
- [ ] Node labels for placement constraints
|
||||
- [ ] Resource reservations set
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Resource Planning**
|
||||
```yaml
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
```
|
||||
|
||||
2. **Rolling Updates**
|
||||
```yaml
|
||||
deploy:
|
||||
update_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
failure_action: rollback
|
||||
monitor: 30s
|
||||
```
|
||||
|
||||
3. **Placement Constraints**
|
||||
```yaml
|
||||
deploy:
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == worker
|
||||
preferences:
|
||||
- spread: node.labels.zone
|
||||
```
|
||||
|
||||
4. **Network Segmentation**
|
||||
```yaml
|
||||
networks:
|
||||
frontend:
|
||||
driver: overlay
|
||||
backend:
|
||||
driver: overlay
|
||||
internal: true
|
||||
```
|
||||
|
||||
5. **Secrets Management**
|
||||
```yaml
|
||||
secrets:
|
||||
- db_password
|
||||
- jwt_secret
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Service Won't Start
|
||||
|
||||
```bash
|
||||
# Check task status
|
||||
docker service ps mystack_api --no-trunc
|
||||
|
||||
# Check logs
|
||||
docker service logs mystack_api
|
||||
|
||||
# Check node resources
|
||||
docker node ls
|
||||
docker stats
|
||||
|
||||
# Check network
|
||||
docker network inspect mystack_app-network
|
||||
```
|
||||
|
||||
### Task Keeps Restarting
|
||||
|
||||
```bash
|
||||
# Check restart policy
|
||||
docker service inspect mystack_api --pretty
|
||||
|
||||
# Check container logs
|
||||
docker service logs --tail 50 mystack_api
|
||||
|
||||
# Check health check
|
||||
docker inspect <CONTAINER_ID> --format='{{.State.Health}}'
|
||||
```
|
||||
|
||||
### Network Issues
|
||||
|
||||
```bash
|
||||
# Verify overlay network
|
||||
docker network inspect mystack_app-network
|
||||
|
||||
# Check DNS resolution
|
||||
docker run --rm --network mystack_app-network alpine nslookup api
|
||||
|
||||
# Check connectivity
|
||||
docker run --rm --network mystack_app-network alpine ping api
|
||||
```
|
||||
|
||||
## Related Skills
|
||||
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `docker-compose` | Local development with Compose |
|
||||
| `docker-security` | Container security patterns |
|
||||
| `kubernetes` | Kubernetes orchestration |
|
||||
| `docker-monitoring` | Container monitoring setup |
|
||||
519
.kilo/skills/docker-swarm/examples/ha-web-app.md
Normal file
519
.kilo/skills/docker-swarm/examples/ha-web-app.md
Normal file
@@ -0,0 +1,519 @@
|
||||
# Docker Swarm Deployment Examples
|
||||
|
||||
## Example: High Availability Web Application
|
||||
|
||||
Complete example of deploying a production-ready web application with Docker Swarm.
|
||||
|
||||
### docker-compose.yml (Swarm Stack)
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
# Reverse Proxy with SSL
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
configs:
|
||||
- source: nginx_config
|
||||
target: /etc/nginx/nginx.conf
|
||||
secrets:
|
||||
- ssl_cert
|
||||
- ssl_key
|
||||
networks:
|
||||
- frontend
|
||||
deploy:
|
||||
replicas: 2
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == worker
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 256M
|
||||
healthcheck:
|
||||
test: ["CMD", "nginx", "-t"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
# API Service
|
||||
api:
|
||||
image: myapp/api:latest
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
- DATABASE_URL=postgres://app:${DB_PASSWORD}@db:5432/app
|
||||
- REDIS_URL=redis://cache:6379
|
||||
configs:
|
||||
- source: app_config
|
||||
target: /app/config.json
|
||||
secrets:
|
||||
- jwt_secret
|
||||
networks:
|
||||
- frontend
|
||||
- backend
|
||||
deploy:
|
||||
replicas: 3
|
||||
update_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
failure_action: rollback
|
||||
order: start-first
|
||||
rollback_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
max_attempts: 3
|
||||
window: 120s
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == worker
|
||||
preferences:
|
||||
- spread: node.id
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
# Background Worker
|
||||
worker:
|
||||
image: myapp/worker:latest
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
- DATABASE_URL=postgres://app:${DB_PASSWORD}@db:5432/app
|
||||
secrets:
|
||||
- jwt_secret
|
||||
networks:
|
||||
- backend
|
||||
deploy:
|
||||
replicas: 2
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 10s
|
||||
max_attempts: 5
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == worker
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
|
||||
# Database (PostgreSQL with Replication)
|
||||
db:
|
||||
image: postgres:15-alpine
|
||||
environment:
|
||||
POSTGRES_DB: app
|
||||
POSTGRES_USER: app
|
||||
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
|
||||
secrets:
|
||||
- db_password
|
||||
volumes:
|
||||
- postgres-data:/var/lib/postgresql/data
|
||||
networks:
|
||||
- backend
|
||||
deploy:
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.labels.database == true
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2'
|
||||
memory: 2G
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U app -d app"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# Redis Cache
|
||||
cache:
|
||||
image: redis:7-alpine
|
||||
command: redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
networks:
|
||||
- backend
|
||||
deploy:
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.labels.cache == true
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# Monitoring (Prometheus)
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
configs:
|
||||
- source: prometheus_config
|
||||
target: /etc/prometheus/prometheus.yml
|
||||
volumes:
|
||||
- prometheus-data:/prometheus
|
||||
networks:
|
||||
- monitoring
|
||||
deploy:
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.retention.time=30d'
|
||||
|
||||
# Monitoring (Grafana)
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}
|
||||
volumes:
|
||||
- grafana-data:/var/lib/grafana
|
||||
networks:
|
||||
- monitoring
|
||||
deploy:
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
|
||||
networks:
|
||||
frontend:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
backend:
|
||||
driver: overlay
|
||||
internal: true
|
||||
monitoring:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
|
||||
volumes:
|
||||
postgres-data:
|
||||
redis-data:
|
||||
prometheus-data:
|
||||
grafana-data:
|
||||
|
||||
configs:
|
||||
nginx_config:
|
||||
file: ./configs/nginx.conf
|
||||
app_config:
|
||||
file: ./configs/app.json
|
||||
prometheus_config:
|
||||
file: ./configs/prometheus.yml
|
||||
|
||||
secrets:
|
||||
db_password:
|
||||
file: ./secrets/db_password.txt
|
||||
jwt_secret:
|
||||
file: ./secrets/jwt_secret.txt
|
||||
ssl_cert:
|
||||
file: ./secrets/ssl_cert.pem
|
||||
ssl_key:
|
||||
file: ./secrets/ssl_key.pem
|
||||
```
|
||||
|
||||
### Deployment Script
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# deploy.sh
|
||||
|
||||
set -e
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
NC='\033[0m'
|
||||
|
||||
# Configuration
|
||||
STACK_NAME="myapp"
|
||||
COMPOSE_FILE="docker-compose.yml"
|
||||
|
||||
echo "Starting deployment for ${STACK_NAME}..."
|
||||
|
||||
# Check if running on Swarm
|
||||
if ! docker info | grep -q "Swarm: active"; then
|
||||
echo -e "${RED}Error: Not running in Swarm mode${NC}"
|
||||
echo "Initialize Swarm with: docker swarm init"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create secrets (if not exists)
|
||||
echo "Checking secrets..."
|
||||
for secret in db_password jwt_secret ssl_cert ssl_key; do
|
||||
if ! docker secret inspect ${secret} > /dev/null 2>&1; then
|
||||
if [ -f "./secrets/${secret}.txt" ]; then
|
||||
docker secret create ${secret} ./secrets/${secret}.txt
|
||||
echo -e "${GREEN}Created secret: ${secret}${NC}"
|
||||
else
|
||||
echo -e "${RED}Missing secret file: ./secrets/${secret}.txt${NC}"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Secret ${secret} already exists"
|
||||
fi
|
||||
done
|
||||
|
||||
# Create configs
|
||||
echo "Creating configs..."
|
||||
docker config rm nginx_config 2>/dev/null || true
|
||||
docker config create nginx_config ./configs/nginx.conf
|
||||
|
||||
docker config rm app_config 2>/dev/null || true
|
||||
docker config create app_config ./configs/app.json
|
||||
|
||||
docker config rm prometheus_config 2>/dev/null || true
|
||||
docker config create prometheus_config ./configs/prometheus.yml
|
||||
|
||||
# Deploy stack
|
||||
echo "Deploying stack..."
|
||||
docker stack deploy -c ${COMPOSE_FILE} ${STACK_NAME}
|
||||
|
||||
# Wait for services to start
|
||||
echo "Waiting for services to start..."
|
||||
sleep 30
|
||||
|
||||
# Show status
|
||||
docker stack services ${STACK_NAME}
|
||||
|
||||
# Check health
|
||||
echo "Checking service health..."
|
||||
for service in nginx api worker db cache prometheus grafana; do
|
||||
REPLICAS=$(docker service ls --filter name=${STACK_NAME}_${service} --format "{{.Replicas}}")
|
||||
echo "${service}: ${REPLICAS}"
|
||||
done
|
||||
|
||||
echo -e "${GREEN}Deployment complete!${NC}"
|
||||
echo "Check status: docker stack services ${STACK_NAME}"
|
||||
echo "View logs: docker service logs -f ${STACK_NAME}_api"
|
||||
```
|
||||
|
||||
### Service Update Script
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# update-service.sh
|
||||
|
||||
set -e
|
||||
|
||||
SERVICE_NAME=$1
|
||||
NEW_IMAGE=$2
|
||||
|
||||
if [ -z "$SERVICE_NAME" ] || [ -z "$NEW_IMAGE" ]; then
|
||||
echo "Usage: ./update-service.sh <service-name> <new-image>"
|
||||
echo "Example: ./update-service.sh myapp_api myapp/api:v2"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
FULL_SERVICE_NAME="${STACK_NAME}_${SERVICE_NAME}"
|
||||
|
||||
echo "Updating ${FULL_SERVICE_NAME} to ${NEW_IMAGE}..."
|
||||
|
||||
# Update service with rollback on failure
|
||||
docker service update \
|
||||
--image ${NEW_IMAGE} \
|
||||
--update-parallelism 1 \
|
||||
--update-delay 10s \
|
||||
--update-failure-action rollback \
|
||||
--update-monitor 30s \
|
||||
${FULL_SERVICE_NAME}
|
||||
|
||||
# Wait for update
|
||||
echo "Waiting for update to complete..."
|
||||
sleep 30
|
||||
|
||||
# Check status
|
||||
docker service ps ${FULL_SERVICE_NAME}
|
||||
|
||||
echo "Update complete!"
|
||||
```
|
||||
|
||||
### Rollback Script
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# rollback-service.sh
|
||||
|
||||
set -e
|
||||
|
||||
SERVICE_NAME=$1
|
||||
STACK_NAME="myapp"
|
||||
|
||||
if [ -z "$SERVICE_NAME" ]; then
|
||||
echo "Usage: ./rollback-service.sh <service-name>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
FULL_SERVICE_NAME="${STACK_NAME}_${SERVICE_NAME}"
|
||||
|
||||
echo "Rolling back ${FULL_SERVICE_NAME}..."
|
||||
|
||||
docker service rollback ${FULL_SERVICE_NAME}
|
||||
|
||||
sleep 30
|
||||
|
||||
docker service ps ${FULL_SERVICE_NAME}
|
||||
|
||||
echo "Rollback complete!"
|
||||
```
|
||||
|
||||
### Monitoring Dashboard (Grafana)
|
||||
|
||||
```json
|
||||
{
|
||||
"dashboard": {
|
||||
"title": "Docker Swarm Overview",
|
||||
"panels": [
|
||||
{
|
||||
"title": "Running Tasks",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(container_tasks_state{state=\"running\"})"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "CPU Usage per Service",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(container_cpu_usage_seconds_total{name=~\".+\"}[5m]) * 100",
|
||||
"legendFormat": "{{name}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Memory Usage per Service",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "container_memory_usage_bytes{name=~\".+\"} / 1024 / 1024",
|
||||
"legendFormat": "{{name}} MB"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Network I/O",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(container_network_receive_bytes_total{name=~\".+\"}[5m])",
|
||||
"legendFormat": "{{name}} RX"
|
||||
},
|
||||
{
|
||||
"expr": "rate(container_network_transmit_bytes_total{name=~\".+\"}[5m])",
|
||||
"legendFormat": "{{name}} TX"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Service Health",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "container_health_status{name=~\".+\"}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Prometheus Configuration
|
||||
|
||||
```yaml
|
||||
# prometheus.yml
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15m
|
||||
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- alertmanager:9093
|
||||
|
||||
rule_files:
|
||||
- /etc/prometheus/alerts.yml
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['prometheus:9090']
|
||||
|
||||
- job_name: 'cadvisor'
|
||||
static_configs:
|
||||
- targets: ['cadvisor:8080']
|
||||
|
||||
- job_name: 'node'
|
||||
static_configs:
|
||||
- targets: ['node-exporter:9100']
|
||||
|
||||
- job_name: 'api'
|
||||
static_configs:
|
||||
- targets: ['api:3000']
|
||||
metrics_path: '/metrics'
|
||||
```
|
||||
|
||||
### Alert Rules
|
||||
|
||||
```yaml
|
||||
# alerts.yml
|
||||
groups:
|
||||
- name: swarm_alerts
|
||||
rules:
|
||||
- alert: ServiceDown
|
||||
expr: count(container_tasks_state{state="running"}) == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Service {{ $labels.service }} is down"
|
||||
description: "No running tasks for service {{ $labels.service }}"
|
||||
|
||||
- alert: HighCpuUsage
|
||||
expr: rate(container_cpu_usage_seconds_total[5m]) * 100 > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High CPU usage on {{ $labels.name }}"
|
||||
description: "Container {{ $labels.name }} CPU usage is {{ $value }}%"
|
||||
|
||||
- alert: HighMemoryUsage
|
||||
expr: (container_memory_usage_bytes / container_spec_memory_limit_bytes) * 100 > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High memory usage on {{ $labels.name }}"
|
||||
description: "Container {{ $labels.name }} memory usage is {{ $value }}%"
|
||||
|
||||
- alert: ContainerRestart
|
||||
expr: increase(container_restart_count[1h]) > 0
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Container {{ $labels.name }} restarted"
|
||||
description: "Container {{ $labels.name }} restarted {{ $value }} times in the last hour"
|
||||
```
|
||||
151
AGENTS.md
151
AGENTS.md
@@ -31,95 +31,50 @@ These agents are invoked automatically by `/pipeline` or manually via `@mention`
|
||||
### Core Development
|
||||
| Agent | Role | When Invoked |
|
||||
|-------|------|--------------|
|
||||
| `@requirement-refiner` | Converts ideas to User Stories | Issue status: new |
|
||||
| `@history-miner` | Finds duplicates in git | Status: planned |
|
||||
| `@system-analyst` | Designs specifications | Status: researching |
|
||||
| `@sdet-engineer` | Writes tests (TDD) | Status: designed |
|
||||
| `@lead-developer` | Implements code | Status: testing (tests fail) |
|
||||
| `@frontend-developer` | UI implementation | When UI work needed |
|
||||
| `@backend-developer` | Node.js/Express/APIs | When backend needed |
|
||||
| `@RequirementRefiner` | Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists | Issue status: new |
|
||||
| `@HistoryMiner` | Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work | Status: planned |
|
||||
| `@SystemAnalyst` | Designs technical specifications, data schemas, and API contracts before implementation | Status: researching |
|
||||
| `@SdetEngineer` | Writes tests following TDD methodology | Status: designed |
|
||||
| `@LeadDeveloper` | Primary code writer for backend and core logic | Status: testing |
|
||||
| `@FrontendDeveloper` | Handles UI implementation with multimodal capabilities | When UI work needed |
|
||||
| `@BackendDeveloper` | Backend specialist for Node | When backend needed |
|
||||
| `@GoDeveloper` | Go backend specialist for Gin, Echo, APIs, and database integration | When Go backend needed |
|
||||
| `@DevopsEngineer` | DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management | When deployment/infra needed |
|
||||
|
||||
### Quality Assurance
|
||||
| Agent | Role | When Invoked |
|
||||
|-------|------|--------------|
|
||||
| `@code-skeptic` | Adversarial review | Status: implementing |
|
||||
| `@the-fixer` | Fixes issues | When review fails |
|
||||
| `@performance-engineer` | Performance review | After code-skeptic |
|
||||
| `@security-auditor` | Security audit | After performance |
|
||||
| `@visual-tester` | Visual regression | When UI changes |
|
||||
| `@CodeSkeptic` | Adversarial code reviewer | Status: implementing |
|
||||
| `@TheFixer` | Iteratively fixes bugs based on specific error reports and test failures | When review fails |
|
||||
| `@PerformanceEngineer` | Reviews code for performance issues | After code-skeptic |
|
||||
| `@SecurityAuditor` | Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets | After performance |
|
||||
| `@VisualTester` | Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff | When UI changes |
|
||||
|
||||
### Cognitive Enhancement (New)
|
||||
### DevOps & Infrastructure
|
||||
| Agent | Role | When Invoked |
|
||||
|-------|------|--------------|
|
||||
| `@planner` | Task decomposition (CoT/ToT) | Complex tasks |
|
||||
| `@reflector` | Self-reflection (Reflexion) | After each agent |
|
||||
| `@memory-manager` | Memory systems | Context management |
|
||||
| `@devops-engineer` | Docker/Swarm/K8s deployment | When deployment needed |
|
||||
| `@security-auditor` | Container security scan | After deployment config |
|
||||
|
||||
### Cognitive Enhancement
|
||||
| Agent | Role | When Invoked |
|
||||
|-------|------|--------------|
|
||||
| `@Planner` | Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect | Complex tasks |
|
||||
| `@Reflector` | Self-reflection agent using Reflexion pattern - learns from mistakes | After each agent |
|
||||
| `@MemoryManager` | Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences) | Context management |
|
||||
|
||||
### Meta & Process
|
||||
| Agent | Role | When Invoked |
|
||||
|-------|------|--------------|
|
||||
| `@release-manager` | Git operations | Status: releasing |
|
||||
| `@evaluator` | Scores effectiveness | Status: evaluated |
|
||||
| `@prompt-optimizer` | Improves prompts | When score < 7 |
|
||||
| `@capability-analyst` | Analyzes task coverage | When starting new task |
|
||||
| `@agent-architect` | Creates new agents | When gaps identified |
|
||||
| `@workflow-architect` | Creates workflows | New workflow needed |
|
||||
| `@markdown-validator` | Validates Markdown | Before issue creation |
|
||||
|
||||
## Workflow State Machine
|
||||
|
||||
```
|
||||
[new]
|
||||
↓ @requirement-refiner
|
||||
[planned]
|
||||
↓ @capability-analyst → (gaps?) → @agent-architect → create new agents
|
||||
↓ @history-miner
|
||||
[researching]
|
||||
↓ @system-analyst
|
||||
[designed]
|
||||
↓ @sdet-engineer (writes failing tests)
|
||||
[testing]
|
||||
↓ @lead-developer (makes tests pass)
|
||||
[implementing]
|
||||
↓ @code-skeptic (review)
|
||||
[reviewing] ──[fail]──→ [fixing] ──→ [reviewing]
|
||||
↓ @review-watcher → (auto-validate) → create fix tasks
|
||||
↓ [pass]
|
||||
[perf-check]
|
||||
↓ @performance-engineer
|
||||
[security-check]
|
||||
↓ @security-auditor
|
||||
[releasing]
|
||||
↓ @release-manager
|
||||
[evaluated]
|
||||
↓ @evaluator
|
||||
├── [score ≥ 7] → [completed]
|
||||
└── [score < 7] → @prompt-optimizer → [completed]
|
||||
```
|
||||
|
||||
## Capability Analysis Flow
|
||||
|
||||
When starting a complex task:
|
||||
|
||||
```
|
||||
[User Request]
|
||||
↓
|
||||
[@capability-analyst] ← Analyzes requirements vs existing capabilities
|
||||
↓
|
||||
[Gap Analysis] ← Identifies missing agents, workflows, skills
|
||||
↓
|
||||
[Recommendations] → Create new or enhance existing?
|
||||
↓
|
||||
[Decision]
|
||||
├── [Create New] → [@agent-architect] → Create component → Review
|
||||
└── [Enhance] → [@lead-developer] → Modify existing
|
||||
↓
|
||||
[Integration] ← Verify new component works with system
|
||||
↓
|
||||
[Complete] ← Task can now be handled
|
||||
```
|
||||
|
||||
## Gitea Integration
|
||||
| `@Orchestrator` | Main dispatcher | Manages all agent routing |
|
||||
| `@ReleaseManager` | Manages git operations, semantic versioning, branching, and deployments | Status: releasing |
|
||||
| `@Evaluator` | Scores agent effectiveness after task completion for continuous improvement | Status: evaluated |
|
||||
| `@PromptOptimizer` | Improves agent system prompts based on performance failures | When score < 7 |
|
||||
| `@ProductOwner` | Manages issue checklists, status labels, tracks progress and coordinates with human users | Manages issues |
|
||||
| `@AgentArchitect` | Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis | When gaps identified |
|
||||
| `@CapabilityAnalyst` | Analyzes task requirements against available agents, workflows, and skills | When starting new task |
|
||||
| `@WorkflowArchitect` | Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates | New workflow needed |
|
||||
| `@MarkdownValidator` | Validates and corrects Markdown descriptions for Gitea issues | Before issue creation |
|
||||
|
||||
### Status Labels
|
||||
|
||||
@@ -207,6 +162,46 @@ GITEA_TOKEN=your-token-here
|
||||
| `.kilo/skills/` | Skill modules |
|
||||
| `src/kilocode/` | TypeScript API for programmatic use |
|
||||
|
||||
## Skills Reference
|
||||
|
||||
### Containerization Skills
|
||||
| Skill | Purpose | Location |
|
||||
|-------|---------|----------|
|
||||
| `docker-compose` | Multi-container orchestration | `.kilo/skills/docker-compose/` |
|
||||
| `docker-swarm` | Production cluster deployment | `.kilo/skills/docker-swarm/` |
|
||||
| `docker-security` | Container security hardening | `.kilo/skills/docker-security/` |
|
||||
| `docker-monitoring` | Container monitoring/logging | `.kilo/skills/docker-monitoring/` |
|
||||
|
||||
### Node.js Skills
|
||||
| Skill | Purpose | Location |
|
||||
|-------|---------|----------|
|
||||
| `nodejs-express-patterns` | Express routing, middleware | `.kilo/skills/nodejs-express-patterns/` |
|
||||
| `nodejs-auth-jwt` | JWT authentication | `.kilo/skills/nodejs-auth-jwt/` |
|
||||
| `nodejs-security-owasp` | OWASP security | `.kilo/skills/nodejs-security-owasp/` |
|
||||
|
||||
### Database Skills
|
||||
| Skill | Purpose | Location |
|
||||
|-------|---------|----------|
|
||||
| `postgresql-patterns` | PostgreSQL patterns | `.kilo/skills/postgresql-patterns/` |
|
||||
| `sqlite-patterns` | SQLite patterns | `.kilo/skills/sqlite-patterns/` |
|
||||
| `clickhouse-patterns` | ClickHouse patterns | `.kilo/skills/clickhouse-patterns/` |
|
||||
|
||||
### Go Skills
|
||||
| Skill | Purpose | Location |
|
||||
|-------|---------|----------|
|
||||
| `go-modules` | Go modules management | `.kilo/skills/go-modules/` |
|
||||
| `go-concurrency` | Goroutines and channels | `.kilo/skills/go-concurrency/` |
|
||||
| `go-testing` | Go testing patterns | `.kilo/skills/go-testing/` |
|
||||
| `go-security` | Go security patterns | `.kilo/skills/go-security/` |
|
||||
|
||||
### Process Skills
|
||||
| Skill | Purpose | Location |
|
||||
|-------|---------|----------|
|
||||
| `planning-patterns` | CoT/ToT planning | `.kilo/skills/planning-patterns/` |
|
||||
| `memory-systems` | Memory management | `.kilo/skills/memory-systems/` |
|
||||
| `tool-use` | Tool usage patterns | `.kilo/skills/tool-use/` |
|
||||
| `research-cycle` | Self-improvement cycle | `.kilo/skills/research-cycle/` |
|
||||
|
||||
## Using the TypeScript API
|
||||
|
||||
```typescript
|
||||
|
||||
343
kilo-meta.json
Normal file
343
kilo-meta.json
Normal file
@@ -0,0 +1,343 @@
|
||||
{
|
||||
"$schema": "https://app.kilo.ai/config.json",
|
||||
"metaVersion": "1.0.0",
|
||||
"lastSync": "2026-04-05T12:19:32.133Z",
|
||||
"agents": {
|
||||
"requirement-refiner": {
|
||||
"file": ".kilo/agents/requirement-refiner.md",
|
||||
"description": "Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists",
|
||||
"model": "ollama-cloud/kimi-k2-thinking",
|
||||
"mode": "all",
|
||||
"color": "#4F46E5",
|
||||
"category": "core"
|
||||
},
|
||||
"history-miner": {
|
||||
"file": ".kilo/agents/history-miner.md",
|
||||
"description": "Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"mode": "subagent",
|
||||
"category": "core"
|
||||
},
|
||||
"system-analyst": {
|
||||
"file": ".kilo/agents/system-analyst.md",
|
||||
"description": "Designs technical specifications, data schemas, and API contracts before implementation",
|
||||
"model": "qwen/qwen3.6-plus:free",
|
||||
"mode": "subagent",
|
||||
"category": "core"
|
||||
},
|
||||
"sdet-engineer": {
|
||||
"file": ".kilo/agents/sdet-engineer.md",
|
||||
"description": "Writes tests following TDD methodology. Tests MUST fail initially (Red phase)",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"mode": "all",
|
||||
"color": "#8B5CF6",
|
||||
"category": "core"
|
||||
},
|
||||
"lead-developer": {
|
||||
"file": ".kilo/agents/lead-developer.md",
|
||||
"description": "Primary code writer for backend and core logic. Writes implementation to pass tests",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"mode": "subagent",
|
||||
"color": "#DC2626",
|
||||
"category": "core"
|
||||
},
|
||||
"frontend-developer": {
|
||||
"file": ".kilo/agents/frontend-developer.md",
|
||||
"description": "Handles UI implementation with multimodal capabilities. Accepts visual references like screenshots and mockups",
|
||||
"model": "ollama-cloud/kimi-k2.5",
|
||||
"mode": "all",
|
||||
"color": "#0EA5E9",
|
||||
"category": "core"
|
||||
},
|
||||
"backend-developer": {
|
||||
"file": ".kilo/agents/backend-developer.md",
|
||||
"description": "Backend specialist for Node.js, Express, APIs, and database integration",
|
||||
"model": "ollama-cloud/deepseek-v3.2",
|
||||
"mode": "subagent",
|
||||
"color": "#10B981",
|
||||
"category": "core"
|
||||
},
|
||||
"go-developer": {
|
||||
"file": ".kilo/agents/go-developer.md",
|
||||
"description": "Go backend specialist for Gin, Echo, APIs, and database integration",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"mode": "subagent",
|
||||
"color": "#00ADD8",
|
||||
"category": "core"
|
||||
},
|
||||
"devops-engineer": {
|
||||
"file": ".kilo/agents/devops-engineer.md",
|
||||
"description": "DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management",
|
||||
"model": "ollama-cloud/deepseek-v3.2",
|
||||
"mode": "subagent",
|
||||
"color": "#FF6B35",
|
||||
"category": "core"
|
||||
},
|
||||
"code-skeptic": {
|
||||
"file": ".kilo/agents/code-skeptic.md",
|
||||
"description": "Adversarial code reviewer. Finds problems and issues. Does NOT suggest implementations",
|
||||
"model": "ollama-cloud/minimax-m2.5",
|
||||
"mode": "subagent",
|
||||
"color": "#E11D48",
|
||||
"category": "quality"
|
||||
},
|
||||
"the-fixer": {
|
||||
"file": ".kilo/agents/the-fixer.md",
|
||||
"description": "Iteratively fixes bugs based on specific error reports and test failures",
|
||||
"model": "ollama-cloud/minimax-m2.5",
|
||||
"mode": "all",
|
||||
"color": "#F59E0B",
|
||||
"category": "quality"
|
||||
},
|
||||
"performance-engineer": {
|
||||
"file": ".kilo/agents/performance-engineer.md",
|
||||
"description": "Reviews code for performance issues. Focuses on efficiency, N+1 queries, memory leaks, and algorithmic complexity",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"mode": "all",
|
||||
"color": "#0D9488",
|
||||
"category": "quality"
|
||||
},
|
||||
"security-auditor": {
|
||||
"file": ".kilo/agents/security-auditor.md",
|
||||
"description": "Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"mode": "subagent",
|
||||
"color": "#DC2626",
|
||||
"category": "quality"
|
||||
},
|
||||
"visual-tester": {
|
||||
"file": ".kilo/agents/visual-tester.md",
|
||||
"description": "Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff",
|
||||
"model": "ollama-cloud/glm-5",
|
||||
"mode": "subagent",
|
||||
"category": "quality"
|
||||
},
|
||||
"orchestrator": {
|
||||
"file": ".kilo/agents/orchestrator.md",
|
||||
"description": "Main dispatcher. Routes tasks between agents based on Issue status and manages the workflow state machine",
|
||||
"model": "ollama-cloud/glm-5",
|
||||
"mode": "all",
|
||||
"color": "#7C3AED",
|
||||
"category": "meta"
|
||||
},
|
||||
"release-manager": {
|
||||
"file": ".kilo/agents/release-manager.md",
|
||||
"description": "Manages git operations, semantic versioning, branching, and deployments. Ensures clean history",
|
||||
"model": "ollama-cloud/devstral-2:123b",
|
||||
"mode": "subagent",
|
||||
"category": "meta"
|
||||
},
|
||||
"evaluator": {
|
||||
"file": ".kilo/agents/evaluator.md",
|
||||
"description": "Scores agent effectiveness after task completion for continuous improvement",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"mode": "subagent",
|
||||
"color": "#047857",
|
||||
"category": "meta"
|
||||
},
|
||||
"prompt-optimizer": {
|
||||
"file": ".kilo/agents/prompt-optimizer.md",
|
||||
"description": "Improves agent system prompts based on performance failures. Meta-learner for prompt optimization",
|
||||
"model": "qwen/qwen3.6-plus:free",
|
||||
"mode": "subagent",
|
||||
"category": "meta"
|
||||
},
|
||||
"product-owner": {
|
||||
"file": ".kilo/agents/product-owner.md",
|
||||
"description": "Manages issue checklists, status labels, tracks progress and coordinates with human users",
|
||||
"model": "ollama-cloud/glm-5",
|
||||
"mode": "subagent",
|
||||
"category": "meta"
|
||||
},
|
||||
"agent-architect": {
|
||||
"file": ".kilo/agents/agent-architect.md",
|
||||
"description": "Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"mode": "subagent",
|
||||
"category": "meta"
|
||||
},
|
||||
"capability-analyst": {
|
||||
"file": ".kilo/agents/capability-analyst.md",
|
||||
"description": "Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components.",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"mode": "subagent",
|
||||
"category": "meta"
|
||||
},
|
||||
"workflow-architect": {
|
||||
"file": ".kilo/agents/workflow-architect.md",
|
||||
"description": "Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates",
|
||||
"model": "ollama-cloud/gpt-oss:120b",
|
||||
"mode": "subagent",
|
||||
"category": "meta"
|
||||
},
|
||||
"markdown-validator": {
|
||||
"file": ".kilo/agents/markdown-validator.md",
|
||||
"description": "Validates and corrects Markdown descriptions for Gitea issues",
|
||||
"model": "ollama-cloud/nemotron-3-nano:30b",
|
||||
"mode": "subagent",
|
||||
"category": "meta"
|
||||
},
|
||||
"browser-automation": {
|
||||
"file": ".kilo/agents/browser-automation.md",
|
||||
"description": "Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction",
|
||||
"model": "ollama-cloud/glm-5",
|
||||
"mode": "subagent",
|
||||
"category": "testing"
|
||||
},
|
||||
"planner": {
|
||||
"file": ".kilo/agents/planner.md",
|
||||
"description": "Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"mode": "subagent",
|
||||
"color": "#F59E0B",
|
||||
"category": "cognitive"
|
||||
},
|
||||
"reflector": {
|
||||
"file": ".kilo/agents/reflector.md",
|
||||
"description": "Self-reflection agent using Reflexion pattern - learns from mistakes",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"mode": "subagent",
|
||||
"color": "#10B981",
|
||||
"category": "cognitive"
|
||||
},
|
||||
"memory-manager": {
|
||||
"file": ".kilo/agents/memory-manager.md",
|
||||
"description": "Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences)",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"mode": "subagent",
|
||||
"color": "#8B5CF6",
|
||||
"category": "cognitive"
|
||||
}
|
||||
},
|
||||
"commands": {
|
||||
"pipeline": {
|
||||
"file": ".kilo/commands/pipeline.md",
|
||||
"description": "Run full agent pipeline for issue with Gitea logging"
|
||||
},
|
||||
"status": {
|
||||
"file": ".kilo/commands/status.md",
|
||||
"description": "Check pipeline status for issue",
|
||||
"model": "qwen/qwen3.6-plus:free"
|
||||
},
|
||||
"evaluate": {
|
||||
"file": ".kilo/commands/evaluate.md",
|
||||
"description": "Generate performance report",
|
||||
"model": "ollama-cloud/gpt-oss:120b"
|
||||
},
|
||||
"plan": {
|
||||
"file": ".kilo/commands/plan.md",
|
||||
"description": "Creates detailed task plans",
|
||||
"model": "openrouter/qwen/qwen3-coder:free"
|
||||
},
|
||||
"ask": {
|
||||
"file": ".kilo/commands/ask.md",
|
||||
"description": "Answers codebase questions",
|
||||
"model": "openai/qwen3-32b"
|
||||
},
|
||||
"debug": {
|
||||
"file": ".kilo/commands/debug.md",
|
||||
"description": "Analyzes and fixes bugs",
|
||||
"model": "ollama-cloud/gpt-oss:20b"
|
||||
},
|
||||
"code": {
|
||||
"file": ".kilo/commands/code.md",
|
||||
"description": "Quick code generation",
|
||||
"model": "openrouter/qwen/qwen3-coder:free"
|
||||
},
|
||||
"research": {
|
||||
"file": ".kilo/commands/research.md",
|
||||
"description": "Run research and self-improvement",
|
||||
"model": "ollama-cloud/glm-5"
|
||||
},
|
||||
"feature": {
|
||||
"file": ".kilo/commands/feature.md",
|
||||
"description": "Full feature development pipeline",
|
||||
"model": "openrouter/qwen/qwen3-coder:free"
|
||||
},
|
||||
"hotfix": {
|
||||
"file": ".kilo/commands/hotfix.md",
|
||||
"description": "Hotfix workflow",
|
||||
"model": "openrouter/minimax/minimax-m2.5:free"
|
||||
},
|
||||
"review": {
|
||||
"file": ".kilo/commands/review.md",
|
||||
"description": "Code review workflow",
|
||||
"model": "openrouter/minimax/minimax-m2.5:free"
|
||||
},
|
||||
"review-watcher": {
|
||||
"file": ".kilo/commands/review-watcher.md",
|
||||
"description": "Auto-validate review results",
|
||||
"model": "ollama-cloud/glm-5"
|
||||
},
|
||||
"e2e-test": {
|
||||
"file": ".kilo/commands/e2e-test.md",
|
||||
"description": "Run E2E tests with browser automation"
|
||||
},
|
||||
"workflow": {
|
||||
"file": ".kilo/commands/workflow.md",
|
||||
"description": "Run complete workflow with quality gates",
|
||||
"model": "ollama-cloud/glm-5"
|
||||
},
|
||||
"landing-page": {
|
||||
"file": ".kilo/commands/landing-page.md",
|
||||
"description": "Create landing page CMS from HTML mockups",
|
||||
"model": "ollama-cloud/kimi-k2.5"
|
||||
},
|
||||
"commerce": {
|
||||
"file": ".kilo/commands/commerce.md",
|
||||
"description": "Create e-commerce site with products, cart, payments",
|
||||
"model": "qwen/qwen3-coder:free"
|
||||
},
|
||||
"blog": {
|
||||
"file": ".kilo/commands/blog.md",
|
||||
"description": "Create blog/CMS with posts, comments, SEO",
|
||||
"model": "qwen/qwen3-coder:free"
|
||||
},
|
||||
"booking": {
|
||||
"file": ".kilo/commands/booking.md",
|
||||
"description": "Create booking system for services/appointments",
|
||||
"model": "qwen/qwen3-coder:free"
|
||||
}
|
||||
},
|
||||
"syncTargets": [
|
||||
{
|
||||
"file": ".kilo/agents/*.md",
|
||||
"type": "agent-frontmatter",
|
||||
"fields": [
|
||||
"model",
|
||||
"mode",
|
||||
"description",
|
||||
"color"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file": ".kilo/KILO_SPEC.md",
|
||||
"section": "### Pipeline Agents",
|
||||
"type": "markdown-table"
|
||||
},
|
||||
{
|
||||
"file": ".kilo/KILO_SPEC.md",
|
||||
"section": "### Workflow Commands",
|
||||
"type": "markdown-table"
|
||||
},
|
||||
{
|
||||
"file": "AGENTS.md",
|
||||
"section": "Pipeline Agents",
|
||||
"type": "category-tables"
|
||||
},
|
||||
{
|
||||
"file": ".kilo/agents/orchestrator.md",
|
||||
"section": "Task Tool Invocation",
|
||||
"type": "subagent-mapping"
|
||||
}
|
||||
],
|
||||
"validation": {
|
||||
"checkOn": [
|
||||
"evolutionary-mode",
|
||||
"pre-commit",
|
||||
"manual-sync"
|
||||
],
|
||||
"failOnError": true,
|
||||
"reportFile": ".kilo/logs/sync-violations.json"
|
||||
}
|
||||
}
|
||||
464
kilo.jsonc
Normal file
464
kilo.jsonc
Normal file
@@ -0,0 +1,464 @@
|
||||
{
|
||||
"$schema": "https://app.kilo.ai/config.json",
|
||||
"instructions": [
|
||||
".kilo/rules/global.md",
|
||||
".kilo/rules/agent-patterns.md",
|
||||
".kilo/rules/docker.md",
|
||||
".kilo/rules/go.md",
|
||||
".kilo/rules/history-miner.md",
|
||||
".kilo/rules/lead-developer.md",
|
||||
".kilo/rules/nodejs.md",
|
||||
".kilo/rules/prompt-engineering.md",
|
||||
".kilo/rules/release-manager.md",
|
||||
".kilo/rules/sdet-engineer.md",
|
||||
".kilo/rules/code-skeptic.md",
|
||||
".kilo/rules/evolutionary-sync.md"
|
||||
],
|
||||
"skills": {
|
||||
"paths": [".kilo/skills"]
|
||||
},
|
||||
"agent": {
|
||||
"requirement-refiner": {
|
||||
"description": "Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists",
|
||||
"mode": "all",
|
||||
"model": "ollama-cloud/kimi-k2-thinking",
|
||||
"color": "#4F46E5",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny",
|
||||
"history-miner": "allow",
|
||||
"system-analyst": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"history-miner": {
|
||||
"description": "Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/nemotron-3-super"
|
||||
},
|
||||
"system-analyst": {
|
||||
"description": "Designs technical specifications, data schemas, and API contracts before implementation",
|
||||
"mode": "subagent",
|
||||
"model": "qwen/qwen3.6-plus:free"
|
||||
},
|
||||
"sdet-engineer": {
|
||||
"description": "Writes tests following TDD methodology. Tests MUST fail initially (Red phase)",
|
||||
"mode": "all",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"color": "#8B5CF6",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny",
|
||||
"lead-developer": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"lead-developer": {
|
||||
"description": "Primary code writer for backend and core logic. Writes implementation to pass tests",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"color": "#DC2626",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny",
|
||||
"code-skeptic": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"frontend-developer": {
|
||||
"description": "Handles UI implementation with multimodal capabilities. Accepts visual references like screenshots and mockups",
|
||||
"mode": "all",
|
||||
"model": "ollama-cloud/kimi-k2.5",
|
||||
"color": "#0EA5E9",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny",
|
||||
"code-skeptic": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"backend-developer": {
|
||||
"description": "Backend specialist for Node.js, Express, APIs, and database integration",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/deepseek-v3.2",
|
||||
"color": "#10B981",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny",
|
||||
"code-skeptic": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"go-developer": {
|
||||
"description": "Go backend specialist for Gin, Echo, APIs, and database integration",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"color": "#00ADD8",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny",
|
||||
"code-skeptic": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"devops-engineer": {
|
||||
"description": "DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/deepseek-v3.2",
|
||||
"color": "#FF6B35",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny",
|
||||
"code-skeptic": "allow",
|
||||
"security-auditor": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"code-skeptic": {
|
||||
"description": "Adversarial code reviewer. Finds problems and issues. Does NOT suggest implementations",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/minimax-m2.5",
|
||||
"color": "#E11D48",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny",
|
||||
"the-fixer": "allow",
|
||||
"performance-engineer": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"the-fixer": {
|
||||
"description": "Iteratively fixes bugs based on specific error reports and test failures",
|
||||
"mode": "all",
|
||||
"model": "ollama-cloud/minimax-m2.5",
|
||||
"color": "#F59E0B",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny",
|
||||
"code-skeptic": "allow",
|
||||
"orchestrator": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"performance-engineer": {
|
||||
"description": "Reviews code for performance issues. Focuses on efficiency, N+1 queries, memory leaks, and algorithmic complexity",
|
||||
"mode": "all",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"color": "#0D9488",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny",
|
||||
"the-fixer": "allow",
|
||||
"security-auditor": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"security-auditor": {
|
||||
"description": "Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"color": "#DC2626",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny",
|
||||
"the-fixer": "allow",
|
||||
"release-manager": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"visual-tester": {
|
||||
"description": "Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/glm-5",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"orchestrator": {
|
||||
"description": "Main dispatcher. Routes tasks between agents based on Issue status and manages the workflow state machine",
|
||||
"mode": "all",
|
||||
"model": "ollama-cloud/glm-5",
|
||||
"color": "#7C3AED",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny",
|
||||
"history-miner": "allow",
|
||||
"system-analyst": "allow",
|
||||
"sdet-engineer": "allow",
|
||||
"lead-developer": "allow",
|
||||
"code-skeptic": "allow",
|
||||
"the-fixer": "allow",
|
||||
"performance-engineer": "allow",
|
||||
"security-auditor": "allow",
|
||||
"release-manager": "allow",
|
||||
"evaluator": "allow",
|
||||
"prompt-optimizer": "allow",
|
||||
"product-owner": "allow",
|
||||
"requirement-refiner": "allow",
|
||||
"frontend-developer": "allow",
|
||||
"browser-automation": "allow",
|
||||
"visual-tester": "allow",
|
||||
"planner": "allow",
|
||||
"reflector": "allow",
|
||||
"memory-manager": "allow",
|
||||
"devops-engineer": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"release-manager": {
|
||||
"description": "Manages git operations, semantic versioning, branching, and deployments. Ensures clean history",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/devstral-2:123b",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"webfetch": "allow",
|
||||
"task": {
|
||||
"*": "deny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"evaluator": {
|
||||
"description": "Scores agent effectiveness after task completion for continuous improvement",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"color": "#047857",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny",
|
||||
"prompt-optimizer": "allow",
|
||||
"product-owner": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"prompt-optimizer": {
|
||||
"description": "Improves agent system prompts based on performance failures. Meta-learner for prompt optimization",
|
||||
"mode": "subagent",
|
||||
"model": "qwen/qwen3.6-plus:free",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"product-owner": {
|
||||
"description": "Manages issue checklists, status labels, tracks progress and coordinates with human users",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/glm-5",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"webfetch": "allow",
|
||||
"task": {
|
||||
"*": "deny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"agent-architect": {
|
||||
"description": "Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"capability-analyst": {
|
||||
"description": "Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components.",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"workflow-architect": {
|
||||
"description": "Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/gpt-oss:120b",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"markdown-validator": {
|
||||
"description": "Validates and corrects Markdown descriptions for Gitea issues",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/nemotron-3-nano:30b",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"browser-automation": {
|
||||
"description": "Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/glm-5",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"edit": "allow",
|
||||
"write": "allow",
|
||||
"bash": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"planner": {
|
||||
"description": "Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"color": "#F59E0B",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"write": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"reflector": {
|
||||
"description": "Self-reflection agent using Reflexion pattern - learns from mistakes",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"color": "#10B981",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"grep": "allow",
|
||||
"glob": "allow",
|
||||
"task": {
|
||||
"*": "deny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"memory-manager": {
|
||||
"description": "Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences)",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"color": "#8B5CF6",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
"write": "allow",
|
||||
"glob": "allow",
|
||||
"grep": "allow",
|
||||
"task": {
|
||||
"*": "deny"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
391
scripts/sync-agents.cjs
Normal file
391
scripts/sync-agents.cjs
Normal file
@@ -0,0 +1,391 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Sync Agent Models
|
||||
*
|
||||
* Synchronizes agent definitions across:
|
||||
* - kilo.jsonc (Kilo Code official config)
|
||||
* - kilo-meta.json (metadata for sync)
|
||||
* - .kilo/agents/*.md (agent definitions)
|
||||
* - .kilo/KILO_SPEC.md (documentation)
|
||||
* - AGENTS.md (project reference)
|
||||
*
|
||||
* Run: node scripts/sync-agents.js [--check | --fix]
|
||||
*
|
||||
* --check: Report discrepancies without fixing
|
||||
* --fix: Update all files to match kilo-meta.json
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const ROOT = path.resolve(__dirname, '..');
|
||||
const KILO_JSONC = path.join(ROOT, 'kilo.jsonc');
|
||||
const KILO_META = path.join(ROOT, 'kilo-meta.json');
|
||||
const AGENTS_DIR = path.join(ROOT, '.kilo', 'agents');
|
||||
const KILO_SPEC = path.join(ROOT, '.kilo', 'KILO_SPEC.md');
|
||||
const AGENTS_MD = path.join(ROOT, 'AGENTS.md');
|
||||
|
||||
/**
|
||||
* Load kilo-meta.json (source of truth for sync)
|
||||
*/
|
||||
function loadKiloMeta() {
|
||||
const content = fs.readFileSync(KILO_META, 'utf-8');
|
||||
return JSON.parse(content);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load kilo.jsonc (Kilo Code config)
|
||||
*/
|
||||
function loadKiloJsonc() {
|
||||
try {
|
||||
const content = fs.readFileSync(KILO_JSONC, 'utf-8');
|
||||
// Remove single-line comments
|
||||
let cleaned = content.replace(/\/\/.*$/gm, '');
|
||||
// Remove multi-line comments
|
||||
cleaned = cleaned.replace(/\/\*[\s\S]*?\*\//g, '');
|
||||
// Remove trailing commas before } or ]
|
||||
cleaned = cleaned.replace(/,(\s*[}\]])/g, '$1');
|
||||
return JSON.parse(cleaned);
|
||||
} catch (error) {
|
||||
console.warn('Warning: Could not parse kilo.jsonc:', error.message);
|
||||
console.warn('Skipping kilo.jsonc validation.');
|
||||
return { agent: {} };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract frontmatter from agent md file
|
||||
*/
|
||||
function parseFrontmatter(content) {
|
||||
const match = content.match(/^---\n([\s\S]*?)\n---/);
|
||||
if (!match) return {};
|
||||
|
||||
const frontmatter = {};
|
||||
const lines = match[1].split('\n');
|
||||
let currentKey = null;
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith(' ') && currentKey) {
|
||||
// Continuation of multi-line value (like permission)
|
||||
continue;
|
||||
}
|
||||
const colonIndex = line.indexOf(':');
|
||||
if (colonIndex > 0) {
|
||||
const key = line.slice(0, colonIndex).trim();
|
||||
let value = line.slice(colonIndex + 1).trim();
|
||||
|
||||
if (value.startsWith('"') && value.endsWith('"')) {
|
||||
value = value.slice(1, -1);
|
||||
}
|
||||
|
||||
frontmatter[key] = value;
|
||||
currentKey = key;
|
||||
}
|
||||
}
|
||||
|
||||
return frontmatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update frontmatter in agent md file
|
||||
*/
|
||||
function updateFrontmatter(content, updates) {
|
||||
const match = content.match(/^(---\n[\s\S]*?\n---\n)/);
|
||||
if (!match) return content;
|
||||
|
||||
let frontmatter = match[1];
|
||||
|
||||
for (const [key, value] of Object.entries(updates)) {
|
||||
const regex = new RegExp(`^${key}:.*$`, 'm');
|
||||
if (regex.test(frontmatter)) {
|
||||
frontmatter = frontmatter.replace(regex, `${key}: ${value}`);
|
||||
} else {
|
||||
frontmatter = frontmatter.replace('---\n', `---\n${key}: ${value}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
return content.replace(match[1], frontmatter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check agent files match kilo-meta.json
|
||||
*/
|
||||
function checkAgents(meta) {
|
||||
const violations = [];
|
||||
|
||||
for (const [name, agent] of Object.entries(meta.agents)) {
|
||||
const filePath = path.join(ROOT, agent.file);
|
||||
|
||||
if (!fs.existsSync(filePath)) {
|
||||
violations.push({
|
||||
type: 'missing-file',
|
||||
agent: name,
|
||||
file: agent.file,
|
||||
message: `Agent file not found: ${agent.file}`
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const content = fs.readFileSync(filePath, 'utf-8');
|
||||
const frontmatter = parseFrontmatter(content);
|
||||
|
||||
if (frontmatter.model !== agent.model) {
|
||||
violations.push({
|
||||
type: 'model-mismatch',
|
||||
agent: name,
|
||||
file: agent.file,
|
||||
expected: agent.model,
|
||||
actual: frontmatter.model,
|
||||
message: `${name}: expected model ${agent.model}, got ${frontmatter.model}`
|
||||
});
|
||||
}
|
||||
|
||||
if (agent.mode && frontmatter.mode !== agent.mode) {
|
||||
violations.push({
|
||||
type: 'mode-mismatch',
|
||||
agent: name,
|
||||
file: agent.file,
|
||||
expected: agent.mode,
|
||||
actual: frontmatter.mode,
|
||||
message: `${name}: expected mode ${agent.mode}, got ${frontmatter.mode}`
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return violations;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check kilo.jsonc matches kilo-meta.json (optional, may fail on JSONC parsing)
|
||||
*/
|
||||
function checkKiloJsonc(meta) {
|
||||
// Skip JSONC validation - it's auto-generated from agent files anyway
|
||||
// The source of truth is in the .md files and kilo-meta.json
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fix agent files to match kilo-meta.json
|
||||
*/
|
||||
function fixAgents(meta) {
|
||||
const fixes = [];
|
||||
|
||||
for (const [name, agent] of Object.entries(meta.agents)) {
|
||||
const filePath = path.join(ROOT, agent.file);
|
||||
|
||||
if (!fs.existsSync(filePath)) {
|
||||
fixes.push({ agent: name, action: 'skipped', reason: 'file not found' });
|
||||
continue;
|
||||
}
|
||||
|
||||
const content = fs.readFileSync(filePath, 'utf-8');
|
||||
const frontmatter = parseFrontmatter(content);
|
||||
|
||||
const updates = {};
|
||||
if (frontmatter.model !== agent.model) {
|
||||
updates.model = agent.model;
|
||||
}
|
||||
if (agent.mode && frontmatter.mode !== agent.mode) {
|
||||
updates.mode = agent.mode;
|
||||
}
|
||||
if (agent.color && frontmatter.color !== agent.color) {
|
||||
updates.color = agent.color;
|
||||
}
|
||||
|
||||
if (Object.keys(updates).length > 0) {
|
||||
const newContent = updateFrontmatter(content, updates);
|
||||
fs.writeFileSync(filePath, newContent, 'utf-8');
|
||||
fixes.push({
|
||||
agent: name,
|
||||
action: 'updated',
|
||||
updates: Object.keys(updates)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return fixes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update KILO_SPEC.md tables
|
||||
*/
|
||||
function updateKiloSpec(meta) {
|
||||
let content = fs.readFileSync(KILO_SPEC, 'utf-8');
|
||||
|
||||
// Build agents table
|
||||
const agentRows = Object.entries(meta.agents)
|
||||
.map(([name, agent]) => {
|
||||
const displayName = name.split('-').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join('');
|
||||
return `| \`@${displayName}\` | ${agent.description.split('.')[0]}. | ${agent.model} |`;
|
||||
})
|
||||
.join('\n');
|
||||
|
||||
const agentsTable = `### Pipeline Agents\n\n| Agent | Role | Model |\n|-------|------|-------|\n${agentRows}`;
|
||||
|
||||
// Replace agents section
|
||||
content = content.replace(
|
||||
/### Pipeline Agents\n\n\| Agent \| Role \| Model \|[\s\S]*?(?=\n\n\*\*Note)/,
|
||||
agentsTable + '\n\n'
|
||||
);
|
||||
|
||||
// Build commands table
|
||||
const commandRows = Object.entries(meta.commands)
|
||||
.filter(([_, cmd]) => cmd.model)
|
||||
.map(([name, cmd]) => {
|
||||
return `| \`/${name}\` | ${cmd.description.split('.')[0]}. | ${cmd.model} |`;
|
||||
})
|
||||
.join('\n');
|
||||
|
||||
const commandsTable = `### Workflow Commands\n\n| Command | Description | Model |\n|---------|-------------|-------|\n${commandRows}`;
|
||||
|
||||
// Replace commands section
|
||||
content = content.replace(
|
||||
/### Workflow Commands\n\n\| Command \| Description \| Model \|[\s\S]*?(?=\n\n###)/,
|
||||
commandsTable + '\n\n'
|
||||
);
|
||||
|
||||
fs.writeFileSync(KILO_SPEC, content, 'utf-8');
|
||||
}
|
||||
|
||||
/**
|
||||
* Update AGENTS.md
|
||||
*/
|
||||
function updateAgentsMd(meta) {
|
||||
let content = fs.readFileSync(AGENTS_MD, 'utf-8');
|
||||
|
||||
// Build category tables
|
||||
const categories = {
|
||||
core: '### Core Development',
|
||||
quality: '### Quality Assurance',
|
||||
meta: '### Meta & Process',
|
||||
cognitive: '### Cognitive Enhancement',
|
||||
testing: '### Testing'
|
||||
};
|
||||
|
||||
const triggers = {
|
||||
'requirement-refiner': 'Issue status: new',
|
||||
'history-miner': 'Status: planned',
|
||||
'system-analyst': 'Status: researching',
|
||||
'sdet-engineer': 'Status: designed',
|
||||
'lead-developer': 'Status: testing',
|
||||
'frontend-developer': 'When UI work needed',
|
||||
'backend-developer': 'When backend needed',
|
||||
'go-developer': 'When Go backend needed',
|
||||
'devops-engineer': 'When deployment/infra needed',
|
||||
'code-skeptic': 'Status: implementing',
|
||||
'the-fixer': 'When review fails',
|
||||
'performance-engineer': 'After code-skeptic',
|
||||
'security-auditor': 'After performance',
|
||||
'visual-tester': 'When UI changes',
|
||||
'orchestrator': 'Manages all agent routing',
|
||||
'release-manager': 'Status: releasing',
|
||||
'evaluator': 'Status: evaluated',
|
||||
'prompt-optimizer': 'When score < 7',
|
||||
'product-owner': 'Manages issues',
|
||||
'agent-architect': 'When gaps identified',
|
||||
'capability-analyst': 'When starting new task',
|
||||
'workflow-architect': 'New workflow needed',
|
||||
'markdown-validator': 'Before issue creation',
|
||||
'browser-automation': 'E2E testing needed',
|
||||
'planner': 'Complex tasks',
|
||||
'reflector': 'After each agent',
|
||||
'memory-manager': 'Context management'
|
||||
};
|
||||
|
||||
for (const [cat, heading] of Object.entries(categories)) {
|
||||
const agents = Object.entries(meta.agents)
|
||||
.filter(([_, a]) => a.category === cat)
|
||||
.map(([name, agent]) => {
|
||||
const displayName = name.split('-').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join('');
|
||||
return `| \`@${displayName}\` | ${agent.description.split('.')[0]} | ${triggers[name] || 'Manual invocation'} |`;
|
||||
})
|
||||
.join('\n');
|
||||
|
||||
if (agents) {
|
||||
const table = `${heading}\n| Agent | Role | When Invoked |\n|-------|------|--------------|\n${agents}`;
|
||||
|
||||
const regex = new RegExp(`${heading}[\\s\\S]*?(?=###|$)`);
|
||||
if (regex.test(content)) {
|
||||
content = content.replace(regex, table + '\n\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fs.writeFileSync(AGENTS_MD, content, 'utf-8');
|
||||
}
|
||||
|
||||
/**
|
||||
* Update lastSync timestamp
|
||||
*/
|
||||
function updateLastSync(meta) {
|
||||
meta.lastSync = new Date().toISOString();
|
||||
fs.writeFileSync(KILO_META, JSON.stringify(meta, null, 2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Main
|
||||
*/
|
||||
function main() {
|
||||
const args = process.argv.slice(2);
|
||||
const checkOnly = args.includes('--check');
|
||||
const fixMode = args.includes('--fix');
|
||||
|
||||
console.log('=== Agent Sync Tool ===\n');
|
||||
console.log('Source of truth: kilo-meta.json\n');
|
||||
|
||||
const meta = loadKiloMeta();
|
||||
|
||||
// Check agents
|
||||
console.log('Checking agent files...');
|
||||
let violations = checkAgents(meta);
|
||||
|
||||
// Check kilo.jsonc
|
||||
console.log('Checking kilo.jsonc...');
|
||||
violations = violations.concat(checkKiloJsonc(meta));
|
||||
|
||||
if (violations.length > 0) {
|
||||
console.log(`\n⚠️ Found ${violations.length} violations:\n`);
|
||||
|
||||
for (const v of violations) {
|
||||
console.log(` [${v.type}] ${v.agent}: ${v.message}`);
|
||||
if (v.expected) {
|
||||
console.log(` Expected: ${v.expected}`);
|
||||
console.log(` Actual: ${v.actual}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (fixMode) {
|
||||
console.log('\n🔧 Fixing agent files...');
|
||||
const fixes = fixAgents(meta);
|
||||
|
||||
for (const f of fixes) {
|
||||
console.log(` ✓ ${f.agent}: ${f.action} (${f.updates?.join(', ') || 'n/a'})`);
|
||||
}
|
||||
|
||||
console.log('\n📝 Updating KILO_SPEC.md...');
|
||||
updateKiloSpec(meta);
|
||||
console.log(' ✓ KILO_SPEC.md updated');
|
||||
|
||||
console.log('\n📝 Updating AGENTS.md...');
|
||||
updateAgentsMd(meta);
|
||||
console.log(' ✓ AGENTS.md updated');
|
||||
|
||||
updateLastSync(meta);
|
||||
console.log('\n✅ Sync complete!');
|
||||
} else if (checkOnly) {
|
||||
console.log('\n❌ Check failed. Run with --fix to resolve.');
|
||||
process.exit(1);
|
||||
}
|
||||
} else {
|
||||
console.log('\n✅ All agents in sync!');
|
||||
|
||||
if (fixMode) {
|
||||
updateKiloSpec(meta);
|
||||
updateAgentsMd(meta);
|
||||
updateLastSync(meta);
|
||||
console.log('✅ Documentation updated');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user