Compare commits
5 Commits
docker-dev
...
broken-aft
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
71dee23d3b | ||
|
|
8d8660abfe | ||
|
|
de3b4e2309 | ||
|
|
68daaf11a6 | ||
|
|
ff6357441e |
@@ -388,6 +388,7 @@ provider/model-id
|
|||||||
| `ollama-cloud/kimi-k2-thinking` | ollama-cloud | Kimi K2 Thinking |
|
| `ollama-cloud/kimi-k2-thinking` | ollama-cloud | Kimi K2 Thinking |
|
||||||
| `ollama-cloud/kimi-k2.5` | ollama-cloud | Kimi K2.5 |
|
| `ollama-cloud/kimi-k2.5` | ollama-cloud | Kimi K2.5 |
|
||||||
| `ollama-cloud/nemotron-3-super` | ollama-cloud | Nemotron 3 Super |
|
| `ollama-cloud/nemotron-3-super` | ollama-cloud | Nemotron 3 Super |
|
||||||
|
| `ollama-cloud/nemotron-3-nano:30b` | ollama-cloud | Nemotron 3 Nano 30B |
|
||||||
| `ollama-cloud/qwen3-coder:480b` | ollama-cloud | Qwen3 Coder 480B |
|
| `ollama-cloud/qwen3-coder:480b` | ollama-cloud | Qwen3 Coder 480B |
|
||||||
| `ollama-cloud/gpt-oss:20b` | ollama-cloud | GPT OSS 20B |
|
| `ollama-cloud/gpt-oss:20b` | ollama-cloud | GPT OSS 20B |
|
||||||
| `ollama-cloud/gpt-oss:120b` | ollama-cloud | GPT OSS 120B |
|
| `ollama-cloud/gpt-oss:120b` | ollama-cloud | GPT OSS 120B |
|
||||||
@@ -415,26 +416,35 @@ Provider availability depends on configuration. Common providers include:
|
|||||||
|
|
||||||
| Agent | Role | Model |
|
| Agent | Role | Model |
|
||||||
|-------|------|-------|
|
|-------|------|-------|
|
||||||
| `@RequirementRefiner` | Converts vague ideas to strict User Stories | ollama-cloud/kimi-k2-thinking |
|
| `@AgentArchitect` | Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis. | ollama-cloud/nemotron-3-super |
|
||||||
| `@HistoryMiner` | Finds duplicates and past solutions in git | ollama-cloud/gpt-oss:20b |
|
| `@BackendDeveloper` | Backend specialist for Node. | ollama-cloud/deepseek-v3.2 |
|
||||||
| `@SystemAnalyst` | Designs technical specifications | qwen/qwen3.6-plus:free |
|
| `@BrowserAutomation` | Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction. | ollama-cloud/glm-5 |
|
||||||
| `@SDETEngineer` | Writes tests following TDD | qwen/qwen3-coder:free |
|
| `@CapabilityAnalyst` | Analyzes task requirements against available agents, workflows, and skills. | ollama-cloud/nemotron-3-super |
|
||||||
| `@LeadDeveloper` | Primary code writer | qwen/qwen3-coder:free |
|
| `@CodeSkeptic` | Adversarial code reviewer. | ollama-cloud/minimax-m2.5 |
|
||||||
| `@FrontendDeveloper` | UI implementation with multimodal | ollama-cloud/kimi-k2.5 |
|
| `@DevopsEngineer` | DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management. | ollama-cloud/deepseek-v3.2 |
|
||||||
| `@CodeSkeptic` | Adversarial code reviewer | ollama-cloud/minimax-m2.5 |
|
| `@Evaluator` | Scores agent effectiveness after task completion for continuous improvement. | ollama-cloud/nemotron-3-super |
|
||||||
| `@TheFixer` | Iteratively fixes bugs | ollama-cloud/minimax-m2.5 |
|
| `@FrontendDeveloper` | Handles UI implementation with multimodal capabilities. | ollama-cloud/kimi-k2.5 |
|
||||||
| `@PerformanceEngineer` | Reviews for performance issues | ollama-cloud/nemotron-3-super |
|
| `@GoDeveloper` | Go backend specialist for Gin, Echo, APIs, and database integration. | ollama-cloud/qwen3-coder:480b |
|
||||||
| `@SecurityAuditor` | Scans for vulnerabilities | ollama-cloud/deepseek-v3.2 |
|
| `@HistoryMiner` | Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work. | ollama-cloud/nemotron-3-super |
|
||||||
| `@ReleaseManager` | Git operations and deployments | ollama-cloud/devstral-2 |
|
| `@LeadDeveloper` | Primary code writer for backend and core logic. | ollama-cloud/qwen3-coder:480b |
|
||||||
| `@Evaluator` | Scores agent effectiveness | ollama-cloud/gpt-oss:120b |
|
| `@MarkdownValidator` | Validates and corrects Markdown descriptions for Gitea issues. | ollama-cloud/nemotron-3-nano:30b |
|
||||||
| `@PromptOptimizer` | Improves agent prompts | openrouter/qwen/qwen3.6-plus:free |
|
| `@MemoryManager` | Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences). | ollama-cloud/nemotron-3-super |
|
||||||
| `@ProductOwner` | Manages issue checklists | openrouter/qwen/qwen3.6-plus:free |
|
| `@Orchestrator` | Main dispatcher. | ollama-cloud/glm-5 |
|
||||||
| `@Orchestrator` | Routes tasks between agents | ollama-cloud/glm-5 |
|
| `@PerformanceEngineer` | Reviews code for performance issues. | ollama-cloud/nemotron-3-super |
|
||||||
| `@AgentArchitect` | Manages agent network per Kilo.ai spec | ollama-cloud/gpt-oss:120b |
|
| `@Planner` | Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect. | ollama-cloud/nemotron-3-super |
|
||||||
| `@CapabilityAnalyst` | Analyzes task coverage, identifies gaps | ollama-cloud/gpt-oss:120b |
|
| `@ProductOwner` | Manages issue checklists, status labels, tracks progress and coordinates with human users. | ollama-cloud/glm-5 |
|
||||||
| `@MarkdownValidator` | Validates Markdown for Gitea issues | qwen/qwen3.6-plus:free |
|
| `@PromptOptimizer` | Improves agent system prompts based on performance failures. | qwen/qwen3.6-plus:free |
|
||||||
| `@BackendDeveloper` | Node.js, Express, APIs, database specialist | ollama-cloud/deepseek-v3.2 |
|
| `@Reflector` | Self-reflection agent using Reflexion pattern - learns from mistakes. | ollama-cloud/nemotron-3-super |
|
||||||
| `@WorkflowArchitect` | Creates workflow definitions with complete architecture | ollama-cloud/gpt-oss:120b |
|
| `@ReleaseManager` | Manages git operations, semantic versioning, branching, and deployments. | ollama-cloud/devstral-2:123b |
|
||||||
|
| `@RequirementRefiner` | Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists. | ollama-cloud/kimi-k2-thinking |
|
||||||
|
| `@SdetEngineer` | Writes tests following TDD methodology. | ollama-cloud/qwen3-coder:480b |
|
||||||
|
| `@SecurityAuditor` | Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets. | ollama-cloud/nemotron-3-super |
|
||||||
|
| `@SystemAnalyst` | Designs technical specifications, data schemas, and API contracts before implementation. | qwen/qwen3.6-plus:free |
|
||||||
|
| `@TheFixer` | Iteratively fixes bugs based on specific error reports and test failures. | ollama-cloud/minimax-m2.5 |
|
||||||
|
| `@VisualTester` | Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff. | ollama-cloud/glm-5 |
|
||||||
|
| `@WorkflowArchitect` | Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates. | ollama-cloud/gpt-oss:120b |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
**Note:** For AgentArchitect, use `subagent_type: "system-analyst"` with prompt "You are Agent Architect..." (workaround for unsupported agent-architect type).
|
**Note:** For AgentArchitect, use `subagent_type: "system-analyst"` with prompt "You are Agent Architect..." (workaround for unsupported agent-architect type).
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
---
|
---
|
||||||
name: Agent Architect
|
name: Agent Architect
|
||||||
mode: all
|
mode: subagent
|
||||||
model: ollama-cloud/nemotron-3-super
|
model: ollama-cloud/nemotron-3-super
|
||||||
description: Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis
|
description: Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis
|
||||||
color: "#8B5CF6"
|
color: "#8B5CF6"
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ permission:
|
|||||||
grep: allow
|
grep: allow
|
||||||
task:
|
task:
|
||||||
"*": deny
|
"*": deny
|
||||||
|
"code-skeptic": allow
|
||||||
---
|
---
|
||||||
|
|
||||||
# Kilo Code: Backend Developer
|
# Kilo Code: Backend Developer
|
||||||
@@ -34,6 +35,11 @@ Invoke this mode when:
|
|||||||
|
|
||||||
Backend specialist for Node.js, Express, APIs, and database integration.
|
Backend specialist for Node.js, Express, APIs, and database integration.
|
||||||
|
|
||||||
|
## Task Tool Invocation
|
||||||
|
|
||||||
|
Use the Task tool with `subagent_type` to delegate to other agents:
|
||||||
|
- `subagent_type: "code-skeptic"` — for code review after implementation
|
||||||
|
|
||||||
## Behavior Guidelines
|
## Behavior Guidelines
|
||||||
|
|
||||||
1. **Security First** — Always validate input, sanitize output, protect against injection
|
1. **Security First** — Always validate input, sanitize output, protect against injection
|
||||||
@@ -276,10 +282,19 @@ This agent uses the following skills for comprehensive Node.js development:
|
|||||||
|-------|---------|
|
|-------|---------|
|
||||||
| `nodejs-npm-management` | package.json, scripts, dependencies |
|
| `nodejs-npm-management` | package.json, scripts, dependencies |
|
||||||
|
|
||||||
|
### Containerization (Docker)
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `docker-compose` | Multi-container application orchestration |
|
||||||
|
| `docker-swarm` | Production cluster deployment |
|
||||||
|
| `docker-security` | Container security hardening |
|
||||||
|
| `docker-monitoring` | Container monitoring and logging |
|
||||||
|
|
||||||
### Rules
|
### Rules
|
||||||
| File | Content |
|
| File | Content |
|
||||||
|------|---------|
|
|------|---------|
|
||||||
| `.kilo/rules/nodejs.md` | Code style, security, best practices |
|
| `.kilo/rules/nodejs.md` | Code style, security, best practices |
|
||||||
|
| `.kilo/rules/docker.md` | Docker, Compose, Swarm best practices |
|
||||||
|
|
||||||
## Handoff Protocol
|
## Handoff Protocol
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
---
|
---
|
||||||
description: Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction
|
description: Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction
|
||||||
mode: all
|
mode: subagent
|
||||||
model: ollama-cloud/glm-5
|
model: ollama-cloud/glm-5
|
||||||
color: "#1E88E5"
|
color: "#1E88E5"
|
||||||
permission:
|
permission:
|
||||||
|
|||||||
364
.kilo/agents/devops-engineer.md
Normal file
364
.kilo/agents/devops-engineer.md
Normal file
@@ -0,0 +1,364 @@
|
|||||||
|
---
|
||||||
|
description: DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management
|
||||||
|
mode: subagent
|
||||||
|
model: ollama-cloud/deepseek-v3.2
|
||||||
|
color: "#FF6B35"
|
||||||
|
permission:
|
||||||
|
read: allow
|
||||||
|
edit: allow
|
||||||
|
write: allow
|
||||||
|
bash: allow
|
||||||
|
glob: allow
|
||||||
|
grep: allow
|
||||||
|
task:
|
||||||
|
"*": deny
|
||||||
|
"code-skeptic": allow
|
||||||
|
"security-auditor": allow
|
||||||
|
---
|
||||||
|
|
||||||
|
# Kilo Code: DevOps Engineer
|
||||||
|
|
||||||
|
## Role Definition
|
||||||
|
|
||||||
|
You are **DevOps Engineer** — the infrastructure specialist. Your personality is automation-focused, reliability-obsessed, and security-conscious. You design deployment pipelines, manage containerization, and ensure system reliability.
|
||||||
|
|
||||||
|
## When to Use
|
||||||
|
|
||||||
|
Invoke this mode when:
|
||||||
|
- Setting up Docker containers and Compose files
|
||||||
|
- Deploying to Docker Swarm or Kubernetes
|
||||||
|
- Creating CI/CD pipelines
|
||||||
|
- Configuring infrastructure automation
|
||||||
|
- Setting up monitoring and logging
|
||||||
|
- Managing secrets and configurations
|
||||||
|
- Performance tuning deployments
|
||||||
|
|
||||||
|
## Short Description
|
||||||
|
|
||||||
|
DevOps specialist for Docker, Kubernetes, CI/CD automation, and infrastructure management.
|
||||||
|
|
||||||
|
## Behavior Guidelines
|
||||||
|
|
||||||
|
1. **Automate everything** — manual steps lead to errors
|
||||||
|
2. **Infrastructure as Code** — version control all configurations
|
||||||
|
3. **Security first** — minimal privileges, scan all images
|
||||||
|
4. **Monitor everything** — metrics, logs, traces
|
||||||
|
5. **Test deployments** — staging before production
|
||||||
|
|
||||||
|
## Task Tool Invocation
|
||||||
|
|
||||||
|
Use the Task tool with `subagent_type` to delegate to other agents:
|
||||||
|
- `subagent_type: "code-skeptic"` — for code review after implementation
|
||||||
|
- `subagent_type: "security-auditor"` — for security review of container configs
|
||||||
|
|
||||||
|
## Skills Reference
|
||||||
|
|
||||||
|
### Containerization
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `docker-compose` | Multi-container application setup |
|
||||||
|
| `docker-swarm` | Production cluster deployment |
|
||||||
|
| `docker-security` | Container security hardening |
|
||||||
|
| `docker-monitoring` | Container monitoring and logging |
|
||||||
|
|
||||||
|
### CI/CD
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `github-actions` | GitHub Actions workflows |
|
||||||
|
| `gitlab-ci` | GitLab CI/CD pipelines |
|
||||||
|
| `jenkins` | Jenkins pipelines |
|
||||||
|
|
||||||
|
### Infrastructure
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `terraform` | Infrastructure as Code |
|
||||||
|
| `ansible` | Configuration management |
|
||||||
|
| `helm` | Kubernetes package manager |
|
||||||
|
|
||||||
|
### Rules
|
||||||
|
| File | Content |
|
||||||
|
|------|---------|
|
||||||
|
| `.kilo/rules/docker.md` | Docker best practices |
|
||||||
|
|
||||||
|
## Tech Stack
|
||||||
|
|
||||||
|
| Layer | Technologies |
|
||||||
|
|-------|-------------|
|
||||||
|
| Containers | Docker, Docker Compose, Docker Swarm |
|
||||||
|
| Orchestration | Kubernetes, Helm |
|
||||||
|
| CI/CD | GitHub Actions, GitLab CI, Jenkins |
|
||||||
|
| Monitoring | Prometheus, Grafana, Loki |
|
||||||
|
| Logging | ELK Stack, Fluentd |
|
||||||
|
| Secrets | Docker Secrets, Vault |
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
## DevOps Implementation: [Feature]
|
||||||
|
|
||||||
|
### Container Configuration
|
||||||
|
- Base image: node:20-alpine
|
||||||
|
- Multi-stage build: ✅
|
||||||
|
- Non-root user: ✅
|
||||||
|
- Health checks: ✅
|
||||||
|
|
||||||
|
### Deployment Configuration
|
||||||
|
- Service: api
|
||||||
|
- Replicas: 3
|
||||||
|
- Resource limits: CPU 1, Memory 1G
|
||||||
|
- Networks: app-network (overlay)
|
||||||
|
|
||||||
|
### Security Measures
|
||||||
|
- ✅ Non-root user (appuser:1001)
|
||||||
|
- ✅ Read-only filesystem
|
||||||
|
- ✅ Dropped capabilities (ALL)
|
||||||
|
- ✅ No new privileges
|
||||||
|
- ✅ Security scanning in CI/CD
|
||||||
|
|
||||||
|
### Monitoring
|
||||||
|
- Health endpoint: /health
|
||||||
|
- Metrics: Prometheus /metrics
|
||||||
|
- Logging: JSON structured logs
|
||||||
|
|
||||||
|
---
|
||||||
|
Status: deployed
|
||||||
|
@CodeSkeptic ready for review
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dockerfile Patterns
|
||||||
|
|
||||||
|
### Multi-stage Production Build
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
# Build stage
|
||||||
|
FROM node:20-alpine AS builder
|
||||||
|
WORKDIR /app
|
||||||
|
COPY package*.json ./
|
||||||
|
RUN npm ci --only=production
|
||||||
|
COPY . .
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# Production stage
|
||||||
|
FROM node:20-alpine
|
||||||
|
RUN addgroup -g 1001 appgroup && \
|
||||||
|
adduser -u 1001 -G appgroup -D appuser
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --from=builder --chown=appuser:appgroup /app/dist ./dist
|
||||||
|
COPY --from=builder --chown=appuser:appgroup /app/node_modules ./node_modules
|
||||||
|
USER appuser
|
||||||
|
EXPOSE 3000
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||||
|
CMD node -e "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"
|
||||||
|
CMD ["node", "dist/index.js"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Development Build
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
FROM node:20-alpine
|
||||||
|
WORKDIR /app
|
||||||
|
COPY package*.json ./
|
||||||
|
RUN npm install
|
||||||
|
COPY . .
|
||||||
|
EXPOSE 3000
|
||||||
|
CMD ["npm", "run", "dev"]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Docker Compose Patterns
|
||||||
|
|
||||||
|
### Development Environment
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.dev
|
||||||
|
volumes:
|
||||||
|
- .:/app
|
||||||
|
- /app/node_modules
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=development
|
||||||
|
- DATABASE_URL=postgres://db:5432/app
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
|
||||||
|
db:
|
||||||
|
image: postgres:15-alpine
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: app
|
||||||
|
POSTGRES_USER: app
|
||||||
|
POSTGRES_PASSWORD: ${DB_PASSWORD}
|
||||||
|
volumes:
|
||||||
|
- postgres-data:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U app"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres-data:
|
||||||
|
```
|
||||||
|
|
||||||
|
### Production Environment
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
image: myapp:${VERSION}
|
||||||
|
deploy:
|
||||||
|
replicas: 3
|
||||||
|
update_config:
|
||||||
|
parallelism: 1
|
||||||
|
delay: 10s
|
||||||
|
failure_action: rollback
|
||||||
|
rollback_config:
|
||||||
|
parallelism: 1
|
||||||
|
delay: 10s
|
||||||
|
restart_policy:
|
||||||
|
condition: on-failure
|
||||||
|
max_attempts: 3
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 1G
|
||||||
|
reservations:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
networks:
|
||||||
|
- app-network
|
||||||
|
secrets:
|
||||||
|
- db_password
|
||||||
|
- jwt_secret
|
||||||
|
|
||||||
|
networks:
|
||||||
|
app-network:
|
||||||
|
driver: overlay
|
||||||
|
attachable: true
|
||||||
|
|
||||||
|
secrets:
|
||||||
|
db_password:
|
||||||
|
external: true
|
||||||
|
jwt_secret:
|
||||||
|
external: true
|
||||||
|
```
|
||||||
|
|
||||||
|
## CI/CD Pipeline Patterns
|
||||||
|
|
||||||
|
### GitHub Actions
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# .github/workflows/docker.yml
|
||||||
|
name: Docker CI/CD
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v2
|
||||||
|
|
||||||
|
- name: Login to Registry
|
||||||
|
uses: docker/login-action@v2
|
||||||
|
with:
|
||||||
|
registry: ghcr.io
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build and Push
|
||||||
|
uses: docker/build-push-action@v4
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
|
tags: ghcr.io/${{ github.repository }}:${{ github.sha }}
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
- name: Scan Image
|
||||||
|
uses: aquasecurity/trivy-action@master
|
||||||
|
with:
|
||||||
|
image-ref: ghcr.io/${{ github.repository }}:${{ github.sha }}
|
||||||
|
format: 'table'
|
||||||
|
exit-code: '1'
|
||||||
|
severity: 'CRITICAL,HIGH'
|
||||||
|
|
||||||
|
deploy:
|
||||||
|
needs: build
|
||||||
|
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Deploy to Swarm
|
||||||
|
run: |
|
||||||
|
docker stack deploy -c docker-compose.prod.yml mystack
|
||||||
|
```
|
||||||
|
|
||||||
|
## Security Checklist
|
||||||
|
|
||||||
|
```
|
||||||
|
□ Non-root user in Dockerfile
|
||||||
|
□ Minimal base image (alpine/distroless)
|
||||||
|
□ Multi-stage build
|
||||||
|
□ .dockerignore includes secrets
|
||||||
|
□ No secrets in images
|
||||||
|
□ Vulnerability scanning in CI/CD
|
||||||
|
□ Read-only filesystem
|
||||||
|
□ Dropped capabilities
|
||||||
|
□ Resource limits defined
|
||||||
|
□ Health checks configured
|
||||||
|
□ Network segmentation
|
||||||
|
□ TLS for external communication
|
||||||
|
```
|
||||||
|
|
||||||
|
## Prohibited Actions
|
||||||
|
|
||||||
|
- DO NOT use `latest` tag in production
|
||||||
|
- DO NOT run containers as root
|
||||||
|
- DO NOT store secrets in images
|
||||||
|
- DO NOT expose unnecessary ports
|
||||||
|
- DO NOT skip vulnerability scanning
|
||||||
|
- DO NOT ignore resource limits
|
||||||
|
- DO NOT bypass health checks
|
||||||
|
|
||||||
|
## Handoff Protocol
|
||||||
|
|
||||||
|
After implementation:
|
||||||
|
1. Verify containers are running
|
||||||
|
2. Check health endpoints
|
||||||
|
3. Review resource usage
|
||||||
|
4. Validate security configuration
|
||||||
|
5. Test deployment updates
|
||||||
|
6. Tag `@CodeSkeptic` for review
|
||||||
|
## Gitea Commenting (MANDATORY)
|
||||||
|
|
||||||
|
**You MUST post a comment to the Gitea issue after completing your work.**
|
||||||
|
|
||||||
|
Post a comment with:
|
||||||
|
1. ✅ Success: What was done, files changed, duration
|
||||||
|
2. ❌ Error: What failed, why, and blocker
|
||||||
|
3. ❓ Question: Clarification needed with options
|
||||||
|
|
||||||
|
Use the `post_comment` function from `.kilo/skills/gitea-commenting/SKILL.md`.
|
||||||
|
|
||||||
|
**NO EXCEPTIONS** - Always comment to Gitea.
|
||||||
@@ -12,6 +12,7 @@ permission:
|
|||||||
grep: allow
|
grep: allow
|
||||||
task:
|
task:
|
||||||
"*": deny
|
"*": deny
|
||||||
|
"code-skeptic": allow
|
||||||
---
|
---
|
||||||
|
|
||||||
# Kilo Code: Frontend Developer
|
# Kilo Code: Frontend Developer
|
||||||
@@ -33,6 +34,11 @@ Invoke this mode when:
|
|||||||
|
|
||||||
Handles UI implementation with multimodal capabilities. Accepts visual references.
|
Handles UI implementation with multimodal capabilities. Accepts visual references.
|
||||||
|
|
||||||
|
## Task Tool Invocation
|
||||||
|
|
||||||
|
Use the Task tool with `subagent_type` to delegate to other agents:
|
||||||
|
- `subagent_type: "code-skeptic"` — for code review after implementation
|
||||||
|
|
||||||
## Behavior Guidelines
|
## Behavior Guidelines
|
||||||
|
|
||||||
1. **Accept visual input** — can analyze screenshots and mockups
|
1. **Accept visual input** — can analyze screenshots and mockups
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ permission:
|
|||||||
grep: allow
|
grep: allow
|
||||||
task:
|
task:
|
||||||
"*": deny
|
"*": deny
|
||||||
|
"code-skeptic": allow
|
||||||
---
|
---
|
||||||
|
|
||||||
# Kilo Code: Go Developer
|
# Kilo Code: Go Developer
|
||||||
@@ -34,6 +35,11 @@ Invoke this mode when:
|
|||||||
|
|
||||||
Go backend specialist for Gin, Echo, APIs, and concurrent systems.
|
Go backend specialist for Gin, Echo, APIs, and concurrent systems.
|
||||||
|
|
||||||
|
## Task Tool Invocation
|
||||||
|
|
||||||
|
Use the Task tool with `subagent_type` to delegate to other agents:
|
||||||
|
- `subagent_type: "code-skeptic"` — for code review after implementation
|
||||||
|
|
||||||
## Behavior Guidelines
|
## Behavior Guidelines
|
||||||
|
|
||||||
1. **Idiomatic Go** — Follow Go conventions and idioms
|
1. **Idiomatic Go** — Follow Go conventions and idioms
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
---
|
---
|
||||||
description: Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work
|
description: Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work
|
||||||
mode: all
|
mode: subagent
|
||||||
model: ollama-cloud/nemotron-3-super
|
model: ollama-cloud/nemotron-3-super
|
||||||
color: "#059669"
|
color: "#059669"
|
||||||
permission:
|
permission:
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ permission:
|
|||||||
"planner": allow
|
"planner": allow
|
||||||
"reflector": allow
|
"reflector": allow
|
||||||
"memory-manager": allow
|
"memory-manager": allow
|
||||||
|
"devops-engineer": allow
|
||||||
---
|
---
|
||||||
|
|
||||||
# Kilo Code: Orchestrator
|
# Kilo Code: Orchestrator
|
||||||
@@ -128,6 +129,8 @@ Use the Task tool to delegate to subagents with these subagent_type values:
|
|||||||
| Planner | planner | Task decomposition, CoT, ToT planning |
|
| Planner | planner | Task decomposition, CoT, ToT planning |
|
||||||
| Reflector | reflector | Self-reflection, lesson extraction |
|
| Reflector | reflector | Self-reflection, lesson extraction |
|
||||||
| MemoryManager | memory-manager | Memory systems, context retrieval |
|
| MemoryManager | memory-manager | Memory systems, context retrieval |
|
||||||
|
| DevOpsEngineer | devops-engineer | Docker, Kubernetes, CI/CD |
|
||||||
|
| BrowserAutomation | browser-automation | Browser automation, E2E testing |
|
||||||
|
|
||||||
**Note:** `agent-architect` subagent_type is not recognized. Use `system-analyst` with prompt "You are Agent Architect..." as workaround.
|
**Note:** `agent-architect` subagent_type is not recognized. Use `system-analyst` with prompt "You are Agent Architect..." as workaround.
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
---
|
---
|
||||||
description: Manages issue checklists, status labels, tracks progress and coordinates with human users
|
description: Manages issue checklists, status labels, tracks progress and coordinates with human users
|
||||||
mode: all
|
mode: subagent
|
||||||
model: ollama-cloud/glm-5
|
model: ollama-cloud/glm-5
|
||||||
color: "#EA580C"
|
color: "#EA580C"
|
||||||
permission:
|
permission:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
---
|
---
|
||||||
description: Improves agent system prompts based on performance failures. Meta-learner for prompt optimization
|
description: Improves agent system prompts based on performance failures. Meta-learner for prompt optimization
|
||||||
mode: all
|
mode: subagent
|
||||||
model: qwen/qwen3.6-plus:free
|
model: qwen/qwen3.6-plus:free
|
||||||
color: "#BE185D"
|
color: "#BE185D"
|
||||||
permission:
|
permission:
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
---
|
---
|
||||||
description: Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets
|
description: Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets
|
||||||
mode: all
|
mode: subagent
|
||||||
model: ollama-cloud/nemotron-3-super
|
model: ollama-cloud/nemotron-3-super
|
||||||
color: "#7F1D1D"
|
color: #DC2626
|
||||||
permission:
|
permission:
|
||||||
read: allow
|
read: allow
|
||||||
bash: allow
|
bash: allow
|
||||||
@@ -115,8 +115,41 @@ gitleaks --path .
|
|||||||
|
|
||||||
# Check for exposed env
|
# Check for exposed env
|
||||||
grep -r "API_KEY\|PASSWORD\|SECRET" --include="*.ts" --include="*.js"
|
grep -r "API_KEY\|PASSWORD\|SECRET" --include="*.ts" --include="*.js"
|
||||||
|
|
||||||
|
# Docker image vulnerability scan
|
||||||
|
trivy image myapp:latest
|
||||||
|
docker scout vulnerabilities myapp:latest
|
||||||
|
|
||||||
|
# Docker secrets scan
|
||||||
|
gitleaks --image myapp:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Docker Security Checklist
|
||||||
|
|
||||||
|
```
|
||||||
|
□ Running as non-root user
|
||||||
|
□ Using minimal base images (alpine/distroless)
|
||||||
|
□ Using specific image versions (not latest)
|
||||||
|
□ No secrets in images
|
||||||
|
□ Read-only filesystem where possible
|
||||||
|
□ Capabilities dropped to minimum
|
||||||
|
□ No new privileges flag set
|
||||||
|
□ Resource limits defined
|
||||||
|
□ Health checks configured
|
||||||
|
□ Network segmentation implemented
|
||||||
|
□ TLS for external communication
|
||||||
|
□ Secrets managed via Docker secrets/vault
|
||||||
|
□ Vulnerability scanning in CI/CD
|
||||||
|
□ Base images regularly updated
|
||||||
|
```
|
||||||
|
|
||||||
|
## Skills Reference
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `docker-security` | Container security hardening |
|
||||||
|
| `nodejs-security-owasp` | Node.js OWASP Top 10 |
|
||||||
|
|
||||||
## Prohibited Actions
|
## Prohibited Actions
|
||||||
|
|
||||||
- DO NOT approve with critical/high vulnerabilities
|
- DO NOT approve with critical/high vulnerabilities
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
---
|
---
|
||||||
description: Designs technical specifications, data schemas, and API contracts before implementation
|
description: Designs technical specifications, data schemas, and API contracts before implementation
|
||||||
mode: all
|
mode: subagent
|
||||||
model: qwen/qwen3.6-plus:free
|
model: qwen/qwen3.6-plus:free
|
||||||
color: "#0891B2"
|
color: "#0891B2"
|
||||||
permission:
|
permission:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
---
|
---
|
||||||
description: Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff
|
description: Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff
|
||||||
mode: all
|
mode: subagent
|
||||||
model: ollama-cloud/glm-5
|
model: ollama-cloud/glm-5
|
||||||
color: "#E91E63"
|
color: "#E91E63"
|
||||||
permission:
|
permission:
|
||||||
|
|||||||
549
.kilo/rules/docker.md
Normal file
549
.kilo/rules/docker.md
Normal file
@@ -0,0 +1,549 @@
|
|||||||
|
# Docker & Containerization Rules
|
||||||
|
|
||||||
|
Essential rules for Docker, Docker Compose, Docker Swarm, and container technologies.
|
||||||
|
|
||||||
|
## Dockerfile Best Practices
|
||||||
|
|
||||||
|
### Layer Optimization
|
||||||
|
|
||||||
|
- Minimize layers by combining commands
|
||||||
|
- Order layers from least to most frequently changing
|
||||||
|
- Use multi-stage builds to reduce image size
|
||||||
|
- Clean up package manager caches
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
# ✅ Good: Multi-stage build with layer optimization
|
||||||
|
FROM node:20-alpine AS builder
|
||||||
|
WORKDIR /app
|
||||||
|
COPY package*.json ./
|
||||||
|
RUN npm ci --only=production
|
||||||
|
|
||||||
|
FROM node:20-alpine
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --from=builder /app/node_modules ./node_modules
|
||||||
|
COPY . .
|
||||||
|
USER node
|
||||||
|
EXPOSE 3000
|
||||||
|
CMD ["node", "server.js"]
|
||||||
|
|
||||||
|
# ❌ Bad: Single stage, many layers
|
||||||
|
FROM node:20
|
||||||
|
RUN npm install -g nodemon
|
||||||
|
WORKDIR /app
|
||||||
|
COPY . .
|
||||||
|
RUN npm install
|
||||||
|
EXPOSE 3000
|
||||||
|
CMD ["nodemon", "server.js"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Security
|
||||||
|
|
||||||
|
- Run as non-root user
|
||||||
|
- Use specific image versions, not `latest`
|
||||||
|
- Scan images for vulnerabilities
|
||||||
|
- Don't store secrets in images
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
# ✅ Good
|
||||||
|
FROM node:20-alpine
|
||||||
|
RUN addgroup -g 1001 appgroup && \
|
||||||
|
adduser -u 1001 -G appgroup -D appuser
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --chown=appuser:appgroup . .
|
||||||
|
USER appuser
|
||||||
|
CMD ["node", "server.js"]
|
||||||
|
|
||||||
|
# ❌ Bad
|
||||||
|
FROM node:latest # Unpredictable version
|
||||||
|
# Running as root (default)
|
||||||
|
COPY . .
|
||||||
|
CMD ["node", "server.js"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Caching Strategy
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
# ✅ Good: Dependencies cached separately
|
||||||
|
COPY package*.json ./
|
||||||
|
RUN npm ci
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# ❌ Bad: All code copied before dependencies
|
||||||
|
COPY . .
|
||||||
|
RUN npm install
|
||||||
|
```
|
||||||
|
|
||||||
|
## Docker Compose
|
||||||
|
|
||||||
|
### Service Structure
|
||||||
|
|
||||||
|
- Use version 3.8+ for modern features
|
||||||
|
- Define services in logical order
|
||||||
|
- Use environment variables for configuration
|
||||||
|
- Set resource limits
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# ✅ Good
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
image: myapp:latest
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=production
|
||||||
|
- DATABASE_URL=postgres://db:5432/app
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
networks:
|
||||||
|
- app-network
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 40s
|
||||||
|
|
||||||
|
db:
|
||||||
|
image: postgres:15-alpine
|
||||||
|
volumes:
|
||||||
|
- postgres-data:/var/lib/postgresql/data
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: app
|
||||||
|
POSTGRES_USER: ${DB_USER}
|
||||||
|
POSTGRES_PASSWORD: ${DB_PASSWORD}
|
||||||
|
networks:
|
||||||
|
- app-network
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
networks:
|
||||||
|
app-network:
|
||||||
|
driver: bridge
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres-data:
|
||||||
|
```
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
- Use `.env` files for local development
|
||||||
|
- Never commit `.env` files with secrets
|
||||||
|
- Use Docker secrets for sensitive data in Swarm
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# .env (gitignored)
|
||||||
|
NODE_ENV=production
|
||||||
|
DB_PASSWORD=secure_password_here
|
||||||
|
JWT_SECRET=your_jwt_secret_here
|
||||||
|
```
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
# OR explicit for non-sensitive
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=production
|
||||||
|
# Secrets for sensitive data in Swarm
|
||||||
|
secrets:
|
||||||
|
- db_password
|
||||||
|
```
|
||||||
|
|
||||||
|
### Network Patterns
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# ✅ Good: Separated networks for security
|
||||||
|
networks:
|
||||||
|
frontend:
|
||||||
|
driver: bridge
|
||||||
|
backend:
|
||||||
|
driver: bridge
|
||||||
|
internal: true # No external access
|
||||||
|
|
||||||
|
services:
|
||||||
|
web:
|
||||||
|
networks:
|
||||||
|
- frontend
|
||||||
|
- backend
|
||||||
|
api:
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
db:
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
```
|
||||||
|
|
||||||
|
### Volume Management
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# ✅ Good: Named volumes with labels
|
||||||
|
volumes:
|
||||||
|
postgres-data:
|
||||||
|
driver: local
|
||||||
|
labels:
|
||||||
|
- "app=myapp"
|
||||||
|
- "type=database"
|
||||||
|
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
volumes:
|
||||||
|
- postgres-data:/var/lib/postgresql/data
|
||||||
|
- ./init-scripts:/docker-entrypoint-initdb.d:ro
|
||||||
|
```
|
||||||
|
|
||||||
|
## Docker Swarm
|
||||||
|
|
||||||
|
### Service Deployment
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml (Swarm compatible)
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
image: myapp/api:latest
|
||||||
|
deploy:
|
||||||
|
mode: replicated
|
||||||
|
replicas: 3
|
||||||
|
update_config:
|
||||||
|
parallelism: 1
|
||||||
|
delay: 10s
|
||||||
|
failure_action: rollback
|
||||||
|
rollback_config:
|
||||||
|
parallelism: 1
|
||||||
|
delay: 10s
|
||||||
|
restart_policy:
|
||||||
|
condition: on-failure
|
||||||
|
delay: 5s
|
||||||
|
max_attempts: 3
|
||||||
|
window: 120s
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.role == worker
|
||||||
|
preferences:
|
||||||
|
- spread: node.id
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
reservations:
|
||||||
|
cpus: '0.25'
|
||||||
|
memory: 256M
|
||||||
|
networks:
|
||||||
|
- app-network
|
||||||
|
secrets:
|
||||||
|
- db_password
|
||||||
|
- jwt_secret
|
||||||
|
configs:
|
||||||
|
- app_config
|
||||||
|
|
||||||
|
networks:
|
||||||
|
app-network:
|
||||||
|
driver: overlay
|
||||||
|
attachable: true
|
||||||
|
|
||||||
|
secrets:
|
||||||
|
db_password:
|
||||||
|
external: true
|
||||||
|
jwt_secret:
|
||||||
|
external: true
|
||||||
|
|
||||||
|
configs:
|
||||||
|
app_config:
|
||||||
|
external: true
|
||||||
|
```
|
||||||
|
|
||||||
|
### Stack Deployment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Deploy stack
|
||||||
|
docker stack deploy -c docker-compose.yml mystack
|
||||||
|
|
||||||
|
# List services
|
||||||
|
docker stack services mystack
|
||||||
|
|
||||||
|
# Scale service
|
||||||
|
docker service scale mystack_api=5
|
||||||
|
|
||||||
|
# Update service
|
||||||
|
docker service update --image myapp/api:v2 mystack_api
|
||||||
|
|
||||||
|
# Rollback
|
||||||
|
docker service rollback mystack_api
|
||||||
|
```
|
||||||
|
|
||||||
|
### Health Checks
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
# Health check in Dockerfile
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "node", "healthcheck.js"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
|
||||||
|
# Or in compose
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Secrets Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create secret
|
||||||
|
echo "my_secret_password" | docker secret create db_password -
|
||||||
|
|
||||||
|
# Create secret from file
|
||||||
|
docker secret create jwt_secret ./jwt_secret.txt
|
||||||
|
|
||||||
|
# List secrets
|
||||||
|
docker secret ls
|
||||||
|
|
||||||
|
# Use in compose
|
||||||
|
secrets:
|
||||||
|
db_password:
|
||||||
|
external: true
|
||||||
|
```
|
||||||
|
|
||||||
|
### Config Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create config
|
||||||
|
docker config create app_config ./config.json
|
||||||
|
|
||||||
|
# Use in compose
|
||||||
|
configs:
|
||||||
|
app_config:
|
||||||
|
external: true
|
||||||
|
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
configs:
|
||||||
|
- app_config
|
||||||
|
```
|
||||||
|
|
||||||
|
## Container Security
|
||||||
|
|
||||||
|
### Image Security
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Scan image for vulnerabilities
|
||||||
|
docker scout vulnerabilities myapp:latest
|
||||||
|
trivy image myapp:latest
|
||||||
|
|
||||||
|
# Check image for secrets
|
||||||
|
gitleaks --image myapp:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
### Runtime Security
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
# ✅ Good: Security measures
|
||||||
|
FROM node:20-alpine
|
||||||
|
|
||||||
|
# Create non-root user
|
||||||
|
RUN addgroup -g 1001 appgroup && \
|
||||||
|
adduser -u 1001 -G appgroup -D appuser
|
||||||
|
|
||||||
|
# Set read-only filesystem
|
||||||
|
RUN chmod -R 755 /app && \
|
||||||
|
chown -R appuser:appgroup /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --chown=appuser:appgroup . .
|
||||||
|
|
||||||
|
# Drop all capabilities
|
||||||
|
USER appuser
|
||||||
|
VOLUME ["/tmp"]
|
||||||
|
|
||||||
|
CMD ["node", "server.js"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Network Security
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# ✅ Good: Limited network access
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
# No ports exposed to host
|
||||||
|
|
||||||
|
db:
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
# Internal network only
|
||||||
|
|
||||||
|
networks:
|
||||||
|
backend:
|
||||||
|
internal: true # No internet access
|
||||||
|
```
|
||||||
|
|
||||||
|
### Resource Limits
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1.0'
|
||||||
|
memory: 1G
|
||||||
|
reservations:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
```
|
||||||
|
|
||||||
|
## Common Patterns
|
||||||
|
|
||||||
|
### Development Setup
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.dev.yml
|
||||||
|
version: '3.8'
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.dev
|
||||||
|
volumes:
|
||||||
|
- .:/app
|
||||||
|
- /app/node_modules
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=development
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
command: npm run dev
|
||||||
|
```
|
||||||
|
|
||||||
|
### Production Setup
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.prod.yml
|
||||||
|
version: '3.8'
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
image: myapp:${VERSION}
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=production
|
||||||
|
deploy:
|
||||||
|
replicas: 3
|
||||||
|
update_config:
|
||||||
|
parallelism: 1
|
||||||
|
delay: 10s
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "node", "healthcheck.js"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Multi-Environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Override files
|
||||||
|
docker-compose -f docker-compose.yml -f docker-compose.dev.yml up
|
||||||
|
docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
### Logging
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
logging:
|
||||||
|
driver: "json-file"
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
labels: "app,environment"
|
||||||
|
```
|
||||||
|
|
||||||
|
## CI/CD Integration
|
||||||
|
|
||||||
|
### Build Pipeline
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# .github/workflows/docker.yml
|
||||||
|
name: Docker Build
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Build image
|
||||||
|
run: docker build -t myapp:${{ github.sha }} .
|
||||||
|
|
||||||
|
- name: Scan image
|
||||||
|
run: trivy image myapp:${{ github.sha }}
|
||||||
|
|
||||||
|
- name: Push to registry
|
||||||
|
run: |
|
||||||
|
echo ${{ secrets.DOCKER_PASSWORD }} | docker login -u ${{ secrets.DOCKER_USER }} --password-stdin
|
||||||
|
docker push myapp:${{ github.sha }}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# View logs
|
||||||
|
docker-compose logs -f app
|
||||||
|
|
||||||
|
# Execute in container
|
||||||
|
docker-compose exec app sh
|
||||||
|
|
||||||
|
# Check health
|
||||||
|
docker inspect --format='{{.State.Health.Status}}' <container>
|
||||||
|
|
||||||
|
# View resource usage
|
||||||
|
docker stats
|
||||||
|
|
||||||
|
# Remove unused resources
|
||||||
|
docker system prune -a
|
||||||
|
|
||||||
|
# Debug network
|
||||||
|
docker network inspect app-network
|
||||||
|
|
||||||
|
# Swarm diagnostics
|
||||||
|
docker node ls
|
||||||
|
docker service ps mystack_api
|
||||||
|
```
|
||||||
|
|
||||||
|
## Prohibitions
|
||||||
|
|
||||||
|
- DO NOT run containers as root
|
||||||
|
- DO NOT use `latest` tag in production
|
||||||
|
- DO NOT expose unnecessary ports
|
||||||
|
- DO NOT store secrets in images
|
||||||
|
- DO NOT use privileged mode unnecessarily
|
||||||
|
- DO NOT mount host directories without restrictions
|
||||||
|
- DO NOT skip health checks in production
|
||||||
|
- DO NOT ignore vulnerability scans
|
||||||
125
.kilo/rules/evolutionary-sync.md
Normal file
125
.kilo/rules/evolutionary-sync.md
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
# Evolutionary Mode Rules
|
||||||
|
|
||||||
|
When agents are modified, created, or updated during evolutionary improvement, this rule ensures all related files stay synchronized.
|
||||||
|
|
||||||
|
## Source of Truth
|
||||||
|
|
||||||
|
**`.kilo/agents/*.md` frontmatter** is the single source of truth for:
|
||||||
|
- Agent definitions (models, modes, descriptions)
|
||||||
|
- Permissions and tool access
|
||||||
|
|
||||||
|
**DO NOT create config files in root directory** - this breaks Kilo Code.
|
||||||
|
|
||||||
|
## Files to Synchronize
|
||||||
|
|
||||||
|
When agents change, update ALL of these files:
|
||||||
|
|
||||||
|
| File | What to Update |
|
||||||
|
|------|----------------|
|
||||||
|
| `.kilo/agents/agent-name.md` | Model in YAML frontmatter (source of truth) |
|
||||||
|
| `.kilo/KILO_SPEC.md` | Pipeline Agents table, Workflow Commands table |
|
||||||
|
| `AGENTS.md` | Pipeline Agents tables by category |
|
||||||
|
| `.kilo/agents/orchestrator.md` | Task Tool Invocation table (if new subagent) |
|
||||||
|
|
||||||
|
## Sync Checklist
|
||||||
|
|
||||||
|
When modifying agents:
|
||||||
|
|
||||||
|
```
|
||||||
|
□ Update agent .md file frontmatter (model, description, mode)
|
||||||
|
□ Run: node scripts/sync-agents.cjs --fix
|
||||||
|
□ Verify KILO_SPEC.md and AGENTS.md updated
|
||||||
|
□ Update orchestrator.md subagent_type mappings (if new agent)
|
||||||
|
□ Commit all changes together
|
||||||
|
```
|
||||||
|
|
||||||
|
## Adding New Agent
|
||||||
|
|
||||||
|
1. Create `.kilo/agents/agent-name.md` with frontmatter:
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
description: Agent description
|
||||||
|
mode: subagent|primary|all
|
||||||
|
model: provider/model-id
|
||||||
|
color: #HEX
|
||||||
|
permission:
|
||||||
|
read: allow
|
||||||
|
edit: allow
|
||||||
|
...
|
||||||
|
---
|
||||||
|
```
|
||||||
|
|
||||||
|
2. If subagent, add to `orchestrator.md`:
|
||||||
|
- Add to permission list under `task:`
|
||||||
|
- Add to Task Tool Invocation table
|
||||||
|
|
||||||
|
3. Run sync:
|
||||||
|
```bash
|
||||||
|
node scripts/sync-agents.cjs --fix
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Commit all changes:
|
||||||
|
```bash
|
||||||
|
git add .kilo/agents/new-agent.md .kilo/KILO_SPEC.md AGENTS.md .kilo/agents/orchestrator.md
|
||||||
|
git commit -m "feat: add new-agent agent"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Model Changes
|
||||||
|
|
||||||
|
When changing a model:
|
||||||
|
|
||||||
|
1. Update `.kilo/agents/agent-name.md` frontmatter `model:` field
|
||||||
|
2. Run `node scripts/sync-agents.cjs --fix`
|
||||||
|
3. Document reason in commit message
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```
|
||||||
|
fix: update LeadDeveloper model to qwen3-coder:480b
|
||||||
|
|
||||||
|
Reason: Better code generation quality, supports larger context
|
||||||
|
```
|
||||||
|
|
||||||
|
## Available Models
|
||||||
|
|
||||||
|
Use exact model IDs from `.kilo/KILO_SPEC.md` Model Format section:
|
||||||
|
|
||||||
|
| Model ID | Use Case |
|
||||||
|
|----------|----------|
|
||||||
|
| `ollama-cloud/kimi-k2-thinking` | Complex reasoning, requirements |
|
||||||
|
| `ollama-cloud/qwen3-coder:480b` | Code generation |
|
||||||
|
| `ollama-cloud/deepseek-v3.2` | Backend, DevOps |
|
||||||
|
| `ollama-cloud/minimax-m2.5` | Code review, fixing |
|
||||||
|
| `ollama-cloud/nemotron-3-super` | Performance, evaluation |
|
||||||
|
| `ollama-cloud/nemotron-3-nano:30b` | Lightweight tasks |
|
||||||
|
| `ollama-cloud/glm-5` | Orchestration |
|
||||||
|
| `ollama-cloud/gpt-oss:120b` | Large context tasks |
|
||||||
|
| `qwen/qwen3.6-plus:free` | Planning (free tier) |
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
Run sync verification before commits:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check only (CI mode)
|
||||||
|
node scripts/sync-agents.cjs --check
|
||||||
|
|
||||||
|
# Fix discrepancies
|
||||||
|
node scripts/sync-agents.cjs --fix
|
||||||
|
```
|
||||||
|
|
||||||
|
## CI Integration
|
||||||
|
|
||||||
|
Add to `.github/workflows/ci.yml`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- name: Verify Agent Sync
|
||||||
|
run: node scripts/sync-agents.cjs --check
|
||||||
|
```
|
||||||
|
|
||||||
|
## Prohibited Actions
|
||||||
|
|
||||||
|
- DO NOT create `kilo.json` or `kilo-meta.json` in root (breaks Kilo Code)
|
||||||
|
- DO NOT update KILO_SPEC.md without updating agent files first
|
||||||
|
- DO NOT add new agent without updating orchestrator permissions
|
||||||
|
- DO NOT skip running sync script after changes
|
||||||
|
- DO NOT commit only partial files (always commit all sync targets)
|
||||||
576
.kilo/skills/docker-compose/SKILL.md
Normal file
576
.kilo/skills/docker-compose/SKILL.md
Normal file
@@ -0,0 +1,576 @@
|
|||||||
|
# Skill: Docker Compose
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
Comprehensive skill for Docker Compose configuration, orchestration, and multi-container application deployment.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Docker Compose is a tool for defining and running multi-container Docker applications. Use this skill when working with local development environments, CI/CD pipelines, and production deployments.
|
||||||
|
|
||||||
|
## When to Use
|
||||||
|
|
||||||
|
- Setting up local development environments
|
||||||
|
- Configuring multi-container applications
|
||||||
|
- Managing service dependencies
|
||||||
|
- Implementing health checks and waiting strategies
|
||||||
|
- Creating development/production configurations
|
||||||
|
|
||||||
|
## Skill Files Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
docker-compose/
|
||||||
|
├── SKILL.md # This file
|
||||||
|
├── patterns/
|
||||||
|
│ ├── basic-service.md # Basic service templates
|
||||||
|
│ ├── networking.md # Network patterns
|
||||||
|
│ ├── volumes.md # Volume management
|
||||||
|
│ └── healthchecks.md # Health check patterns
|
||||||
|
└── examples/
|
||||||
|
├── nodejs-api.md # Node.js API template
|
||||||
|
├── postgres.md # PostgreSQL template
|
||||||
|
└── redis.md # Redis template
|
||||||
|
```
|
||||||
|
|
||||||
|
## Core Patterns
|
||||||
|
|
||||||
|
### 1. Basic Service Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
args:
|
||||||
|
- NODE_ENV=production
|
||||||
|
image: myapp:latest
|
||||||
|
container_name: myapp
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=production
|
||||||
|
- DATABASE_URL=postgres://db:5432/app
|
||||||
|
volumes:
|
||||||
|
- ./data:/app/data
|
||||||
|
networks:
|
||||||
|
- app-network
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 40s
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Environment Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Use .env file for secrets
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
- .env.local
|
||||||
|
environment:
|
||||||
|
# Non-sensitive defaults
|
||||||
|
- NODE_ENV=production
|
||||||
|
- LOG_LEVEL=info
|
||||||
|
# Override from .env
|
||||||
|
- DATABASE_URL=${DATABASE_URL}
|
||||||
|
- JWT_SECRET=${JWT_SECRET}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Network Patterns
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Isolated networks for security
|
||||||
|
networks:
|
||||||
|
frontend:
|
||||||
|
driver: bridge
|
||||||
|
backend:
|
||||||
|
driver: bridge
|
||||||
|
internal: true # No external access
|
||||||
|
|
||||||
|
services:
|
||||||
|
web:
|
||||||
|
networks:
|
||||||
|
- frontend
|
||||||
|
- backend
|
||||||
|
|
||||||
|
api:
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
db:
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Volume Patterns
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
volumes:
|
||||||
|
# Named volume (managed by Docker)
|
||||||
|
postgres-data:
|
||||||
|
driver: local
|
||||||
|
|
||||||
|
# Bind mount (host directory)
|
||||||
|
# ./data:/app/data
|
||||||
|
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
volumes:
|
||||||
|
- postgres-data:/var/lib/postgresql/data
|
||||||
|
- ./init-scripts:/docker-entrypoint-initdb.d:ro
|
||||||
|
|
||||||
|
app:
|
||||||
|
volumes:
|
||||||
|
- ./config:/app/config:ro
|
||||||
|
- app-logs:/app/logs
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
app-logs:
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Health Checks & Dependencies
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgres:15-alpine
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
app:
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_started
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Multi-Environment Configurations
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml (base)
|
||||||
|
version: '3.8'
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
image: myapp:latest
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=production
|
||||||
|
|
||||||
|
# docker-compose.dev.yml (development override)
|
||||||
|
version: '3.8'
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.dev
|
||||||
|
volumes:
|
||||||
|
- .:/app
|
||||||
|
- /app/node_modules
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=development
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
command: npm run dev
|
||||||
|
|
||||||
|
# docker-compose.prod.yml (production override)
|
||||||
|
version: '3.8'
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
image: myapp:${VERSION}
|
||||||
|
deploy:
|
||||||
|
replicas: 3
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 1G
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "node", "healthcheck.js"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
```
|
||||||
|
|
||||||
|
## Service Templates
|
||||||
|
|
||||||
|
### Node.js API
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=production
|
||||||
|
- PORT=3000
|
||||||
|
- DATABASE_URL=postgres://db:5432/app
|
||||||
|
- REDIS_URL=redis://redis:6379
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_started
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
```
|
||||||
|
|
||||||
|
### PostgreSQL Database
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgres:15-alpine
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: app
|
||||||
|
POSTGRES_USER: ${DB_USER:-app}
|
||||||
|
POSTGRES_PASSWORD: ${DB_PASSWORD:?DB_PASSWORD required}
|
||||||
|
volumes:
|
||||||
|
- postgres-data:/var/lib/postgresql/data
|
||||||
|
- ./init-scripts:/docker-entrypoint-initdb.d:ro
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER -d $POSTGRES_DB"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 512M
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres-data:
|
||||||
|
```
|
||||||
|
|
||||||
|
### Redis Cache
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru
|
||||||
|
volumes:
|
||||||
|
- redis-data:/data
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
redis-data:
|
||||||
|
```
|
||||||
|
|
||||||
|
### Nginx Reverse Proxy
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
nginx:
|
||||||
|
image: nginx:alpine
|
||||||
|
ports:
|
||||||
|
- "80:80"
|
||||||
|
- "443:443"
|
||||||
|
volumes:
|
||||||
|
- ./nginx.conf:/etc/nginx/nginx.conf:ro
|
||||||
|
- ./ssl:/etc/nginx/ssl:ro
|
||||||
|
depends_on:
|
||||||
|
- api
|
||||||
|
networks:
|
||||||
|
- frontend
|
||||||
|
- backend
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "nginx", "-t"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
```
|
||||||
|
|
||||||
|
## Common Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start services
|
||||||
|
docker-compose up -d
|
||||||
|
|
||||||
|
# Start specific service
|
||||||
|
docker-compose up -d app
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
docker-compose logs -f app
|
||||||
|
|
||||||
|
# Execute command in container
|
||||||
|
docker-compose exec app sh
|
||||||
|
docker-compose exec app npm test
|
||||||
|
|
||||||
|
# Stop services
|
||||||
|
docker-compose down
|
||||||
|
|
||||||
|
# Stop and remove volumes
|
||||||
|
docker-compose down -v
|
||||||
|
|
||||||
|
# Rebuild images
|
||||||
|
docker-compose build --no-cache app
|
||||||
|
|
||||||
|
# Scale service
|
||||||
|
docker-compose up -d --scale api=3
|
||||||
|
|
||||||
|
# Multi-environment
|
||||||
|
docker-compose -f docker-compose.yml -f docker-compose.dev.yml up
|
||||||
|
docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
### Security
|
||||||
|
|
||||||
|
1. **Never store secrets in images**
|
||||||
|
```yaml
|
||||||
|
# Bad
|
||||||
|
environment:
|
||||||
|
- DB_PASSWORD=password123
|
||||||
|
|
||||||
|
# Good
|
||||||
|
secrets:
|
||||||
|
- db_password
|
||||||
|
secrets:
|
||||||
|
db_password:
|
||||||
|
file: ./secrets/db_password.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Use non-root user**
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
user: "1000:1000"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Limit resources**
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 1G
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Use internal networks for databases**
|
||||||
|
```yaml
|
||||||
|
networks:
|
||||||
|
backend:
|
||||||
|
internal: true
|
||||||
|
```
|
||||||
|
|
||||||
|
### Performance
|
||||||
|
|
||||||
|
1. **Enable health checks**
|
||||||
|
```yaml
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 40s
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Use .dockerignore**
|
||||||
|
```
|
||||||
|
node_modules
|
||||||
|
.git
|
||||||
|
.env
|
||||||
|
*.log
|
||||||
|
coverage
|
||||||
|
.nyc_output
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Optimize build cache**
|
||||||
|
```yaml
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
args:
|
||||||
|
- NODE_ENV=production
|
||||||
|
```
|
||||||
|
|
||||||
|
### Development
|
||||||
|
|
||||||
|
1. **Use volumes for hot reload**
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
volumes:
|
||||||
|
- .:/app
|
||||||
|
- /app/node_modules # Anonymous volume for node_modules
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Keep containers running**
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
stdin_open: true # -i
|
||||||
|
tty: true # -t
|
||||||
|
```
|
||||||
|
|
||||||
|
### Production
|
||||||
|
|
||||||
|
1. **Use specific image versions**
|
||||||
|
```yaml
|
||||||
|
# Bad
|
||||||
|
image: node:latest
|
||||||
|
|
||||||
|
# Good
|
||||||
|
image: node:20-alpine
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Configure logging**
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
logging:
|
||||||
|
driver: "json-file"
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Restart policies**
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
restart: unless-stopped
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
1. **Container won't start**
|
||||||
|
```bash
|
||||||
|
# Check logs
|
||||||
|
docker-compose logs app
|
||||||
|
|
||||||
|
# Check container status
|
||||||
|
docker-compose ps
|
||||||
|
|
||||||
|
# Inspect container
|
||||||
|
docker inspect myapp_app_1
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Network connectivity issues**
|
||||||
|
```bash
|
||||||
|
# List networks
|
||||||
|
docker network ls
|
||||||
|
|
||||||
|
# Inspect network
|
||||||
|
docker network inspect myapp_default
|
||||||
|
|
||||||
|
# Test connectivity
|
||||||
|
docker-compose exec app ping db
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Volume permission issues**
|
||||||
|
```bash
|
||||||
|
# Check volume
|
||||||
|
docker volume inspect myapp_postgres-data
|
||||||
|
|
||||||
|
# Fix permissions (if needed)
|
||||||
|
docker-compose exec app chown -R node:node /app/data
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Health check failing**
|
||||||
|
```bash
|
||||||
|
# Run health check manually
|
||||||
|
docker-compose exec app curl -f http://localhost:3000/health
|
||||||
|
|
||||||
|
# Check health status
|
||||||
|
docker inspect --format='{{.State.Health.Status}}' myapp_app_1
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **Out of disk space**
|
||||||
|
```bash
|
||||||
|
# Clean up
|
||||||
|
docker system prune -a --volumes
|
||||||
|
|
||||||
|
# Check disk usage
|
||||||
|
docker system df
|
||||||
|
```
|
||||||
|
|
||||||
|
## Integration with CI/CD
|
||||||
|
|
||||||
|
### GitHub Actions
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# .github/workflows/test.yml
|
||||||
|
name: Test
|
||||||
|
|
||||||
|
on: [push, pull_request]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Build and test
|
||||||
|
run: |
|
||||||
|
docker-compose -f docker-compose.yml -f docker-compose.test.yml up --abort-on-container-exit --exit-code-from app
|
||||||
|
|
||||||
|
- name: Cleanup
|
||||||
|
if: always()
|
||||||
|
run: docker-compose down -v
|
||||||
|
```
|
||||||
|
|
||||||
|
### GitLab CI
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# .gitlab-ci.yml
|
||||||
|
stages:
|
||||||
|
- test
|
||||||
|
- build
|
||||||
|
|
||||||
|
test:
|
||||||
|
stage: test
|
||||||
|
script:
|
||||||
|
- docker-compose -f docker-compose.yml -f docker-compose.test.yml up --abort-on-container-exit --exit-code-from app
|
||||||
|
after_script:
|
||||||
|
- docker-compose down -v
|
||||||
|
|
||||||
|
build:
|
||||||
|
stage: build
|
||||||
|
script:
|
||||||
|
- docker build -t myapp:$CI_COMMIT_SHA .
|
||||||
|
- docker push myapp:$CI_COMMIT_SHA
|
||||||
|
```
|
||||||
|
|
||||||
|
## Related Skills
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `docker-swarm` | Orchestration with Docker Swarm |
|
||||||
|
| `docker-security` | Container security patterns |
|
||||||
|
| `docker-networking` | Advanced networking techniques |
|
||||||
|
| `docker-monitoring` | Container monitoring and logging |
|
||||||
447
.kilo/skills/docker-compose/patterns/basic-service.md
Normal file
447
.kilo/skills/docker-compose/patterns/basic-service.md
Normal file
@@ -0,0 +1,447 @@
|
|||||||
|
# Docker Compose Patterns
|
||||||
|
|
||||||
|
## Pattern: Multi-Service Application
|
||||||
|
|
||||||
|
Complete pattern for a typical web application with API, database, cache, and reverse proxy.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
# Reverse Proxy
|
||||||
|
nginx:
|
||||||
|
image: nginx:alpine
|
||||||
|
ports:
|
||||||
|
- "80:80"
|
||||||
|
- "443:443"
|
||||||
|
volumes:
|
||||||
|
- ./nginx.conf:/etc/nginx/nginx.conf:ro
|
||||||
|
- ./ssl:/etc/nginx/ssl:ro
|
||||||
|
depends_on:
|
||||||
|
- api
|
||||||
|
networks:
|
||||||
|
- frontend
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 256M
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "nginx", "-t"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
# API Service
|
||||||
|
api:
|
||||||
|
build:
|
||||||
|
context: ./api
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=production
|
||||||
|
- DATABASE_URL=postgres://db:5432/app
|
||||||
|
- REDIS_URL=redis://cache:6379
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
cache:
|
||||||
|
condition: service_started
|
||||||
|
networks:
|
||||||
|
- frontend
|
||||||
|
- backend
|
||||||
|
deploy:
|
||||||
|
replicas: 3
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 1G
|
||||||
|
reservations:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
|
||||||
|
# Database
|
||||||
|
db:
|
||||||
|
image: postgres:15-alpine
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: app
|
||||||
|
POSTGRES_USER: ${DB_USER:-app}
|
||||||
|
POSTGRES_PASSWORD: ${DB_PASSWORD:?DB_PASSWORD required}
|
||||||
|
volumes:
|
||||||
|
- postgres-data:/var/lib/postgresql/data
|
||||||
|
- ./init-scripts:/docker-entrypoint-initdb.d:ro
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER -d $POSTGRES_DB"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '2'
|
||||||
|
memory: 2G
|
||||||
|
|
||||||
|
# Cache
|
||||||
|
cache:
|
||||||
|
image: redis:7-alpine
|
||||||
|
command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru
|
||||||
|
volumes:
|
||||||
|
- redis-data:/data
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
networks:
|
||||||
|
frontend:
|
||||||
|
driver: bridge
|
||||||
|
backend:
|
||||||
|
driver: bridge
|
||||||
|
internal: true # No external access
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres-data:
|
||||||
|
driver: local
|
||||||
|
redis-data:
|
||||||
|
driver: local
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pattern: Development Override
|
||||||
|
|
||||||
|
Development-specific configuration with hot reload and debugging.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.dev.yml
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
build:
|
||||||
|
context: ./api
|
||||||
|
dockerfile: Dockerfile.dev
|
||||||
|
volumes:
|
||||||
|
- ./api/src:/app/src:ro
|
||||||
|
- ./api/tests:/app/tests:ro
|
||||||
|
- /app/node_modules
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=development
|
||||||
|
- DEBUG=app:*
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
- "9229:9229" # Node.js debugger
|
||||||
|
command: npm run dev
|
||||||
|
|
||||||
|
db:
|
||||||
|
ports:
|
||||||
|
- "5432:5432" # Expose for local tools
|
||||||
|
|
||||||
|
cache:
|
||||||
|
ports:
|
||||||
|
- "6379:6379" # Expose for local tools
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Usage
|
||||||
|
docker-compose -f docker-compose.yml -f docker-compose.dev.yml up
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pattern: Production Override
|
||||||
|
|
||||||
|
Production-optimized configuration with security and performance settings.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.prod.yml
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
image: myapp/api:${VERSION}
|
||||||
|
deploy:
|
||||||
|
replicas: 3
|
||||||
|
update_config:
|
||||||
|
parallelism: 1
|
||||||
|
delay: 10s
|
||||||
|
failure_action: rollback
|
||||||
|
rollback_config:
|
||||||
|
parallelism: 1
|
||||||
|
delay: 10s
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 1G
|
||||||
|
reservations:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=production
|
||||||
|
secrets:
|
||||||
|
- db_password
|
||||||
|
- jwt_secret
|
||||||
|
logging:
|
||||||
|
driver: "json-file"
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "5"
|
||||||
|
|
||||||
|
secrets:
|
||||||
|
db_password:
|
||||||
|
external: true
|
||||||
|
jwt_secret:
|
||||||
|
external: true
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Usage
|
||||||
|
docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pattern: Health Check Dependency
|
||||||
|
|
||||||
|
Waiting for dependent services to be healthy before starting.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
cache:
|
||||||
|
condition: service_healthy
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
|
||||||
|
db:
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
cache:
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pattern: Secrets Management
|
||||||
|
|
||||||
|
Using Docker secrets for sensitive data (Swarm mode).
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
secrets:
|
||||||
|
- db_password
|
||||||
|
- api_key
|
||||||
|
- jwt_secret
|
||||||
|
environment:
|
||||||
|
- DB_PASSWORD_FILE=/run/secrets/db_password
|
||||||
|
- API_KEY_FILE=/run/secrets/api_key
|
||||||
|
- JWT_SECRET_FILE=/run/secrets/jwt_secret
|
||||||
|
|
||||||
|
secrets:
|
||||||
|
db_password:
|
||||||
|
file: ./secrets/db_password.txt
|
||||||
|
api_key:
|
||||||
|
file: ./secrets/api_key.txt
|
||||||
|
jwt_secret:
|
||||||
|
external: true # Created via: echo "secret" | docker secret create jwt_secret -
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pattern: Resource Limits
|
||||||
|
|
||||||
|
Setting resource constraints for containers.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1.0'
|
||||||
|
memory: 1G
|
||||||
|
reservations:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
# Alternative for non-Swarm
|
||||||
|
mem_limit: 1G
|
||||||
|
memswap_limit: 1G
|
||||||
|
cpus: 1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pattern: Network Isolation
|
||||||
|
|
||||||
|
Segmenting networks for security.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
web:
|
||||||
|
networks:
|
||||||
|
- frontend
|
||||||
|
- backend
|
||||||
|
|
||||||
|
api:
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
- database
|
||||||
|
|
||||||
|
db:
|
||||||
|
networks:
|
||||||
|
- database
|
||||||
|
|
||||||
|
networks:
|
||||||
|
frontend:
|
||||||
|
driver: bridge
|
||||||
|
backend:
|
||||||
|
driver: bridge
|
||||||
|
database:
|
||||||
|
driver: bridge
|
||||||
|
internal: true # No internet access
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pattern: Volume Management
|
||||||
|
|
||||||
|
Different volume types for different use cases.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
volumes:
|
||||||
|
# Named volume (managed by Docker)
|
||||||
|
- app-data:/app/data
|
||||||
|
# Bind mount (host directory)
|
||||||
|
- ./config:/app/config:ro
|
||||||
|
# Anonymous volume (for node_modules)
|
||||||
|
- /app/node_modules
|
||||||
|
# tmpfs (temporary in-memory)
|
||||||
|
- type: tmpfs
|
||||||
|
target: /tmp
|
||||||
|
tmpfs:
|
||||||
|
size: 100M
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
app-data:
|
||||||
|
driver: local
|
||||||
|
labels:
|
||||||
|
- "app=myapp"
|
||||||
|
- "type=persistent"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pattern: Logging Configuration
|
||||||
|
|
||||||
|
Configuring logging drivers and options.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
logging:
|
||||||
|
driver: "json-file" # Default
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
labels: "app,environment"
|
||||||
|
tag: "{{.ImageName}}/{{.Name}}"
|
||||||
|
|
||||||
|
# Syslog logging
|
||||||
|
app-syslog:
|
||||||
|
logging:
|
||||||
|
driver: "syslog"
|
||||||
|
options:
|
||||||
|
syslog-address: "tcp://logserver:514"
|
||||||
|
syslog-facility: "daemon"
|
||||||
|
tag: "myapp"
|
||||||
|
|
||||||
|
# Fluentd logging
|
||||||
|
app-fluentd:
|
||||||
|
logging:
|
||||||
|
driver: "fluentd"
|
||||||
|
options:
|
||||||
|
fluentd-address: "localhost:24224"
|
||||||
|
tag: "myapp.api"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pattern: Multi-Environment
|
||||||
|
|
||||||
|
Managing multiple environments with overrides.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Directory structure
|
||||||
|
# docker-compose.yml # Base configuration
|
||||||
|
# docker-compose.dev.yml # Development overrides
|
||||||
|
# docker-compose.staging.yml # Staging overrides
|
||||||
|
# docker-compose.prod.yml # Production overrides
|
||||||
|
# .env # Environment variables
|
||||||
|
# .env.dev # Development variables
|
||||||
|
# .env.staging # Staging variables
|
||||||
|
# .env.prod # Production variables
|
||||||
|
|
||||||
|
# Development
|
||||||
|
docker-compose --env-file .env.dev \
|
||||||
|
-f docker-compose.yml -f docker-compose.dev.yml up
|
||||||
|
|
||||||
|
# Staging
|
||||||
|
docker-compose --env-file .env.staging \
|
||||||
|
-f docker-compose.yml -f docker-compose.staging.yml up -d
|
||||||
|
|
||||||
|
# Production
|
||||||
|
docker-compose --env-file .env.prod \
|
||||||
|
-f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pattern: CI/CD Testing
|
||||||
|
|
||||||
|
Running tests in isolated containers.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.test.yml
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=test
|
||||||
|
- DATABASE_URL=postgres://test:test@db:5432/test
|
||||||
|
depends_on:
|
||||||
|
- db
|
||||||
|
command: npm test
|
||||||
|
networks:
|
||||||
|
- test-network
|
||||||
|
|
||||||
|
db:
|
||||||
|
image: postgres:15-alpine
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: test
|
||||||
|
POSTGRES_USER: test
|
||||||
|
POSTGRES_PASSWORD: test
|
||||||
|
networks:
|
||||||
|
- test-network
|
||||||
|
|
||||||
|
networks:
|
||||||
|
test-network:
|
||||||
|
driver: bridge
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# CI pipeline
|
||||||
|
docker-compose -f docker-compose.test.yml up --abort-on-container-exit --exit-code-from app
|
||||||
|
docker-compose -f docker-compose.test.yml down -v
|
||||||
|
```
|
||||||
756
.kilo/skills/docker-monitoring/SKILL.md
Normal file
756
.kilo/skills/docker-monitoring/SKILL.md
Normal file
@@ -0,0 +1,756 @@
|
|||||||
|
# Skill: Docker Monitoring & Logging
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
Comprehensive skill for Docker container monitoring, logging, metrics collection, and observability.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Container monitoring is essential for understanding application health, performance, and troubleshooting issues in production. Use this skill for setting up monitoring stacks, configuring logging, and implementing observability.
|
||||||
|
|
||||||
|
## When to Use
|
||||||
|
|
||||||
|
- Setting up container monitoring
|
||||||
|
- Configuring centralized logging
|
||||||
|
- Implementing health checks
|
||||||
|
- Performance optimization
|
||||||
|
- Troubleshooting container issues
|
||||||
|
- Alerting configuration
|
||||||
|
|
||||||
|
## Monitoring Stack
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Container Monitoring Stack │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||||
|
│ │ Grafana │ │ Prometheus │ │ Alertmgr │ │
|
||||||
|
│ │ Dashboard │ │ Metrics │ │ Alerts │ │
|
||||||
|
│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ ┌──────┴────────────────┴────────────────┴──────┐ │
|
||||||
|
│ │ Container Observability │ │
|
||||||
|
│ └──────┬────────────────┬───────────────────────┘ │
|
||||||
|
│ │ │ │
|
||||||
|
│ ┌──────┴──────┐ ┌──────┴──────┐ ┌─────────────┐ │
|
||||||
|
│ │ cAdvisor │ │ node-exporter│ │ Loki/EFK │ │
|
||||||
|
│ │ Container │ │ Node Metrics│ │ Logging │ │
|
||||||
|
│ │ Metrics │ │ │ │ │ │
|
||||||
|
│ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## Health Checks
|
||||||
|
|
||||||
|
### 1. Dockerfile Health Check
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
FROM node:20-alpine
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY . .
|
||||||
|
RUN npm ci --only=production
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||||
|
CMD wget --no-verbose --tries=1 --spider http://localhost:3000/health || exit 1
|
||||||
|
|
||||||
|
# Or for Alpine (no wget)
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||||
|
CMD curl -f http://localhost:3000/health || exit 1
|
||||||
|
|
||||||
|
# Or use Node.js for health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||||
|
CMD node -e "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Docker Compose Health Check
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
image: myapp:latest
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
|
||||||
|
db:
|
||||||
|
image: postgres:15-alpine
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U $POSTGRES_USER"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Docker Swarm Health Check
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
image: myapp:latest
|
||||||
|
deploy:
|
||||||
|
update_config:
|
||||||
|
failure_action: rollback
|
||||||
|
monitor: 30s
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Application Health Endpoint
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Node.js health check endpoint
|
||||||
|
const express = require('express');
|
||||||
|
const app = express();
|
||||||
|
|
||||||
|
// Dependencies status
|
||||||
|
async function checkHealth() {
|
||||||
|
const checks = {
|
||||||
|
database: await checkDatabase(),
|
||||||
|
redis: await checkRedis(),
|
||||||
|
disk: checkDiskSpace(),
|
||||||
|
memory: checkMemory()
|
||||||
|
};
|
||||||
|
|
||||||
|
const healthy = Object.values(checks).every(c => c === 'healthy');
|
||||||
|
|
||||||
|
return {
|
||||||
|
status: healthy ? 'healthy' : 'unhealthy',
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
checks
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
app.get('/health', async (req, res) => {
|
||||||
|
const health = await checkHealth();
|
||||||
|
const status = health.status === 'healthy' ? 200 : 503;
|
||||||
|
res.status(status).json(health);
|
||||||
|
});
|
||||||
|
|
||||||
|
app.get('/health/live', (req, res) => {
|
||||||
|
// Liveness probe - is the app running?
|
||||||
|
res.status(200).json({ status: 'alive' });
|
||||||
|
});
|
||||||
|
|
||||||
|
app.get('/health/ready', async (req, res) => {
|
||||||
|
// Readiness probe - is the app ready to serve?
|
||||||
|
const ready = await isReady();
|
||||||
|
res.status(ready ? 200 : 503).json({ ready });
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Logging
|
||||||
|
|
||||||
|
### 1. Docker Logging Drivers
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# JSON file driver (default)
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
logging:
|
||||||
|
driver: "json-file"
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
labels: "app,environment"
|
||||||
|
|
||||||
|
# Syslog driver
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
logging:
|
||||||
|
driver: "syslog"
|
||||||
|
options:
|
||||||
|
syslog-address: "tcp://logserver:514"
|
||||||
|
syslog-facility: "daemon"
|
||||||
|
tag: "myapp"
|
||||||
|
|
||||||
|
# Journald driver
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
logging:
|
||||||
|
driver: "journald"
|
||||||
|
options:
|
||||||
|
labels: "app,environment"
|
||||||
|
|
||||||
|
# Fluentd driver
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
logging:
|
||||||
|
driver: "fluentd"
|
||||||
|
options:
|
||||||
|
fluentd-address: "localhost:24224"
|
||||||
|
tag: "myapp.api"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Structured Logging
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Pino for structured logging
|
||||||
|
const pino = require('pino');
|
||||||
|
|
||||||
|
const logger = pino({
|
||||||
|
level: process.env.LOG_LEVEL || 'info',
|
||||||
|
formatters: {
|
||||||
|
level: (label) => ({ level: label })
|
||||||
|
},
|
||||||
|
timestamp: pino.stdTimeFunctions.isoTime
|
||||||
|
});
|
||||||
|
|
||||||
|
// Log with context
|
||||||
|
logger.info({
|
||||||
|
userId: '123',
|
||||||
|
action: 'login',
|
||||||
|
ip: '192.168.1.1'
|
||||||
|
}, 'User logged in');
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// {"level":"info","time":"2024-01-01T12:00:00.000Z","userId":"123","action":"login","ip":"192.168.1.1","msg":"User logged in"}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. EFK Stack (Elasticsearch, Fluentd, Kibana)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
elasticsearch:
|
||||||
|
image: elasticsearch:8.10.0
|
||||||
|
environment:
|
||||||
|
- discovery.type=single-node
|
||||||
|
- xpack.security.enabled=false
|
||||||
|
volumes:
|
||||||
|
- elasticsearch-data:/usr/share/elasticsearch/data
|
||||||
|
networks:
|
||||||
|
- logging
|
||||||
|
|
||||||
|
fluentd:
|
||||||
|
image: fluent/fluentd:v1.16
|
||||||
|
volumes:
|
||||||
|
- ./fluentd/conf:/fluentd/etc
|
||||||
|
ports:
|
||||||
|
- "24224:24224"
|
||||||
|
networks:
|
||||||
|
- logging
|
||||||
|
|
||||||
|
kibana:
|
||||||
|
image: kibana:8.10.0
|
||||||
|
environment:
|
||||||
|
- ELASTICSEARCH_HOSTS=http://elasticsearch:9200
|
||||||
|
ports:
|
||||||
|
- "5601:5601"
|
||||||
|
networks:
|
||||||
|
- logging
|
||||||
|
|
||||||
|
app:
|
||||||
|
image: myapp:latest
|
||||||
|
logging:
|
||||||
|
driver: "fluentd"
|
||||||
|
options:
|
||||||
|
fluentd-address: "localhost:24224"
|
||||||
|
tag: "myapp.api"
|
||||||
|
networks:
|
||||||
|
- logging
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
elasticsearch-data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
logging:
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Loki Stack (Promtail, Loki, Grafana)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
loki:
|
||||||
|
image: grafana/loki:latest
|
||||||
|
ports:
|
||||||
|
- "3100:3100"
|
||||||
|
volumes:
|
||||||
|
- ./loki-config.yml:/etc/loki/local-config.yaml
|
||||||
|
command: -config.file=/etc/loki/local-config.yaml
|
||||||
|
networks:
|
||||||
|
- monitoring
|
||||||
|
|
||||||
|
promtail:
|
||||||
|
image: grafana/promtail:latest
|
||||||
|
volumes:
|
||||||
|
- /var/log:/var/log
|
||||||
|
- ./promtail-config.yml:/etc/promtail/config.yml
|
||||||
|
command: -config.file=/etc/promtail/config.yml
|
||||||
|
networks:
|
||||||
|
- monitoring
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:latest
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
environment:
|
||||||
|
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||||
|
volumes:
|
||||||
|
- grafana-data:/var/lib/grafana
|
||||||
|
networks:
|
||||||
|
- monitoring
|
||||||
|
|
||||||
|
app:
|
||||||
|
image: myapp:latest
|
||||||
|
logging:
|
||||||
|
driver: "json-file"
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
networks:
|
||||||
|
- monitoring
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
grafana-data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
monitoring:
|
||||||
|
```
|
||||||
|
|
||||||
|
## Metrics Collection
|
||||||
|
|
||||||
|
### 1. Prometheus + cAdvisor
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:latest
|
||||||
|
ports:
|
||||||
|
- "9090:9090"
|
||||||
|
volumes:
|
||||||
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||||
|
- prometheus-data:/prometheus
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||||
|
- '--storage.tsdb.retention.time=30d'
|
||||||
|
networks:
|
||||||
|
- monitoring
|
||||||
|
|
||||||
|
cadvisor:
|
||||||
|
image: gcr.io/cadvisor/cadvisor:latest
|
||||||
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
|
volumes:
|
||||||
|
- /:/rootfs:ro
|
||||||
|
- /var/run:/var/run:ro
|
||||||
|
- /sys:/sys:ro
|
||||||
|
- /var/lib/docker/:/var/lib/docker:ro
|
||||||
|
networks:
|
||||||
|
- monitoring
|
||||||
|
|
||||||
|
node_exporter:
|
||||||
|
image: prom/node-exporter:latest
|
||||||
|
ports:
|
||||||
|
- "9100:9100"
|
||||||
|
volumes:
|
||||||
|
- /proc:/host/proc:ro
|
||||||
|
- /sys:/host/sys:ro
|
||||||
|
- /:/rootfs:ro
|
||||||
|
command:
|
||||||
|
- '--path.procfs=/host/proc'
|
||||||
|
- '--path.rootfs=/rootfs'
|
||||||
|
- '--path.sysfs=/host/sys'
|
||||||
|
networks:
|
||||||
|
- monitoring
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:latest
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
environment:
|
||||||
|
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||||
|
volumes:
|
||||||
|
- grafana-data:/var/lib/grafana
|
||||||
|
networks:
|
||||||
|
- monitoring
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
prometheus-data:
|
||||||
|
grafana-data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
monitoring:
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Prometheus Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# prometheus.yml
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
# Prometheus itself
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['prometheus:9090']
|
||||||
|
|
||||||
|
# cAdvisor (container metrics)
|
||||||
|
- job_name: 'cadvisor'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['cadvisor:8080']
|
||||||
|
|
||||||
|
# Node exporter (host metrics)
|
||||||
|
- job_name: 'node'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['node_exporter:9100']
|
||||||
|
|
||||||
|
# Application metrics
|
||||||
|
- job_name: 'app'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['app:3000']
|
||||||
|
metrics_path: '/metrics'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Application Metrics (Prometheus Client)
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Node.js with prom-client
|
||||||
|
const promClient = require('prom-client');
|
||||||
|
|
||||||
|
// Enable default metrics
|
||||||
|
promClient.collectDefaultMetrics();
|
||||||
|
|
||||||
|
// Custom metrics
|
||||||
|
const httpRequestDuration = new promClient.Histogram({
|
||||||
|
name: 'http_request_duration_seconds',
|
||||||
|
help: 'Duration of HTTP requests in seconds',
|
||||||
|
labelNames: ['method', 'route', 'status_code'],
|
||||||
|
buckets: [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7, 10]
|
||||||
|
});
|
||||||
|
|
||||||
|
const activeConnections = new promClient.Gauge({
|
||||||
|
name: 'active_connections',
|
||||||
|
help: 'Number of active connections'
|
||||||
|
});
|
||||||
|
|
||||||
|
const dbQueryDuration = new promClient.Histogram({
|
||||||
|
name: 'db_query_duration_seconds',
|
||||||
|
help: 'Duration of database queries in seconds',
|
||||||
|
labelNames: ['query_type', 'table'],
|
||||||
|
buckets: [0.01, 0.05, 0.1, 0.5, 1, 2]
|
||||||
|
});
|
||||||
|
|
||||||
|
// Middleware for HTTP metrics
|
||||||
|
app.use((req, res, next) => {
|
||||||
|
const end = httpRequestDuration.startTimer();
|
||||||
|
res.on('finish', () => {
|
||||||
|
end({ method: req.method, route: req.route?.path || req.path, status_code: res.statusCode });
|
||||||
|
});
|
||||||
|
next();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Metrics endpoint
|
||||||
|
app.get('/metrics', async (req, res) => {
|
||||||
|
res.set('Content-Type', promClient.register.contentType);
|
||||||
|
res.send(await promClient.register.metrics());
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Grafana Dashboards
|
||||||
|
|
||||||
|
```json
|
||||||
|
// Dashboard JSON for container metrics
|
||||||
|
{
|
||||||
|
"dashboard": {
|
||||||
|
"title": "Docker Container Metrics",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"title": "Container CPU Usage",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "rate(container_cpu_usage_seconds_total{name=~\".+\"}[5m]) * 100",
|
||||||
|
"legendFormat": "{{name}}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Container Memory Usage",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "container_memory_usage_bytes{name=~\".+\"} / 1024 / 1024",
|
||||||
|
"legendFormat": "{{name}} MB"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Container Network I/O",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "rate(container_network_receive_bytes_total{name=~\".+\"}[5m])",
|
||||||
|
"legendFormat": "{{name}} RX"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "rate(container_network_transmit_bytes_total{name=~\".+\"}[5m])",
|
||||||
|
"legendFormat": "{{name}} TX"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Alerting
|
||||||
|
|
||||||
|
### 1. Alertmanager Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# alertmanager.yml
|
||||||
|
global:
|
||||||
|
smtp_smarthost: 'smtp.example.com:587'
|
||||||
|
smtp_from: 'alerts@example.com'
|
||||||
|
smtp_auth_username: 'alerts@example.com'
|
||||||
|
smtp_auth_password: 'password'
|
||||||
|
|
||||||
|
route:
|
||||||
|
group_by: ['alertname', 'severity']
|
||||||
|
group_wait: 30s
|
||||||
|
group_interval: 5m
|
||||||
|
repeat_interval: 1h
|
||||||
|
receiver: 'team-email'
|
||||||
|
routes:
|
||||||
|
- match:
|
||||||
|
severity: critical
|
||||||
|
receiver: 'team-email-critical'
|
||||||
|
- match:
|
||||||
|
severity: warning
|
||||||
|
receiver: 'team-email-warning'
|
||||||
|
|
||||||
|
receivers:
|
||||||
|
- name: 'team-email-critical'
|
||||||
|
email_configs:
|
||||||
|
- to: 'critical@example.com'
|
||||||
|
send_resolved: true
|
||||||
|
|
||||||
|
- name: 'team-email-warning'
|
||||||
|
email_configs:
|
||||||
|
- to: 'warnings@example.com'
|
||||||
|
send_resolved: true
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Prometheus Alert Rules
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# alerts.yml
|
||||||
|
groups:
|
||||||
|
- name: container_alerts
|
||||||
|
rules:
|
||||||
|
# Container down
|
||||||
|
- alert: ContainerDown
|
||||||
|
expr: absent(container_last_seen{name=~".+"})
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Container {{ $labels.name }} is down"
|
||||||
|
description: "Container {{ $labels.name }} has been down for more than 5 minutes."
|
||||||
|
|
||||||
|
# High CPU
|
||||||
|
- alert: HighCpuUsage
|
||||||
|
expr: rate(container_cpu_usage_seconds_total{name=~".+"}[5m]) * 100 > 80
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "High CPU usage on {{ $labels.name }}"
|
||||||
|
description: "Container {{ $labels.name }} CPU usage is {{ $value }}%."
|
||||||
|
|
||||||
|
# High Memory
|
||||||
|
- alert: HighMemoryUsage
|
||||||
|
expr: (container_memory_usage_bytes{name=~".+"} / container_spec_memory_limit_bytes{name=~".+"}) * 100 > 80
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "High memory usage on {{ $labels.name }}"
|
||||||
|
description: "Container {{ $labels.name }} memory usage is {{ $value }}%."
|
||||||
|
|
||||||
|
# Container restart
|
||||||
|
- alert: ContainerRestart
|
||||||
|
expr: increase(container_restart_count{name=~".+"}[1h]) > 0
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Container {{ $labels.name }} restarted"
|
||||||
|
description: "Container {{ $labels.name }} has restarted {{ $value }} times in the last hour."
|
||||||
|
|
||||||
|
# No health check
|
||||||
|
- alert: NoHealthCheck
|
||||||
|
expr: container_health_status{name=~".+"} == 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Health check failing for {{ $labels.name }}"
|
||||||
|
description: "Container {{ $labels.name }} health check has been failing for 5 minutes."
|
||||||
|
```
|
||||||
|
|
||||||
|
## Observability Best Practices
|
||||||
|
|
||||||
|
### 1. Three Pillars
|
||||||
|
|
||||||
|
| Pillar | Tool | Purpose |
|
||||||
|
|--------|------|---------|
|
||||||
|
| Metrics | Prometheus | Quantitative measurements |
|
||||||
|
| Logs | Loki/EFK | Event records |
|
||||||
|
| Traces | Jaeger/Zipkin | Request flow |
|
||||||
|
|
||||||
|
### 2. Metrics Categories
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Four Golden Signals (Google SRE)
|
||||||
|
|
||||||
|
# 1. Latency
|
||||||
|
- http_request_duration_seconds
|
||||||
|
- db_query_duration_seconds
|
||||||
|
|
||||||
|
# 2. Traffic
|
||||||
|
- http_requests_per_second
|
||||||
|
- active_connections
|
||||||
|
|
||||||
|
# 3. Errors
|
||||||
|
- http_requests_failed_total
|
||||||
|
- error_rate
|
||||||
|
|
||||||
|
# 4. Saturation
|
||||||
|
- container_memory_usage_bytes
|
||||||
|
- container_cpu_usage_seconds_total
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Service Level Objectives (SLOs)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Prometheus recording rules for SLO
|
||||||
|
groups:
|
||||||
|
- name: slo_rules
|
||||||
|
rules:
|
||||||
|
- record: slo:availability:ratio_5m
|
||||||
|
expr: |
|
||||||
|
sum(rate(http_requests_total{status!~"5.."}[5m])) /
|
||||||
|
sum(rate(http_requests_total[5m]))
|
||||||
|
|
||||||
|
- record: slo:latency:p99_5m
|
||||||
|
expr: |
|
||||||
|
histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))
|
||||||
|
|
||||||
|
- record: slo:error_rate:ratio_5m
|
||||||
|
expr: |
|
||||||
|
sum(rate(http_requests_total{status=~"5.."}[5m])) /
|
||||||
|
sum(rate(http_requests_total[5m]))
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# View container logs
|
||||||
|
docker logs <container_id>
|
||||||
|
docker logs -f --tail 100 <container_id>
|
||||||
|
|
||||||
|
# View resource usage
|
||||||
|
docker stats
|
||||||
|
docker stats --no-stream
|
||||||
|
|
||||||
|
# Inspect container
|
||||||
|
docker inspect <container_id>
|
||||||
|
|
||||||
|
# Check health status
|
||||||
|
docker inspect --format='{{.State.Health.Status}}' <container_id>
|
||||||
|
|
||||||
|
# View processes
|
||||||
|
docker top <container_id>
|
||||||
|
|
||||||
|
# Execute commands
|
||||||
|
docker exec -it <container_id> sh
|
||||||
|
docker exec <container_id> df -h
|
||||||
|
|
||||||
|
# View network
|
||||||
|
docker network inspect <network_name>
|
||||||
|
|
||||||
|
# View disk usage
|
||||||
|
docker system df
|
||||||
|
docker system df -v
|
||||||
|
|
||||||
|
# Prune unused resources
|
||||||
|
docker system prune -a --volumes
|
||||||
|
|
||||||
|
# Swarm service logs
|
||||||
|
docker service logs <service_name>
|
||||||
|
docker service ps <service_name>
|
||||||
|
|
||||||
|
# Swarm node status
|
||||||
|
docker node ls
|
||||||
|
docker node inspect <node_id>
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Tuning
|
||||||
|
|
||||||
|
### 1. Container Resource Limits
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 1G
|
||||||
|
reservations:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Logging Performance
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
logging:
|
||||||
|
driver: "json-file"
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
# Reduce logging overhead
|
||||||
|
labels: "level,requestId"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Prometheus Optimization
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# prometheus.yml
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s # Balance between granularity and load
|
||||||
|
evaluation_interval: 15s
|
||||||
|
|
||||||
|
# Retention
|
||||||
|
command:
|
||||||
|
- '--storage.tsdb.retention.time=30d'
|
||||||
|
- '--storage.tsdb.retention.size=10GB'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Related Skills
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `docker-compose` | Local development setup |
|
||||||
|
| `docker-swarm` | Production orchestration |
|
||||||
|
| `docker-security` | Container security |
|
||||||
|
| `kubernetes` | Advanced orchestration |
|
||||||
685
.kilo/skills/docker-security/SKILL.md
Normal file
685
.kilo/skills/docker-security/SKILL.md
Normal file
@@ -0,0 +1,685 @@
|
|||||||
|
# Skill: Docker Security
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
Comprehensive skill for Docker container security, vulnerability scanning, secrets management, and hardening best practices.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Container security is essential for production deployments. Use this skill when scanning for vulnerabilities, configuring security settings, managing secrets, and implementing security best practices.
|
||||||
|
|
||||||
|
## When to Use
|
||||||
|
|
||||||
|
- Security hardening containers
|
||||||
|
- Scanning images for vulnerabilities
|
||||||
|
- Managing secrets and credentials
|
||||||
|
- Configuring container isolation
|
||||||
|
- Implementing least privilege
|
||||||
|
- Security audits
|
||||||
|
|
||||||
|
## Security Layers
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Container Security Layers │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ 1. Host Security │
|
||||||
|
│ - Kernel hardening │
|
||||||
|
│ - SELinux/AppArmor │
|
||||||
|
│ - cgroups namespace │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ 2. Container Runtime Security │
|
||||||
|
│ - User namespace │
|
||||||
|
│ - Seccomp profiles │
|
||||||
|
│ - Capability dropping │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ 3. Image Security │
|
||||||
|
│ - Minimal base images │
|
||||||
|
│ - Vulnerability scanning │
|
||||||
|
│ - No secrets in images │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ 4. Network Security │
|
||||||
|
│ - Network policies │
|
||||||
|
│ - TLS encryption │
|
||||||
|
│ - Ingress controls │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ 5. Application Security │
|
||||||
|
│ - Input validation │
|
||||||
|
│ - Authentication │
|
||||||
|
│ - Authorization │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## Image Security
|
||||||
|
|
||||||
|
### 1. Base Image Selection
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
# ✅ Good: Minimal, specific version
|
||||||
|
FROM node:20-alpine
|
||||||
|
|
||||||
|
# ✅ Better: Distroless (minimal attack surface)
|
||||||
|
FROM gcr.io/distroless/nodejs20-debian12
|
||||||
|
|
||||||
|
# ❌ Bad: Large base, latest tag
|
||||||
|
FROM node:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Multi-stage Builds
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
# Build stage
|
||||||
|
FROM node:20-alpine AS builder
|
||||||
|
WORKDIR /app
|
||||||
|
COPY package*.json ./
|
||||||
|
RUN npm ci
|
||||||
|
COPY . .
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# Runtime stage
|
||||||
|
FROM node:20-alpine
|
||||||
|
RUN addgroup -g 1001 appgroup && \
|
||||||
|
adduser -u 1001 -G appgroup -D appuser
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --from=builder --chown=appuser:appgroup /app/dist ./dist
|
||||||
|
COPY --from=builder --chown=appuser:appgroup /app/node_modules ./node_modules
|
||||||
|
USER appuser
|
||||||
|
CMD ["node", "dist/index.js"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Vulnerability Scanning
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Scan with Trivy
|
||||||
|
trivy image myapp:latest
|
||||||
|
|
||||||
|
# Scan with Docker Scout
|
||||||
|
docker scout vulnerabilities myapp:latest
|
||||||
|
|
||||||
|
# Scan with Grype
|
||||||
|
grype myapp:latest
|
||||||
|
|
||||||
|
# CI/CD integration
|
||||||
|
trivy image --exit-code 1 --severity HIGH,CRITICAL myapp:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. No Secrets in Images
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
# ❌ Never do this
|
||||||
|
ENV DATABASE_PASSWORD=password123
|
||||||
|
COPY .env ./
|
||||||
|
|
||||||
|
# ✅ Use runtime secrets
|
||||||
|
# Secrets are mounted at runtime
|
||||||
|
RUN --mount=type=secret,id=db_password \
|
||||||
|
export DB_PASSWORD=$(cat /run/secrets/db_password)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Container Runtime Security
|
||||||
|
|
||||||
|
### 1. Non-root User
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
# Create non-root user
|
||||||
|
FROM alpine:3.18
|
||||||
|
RUN addgroup -g 1001 appgroup && \
|
||||||
|
adduser -u 1001 -G appgroup -D appuser
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --chown=appuser:appgroup . .
|
||||||
|
USER appuser
|
||||||
|
CMD ["./app"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Read-only Filesystem
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
image: myapp:latest
|
||||||
|
read_only: true
|
||||||
|
tmpfs:
|
||||||
|
- /tmp
|
||||||
|
- /var/cache
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Capability Dropping
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Drop all capabilities
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
image: myapp:latest
|
||||||
|
cap_drop:
|
||||||
|
- ALL
|
||||||
|
cap_add:
|
||||||
|
- CHOWN # Only needed capabilities
|
||||||
|
- SETGID
|
||||||
|
- SETUID
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Security Options
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
image: myapp:latest
|
||||||
|
security_opt:
|
||||||
|
- no-new-privileges:true # Prevent privilege escalation
|
||||||
|
- seccomp:default.json # Seccomp profile
|
||||||
|
- apparmor:docker-default # AppArmor profile
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Resource Limits
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
image: myapp:latest
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 1G
|
||||||
|
reservations:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
pids_limit: 100 # Limit process count
|
||||||
|
```
|
||||||
|
|
||||||
|
## Secrets Management
|
||||||
|
|
||||||
|
### 1. Docker Secrets (Swarm)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create secret
|
||||||
|
echo "my_password" | docker secret create db_password -
|
||||||
|
|
||||||
|
# Create from file
|
||||||
|
docker secret create jwt_secret ./secrets/jwt.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml (Swarm)
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
image: myapp:latest
|
||||||
|
secrets:
|
||||||
|
- db_password
|
||||||
|
- jwt_secret
|
||||||
|
environment:
|
||||||
|
- DB_PASSWORD_FILE=/run/secrets/db_password
|
||||||
|
|
||||||
|
secrets:
|
||||||
|
db_password:
|
||||||
|
external: true
|
||||||
|
jwt_secret:
|
||||||
|
external: true
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Docker Compose Secrets (Non-Swarm)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
image: myapp:latest
|
||||||
|
secrets:
|
||||||
|
- db_password
|
||||||
|
environment:
|
||||||
|
- DB_PASSWORD_FILE=/run/secrets/db_password
|
||||||
|
|
||||||
|
secrets:
|
||||||
|
db_password:
|
||||||
|
file: ./secrets/db_password.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Environment Variables (Development)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml (development only)
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
image: myapp:latest
|
||||||
|
env_file:
|
||||||
|
- .env # Add .env to .gitignore!
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# .env (NEVER COMMIT)
|
||||||
|
DATABASE_URL=postgres://...
|
||||||
|
JWT_SECRET=secret123
|
||||||
|
API_KEY=key123
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Reading Secrets in Application
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Node.js
|
||||||
|
const fs = require('fs');
|
||||||
|
|
||||||
|
function getSecret(secretName, envName) {
|
||||||
|
// Try file-based secret first (Docker secrets)
|
||||||
|
const secretPath = `/run/secrets/${secretName}`;
|
||||||
|
if (fs.existsSync(secretPath)) {
|
||||||
|
return fs.readFileSync(secretPath, 'utf8').trim();
|
||||||
|
}
|
||||||
|
// Fallback to environment variable (development)
|
||||||
|
return process.env[envName];
|
||||||
|
}
|
||||||
|
|
||||||
|
const dbPassword = getSecret('db_password', 'DB_PASSWORD');
|
||||||
|
```
|
||||||
|
|
||||||
|
## Network Security
|
||||||
|
|
||||||
|
### 1. Network Segmentation
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Separate networks for different access levels
|
||||||
|
networks:
|
||||||
|
frontend:
|
||||||
|
driver: bridge
|
||||||
|
|
||||||
|
backend:
|
||||||
|
driver: bridge
|
||||||
|
internal: true # No external access
|
||||||
|
|
||||||
|
database:
|
||||||
|
driver: bridge
|
||||||
|
internal: true
|
||||||
|
|
||||||
|
services:
|
||||||
|
web:
|
||||||
|
networks:
|
||||||
|
- frontend
|
||||||
|
|
||||||
|
api:
|
||||||
|
networks:
|
||||||
|
- frontend
|
||||||
|
- backend
|
||||||
|
|
||||||
|
db:
|
||||||
|
networks:
|
||||||
|
- database
|
||||||
|
|
||||||
|
cache:
|
||||||
|
networks:
|
||||||
|
- database
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Port Exposure
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# ✅ Good: Only expose necessary ports
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
ports:
|
||||||
|
- "3000:3000" # API port only
|
||||||
|
|
||||||
|
db:
|
||||||
|
# No ports exposed - only accessible inside network
|
||||||
|
networks:
|
||||||
|
- database
|
||||||
|
|
||||||
|
# ❌ Bad: Exposing database to host
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
ports:
|
||||||
|
- "5432:5432" # Security risk!
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. TLS Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
nginx:
|
||||||
|
image: nginx:alpine
|
||||||
|
ports:
|
||||||
|
- "443:443"
|
||||||
|
volumes:
|
||||||
|
- ./ssl/cert.pem:/etc/nginx/ssl/cert.pem:ro
|
||||||
|
- ./ssl/key.pem:/etc/nginx/ssl/key.pem:ro
|
||||||
|
configs:
|
||||||
|
- source: nginx_config
|
||||||
|
target: /etc/nginx/nginx.conf
|
||||||
|
|
||||||
|
configs:
|
||||||
|
nginx_config:
|
||||||
|
file: ./nginx.conf
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Ingress Controls
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Limit connections
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
image: myapp:latest
|
||||||
|
ports:
|
||||||
|
- target: 3000
|
||||||
|
published: 3000
|
||||||
|
mode: host # Bypass ingress mesh for performance
|
||||||
|
deploy:
|
||||||
|
endpoint_mode: dnsrr
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 1G
|
||||||
|
```
|
||||||
|
|
||||||
|
## Security Profiles
|
||||||
|
|
||||||
|
### 1. Seccomp Profile
|
||||||
|
|
||||||
|
```json
|
||||||
|
// default-seccomp.json
|
||||||
|
{
|
||||||
|
"defaultAction": "SCMP_ACT_ERRNO",
|
||||||
|
"architectures": ["SCMP_ARCH_X86_64"],
|
||||||
|
"syscalls": [
|
||||||
|
{
|
||||||
|
"names": ["read", "write", "exit", "exit_group"],
|
||||||
|
"action": "SCMP_ACT_ALLOW"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"names": ["open", "openat", "close"],
|
||||||
|
"action": "SCMP_ACT_ALLOW"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Use custom seccomp profile
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
security_opt:
|
||||||
|
- seccomp:./seccomp.json
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. AppArmor Profile
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create AppArmor profile
|
||||||
|
cat > /etc/apparmor.d/docker-myapp <<EOF
|
||||||
|
#include <tunables/global>
|
||||||
|
profile docker-myapp flags=(attach_disconnected,mediate_deleted) {
|
||||||
|
#include <abstractions/base>
|
||||||
|
|
||||||
|
network inet tcp,
|
||||||
|
network inet udp,
|
||||||
|
|
||||||
|
/app/** r,
|
||||||
|
/app/** w,
|
||||||
|
|
||||||
|
deny /** rw,
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Load profile
|
||||||
|
apparmor_parser -r /etc/apparmor.d/docker-myapp
|
||||||
|
```
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Use AppArmor profile
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
security_opt:
|
||||||
|
- apparmor:docker-myapp
|
||||||
|
```
|
||||||
|
|
||||||
|
## Security Scanning
|
||||||
|
|
||||||
|
### 1. Image Vulnerability Scan
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Trivy scan
|
||||||
|
trivy image --severity HIGH,CRITICAL myapp:latest
|
||||||
|
|
||||||
|
# Docker Scout
|
||||||
|
docker scout vulnerabilities myapp:latest
|
||||||
|
|
||||||
|
# Grype
|
||||||
|
grype myapp:latest
|
||||||
|
|
||||||
|
# Output JSON for CI
|
||||||
|
trivy image --format json --output results.json myapp:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Base Image Updates
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check base image for updates
|
||||||
|
docker pull node:20-alpine
|
||||||
|
|
||||||
|
# Rebuild with updated base
|
||||||
|
docker build --no-cache -t myapp:latest .
|
||||||
|
|
||||||
|
# Scan new image
|
||||||
|
trivy image myapp:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Dependency Audit
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Node.js
|
||||||
|
npm audit
|
||||||
|
npm audit fix
|
||||||
|
|
||||||
|
# Python
|
||||||
|
pip-audit
|
||||||
|
|
||||||
|
# Go
|
||||||
|
go list -m all | nancy
|
||||||
|
|
||||||
|
# General
|
||||||
|
snyk test
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Secret Detection
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Scan for secrets
|
||||||
|
gitleaks --path . --verbose
|
||||||
|
|
||||||
|
# Pre-commit hook
|
||||||
|
gitleaks protect --staged
|
||||||
|
|
||||||
|
# Docker image
|
||||||
|
gitleaks --image myapp:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
## CI/CD Security Integration
|
||||||
|
|
||||||
|
### GitHub Actions
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# .github/workflows/security.yml
|
||||||
|
name: Security Scan
|
||||||
|
|
||||||
|
on: [push, pull_request]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
scan:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Run Trivy vulnerability scanner
|
||||||
|
uses: aquasecurity/trivy-action@master
|
||||||
|
with:
|
||||||
|
image-ref: 'myapp:${{ github.sha }}'
|
||||||
|
format: 'table'
|
||||||
|
exit-code: '1'
|
||||||
|
severity: 'CRITICAL,HIGH'
|
||||||
|
|
||||||
|
- name: Run Gitleaks secret scan
|
||||||
|
uses: gitleaks/gitleaks-action@v2
|
||||||
|
with:
|
||||||
|
args: --path=.
|
||||||
|
```
|
||||||
|
|
||||||
|
### GitLab CI
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# .gitlab-ci.yml
|
||||||
|
security_scan:
|
||||||
|
stage: test
|
||||||
|
image: docker:24
|
||||||
|
services:
|
||||||
|
- docker:dind
|
||||||
|
script:
|
||||||
|
- docker build -t myapp:$CI_COMMIT_SHA .
|
||||||
|
- trivy image --exit-code 1 --severity HIGH,CRITICAL myapp:$CI_COMMIT_SHA
|
||||||
|
- gitleaks --path . --verbose
|
||||||
|
```
|
||||||
|
|
||||||
|
## Security Checklist
|
||||||
|
|
||||||
|
### Dockerfile Security
|
||||||
|
|
||||||
|
- [ ] Using minimal base image (alpine/distroless)
|
||||||
|
- [ ] Specific version tags, not `latest`
|
||||||
|
- [ ] Running as non-root user
|
||||||
|
- [ ] No secrets in image
|
||||||
|
- [ ] `.dockerignore` includes `.env`, `.git`, `.credentials`
|
||||||
|
- [ ] COPY instead of ADD (unless needed)
|
||||||
|
- [ ] Multi-stage build for smaller image
|
||||||
|
- [ ] HEALTHCHECK defined
|
||||||
|
|
||||||
|
### Runtime Security
|
||||||
|
|
||||||
|
- [ ] Read-only filesystem
|
||||||
|
- [ ] Capabilities dropped
|
||||||
|
- [ ] No new privileges
|
||||||
|
- [ ] Resource limits set
|
||||||
|
- [ ] User namespace enabled (if available)
|
||||||
|
- [ ] Seccomp/AppArmor profiles applied
|
||||||
|
|
||||||
|
### Network Security
|
||||||
|
|
||||||
|
- [ ] Only necessary ports exposed
|
||||||
|
- [ ] Internal networks for sensitive services
|
||||||
|
- [ ] TLS for external communication
|
||||||
|
- [ ] Network segmentation
|
||||||
|
|
||||||
|
### Secrets Management
|
||||||
|
|
||||||
|
- [ ] No secrets in images
|
||||||
|
- [ ] Using Docker secrets or external vault
|
||||||
|
- [ ] `.env` files gitignored
|
||||||
|
- [ ] Secret rotation implemented
|
||||||
|
|
||||||
|
### CI/CD Security
|
||||||
|
|
||||||
|
- [ ] Vulnerability scanning in pipeline
|
||||||
|
- [ ] Secret detection pre-commit
|
||||||
|
- [ ] Dependency audit automated
|
||||||
|
- [ ] Base images updated regularly
|
||||||
|
|
||||||
|
## Remediation Priority
|
||||||
|
|
||||||
|
| Severity | Priority | Timeline |
|
||||||
|
|----------|----------|----------|
|
||||||
|
| Critical | P0 | Immediately (24h) |
|
||||||
|
| High | P1 | Within 7 days |
|
||||||
|
| Medium | P2 | Within 30 days |
|
||||||
|
| Low | P3 | Next release |
|
||||||
|
|
||||||
|
## Security Tools
|
||||||
|
|
||||||
|
| Tool | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| Trivy | Image vulnerability scanning |
|
||||||
|
| Docker Scout | Docker's built-in scanner |
|
||||||
|
| Grype | Vulnerability scanner |
|
||||||
|
| Gitleaks | Secret detection |
|
||||||
|
| Snyk | Dependency scanning |
|
||||||
|
| Falco | Runtime security monitoring |
|
||||||
|
| Anchore | Container security analysis |
|
||||||
|
| Clair | Open-source vulnerability scanner |
|
||||||
|
|
||||||
|
## Common Vulnerabilities
|
||||||
|
|
||||||
|
### CVE Examples
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Check for specific CVE
|
||||||
|
trivy image --vulnerabilities CVE-2021-44228 myapp:latest
|
||||||
|
|
||||||
|
# Ignore specific CVE (use carefully)
|
||||||
|
trivy image --ignorefile .trivyignore myapp:latest
|
||||||
|
|
||||||
|
# .trivyignore
|
||||||
|
CVE-2021-12345 # Known and accepted
|
||||||
|
```
|
||||||
|
|
||||||
|
### Log4j Example (CVE-2021-44228)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check for vulnerable versions
|
||||||
|
docker images --format '{{.Repository}}:{{.Tag}}' | xargs -I {} \
|
||||||
|
trivy image --vulnerabilities CVE-2021-44228 {}
|
||||||
|
|
||||||
|
# Update and rebuild
|
||||||
|
FROM node:20-alpine
|
||||||
|
# Ensure no vulnerable log4j dependency
|
||||||
|
RUN npm audit fix
|
||||||
|
```
|
||||||
|
|
||||||
|
## Incident Response
|
||||||
|
|
||||||
|
### Security Breach Steps
|
||||||
|
|
||||||
|
1. **Isolate**
|
||||||
|
```bash
|
||||||
|
# Stop container
|
||||||
|
docker stop <container_id>
|
||||||
|
|
||||||
|
# Remove from network
|
||||||
|
docker network disconnect app-network <container_id>
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Preserve Evidence**
|
||||||
|
```bash
|
||||||
|
# Save container state
|
||||||
|
docker commit <container_id> incident-container
|
||||||
|
|
||||||
|
# Export logs
|
||||||
|
docker logs <container_id> > incident-logs.txt
|
||||||
|
docker export <container_id> > incident-container.tar
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Analyze**
|
||||||
|
```bash
|
||||||
|
# Inspect container
|
||||||
|
docker inspect <container_id>
|
||||||
|
|
||||||
|
# Check image
|
||||||
|
trivy image <image_name>
|
||||||
|
|
||||||
|
# Review process history
|
||||||
|
docker history <image_name>
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Remediate**
|
||||||
|
```bash
|
||||||
|
# Update base image
|
||||||
|
docker pull node:20-alpine
|
||||||
|
|
||||||
|
# Rebuild
|
||||||
|
docker build --no-cache -t myapp:fixed .
|
||||||
|
|
||||||
|
# Scan
|
||||||
|
trivy image myapp:fixed
|
||||||
|
```
|
||||||
|
|
||||||
|
## Related Skills
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `docker-compose` | Local development setup |
|
||||||
|
| `docker-swarm` | Production orchestration |
|
||||||
|
| `docker-monitoring` | Security monitoring |
|
||||||
|
| `docker-networking` | Network security |
|
||||||
757
.kilo/skills/docker-swarm/SKILL.md
Normal file
757
.kilo/skills/docker-swarm/SKILL.md
Normal file
@@ -0,0 +1,757 @@
|
|||||||
|
# Skill: Docker Swarm
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
Comprehensive skill for Docker Swarm orchestration, cluster management, and production-ready container deployment.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Docker Swarm is Docker's native clustering and orchestration solution. Use this skill for production deployments, high availability setups, and managing containerized applications at scale.
|
||||||
|
|
||||||
|
## When to Use
|
||||||
|
|
||||||
|
- Deploying applications in production clusters
|
||||||
|
- Setting up high availability services
|
||||||
|
- Scaling services dynamically
|
||||||
|
- Managing rolling updates
|
||||||
|
- Handling secrets and configs securely
|
||||||
|
- Multi-node orchestration
|
||||||
|
|
||||||
|
## Core Concepts
|
||||||
|
|
||||||
|
### Swarm Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Docker Swarm Cluster │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||||
|
│ │ Manager │ │ Manager │ │ Manager │ (HA) │
|
||||||
|
│ │ Node 1 │ │ Node 2 │ │ Node 3 │ │
|
||||||
|
│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ ┌──────┴────────────────┴────────────────┴──────┐ │
|
||||||
|
│ │ Internal Network │ │
|
||||||
|
│ └──────┬────────────────┬──────────────────────┘ │
|
||||||
|
│ │ │ │
|
||||||
|
│ ┌──────┴──────┐ ┌──────┴──────┐ ┌─────────────┐ │
|
||||||
|
│ │ Worker │ │ Worker │ │ Worker │ │
|
||||||
|
│ │ Node 4 │ │ Node 5 │ │ Node 6 │ │
|
||||||
|
│ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||||
|
│ │
|
||||||
|
│ Services: api, web, db, redis, queue │
|
||||||
|
│ Tasks: Running containers distributed across nodes │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Key Components
|
||||||
|
|
||||||
|
| Component | Description |
|
||||||
|
|-----------|-------------|
|
||||||
|
| **Service** | Definition of a container (image, ports, replicas) |
|
||||||
|
| **Task** | Single running instance of a service |
|
||||||
|
| **Stack** | Group of related services (like docker-compose) |
|
||||||
|
| **Node** | Docker daemon participating in swarm |
|
||||||
|
| **Overlay Network** | Network spanning multiple nodes |
|
||||||
|
|
||||||
|
## Skill Files Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
docker-swarm/
|
||||||
|
├── SKILL.md # This file
|
||||||
|
├── patterns/
|
||||||
|
│ ├── services.md # Service deployment patterns
|
||||||
|
│ ├── networking.md # Overlay network patterns
|
||||||
|
│ ├── secrets.md # Secrets management
|
||||||
|
│ └── configs.md # Config management
|
||||||
|
└── examples/
|
||||||
|
├── ha-web-app.md # High availability web app
|
||||||
|
├── microservices.md # Microservices deployment
|
||||||
|
└── database.md # Database cluster setup
|
||||||
|
```
|
||||||
|
|
||||||
|
## Core Patterns
|
||||||
|
|
||||||
|
### 1. Initialize Swarm
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Initialize swarm on manager node
|
||||||
|
docker swarm init --advertise-addr <MANAGER_IP>
|
||||||
|
|
||||||
|
# Get join token for workers
|
||||||
|
docker swarm join-token -q worker
|
||||||
|
|
||||||
|
# Get join token for managers
|
||||||
|
docker swarm join-token -q manager
|
||||||
|
|
||||||
|
# Join swarm (on worker nodes)
|
||||||
|
docker swarm join --token <TOKEN> <MANAGER_IP>:2377
|
||||||
|
|
||||||
|
# Check swarm status
|
||||||
|
docker node ls
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Service Deployment
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml (Swarm stack)
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
image: myapp/api:latest
|
||||||
|
deploy:
|
||||||
|
mode: replicated
|
||||||
|
replicas: 3
|
||||||
|
update_config:
|
||||||
|
parallelism: 1
|
||||||
|
delay: 10s
|
||||||
|
failure_action: rollback
|
||||||
|
order: start-first
|
||||||
|
rollback_config:
|
||||||
|
parallelism: 1
|
||||||
|
delay: 10s
|
||||||
|
restart_policy:
|
||||||
|
condition: on-failure
|
||||||
|
delay: 5s
|
||||||
|
max_attempts: 3
|
||||||
|
window: 120s
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.role == worker
|
||||||
|
preferences:
|
||||||
|
- spread: node.id
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 1G
|
||||||
|
reservations:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
networks:
|
||||||
|
- app-network
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
secrets:
|
||||||
|
- db_password
|
||||||
|
- jwt_secret
|
||||||
|
configs:
|
||||||
|
- app_config
|
||||||
|
|
||||||
|
networks:
|
||||||
|
app-network:
|
||||||
|
driver: overlay
|
||||||
|
attachable: true
|
||||||
|
|
||||||
|
secrets:
|
||||||
|
db_password:
|
||||||
|
external: true
|
||||||
|
jwt_secret:
|
||||||
|
external: true
|
||||||
|
|
||||||
|
configs:
|
||||||
|
app_config:
|
||||||
|
external: true
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Deploy Stack
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create secrets (before deploying)
|
||||||
|
echo "my_db_password" | docker secret create db_password -
|
||||||
|
docker secret create jwt_secret ./jwt_secret.txt
|
||||||
|
|
||||||
|
# Create configs
|
||||||
|
docker config create app_config ./config.json
|
||||||
|
|
||||||
|
# Deploy stack
|
||||||
|
docker stack deploy -c docker-compose.yml mystack
|
||||||
|
|
||||||
|
# List services
|
||||||
|
docker stack services mystack
|
||||||
|
|
||||||
|
# List tasks
|
||||||
|
docker stack ps mystack
|
||||||
|
|
||||||
|
# Remove stack
|
||||||
|
docker stack rm mystack
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Service Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Scale service
|
||||||
|
docker service scale mystack_api=5
|
||||||
|
|
||||||
|
# Update service image
|
||||||
|
docker service update --image myapp/api:v2 mystack_api
|
||||||
|
|
||||||
|
# Update environment variable
|
||||||
|
docker service update --env-add NODE_ENV=staging mystack_api
|
||||||
|
|
||||||
|
# Add constraint
|
||||||
|
docker service update --constraint-add 'node.labels.region==us-east' mystack_api
|
||||||
|
|
||||||
|
# Rollback service
|
||||||
|
docker service rollback mystack_api
|
||||||
|
|
||||||
|
# View service details
|
||||||
|
docker service inspect mystack_api
|
||||||
|
|
||||||
|
# View service logs
|
||||||
|
docker service logs -f mystack_api
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Secrets Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create secret from stdin
|
||||||
|
echo "my_secret" | docker secret create db_password -
|
||||||
|
|
||||||
|
# Create secret from file
|
||||||
|
docker secret create jwt_secret ./secrets/jwt.txt
|
||||||
|
|
||||||
|
# List secrets
|
||||||
|
docker secret ls
|
||||||
|
|
||||||
|
# Inspect secret metadata
|
||||||
|
docker secret inspect db_password
|
||||||
|
|
||||||
|
# Use secret in service
|
||||||
|
docker service create \
|
||||||
|
--name api \
|
||||||
|
--secret db_password \
|
||||||
|
--secret jwt_secret \
|
||||||
|
myapp/api:latest
|
||||||
|
|
||||||
|
# Remove secret
|
||||||
|
docker secret rm db_password
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Config Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create config
|
||||||
|
docker config create app_config ./config.json
|
||||||
|
|
||||||
|
# List configs
|
||||||
|
docker config ls
|
||||||
|
|
||||||
|
# Use config in service
|
||||||
|
docker service create \
|
||||||
|
--name api \
|
||||||
|
--config source=app_config,target=/app/config.json \
|
||||||
|
myapp/api:latest
|
||||||
|
|
||||||
|
# Update config (create new version)
|
||||||
|
docker config create app_config_v2 ./config-v2.json
|
||||||
|
|
||||||
|
# Update service with new config
|
||||||
|
docker service update \
|
||||||
|
--config-rm app_config \
|
||||||
|
--config-add source=app_config_v2,target=/app/config.json \
|
||||||
|
mystack_api
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7. Overlay Networks
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Create overlay network
|
||||||
|
networks:
|
||||||
|
frontend:
|
||||||
|
driver: overlay
|
||||||
|
attachable: true
|
||||||
|
|
||||||
|
backend:
|
||||||
|
driver: overlay
|
||||||
|
attachable: true
|
||||||
|
internal: true # No external access
|
||||||
|
|
||||||
|
services:
|
||||||
|
web:
|
||||||
|
networks:
|
||||||
|
- frontend
|
||||||
|
- backend
|
||||||
|
|
||||||
|
api:
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
db:
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create network manually
|
||||||
|
docker network create --driver overlay --attachable my-network
|
||||||
|
|
||||||
|
# List networks
|
||||||
|
docker network ls
|
||||||
|
|
||||||
|
# Inspect network
|
||||||
|
docker network inspect my-network
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment Strategies
|
||||||
|
|
||||||
|
### Rolling Update
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
deploy:
|
||||||
|
update_config:
|
||||||
|
parallelism: 2 # Update 2 tasks at a time
|
||||||
|
delay: 10s # Wait 10s between updates
|
||||||
|
failure_action: rollback
|
||||||
|
monitor: 30s # Monitor for 30s after update
|
||||||
|
max_failure_ratio: 0.3 # Allow 30% failures
|
||||||
|
```
|
||||||
|
|
||||||
|
### Blue-Green Deployment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Deploy new version alongside existing
|
||||||
|
docker service create \
|
||||||
|
--name api-v2 \
|
||||||
|
--mode replicated \
|
||||||
|
--replicas 3 \
|
||||||
|
--network app-network \
|
||||||
|
myapp/api:v2
|
||||||
|
|
||||||
|
# Update router to point to new version
|
||||||
|
# (Using nginx/traefik config update)
|
||||||
|
|
||||||
|
# Remove old version
|
||||||
|
docker service rm api-v1
|
||||||
|
```
|
||||||
|
|
||||||
|
### Canary Deployment
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Deploy canary version
|
||||||
|
version: '3.8'
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
image: myapp/api:v1
|
||||||
|
deploy:
|
||||||
|
replicas: 9
|
||||||
|
# ... 90% of traffic
|
||||||
|
|
||||||
|
api-canary:
|
||||||
|
image: myapp/api:v2
|
||||||
|
deploy:
|
||||||
|
replicas: 1
|
||||||
|
# ... 10% of traffic
|
||||||
|
```
|
||||||
|
|
||||||
|
### Global Services
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Run one instance on every node
|
||||||
|
services:
|
||||||
|
monitoring:
|
||||||
|
image: myapp/monitoring:latest
|
||||||
|
deploy:
|
||||||
|
mode: global
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
```
|
||||||
|
|
||||||
|
## High Availability Patterns
|
||||||
|
|
||||||
|
### 1. Multi-Manager Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create 3 manager nodes for HA
|
||||||
|
docker swarm init --advertise-addr <MANAGER1_IP>
|
||||||
|
|
||||||
|
# On manager2
|
||||||
|
docker swarm join --token <MANAGER_TOKEN> <MANAGER1_IP>:2377
|
||||||
|
|
||||||
|
# On manager3
|
||||||
|
docker swarm join --token <MANAGER_TOKEN> <MANAGER1_IP>:2377
|
||||||
|
|
||||||
|
# Promote worker to manager
|
||||||
|
docker node promote <NODE_ID>
|
||||||
|
|
||||||
|
# Demote manager to worker
|
||||||
|
docker node demote <NODE_ID>
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Placement Constraints
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgres:15
|
||||||
|
deploy:
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.role == worker
|
||||||
|
- node.labels.database == true
|
||||||
|
preferences:
|
||||||
|
- spread: node.labels.zone # Spread across zones
|
||||||
|
|
||||||
|
cache:
|
||||||
|
image: redis:7
|
||||||
|
deploy:
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.cache == true
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Resource Management
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '2'
|
||||||
|
memory: 2G
|
||||||
|
reservations:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 1G
|
||||||
|
restart_policy:
|
||||||
|
condition: on-failure
|
||||||
|
max_attempts: 3
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Health Checks
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
deploy:
|
||||||
|
update_config:
|
||||||
|
failure_action: rollback
|
||||||
|
monitor: 30s
|
||||||
|
```
|
||||||
|
|
||||||
|
## Service Discovery & Load Balancing
|
||||||
|
|
||||||
|
### Built-in Load Balancing
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Swarm provides automatic load balancing
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
deploy:
|
||||||
|
replicas: 3
|
||||||
|
ports:
|
||||||
|
- "3000:3000" # Requests are load balanced across replicas
|
||||||
|
|
||||||
|
# Virtual IP (VIP) - default mode
|
||||||
|
# DNS round-robin
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
deploy:
|
||||||
|
endpoint_mode: dnsrr
|
||||||
|
```
|
||||||
|
|
||||||
|
### Ingress Network
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Publishing ports
|
||||||
|
services:
|
||||||
|
web:
|
||||||
|
ports:
|
||||||
|
- "80:80" # Published on all nodes
|
||||||
|
- "443:443"
|
||||||
|
deploy:
|
||||||
|
mode: ingress # Default, routed through mesh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Host Mode
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Bypass load balancer (for performance)
|
||||||
|
services:
|
||||||
|
web:
|
||||||
|
ports:
|
||||||
|
- target: 80
|
||||||
|
published: 80
|
||||||
|
mode: host # Direct port mapping
|
||||||
|
deploy:
|
||||||
|
mode: global # One per node
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring & Logging
|
||||||
|
|
||||||
|
### Logging Drivers
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
logging:
|
||||||
|
driver: "json-file"
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
labels: "app,environment"
|
||||||
|
|
||||||
|
# Or use syslog
|
||||||
|
api:
|
||||||
|
logging:
|
||||||
|
driver: "syslog"
|
||||||
|
options:
|
||||||
|
syslog-address: "tcp://logserver:514"
|
||||||
|
syslog-facility: "daemon"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Viewing Logs
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Service logs
|
||||||
|
docker service logs mystack_api
|
||||||
|
|
||||||
|
# Filter by time
|
||||||
|
docker service logs --since 1h mystack_api
|
||||||
|
|
||||||
|
# Follow logs
|
||||||
|
docker service logs -f mystack_api
|
||||||
|
|
||||||
|
# All tasks
|
||||||
|
docker service logs --tail 100 mystack_api
|
||||||
|
```
|
||||||
|
|
||||||
|
### Monitoring Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Node status
|
||||||
|
docker node ls
|
||||||
|
|
||||||
|
# Service status
|
||||||
|
docker service ls
|
||||||
|
|
||||||
|
# Task status
|
||||||
|
docker service ps mystack_api
|
||||||
|
|
||||||
|
# Resource usage
|
||||||
|
docker stats
|
||||||
|
|
||||||
|
# Service inspect
|
||||||
|
docker service inspect mystack_api --pretty
|
||||||
|
```
|
||||||
|
|
||||||
|
## Backup & Recovery
|
||||||
|
|
||||||
|
### Backup Swarm State
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On manager node
|
||||||
|
docker pull swaggercodebreaker/swarmctl
|
||||||
|
docker run --rm -v /var/lib/docker/swarm:/ swarmctl export > swarm-backup.json
|
||||||
|
|
||||||
|
# Or manual backup
|
||||||
|
cp -r /var/lib/docker/swarm/raft ~/swarm-backup/
|
||||||
|
```
|
||||||
|
|
||||||
|
### Recovery
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Unlock swarm after restart (if encrypted)
|
||||||
|
docker swarm unlock
|
||||||
|
|
||||||
|
# Force new cluster (disaster recovery)
|
||||||
|
docker swarm init --force-new-cluster
|
||||||
|
|
||||||
|
# Restore from backup
|
||||||
|
docker swarm init --force-new-cluster
|
||||||
|
docker service create --name restore-app ...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Common Operations
|
||||||
|
|
||||||
|
### Node Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# List nodes
|
||||||
|
docker node ls
|
||||||
|
|
||||||
|
# Inspect node
|
||||||
|
docker node inspect <NODE_ID>
|
||||||
|
|
||||||
|
# Drain node (for maintenance)
|
||||||
|
docker node update --availability drain <NODE_ID>
|
||||||
|
|
||||||
|
# Activate node
|
||||||
|
docker node update --availability active <NODE_ID>
|
||||||
|
|
||||||
|
# Add labels
|
||||||
|
docker node update --label-add region=us-east <NODE_ID>
|
||||||
|
|
||||||
|
# Remove node
|
||||||
|
docker node rm <NODE_ID>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Service Debugging
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# View service tasks
|
||||||
|
docker service ps mystack_api
|
||||||
|
|
||||||
|
# View task details
|
||||||
|
docker inspect <TASK_ID>
|
||||||
|
|
||||||
|
# Run temporary container for debugging
|
||||||
|
docker run --rm -it --network mystack_app-network \
|
||||||
|
myapp/api:latest sh
|
||||||
|
|
||||||
|
# Check service logs
|
||||||
|
docker service logs mystack_api
|
||||||
|
|
||||||
|
# Execute command in running container
|
||||||
|
docker exec -it <CONTAINER_ID> sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Network Debugging
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# List networks
|
||||||
|
docker network ls
|
||||||
|
|
||||||
|
# Inspect overlay network
|
||||||
|
docker network inspect mystack_app-network
|
||||||
|
|
||||||
|
# Test connectivity
|
||||||
|
docker run --rm --network mystack_app-network alpine ping api
|
||||||
|
|
||||||
|
# DNS resolution
|
||||||
|
docker run --rm --network mystack_app-network alpine nslookup api
|
||||||
|
```
|
||||||
|
|
||||||
|
## Production Checklist
|
||||||
|
|
||||||
|
- [ ] At least 3 manager nodes for HA
|
||||||
|
- [ ] Quorum maintained (odd number of managers)
|
||||||
|
- [ ] Resources limited for all services
|
||||||
|
- [ ] Health checks configured
|
||||||
|
- [ ] Rolling update strategy defined
|
||||||
|
- [ ] Rollback strategy configured
|
||||||
|
- [ ] Secrets used for sensitive data
|
||||||
|
- [ ] Configs for environment settings
|
||||||
|
- [ ] Overlay networks properly segmented
|
||||||
|
- [ ] Logging driver configured
|
||||||
|
- [ ] Monitoring solution deployed
|
||||||
|
- [ ] Backup strategy implemented
|
||||||
|
- [ ] Node labels for placement constraints
|
||||||
|
- [ ] Resource reservations set
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
1. **Resource Planning**
|
||||||
|
```yaml
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 1G
|
||||||
|
reservations:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Rolling Updates**
|
||||||
|
```yaml
|
||||||
|
deploy:
|
||||||
|
update_config:
|
||||||
|
parallelism: 1
|
||||||
|
delay: 10s
|
||||||
|
failure_action: rollback
|
||||||
|
monitor: 30s
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Placement Constraints**
|
||||||
|
```yaml
|
||||||
|
deploy:
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.role == worker
|
||||||
|
preferences:
|
||||||
|
- spread: node.labels.zone
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Network Segmentation**
|
||||||
|
```yaml
|
||||||
|
networks:
|
||||||
|
frontend:
|
||||||
|
driver: overlay
|
||||||
|
backend:
|
||||||
|
driver: overlay
|
||||||
|
internal: true
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **Secrets Management**
|
||||||
|
```yaml
|
||||||
|
secrets:
|
||||||
|
- db_password
|
||||||
|
- jwt_secret
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Service Won't Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check task status
|
||||||
|
docker service ps mystack_api --no-trunc
|
||||||
|
|
||||||
|
# Check logs
|
||||||
|
docker service logs mystack_api
|
||||||
|
|
||||||
|
# Check node resources
|
||||||
|
docker node ls
|
||||||
|
docker stats
|
||||||
|
|
||||||
|
# Check network
|
||||||
|
docker network inspect mystack_app-network
|
||||||
|
```
|
||||||
|
|
||||||
|
### Task Keeps Restarting
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check restart policy
|
||||||
|
docker service inspect mystack_api --pretty
|
||||||
|
|
||||||
|
# Check container logs
|
||||||
|
docker service logs --tail 50 mystack_api
|
||||||
|
|
||||||
|
# Check health check
|
||||||
|
docker inspect <CONTAINER_ID> --format='{{.State.Health}}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Network Issues
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Verify overlay network
|
||||||
|
docker network inspect mystack_app-network
|
||||||
|
|
||||||
|
# Check DNS resolution
|
||||||
|
docker run --rm --network mystack_app-network alpine nslookup api
|
||||||
|
|
||||||
|
# Check connectivity
|
||||||
|
docker run --rm --network mystack_app-network alpine ping api
|
||||||
|
```
|
||||||
|
|
||||||
|
## Related Skills
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `docker-compose` | Local development with Compose |
|
||||||
|
| `docker-security` | Container security patterns |
|
||||||
|
| `kubernetes` | Kubernetes orchestration |
|
||||||
|
| `docker-monitoring` | Container monitoring setup |
|
||||||
519
.kilo/skills/docker-swarm/examples/ha-web-app.md
Normal file
519
.kilo/skills/docker-swarm/examples/ha-web-app.md
Normal file
@@ -0,0 +1,519 @@
|
|||||||
|
# Docker Swarm Deployment Examples
|
||||||
|
|
||||||
|
## Example: High Availability Web Application
|
||||||
|
|
||||||
|
Complete example of deploying a production-ready web application with Docker Swarm.
|
||||||
|
|
||||||
|
### docker-compose.yml (Swarm Stack)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
# Reverse Proxy with SSL
|
||||||
|
nginx:
|
||||||
|
image: nginx:alpine
|
||||||
|
ports:
|
||||||
|
- "80:80"
|
||||||
|
- "443:443"
|
||||||
|
configs:
|
||||||
|
- source: nginx_config
|
||||||
|
target: /etc/nginx/nginx.conf
|
||||||
|
secrets:
|
||||||
|
- ssl_cert
|
||||||
|
- ssl_key
|
||||||
|
networks:
|
||||||
|
- frontend
|
||||||
|
deploy:
|
||||||
|
replicas: 2
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.role == worker
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 256M
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "nginx", "-t"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
# API Service
|
||||||
|
api:
|
||||||
|
image: myapp/api:latest
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=production
|
||||||
|
- DATABASE_URL=postgres://app:${DB_PASSWORD}@db:5432/app
|
||||||
|
- REDIS_URL=redis://cache:6379
|
||||||
|
configs:
|
||||||
|
- source: app_config
|
||||||
|
target: /app/config.json
|
||||||
|
secrets:
|
||||||
|
- jwt_secret
|
||||||
|
networks:
|
||||||
|
- frontend
|
||||||
|
- backend
|
||||||
|
deploy:
|
||||||
|
replicas: 3
|
||||||
|
update_config:
|
||||||
|
parallelism: 1
|
||||||
|
delay: 10s
|
||||||
|
failure_action: rollback
|
||||||
|
order: start-first
|
||||||
|
rollback_config:
|
||||||
|
parallelism: 1
|
||||||
|
delay: 10s
|
||||||
|
restart_policy:
|
||||||
|
condition: on-failure
|
||||||
|
delay: 5s
|
||||||
|
max_attempts: 3
|
||||||
|
window: 120s
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.role == worker
|
||||||
|
preferences:
|
||||||
|
- spread: node.id
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 1G
|
||||||
|
reservations:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
|
||||||
|
# Background Worker
|
||||||
|
worker:
|
||||||
|
image: myapp/worker:latest
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=production
|
||||||
|
- DATABASE_URL=postgres://app:${DB_PASSWORD}@db:5432/app
|
||||||
|
secrets:
|
||||||
|
- jwt_secret
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
deploy:
|
||||||
|
replicas: 2
|
||||||
|
restart_policy:
|
||||||
|
condition: on-failure
|
||||||
|
delay: 10s
|
||||||
|
max_attempts: 5
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.role == worker
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
|
||||||
|
# Database (PostgreSQL with Replication)
|
||||||
|
db:
|
||||||
|
image: postgres:15-alpine
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: app
|
||||||
|
POSTGRES_USER: app
|
||||||
|
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
|
||||||
|
secrets:
|
||||||
|
- db_password
|
||||||
|
volumes:
|
||||||
|
- postgres-data:/var/lib/postgresql/data
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
deploy:
|
||||||
|
replicas: 1
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.database == true
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '2'
|
||||||
|
memory: 2G
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U app -d app"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
# Redis Cache
|
||||||
|
cache:
|
||||||
|
image: redis:7-alpine
|
||||||
|
command: redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru
|
||||||
|
volumes:
|
||||||
|
- redis-data:/data
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
deploy:
|
||||||
|
replicas: 1
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.cache == true
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
# Monitoring (Prometheus)
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:latest
|
||||||
|
configs:
|
||||||
|
- source: prometheus_config
|
||||||
|
target: /etc/prometheus/prometheus.yml
|
||||||
|
volumes:
|
||||||
|
- prometheus-data:/prometheus
|
||||||
|
networks:
|
||||||
|
- monitoring
|
||||||
|
deploy:
|
||||||
|
replicas: 1
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.role == manager
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||||
|
- '--storage.tsdb.retention.time=30d'
|
||||||
|
|
||||||
|
# Monitoring (Grafana)
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:latest
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
environment:
|
||||||
|
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}
|
||||||
|
volumes:
|
||||||
|
- grafana-data:/var/lib/grafana
|
||||||
|
networks:
|
||||||
|
- monitoring
|
||||||
|
deploy:
|
||||||
|
replicas: 1
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.role == manager
|
||||||
|
|
||||||
|
networks:
|
||||||
|
frontend:
|
||||||
|
driver: overlay
|
||||||
|
attachable: true
|
||||||
|
backend:
|
||||||
|
driver: overlay
|
||||||
|
internal: true
|
||||||
|
monitoring:
|
||||||
|
driver: overlay
|
||||||
|
attachable: true
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres-data:
|
||||||
|
redis-data:
|
||||||
|
prometheus-data:
|
||||||
|
grafana-data:
|
||||||
|
|
||||||
|
configs:
|
||||||
|
nginx_config:
|
||||||
|
file: ./configs/nginx.conf
|
||||||
|
app_config:
|
||||||
|
file: ./configs/app.json
|
||||||
|
prometheus_config:
|
||||||
|
file: ./configs/prometheus.yml
|
||||||
|
|
||||||
|
secrets:
|
||||||
|
db_password:
|
||||||
|
file: ./secrets/db_password.txt
|
||||||
|
jwt_secret:
|
||||||
|
file: ./secrets/jwt_secret.txt
|
||||||
|
ssl_cert:
|
||||||
|
file: ./secrets/ssl_cert.pem
|
||||||
|
ssl_key:
|
||||||
|
file: ./secrets/ssl_key.pem
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deployment Script
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# deploy.sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Colors
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
NC='\033[0m'
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
STACK_NAME="myapp"
|
||||||
|
COMPOSE_FILE="docker-compose.yml"
|
||||||
|
|
||||||
|
echo "Starting deployment for ${STACK_NAME}..."
|
||||||
|
|
||||||
|
# Check if running on Swarm
|
||||||
|
if ! docker info | grep -q "Swarm: active"; then
|
||||||
|
echo -e "${RED}Error: Not running in Swarm mode${NC}"
|
||||||
|
echo "Initialize Swarm with: docker swarm init"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create secrets (if not exists)
|
||||||
|
echo "Checking secrets..."
|
||||||
|
for secret in db_password jwt_secret ssl_cert ssl_key; do
|
||||||
|
if ! docker secret inspect ${secret} > /dev/null 2>&1; then
|
||||||
|
if [ -f "./secrets/${secret}.txt" ]; then
|
||||||
|
docker secret create ${secret} ./secrets/${secret}.txt
|
||||||
|
echo -e "${GREEN}Created secret: ${secret}${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${RED}Missing secret file: ./secrets/${secret}.txt${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Secret ${secret} already exists"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Create configs
|
||||||
|
echo "Creating configs..."
|
||||||
|
docker config rm nginx_config 2>/dev/null || true
|
||||||
|
docker config create nginx_config ./configs/nginx.conf
|
||||||
|
|
||||||
|
docker config rm app_config 2>/dev/null || true
|
||||||
|
docker config create app_config ./configs/app.json
|
||||||
|
|
||||||
|
docker config rm prometheus_config 2>/dev/null || true
|
||||||
|
docker config create prometheus_config ./configs/prometheus.yml
|
||||||
|
|
||||||
|
# Deploy stack
|
||||||
|
echo "Deploying stack..."
|
||||||
|
docker stack deploy -c ${COMPOSE_FILE} ${STACK_NAME}
|
||||||
|
|
||||||
|
# Wait for services to start
|
||||||
|
echo "Waiting for services to start..."
|
||||||
|
sleep 30
|
||||||
|
|
||||||
|
# Show status
|
||||||
|
docker stack services ${STACK_NAME}
|
||||||
|
|
||||||
|
# Check health
|
||||||
|
echo "Checking service health..."
|
||||||
|
for service in nginx api worker db cache prometheus grafana; do
|
||||||
|
REPLICAS=$(docker service ls --filter name=${STACK_NAME}_${service} --format "{{.Replicas}}")
|
||||||
|
echo "${service}: ${REPLICAS}"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo -e "${GREEN}Deployment complete!${NC}"
|
||||||
|
echo "Check status: docker stack services ${STACK_NAME}"
|
||||||
|
echo "View logs: docker service logs -f ${STACK_NAME}_api"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Service Update Script
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# update-service.sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SERVICE_NAME=$1
|
||||||
|
NEW_IMAGE=$2
|
||||||
|
|
||||||
|
if [ -z "$SERVICE_NAME" ] || [ -z "$NEW_IMAGE" ]; then
|
||||||
|
echo "Usage: ./update-service.sh <service-name> <new-image>"
|
||||||
|
echo "Example: ./update-service.sh myapp_api myapp/api:v2"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
FULL_SERVICE_NAME="${STACK_NAME}_${SERVICE_NAME}"
|
||||||
|
|
||||||
|
echo "Updating ${FULL_SERVICE_NAME} to ${NEW_IMAGE}..."
|
||||||
|
|
||||||
|
# Update service with rollback on failure
|
||||||
|
docker service update \
|
||||||
|
--image ${NEW_IMAGE} \
|
||||||
|
--update-parallelism 1 \
|
||||||
|
--update-delay 10s \
|
||||||
|
--update-failure-action rollback \
|
||||||
|
--update-monitor 30s \
|
||||||
|
${FULL_SERVICE_NAME}
|
||||||
|
|
||||||
|
# Wait for update
|
||||||
|
echo "Waiting for update to complete..."
|
||||||
|
sleep 30
|
||||||
|
|
||||||
|
# Check status
|
||||||
|
docker service ps ${FULL_SERVICE_NAME}
|
||||||
|
|
||||||
|
echo "Update complete!"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Rollback Script
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# rollback-service.sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SERVICE_NAME=$1
|
||||||
|
STACK_NAME="myapp"
|
||||||
|
|
||||||
|
if [ -z "$SERVICE_NAME" ]; then
|
||||||
|
echo "Usage: ./rollback-service.sh <service-name>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
FULL_SERVICE_NAME="${STACK_NAME}_${SERVICE_NAME}"
|
||||||
|
|
||||||
|
echo "Rolling back ${FULL_SERVICE_NAME}..."
|
||||||
|
|
||||||
|
docker service rollback ${FULL_SERVICE_NAME}
|
||||||
|
|
||||||
|
sleep 30
|
||||||
|
|
||||||
|
docker service ps ${FULL_SERVICE_NAME}
|
||||||
|
|
||||||
|
echo "Rollback complete!"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Monitoring Dashboard (Grafana)
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"dashboard": {
|
||||||
|
"title": "Docker Swarm Overview",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"title": "Running Tasks",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "count(container_tasks_state{state=\"running\"})"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "CPU Usage per Service",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "rate(container_cpu_usage_seconds_total{name=~\".+\"}[5m]) * 100",
|
||||||
|
"legendFormat": "{{name}}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Memory Usage per Service",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "container_memory_usage_bytes{name=~\".+\"} / 1024 / 1024",
|
||||||
|
"legendFormat": "{{name}} MB"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Network I/O",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "rate(container_network_receive_bytes_total{name=~\".+\"}[5m])",
|
||||||
|
"legendFormat": "{{name}} RX"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "rate(container_network_transmit_bytes_total{name=~\".+\"}[5m])",
|
||||||
|
"legendFormat": "{{name}} TX"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Service Health",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "container_health_status{name=~\".+\"}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Prometheus Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# prometheus.yml
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15m
|
||||||
|
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- static_configs:
|
||||||
|
- targets:
|
||||||
|
- alertmanager:9093
|
||||||
|
|
||||||
|
rule_files:
|
||||||
|
- /etc/prometheus/alerts.yml
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['prometheus:9090']
|
||||||
|
|
||||||
|
- job_name: 'cadvisor'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['cadvisor:8080']
|
||||||
|
|
||||||
|
- job_name: 'node'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['node-exporter:9100']
|
||||||
|
|
||||||
|
- job_name: 'api'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['api:3000']
|
||||||
|
metrics_path: '/metrics'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Alert Rules
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# alerts.yml
|
||||||
|
groups:
|
||||||
|
- name: swarm_alerts
|
||||||
|
rules:
|
||||||
|
- alert: ServiceDown
|
||||||
|
expr: count(container_tasks_state{state="running"}) == 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Service {{ $labels.service }} is down"
|
||||||
|
description: "No running tasks for service {{ $labels.service }}"
|
||||||
|
|
||||||
|
- alert: HighCpuUsage
|
||||||
|
expr: rate(container_cpu_usage_seconds_total[5m]) * 100 > 80
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "High CPU usage on {{ $labels.name }}"
|
||||||
|
description: "Container {{ $labels.name }} CPU usage is {{ $value }}%"
|
||||||
|
|
||||||
|
- alert: HighMemoryUsage
|
||||||
|
expr: (container_memory_usage_bytes / container_spec_memory_limit_bytes) * 100 > 80
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "High memory usage on {{ $labels.name }}"
|
||||||
|
description: "Container {{ $labels.name }} memory usage is {{ $value }}%"
|
||||||
|
|
||||||
|
- alert: ContainerRestart
|
||||||
|
expr: increase(container_restart_count[1h]) > 0
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Container {{ $labels.name }} restarted"
|
||||||
|
description: "Container {{ $labels.name }} restarted {{ $value }} times in the last hour"
|
||||||
|
```
|
||||||
129
scripts/sync-agents.cjs
Normal file
129
scripts/sync-agents.cjs
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
/**
|
||||||
|
* Sync Agent Models - Source of truth: .kilo/agents/*.md frontmatter
|
||||||
|
* Run: node scripts/sync-agents.cjs [--check | --fix]
|
||||||
|
*/
|
||||||
|
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
const ROOT = path.resolve(__dirname, '..');
|
||||||
|
const AGENTS_DIR = path.join(ROOT, '.kilo', 'agents');
|
||||||
|
const KILO_SPEC = path.join(ROOT, '.kilo', 'KILO_SPEC.md');
|
||||||
|
const AGENTS_MD = path.join(ROOT, 'AGENTS.md');
|
||||||
|
|
||||||
|
function parseFrontmatter(content) {
|
||||||
|
const match = content.match(/^---\n([\s\S]*?)\n---/);
|
||||||
|
if (!match) return {};
|
||||||
|
const frontmatter = {};
|
||||||
|
for (const line of match[1].split('\n')) {
|
||||||
|
const idx = line.indexOf(':');
|
||||||
|
if (idx > 0) {
|
||||||
|
const key = line.slice(0, idx).trim();
|
||||||
|
let val = line.slice(idx + 1).trim();
|
||||||
|
if (val.startsWith('"') && val.endsWith('"')) val = val.slice(1, -1);
|
||||||
|
frontmatter[key] = val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return frontmatter;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAllAgents() {
|
||||||
|
const agents = {};
|
||||||
|
for (const file of fs.readdirSync(AGENTS_DIR).filter(f => f.endsWith('.md'))) {
|
||||||
|
const content = fs.readFileSync(path.join(AGENTS_DIR, file), 'utf-8');
|
||||||
|
const fm = parseFrontmatter(content);
|
||||||
|
const name = file.replace('.md', '');
|
||||||
|
agents[name] = {
|
||||||
|
description: fm.description || '',
|
||||||
|
model: fm.model || '',
|
||||||
|
mode: fm.mode || 'all',
|
||||||
|
color: fm.color || ''
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return agents;
|
||||||
|
}
|
||||||
|
|
||||||
|
function categorizeAgent(name) {
|
||||||
|
const cats = {
|
||||||
|
core: ['requirement-refiner', 'history-miner', 'system-analyst', 'sdet-engineer', 'lead-developer', 'frontend-developer', 'backend-developer', 'go-developer', 'devops-engineer'],
|
||||||
|
quality: ['code-skeptic', 'the-fixer', 'performance-engineer', 'security-auditor', 'visual-tester'],
|
||||||
|
meta: ['orchestrator', 'release-manager', 'evaluator', 'prompt-optimizer', 'product-owner', 'agent-architect', 'capability-analyst', 'workflow-architect', 'markdown-validator'],
|
||||||
|
testing: ['browser-automation'],
|
||||||
|
cognitive: ['planner', 'reflector', 'memory-manager']
|
||||||
|
};
|
||||||
|
for (const [cat, list] of Object.entries(cats)) {
|
||||||
|
if (list.includes(name)) return cat;
|
||||||
|
}
|
||||||
|
return 'meta';
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateKiloSpec(agents) {
|
||||||
|
let content = fs.readFileSync(KILO_SPEC, 'utf-8');
|
||||||
|
const rows = Object.entries(agents)
|
||||||
|
.filter(([_, a]) => a.model)
|
||||||
|
.map(([name, a]) => {
|
||||||
|
const dn = name.split('-').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join('');
|
||||||
|
return `| \`@${dn}\` | ${a.description.split('.')[0]}. | ${a.model} |`;
|
||||||
|
}).join('\n');
|
||||||
|
const table = `### Pipeline Agents\n\n| Agent | Role | Model |\n|-------|------|-------|\n${rows}`;
|
||||||
|
content = content.replace(/### Pipeline Agents\n\n\| Agent \| Role \| Model \|[\s\S]*?(?=\n\n\*\*Note)/, table + '\n\n');
|
||||||
|
fs.writeFileSync(KILO_SPEC, content);
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateAgentsMd(agents) {
|
||||||
|
let content = fs.readFileSync(AGENTS_MD, 'utf-8');
|
||||||
|
const catNames = { core: '### Core Development', quality: '### Quality Assurance', meta: '### Meta & Process', testing: '### Testing', cognitive: '### Cognitive Enhancement (New)' };
|
||||||
|
const triggers = { 'requirement-refiner': 'Issue status: new', 'history-miner': 'Status: planned', 'system-analyst': 'Status: researching', 'sdet-engineer': 'Status: designed', 'lead-developer': 'Status: testing', 'frontend-developer': 'When UI work needed', 'backend-developer': 'When backend needed', 'go-developer': 'When Go backend needed', 'devops-engineer': 'When deployment/infra needed', 'code-skeptic': 'Status: implementing', 'the-fixer': 'When review fails', 'performance-engineer': 'After code-skeptic', 'security-auditor': 'After performance', 'visual-tester': 'When UI changes', 'orchestrator': 'Manages all agent routing', 'release-manager': 'Status: releasing', 'evaluator': 'Status: evaluated', 'prompt-optimizer': 'When score < 7', 'product-owner': 'Manages issues', 'agent-architect': 'When gaps identified', 'capability-analyst': 'When starting new task', 'workflow-architect': 'New workflow needed', 'markdown-validator': 'Before issue creation', 'browser-automation': 'E2E testing needed', 'planner': 'Complex tasks', 'reflector': 'After each agent', 'memory-manager': 'Context management' };
|
||||||
|
|
||||||
|
const byCat = {};
|
||||||
|
for (const [name, a] of Object.entries(agents)) {
|
||||||
|
const cat = categorizeAgent(name);
|
||||||
|
(byCat[cat] = byCat[cat] || []).push([name, a]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const [cat, heading] of Object.entries(catNames)) {
|
||||||
|
const list = byCat[cat] || [];
|
||||||
|
if (!list.length) continue;
|
||||||
|
const rows = list.map(([name, a]) => {
|
||||||
|
const dn = name.split('-').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join('');
|
||||||
|
return `| \`@${dn}\` | ${a.description.split('.')[0]} | ${triggers[name] || 'Manual invocation'} |`;
|
||||||
|
}).join('\n');
|
||||||
|
const table = `${heading}\n| Agent | Role | When Invoked |\n|-------|------|--------------|\n${rows}`;
|
||||||
|
const regex = new RegExp(`${heading}[\s\S]*?(?=###|$)`);
|
||||||
|
if (regex.test(content)) content = content.replace(regex, table + '\n\n');
|
||||||
|
}
|
||||||
|
fs.writeFileSync(AGENTS_MD, content);
|
||||||
|
}
|
||||||
|
|
||||||
|
function main() {
|
||||||
|
const args = process.argv.slice(2);
|
||||||
|
const fix = args.includes('--fix');
|
||||||
|
const check = args.includes('--check');
|
||||||
|
|
||||||
|
console.log('=== Agent Sync Tool ===\n');
|
||||||
|
console.log('Source of truth: .kilo/agents/*.md frontmatter\n');
|
||||||
|
|
||||||
|
const agents = getAllAgents();
|
||||||
|
console.log(`Found ${Object.keys(agents).length} agents\n`);
|
||||||
|
|
||||||
|
const issues = Object.entries(agents).filter(([_, a]) => !a.model || !a.description);
|
||||||
|
if (issues.length) {
|
||||||
|
console.log('Issues found:');
|
||||||
|
issues.forEach(([n, a]) => console.log(` ${n}: ${!a.model ? 'missing model' : ''} ${!a.description ? 'missing description' : ''}`));
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fix) {
|
||||||
|
console.log('Updating KILO_SPEC.md...');
|
||||||
|
updateKiloSpec(agents);
|
||||||
|
console.log('Updating AGENTS.md...');
|
||||||
|
updateAgentsMd(agents);
|
||||||
|
console.log('✅ Done!');
|
||||||
|
} else {
|
||||||
|
console.log('✅ All agents have model and description');
|
||||||
|
if (check) console.log('\nRun with --fix to update documentation.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
Reference in New Issue
Block a user