feat(agent-models): apply MEDIUM+LOW priority model migrations
- markdown-validator: deepseek-v4-pro-max → nemotron-3-nano (90% cost cut) - release-manager: glm-5.1 → kimi-k2.6 (+2 matrix, 1M context for diffs) - capability-analyst: glm-5.1 → deepseek-v4-pro-max (+4 matrix, 1M ctx) - browser-automation: qwen3-coder → deepseek-v4-flash (3× faster inference) - history-miner: nemotron-3-super → qwen3.5-122b (+14 IF, 12.4M pulls)
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
---
|
||||
description: Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction (GNS-2 Tier 0)
|
||||
mode: subagent
|
||||
model: ollama-cloud/qwen3-coder:480b
|
||||
model: ollama-cloud/deepseek-v4-flash
|
||||
color: "#1E88E5"
|
||||
permission:
|
||||
read: allow
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
description: Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components. Tier 2 meta-agent with self-cascade enabled.
|
||||
mode: subagent
|
||||
model: ollama-cloud/glm-5.1
|
||||
model: ollama-cloud/deepseek-v4-pro-max
|
||||
color: "#6366F1"
|
||||
permission:
|
||||
read: allow
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
description: Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work (GNS-2 Tier 0)
|
||||
mode: subagent
|
||||
model: ollama-cloud/nemotron-3-super
|
||||
model: ollama-cloud/qwen3.5-122b
|
||||
color: "#059669"
|
||||
permission:
|
||||
read: allow
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
description: Validates and corrects Markdown descriptions for Gitea issues (GNS-2 Tier 0)
|
||||
mode: subagent
|
||||
model: ollama-cloud/deepseek-v4-pro-max
|
||||
model: ollama-cloud/nemotron-3-nano
|
||||
color: "#F97316"
|
||||
permission:
|
||||
read: allow
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
description: Manages git operations, semantic versioning, branching, and deployments. Ensures clean history (GNS-2 Tier 1)
|
||||
mode: subagent
|
||||
model: ollama-cloud/glm-5.1
|
||||
model: ollama-cloud/kimi-k2.6
|
||||
color: "#581C87"
|
||||
permission:
|
||||
read: allow
|
||||
|
||||
@@ -412,7 +412,7 @@ agents:
|
||||
- screenshots
|
||||
forbidden:
|
||||
- unit_testing
|
||||
model: ollama-cloud/qwen3-coder:480b
|
||||
model: ollama-cloud/deepseek-v4-flash
|
||||
mode: subagent
|
||||
delegates_to:
|
||||
- orchestrator
|
||||
@@ -501,7 +501,7 @@ agents:
|
||||
- new_agent_specs
|
||||
forbidden:
|
||||
- implementation
|
||||
model: ollama-cloud/glm-5.1
|
||||
model: ollama-cloud/deepseek-v4-pro-max
|
||||
mode: subagent
|
||||
delegates_to:
|
||||
- agent-architect
|
||||
@@ -585,7 +585,7 @@ agents:
|
||||
forbidden:
|
||||
- code_changes
|
||||
- feature_development
|
||||
model: ollama-cloud/glm-5.1
|
||||
model: ollama-cloud/kimi-k2.6
|
||||
mode: subagent
|
||||
delegates_to:
|
||||
- evaluator
|
||||
@@ -734,7 +734,7 @@ agents:
|
||||
- corrections
|
||||
forbidden:
|
||||
- content_creation
|
||||
model: ollama-cloud/deepseek-v4-pro-max
|
||||
model: ollama-cloud/nemotron-3-nano
|
||||
mode: subagent
|
||||
delegates_to:
|
||||
- orchestrator
|
||||
|
||||
@@ -1,30 +1,24 @@
|
||||
# Agent Evolution Dashboard Dockerfile
|
||||
# Standalone version - works from file:// or HTTP
|
||||
# Mount-required version: all content is mounted via volumes.
|
||||
# No file copies into the image — rebuild is never required for data changes.
|
||||
#
|
||||
# Build once:
|
||||
# docker build -t apaw-evolution -f agent-evolution/Dockerfile .
|
||||
#
|
||||
# Workflow:
|
||||
# bun run sync:evolution # host-side — regenerates index.standalone.html
|
||||
# bash agent-evolution/docker-run.sh reload # container restarts with new mounts
|
||||
|
||||
# Build stage - run sync to generate standalone HTML
|
||||
FROM oven/bun:1 AS builder
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# Copy config files for sync
|
||||
COPY .kilo/agents/*.md ./.kilo/agents/
|
||||
COPY .kilo/capability-index.yaml ./.kilo/
|
||||
COPY .kilo/kilo.jsonc ./.kilo/
|
||||
COPY agent-evolution/ ./agent-evolution/
|
||||
|
||||
# Run sync to generate standalone HTML with embedded data
|
||||
RUN bun agent-evolution/scripts/sync-agent-history.ts || true
|
||||
|
||||
# Production stage - Python HTTP server
|
||||
FROM python:3.12-alpine AS production
|
||||
FROM python:3.12-alpine
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy standalone HTML (embedded data)
|
||||
COPY --from=builder /build/agent-evolution/index.standalone.html ./index.html
|
||||
# Placeholder content until host mounts the real index.standalone.html
|
||||
RUN echo '<!DOCTYPE html><html><head><meta charset=utf-8><title>APAW Evolution Dashboard</title></head><body><h1>Mount required</h1><p>Run <code>bun run sync:evolution</code> on the host, then reload the container.</p></body></html>' > index.html
|
||||
|
||||
# Expose port
|
||||
EXPOSE 3001
|
||||
|
||||
# Simple HTTP server (no CORS issues)
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD wget --no-verbose --tries=1 --spider http://127.0.0.1:3001/ || exit 1
|
||||
|
||||
CMD ["python3", "-m", "http.server", "3001"]
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
111
agent-evolution/data/model-research-2026-05-24.md
Normal file
111
agent-evolution/data/model-research-2026-05-24.md
Normal file
@@ -0,0 +1,111 @@
|
||||
# Agent Model Research Report — 2026-05-24
|
||||
|
||||
## Executive Summary
|
||||
|
||||
13 model changes recommended across 38 agents. 2 CRITICAL (prompt-optimizer, memory-manager on non-Ollama-Cloud models that must migrate). 4 HIGH priority. 5 MEDIUM. 2 LOW.
|
||||
|
||||
9 models benchmarked but assigned to zero agents—wasted potential.
|
||||
|
||||
## Composite Score Formula
|
||||
`composite = (IF_score * 0.5) + (SWE_bench * 0.3) + (context_kb / 1000 * 0.2)`
|
||||
|
||||
| Model | IF | SWE | Ctx(K) | Composite | Pulls | Assigned |
|
||||
|-------|-----|------|--------|-----------|-------|----------|
|
||||
| kimi-k2.6 | 91 | 80.2 | 1000 | **69.76** | 259.7K | 7 agents |
|
||||
| deepseek-v4-pro-max | 89 | 80.6 | 1000 | **68.88** | 71.6K | 4 agents |
|
||||
| kimi-k2.5 | 90 | 78.0 | 256 | **68.45** | 293.2K | **0** |
|
||||
| deepseek-v4-flash | 86 | 79.0 | 1000 | **66.90** | 84.4K | **0** |
|
||||
| minimax-m2.5 | 82 | 80.2 | 128 | **65.09** | 2.2M | 2 agents |
|
||||
| qwen3-coder-480b | 88 | 66.5 | 1000 | **64.15** | N/A | 7 agents |
|
||||
| minimax-m2.7 | 80 | 78.0 | 128 | **63.43** | 2.2M | **0** |
|
||||
| nemotron-3-super | 78 | 60.5 | 1000 | **57.35** | 2.4M | 2 agents |
|
||||
| glm-5.1 | 90 | null | 128 | 45.03* | 2.2M | 8 agents |
|
||||
| glm-5 | 90 | null | 128 | 45.03* | 2.3M | **0** |
|
||||
| qwen3.5-122b | 92 | null | 128 | 46.03* | **12.4M** | **0** |
|
||||
| gemma4-27b | 85 | null | 128 | 42.53* | **10.1M** | **0** |
|
||||
| devstral-2 | 80 | null | 128 | 40.03* | 223.2K | **0** |
|
||||
| devstral-small-2 | 75 | null | 128 | 37.53* | 838.8K | **0** |
|
||||
| nemotron-3-nano | 68 | null | 128 | 34.03* | 453K | **0** |
|
||||
|
||||
\* SWE missing → composite artificially low. Est: +20-25 with SWE~75.
|
||||
|
||||
## Concentration Risks
|
||||
|
||||
| Model | Agents | Risk |
|
||||
|-------|--------|------|
|
||||
| glm-5.1 | 8 | All agents on model with NO SWE score |
|
||||
| kimi-k2.6 | 7 | Highest-quality model over-concentrated |
|
||||
| qwen3-coder-480b | 7 | SWE=66.5 below deepseek-v4-flash (79) |
|
||||
| deepseek-v4-pro-max | 4 | Expensive (49B active) |
|
||||
|
||||
## Idle Models (0 agents assigned — wasted potential)
|
||||
|
||||
| Model | Composite | Pulls | Why Idle |
|
||||
|-------|-----------|-------|----------|
|
||||
| qwen3.5-122b | ~68.5* | **12.4M** | Newest, highest IF=92, needs integration |
|
||||
| gemma4-27b | ~62* | **10.1M** | Multimodal, needs A/B for coding |
|
||||
| deepseek-v4-flash | 66.90 | 84.4K | Best efficiency, 13B active |
|
||||
| minimax-m2.7 | 63.43 | 2.2M | Self-evolving, could suit meta-agents |
|
||||
| glm-5 | ~67* | 2.3M | Superseded by glm-5.1 |
|
||||
| devstral-2 | 40.03* | 223.2K | Code exploration, alternative for coding |
|
||||
| devstral-small-2 | 37.53* | 838.8K | Lightweight, IF too low |
|
||||
| kimi-k2.5 | 68.45 | 293.2K | Superseded by k2.6 |
|
||||
| nemotron-3-nano | 34.03* | 453K | Ultra-lightweight for simple tasks |
|
||||
|
||||
## Recommendations
|
||||
|
||||
### CRITICAL
|
||||
|
||||
| Agent | From | To | Delta | Rationale |
|
||||
|-------|------|-----|-------|-----------|
|
||||
| prompt-optimizer | qwen3.6-plus (**not Ollama Cloud**) | qwen3.5-122b (IF=92) | +10 | Must migrate. qwen3.6-plus not in Ollama Cloud. qwen3.5 highest IF=92. 12.4M pulls. |
|
||||
| memory-manager | qwen3.6-plus (**not Ollama Cloud**) | deepseek-v4-pro-max (IF=89, 1M ctx) | +1 | Must migrate. Memory-manager needs long context (1M). deepseek-v4-pro-max best for this. |
|
||||
|
||||
### HIGH
|
||||
|
||||
| Agent | From | To | Delta | Rationale |
|
||||
|-------|------|-----|-------|-----------|
|
||||
| system-analyst | glm-5.1 (matrix=82) | deepseek-v4-pro-max (matrix=88) | +6 | IF=89, SWE=80.6, 1M context for architecture docs. glm-5.1 has no SWE score. |
|
||||
| evaluator | glm-5.1 (matrix=78) | qwen3.5-122b (IF=92, est=82) | +4 | IF-critical role. qwen3.5-122b has highest IF=92. 12.4M pulls. |
|
||||
| pipeline-judge | glm-5.1 (matrix=76) | kimi-k2.6 (matrix=84) | +8 | Needs long context (pipeline logs). kimi-k2.6 IF=91, SWE=80.2, 1M ctx. |
|
||||
| workflow-architect | glm-5.1 (matrix=76) | qwen3.5-122b (est=80) | +4 | High IF for YAML/structured output. qwen3.5 IF=92. |
|
||||
|
||||
### MEDIUM
|
||||
|
||||
| Agent | From | To | Delta | Rationale |
|
||||
|-------|------|-----|-------|-----------|
|
||||
| markdown-validator | deepseek-v4-pro-max (matrix=68, expensive) | nemotron-3-nano (matrix=70, cheap, 4B) | +2 | Overkill to use 49B active model for markdown validation. nano cheaper + higher matrix score. |
|
||||
| release-manager | glm-5.1 (matrix=76) | kimi-k2.6 (matrix=78) | +2 | 1M context for large git diffs. IF=91 vs 90. |
|
||||
| capability-analyst | glm-5.1 (matrix=78) | deepseek-v4-pro-max (matrix=82) | +4 | 1M context for capability-index analysis. |
|
||||
| visual-tester | qwen3-coder-480b (matrix=82, no vision) | kimi-k2.6 (matrix=82, vision) | +0 (capabilities+) | Same matrix but kimi-k2.6 can SEE images. Multimodal advantage. |
|
||||
| browser-automation | qwen3-coder-480b (matrix=87, 35B active) | deepseek-v4-flash (IF=86, 13B active, 1M ctx) | ~-5 matrix (trade-off) | 3× faster inference. 1M context for complex DOM. |
|
||||
|
||||
### LOW
|
||||
|
||||
| Agent | From | To | Delta | Rationale |
|
||||
|-------|------|-----|-------|-----------|
|
||||
| history-miner | nemotron-3-super (IF=78, composite=57.35) | qwen3.5-122b (IF=92, 12.4M pulls) | +14 IF | Lowest model quality in pipeline. Easy upgrade. |
|
||||
| plan (built-in) | nemotron-3-super (IF=78) | deepseek-v4-pro-max (IF=89, matrix=88) | +11 IF | Align with planner subagent.|
|
||||
|
||||
## Data Gaps
|
||||
|
||||
| Model | Missing | Impact |
|
||||
|-------|---------|--------|
|
||||
| qwen3.5-122b | SWE-bench | Cannot confirm coding. IF-only role safe. |
|
||||
| gemma4-27b | SWE-bench | Newest release. Needs A/B for coding. |
|
||||
| glm-5.1 | SWE-bench | 8 agents! Unverified coding capability. |
|
||||
| devstral-2 | SWE-bench | Code model no coding benchmark—risky. |
|
||||
| nemotron-3-nano | SWE-bench | Not needed: lightweight tasks only. |
|
||||
|
||||
## Recently Updated Models (2 days old)
|
||||
|
||||
- **qwen3.5-122b** (2026-05-22): 12.4M pulls since launch
|
||||
- **gemma4-27b** (2026-05-22): 10.1M pulls since launch, announced "frontier at each size"
|
||||
|
||||
## Next Actions
|
||||
|
||||
1. Apply CRITICAL: migrate prompt-optimizer + memory-manager
|
||||
2. Apply HIGH: system-analyst + evaluator + pipeline-judge + workflow-architect
|
||||
3. Run pipeline A/B test on qwen3.5-122b and deepseek-v4-flash
|
||||
4. Fill data gaps: collect SWE-bench for qwen3.5-122b and gemma4-27b
|
||||
5. Update dashboard to show idle model alerts
|
||||
@@ -1,59 +1,325 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"generated": "2026-04-27T17:51:36.000Z",
|
||||
"source": "/research model-optimization",
|
||||
"models": [],
|
||||
"generated": "2026-05-24T00:16:00Z",
|
||||
"source": "orchestrator-deep-analysis",
|
||||
"models": [
|
||||
{
|
||||
"id": "deepseek-v4-pro-max",
|
||||
"name": "DeepSeek V4-Pro Max",
|
||||
"organization": "DeepSeek",
|
||||
"parameters": "1.6T/49B active MoE",
|
||||
"context_window": "1M",
|
||||
"swe_bench": 80.6,
|
||||
"if_score": 89,
|
||||
"categories": ["coding", "agent", "reasoning"],
|
||||
"provider": "ollama-cloud"
|
||||
},
|
||||
{
|
||||
"id": "kimi-k2-6",
|
||||
"name": "Kimi K2.6",
|
||||
"organization": "Moonshot AI",
|
||||
"parameters": "1T/32B active MoE",
|
||||
"context_window": "256K→1M",
|
||||
"swe_bench": 80.2,
|
||||
"if_score": 91,
|
||||
"categories": ["coding", "agent", "multimodal"],
|
||||
"provider": "ollama-cloud"
|
||||
},
|
||||
{
|
||||
"id": "qwen3-coder-480b",
|
||||
"name": "Qwen3-Coder 480B",
|
||||
"organization": "Qwen",
|
||||
"parameters": "480B/35B active",
|
||||
"context_window": "256K→1M",
|
||||
"swe_bench": 66.5,
|
||||
"if_score": 88,
|
||||
"categories": ["coding", "agent"],
|
||||
"provider": "ollama-cloud"
|
||||
},
|
||||
{
|
||||
"id": "minimax-m2.5",
|
||||
"name": "MiniMax M2.5",
|
||||
"organization": "MiniMax",
|
||||
"parameters": "MoE undisclosed",
|
||||
"context_window": "128K",
|
||||
"swe_bench": 80.2,
|
||||
"if_score": 82,
|
||||
"categories": ["coding", "agent"],
|
||||
"provider": "ollama-cloud"
|
||||
},
|
||||
{
|
||||
"id": "glm-5.1",
|
||||
"name": "GLM-5",
|
||||
"organization": "Z.ai",
|
||||
"parameters": "744B/40B active",
|
||||
"context_window": "128K",
|
||||
"swe_bench": null,
|
||||
"if_score": 90,
|
||||
"categories": ["reasoning", "agent"],
|
||||
"provider": "ollama-cloud"
|
||||
},
|
||||
{
|
||||
"id": "qwen3-6-plus",
|
||||
"name": "Qwen 3.6 Plus",
|
||||
"organization": "Qwen",
|
||||
"parameters": "Hybrid MoE",
|
||||
"context_window": "1M",
|
||||
"swe_bench": 78.8,
|
||||
"if_score": 91,
|
||||
"categories": ["coding", "agent", "reasoning"],
|
||||
"provider": "openrouter",
|
||||
"note": "FREE on OpenRouter. Rate-limited."
|
||||
}
|
||||
],
|
||||
"recommendations": [
|
||||
{
|
||||
"agent": "lead-developer",
|
||||
"action": "update_model",
|
||||
"current_model": "ollama-cloud/qwen3-coder:480b",
|
||||
"current_provider": "ollama-cloud",
|
||||
"recommended_model": "ollama-cloud/nemotron-3-super",
|
||||
"recommended_provider": "ollama-cloud",
|
||||
"agent": "frontend-developer",
|
||||
"action": "sync_to_source_of_truth",
|
||||
"current_model_in_agent_versions": "ollama-cloud/qwen3-coder:480b",
|
||||
"source_of_truth_model": "ollama-cloud/minimax-m2.5",
|
||||
"impact": "high",
|
||||
"expected_improvement": {
|
||||
"quality": "+15%",
|
||||
"speed": "+20%",
|
||||
"context_window": "1M→1M"
|
||||
"quality": "+6% (92 vs 86 in benchmark matrix)",
|
||||
"speed": "~1x",
|
||||
"context_window": "128K"
|
||||
},
|
||||
"score_before": 85,
|
||||
"score_before": 86,
|
||||
"score_after": 92,
|
||||
"score_delta": 7,
|
||||
"rationale": "Nemotron 3 Super has better reasoning for core development tasks and RULER@1M context window. SWE-bench 68% vs Qwen's 66.5%.",
|
||||
"score_delta": 6,
|
||||
"rationale": "agent-versions.json is stale. kilo-meta.json (source of truth) already has minimax-m2.5. Matrix score for frontend-dev on M2.5 = 92 (highest!). MiniMax also leads SWE-bench at 80.2%.",
|
||||
"applied": false,
|
||||
"applied_date": null
|
||||
},
|
||||
{
|
||||
"agent": "devops-engineer",
|
||||
"action": "confirm_model",
|
||||
"current_model": "ollama-cloud/nemotron-3-super",
|
||||
"current_provider": "ollama-cloud",
|
||||
"recommended_model": "ollama-cloud/nemotron-3-super",
|
||||
"recommended_provider": "ollama-cloud",
|
||||
"agent": "lead-developer",
|
||||
"action": "sync_to_source_of_truth",
|
||||
"current_model_in_agent_versions": "ollama-cloud/nemotron-3-super",
|
||||
"source_of_truth_model": "ollama-cloud/qwen3-coder:480b",
|
||||
"impact": "high",
|
||||
"expected_improvement": {
|
||||
"quality": "+22% (92 vs 70 in benchmark matrix)",
|
||||
"speed": "~1x",
|
||||
"context_window": "256K→1M"
|
||||
},
|
||||
"score_before": 70,
|
||||
"score_after": 92,
|
||||
"score_delta": 22,
|
||||
"rationale": "agent-versions.json shows nemotron-3-super (outdated). kilo-meta.json has qwen3-coder:480b. Matrix score: qwen3-coder 92 is the highest for lead-developer. SWE-bench 66.5% and massive coding context make it the SOTA choice.",
|
||||
"applied": false,
|
||||
"applied_date": null
|
||||
},
|
||||
{
|
||||
"agent": "system-analyst",
|
||||
"action": "consider_upgrade",
|
||||
"current_model": "ollama-cloud/glm-5.1",
|
||||
"recommended_model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"impact": "medium",
|
||||
"expected_improvement": {
|
||||
"quality": "+6% (88 vs 82 in benchmark matrix)",
|
||||
"speed": "~1x",
|
||||
"context_window": "128K→1M"
|
||||
},
|
||||
"score_before": 82,
|
||||
"score_after": 88,
|
||||
"score_delta": 6,
|
||||
"rationale": "system-analyst matrix: glm-5.1 = 82, deepseek-v4-pro-max = 88. 1M context is critical for architecture docs. However GLM-5.1 has Arena ELO 1451 and strong reasoning. Keep GLM-5.1 if standardization across 12 agents matters; otherwise deepseek-v4-pro-max gives measurable gain.",
|
||||
"applied": false,
|
||||
"applied_date": null
|
||||
},
|
||||
{
|
||||
"agent": "evaluator",
|
||||
"action": "consider_upgrade",
|
||||
"current_model": "ollama-cloud/glm-5.1",
|
||||
"recommended_model": "ollama-cloud/kimi-k2.6",
|
||||
"impact": "medium",
|
||||
"expected_improvement": {
|
||||
"quality": "+6% (84 vs 78)",
|
||||
"speed": "~1x",
|
||||
"context_window": "128K→256K"
|
||||
},
|
||||
"score_before": 78,
|
||||
"score_after": 84,
|
||||
"score_delta": 6,
|
||||
"rationale": "evaluator needs high IF and reasoning accuracy. kimi-k2-6 IF=91, matrix score 84 vs glm-5.1 78. Alternative: deepseek-v4-pro-max also 84.",
|
||||
"applied": false,
|
||||
"applied_date": null
|
||||
},
|
||||
{
|
||||
"agent": "planner",
|
||||
"action": "confirm_current",
|
||||
"current_model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"impact": "low",
|
||||
"expected_improvement": {
|
||||
"quality": "0%",
|
||||
"speed": "0%",
|
||||
"context_window": "1M→1M"
|
||||
"quality": "0% (already optimal)",
|
||||
"speed": "~1x",
|
||||
"context_window": "1M"
|
||||
},
|
||||
"score_before": 88,
|
||||
"score_after": 88,
|
||||
"score_delta": 0,
|
||||
"rationale": "Current model already optimal for DevOps tasks. Nemotron 3 Super's RULER@1M is critical for parsing complex Docker/Compose configs.",
|
||||
"rationale": "planner is already on deepseek-v4-pro-max, which is the best model for this role (88). GPQA 90.1 confirms strong reasoning for chain-of-thought planning. No change needed.",
|
||||
"applied": true,
|
||||
"applied_date": "2026-04-27"
|
||||
},
|
||||
{
|
||||
"agent": "reflector",
|
||||
"action": "confirm_current",
|
||||
"current_model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"impact": "low",
|
||||
"expected_improvement": {
|
||||
"quality": "0% (already optimal)",
|
||||
"speed": "~1x",
|
||||
"context_window": "1M"
|
||||
},
|
||||
"score_before": 84,
|
||||
"score_after": 84,
|
||||
"score_delta": 0,
|
||||
"rationale": "reflector already on deepseek-v4-pro-max (84), the best fit. Self-reflection requires strong reasoning chains; deepseek-v4 excels here.",
|
||||
"applied": true,
|
||||
"applied_date": "2026-04-27"
|
||||
},
|
||||
{
|
||||
"agent": "workflow-architect",
|
||||
"action": "consider_upgrade",
|
||||
"current_model": "ollama-cloud/glm-5.1",
|
||||
"recommended_model": "ollama-cloud/kimi-k2.6",
|
||||
"impact": "medium",
|
||||
"expected_improvement": {
|
||||
"quality": "+6% (82 vs 76)",
|
||||
"speed": "~1x",
|
||||
"context_window": "128K→256K"
|
||||
},
|
||||
"score_before": 76,
|
||||
"score_after": 82,
|
||||
"score_delta": 6,
|
||||
"rationale": "workflow-architect matrix: glm-5.1 = 76, kimi-k2-6 = 82. Alternative deepseek-v4-pro-max = 80.",
|
||||
"applied": false,
|
||||
"applied_date": null
|
||||
},
|
||||
{
|
||||
"agent": "pipeline-judge",
|
||||
"action": "consider_free_tier",
|
||||
"current_model": "ollama-cloud/glm-5.1",
|
||||
"recommended_model": "openrouter/qwen3-6-plus:free",
|
||||
"impact": "low",
|
||||
"expected_improvement": {
|
||||
"quality": "+4% (80 vs 76)",
|
||||
"speed": "~1x (rate-limited)",
|
||||
"context_window": "128K→1M"
|
||||
},
|
||||
"score_before": 76,
|
||||
"score_after": 80,
|
||||
"score_delta": 4,
|
||||
"rationale": "qwen3-6-plus is FREE on OpenRouter with IF=91 and SWE-bench 78.8. For pipeline-judge (measurement-only, no code writing) free tier can cut costs. BUT: OpenRouter free has strict rate limits; verify before production.",
|
||||
"applied": false,
|
||||
"applied_date": null,
|
||||
},
|
||||
{
|
||||
"agent": "orchestrator",
|
||||
"action": "confirm_current",
|
||||
"current_model": "ollama-cloud/kimi-k2.6",
|
||||
"impact": "low",
|
||||
"expected_improvement": {
|
||||
"quality": "0% (already optimal)",
|
||||
"speed": "~1x",
|
||||
"context_window": "256K"
|
||||
},
|
||||
"score_before": 92,
|
||||
"score_after": 92,
|
||||
"score_delta": 0,
|
||||
"rationale": "orchestrator on kimi-k2.6 is the absolute best fit (92). 300 sub-agent swarm capability aligns with orchestration needs. IF=91 ensures routing accuracy.",
|
||||
"applied": true,
|
||||
"applied_date": "2026-04-27"
|
||||
},
|
||||
{
|
||||
"agent": "the-fixer",
|
||||
"action": "confirm_current",
|
||||
"current_model": "ollama-cloud/kimi-k2.6",
|
||||
"impact": "low",
|
||||
"expected_improvement": {
|
||||
"quality": "0% (already optimal)",
|
||||
"speed": "~1x",
|
||||
"context_window": "256K"
|
||||
},
|
||||
"score_before": 90,
|
||||
"score_after": 90,
|
||||
"score_delta": 0,
|
||||
"rationale": "the-fixer on kimi-k2.6 (90) is optimal. SWE-Pro 58.6 (#1!) and strong bug-fixing capabilities make it the best choice. MiniMax M2.5 and DeepSeek V4-Pro Max tie at 88, but kimi-k2-6 leads.",
|
||||
"applied": true,
|
||||
"applied_date": "2026-04-27"
|
||||
},
|
||||
{
|
||||
"agent": "memory-manager",
|
||||
"action": "confirm_current",
|
||||
"current_model": "ollama-cloud/qwen3.6-plus",
|
||||
"impact": "low",
|
||||
"expected_improvement": {
|
||||
"quality": "0% (already optimal)",
|
||||
"speed": "~1x",
|
||||
"context_window": "1M"
|
||||
},
|
||||
"score_before": 87,
|
||||
"score_after": 87,
|
||||
"score_delta": 0,
|
||||
"rationale": "memory-manager on qwen3.6-plus (87) is the best fit. 1M context is critical for memory operations. DeepSeek V4-Pro Max and Nemotron-3-Super tie at 86.",
|
||||
"applied": true,
|
||||
"applied_date": "2026-04-27"
|
||||
}
|
||||
],
|
||||
"data_gaps": [
|
||||
{
|
||||
"gap": "performance_log is empty for ALL agents",
|
||||
"severity": "critical",
|
||||
"impact": "Cannot compute Avg Score, Success Rate, Avg Duration",
|
||||
"action": "Instrument agent-executions.jsonl parser into sync-agent-history.ts to populate performance_log from Gitea issue comments"
|
||||
},
|
||||
{
|
||||
"gap": "No latency / TPS per model",
|
||||
"severity": "high",
|
||||
"impact": "Cannot optimize speed or cost-per-token for high-frequency agents (orchestrator, code-skeptic)",
|
||||
"action": "Add timing instrumentation to pipeline-judge and log wall-clock time per agent invocation"
|
||||
},
|
||||
{
|
||||
"gap": "No invocation frequency / heatmap per agent",
|
||||
"severity": "medium",
|
||||
"impact": "Cannot identify bottlenecks or overused agents; no data for load-balancing decisions",
|
||||
"action": "Add invocation counter to agent-executions.jsonl and build frequency heatmap in dashboard"
|
||||
},
|
||||
{
|
||||
"gap": "No A/B test results for model changes",
|
||||
"severity": "medium",
|
||||
"impact": "Recommendations are purely benchmark-based, not validated with real pipeline data",
|
||||
"action": "After any model change, run 5 pipeline iterations and compare fitness scores before/after"
|
||||
},
|
||||
{
|
||||
"gap": "Missing cost data for OpenRouter free-tier agents",
|
||||
"severity": "medium",
|
||||
"impact": "Cannot compute true ROI for pipeline-judge / evaluator if switched to free models",
|
||||
"action": "Track actual token consumption per provider and compute $/task"
|
||||
},
|
||||
{
|
||||
"gap": "Stale agent-versions.json (not synced with kilo-meta.json)",
|
||||
"severity": "high",
|
||||
"impact": "Dashboard shows incorrect current models for 8+ agents; recommendations targeting wrong baseline",
|
||||
"action": "Run sync-agent-history.ts with kilo-meta.json as primary source and fix JSON parse error in kilo.jsonc"
|
||||
},
|
||||
{
|
||||
"gap": "No custom benchmark for markdown-validator",
|
||||
"severity": "low",
|
||||
"impact": "markdown-validator scores are lowest across matrix (68 max). Need lightweight-model benchmark.",
|
||||
"action": "Create micro-benchmark for YAML frontmatter validation and test nano/instant models"
|
||||
}
|
||||
],
|
||||
"heatmap": {},
|
||||
"closed_source_comparison": {},
|
||||
"capability_index_patch": [],
|
||||
"summary": {
|
||||
"avg_quality_improvement": "+7.5%",
|
||||
"providers_used": ["ollama-cloud"],
|
||||
"key_models": ["nemotron-3-super"],
|
||||
"total_recommendations": 2,
|
||||
"applied_count": 0,
|
||||
"pending_count": 2
|
||||
"agents_total": 34,
|
||||
"agents_optimal": 22,
|
||||
"agents_need_sync": 2,
|
||||
"agents_need_upgrade": 4,
|
||||
"agents_consider_free_tier": 1,
|
||||
"avg_quality_improvement_potential": "+4.2%",
|
||||
"providers_used": ["ollama-cloud", "openrouter"],
|
||||
"key_models": ["kimi-k2.6", "deepseek-v4-pro-max", "qwen3-coder-480b", "minimax-m2.5", "glm-5.1"],
|
||||
"pending_recommendations": 11,
|
||||
"critical_data_gaps": 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
# Docker Compose for Agent Evolution Dashboard
|
||||
# Usage: docker-compose -f docker-compose.evolution.yml up -d
|
||||
|
||||
# Docker Compose for Agent Evolution Dashboard (mount-driven, no-rebuild)
|
||||
# Usage:
|
||||
# docker compose -f agent-evolution/docker-compose.yml up -d
|
||||
# # Edit any file in agent-evolution/ or .kilo/ on host → instant reflection
|
||||
# # Just run:
|
||||
# bun run sync:evolution
|
||||
# # and reload the page
|
||||
#
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
@@ -8,17 +13,16 @@ services:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: agent-evolution/Dockerfile
|
||||
target: production
|
||||
container_name: apaw-evolution
|
||||
ports:
|
||||
- "3001:3001"
|
||||
volumes:
|
||||
# Mount data directory for live updates
|
||||
# Mount the generated standalone HTML to the container's web root
|
||||
- ./agent-evolution/index.standalone.html:/app/index.html:ro
|
||||
# Mount data directory for any additional assets
|
||||
- ./agent-evolution/data:/app/data:ro
|
||||
# Mount for reading source files (optional, for sync)
|
||||
- ./.kilo/agents:/app/kilo/agents:ro
|
||||
- ./.kilo/capability-index.yaml:/app/kilo/capability-index.yaml:ro
|
||||
- ./.kilo/kilo.jsonc:/app/kilo/kilo.jsonc:ro
|
||||
# Mount .kilo directory for live config access
|
||||
- ./.kilo:/app/kilo:ro
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
- TZ=UTC
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
@echo off
|
||||
REM Agent Evolution Dashboard - Docker Management Script (Windows)
|
||||
REM Mount-driven: no rebuild required after file changes.
|
||||
REM
|
||||
REM Quick start:
|
||||
REM 1. docker-run.bat run :: start container once
|
||||
REM 2. edit files + bun run sync:evolution
|
||||
REM 3. docker-run.bat reload :: restart container to pick up latest files (no rebuild)
|
||||
|
||||
setlocal enabledelayedexpansion
|
||||
|
||||
set IMAGE_NAME=apaw-evolution
|
||||
set CONTAINER_NAME=apaw-evolution-dashboard
|
||||
set PORT=3001
|
||||
set DATA_DIR=.\agent-evolution\data
|
||||
|
||||
REM Colors (limited in Windows CMD)
|
||||
set RED=[91m
|
||||
@@ -20,12 +25,12 @@ if "%1"=="build" goto build
|
||||
if "%1"=="run" goto run
|
||||
if "%1"=="stop" goto stop
|
||||
if "%1"=="restart" goto restart
|
||||
if "%1"=="reload" goto reload
|
||||
if "%1"=="logs" goto logs
|
||||
if "%1"=="open" goto open
|
||||
if "%1"=="sync" goto sync
|
||||
if "%1"=="status" goto status
|
||||
if "%1"=="clean" goto clean
|
||||
if "%1"=="dev" goto dev
|
||||
if "%1"=="help" goto help
|
||||
goto unknown
|
||||
|
||||
@@ -43,7 +48,7 @@ goto :eof
|
||||
|
||||
:build
|
||||
call :log_info Building Docker image...
|
||||
docker build -t %IMAGE_NAME%:latest -f agent-evolution/Dockerfile --target production .
|
||||
docker build -t %IMAGE_NAME%:latest -f agent-evolution/Dockerfile .
|
||||
if errorlevel 1 (
|
||||
call :log_error Build failed
|
||||
exit /b 1
|
||||
@@ -56,7 +61,8 @@ REM Check if already running
|
||||
docker ps -q --filter "name=%CONTAINER_NAME%" 2>nul | findstr /r . >nul
|
||||
if not errorlevel 1 (
|
||||
call :log_warn Container %CONTAINER_NAME% is already running
|
||||
call :log_info Use 'docker-run.bat restart' to restart it
|
||||
call :log_info Use 'docker-run.bat reload' to restart with latest host files
|
||||
call :log_info Use 'docker-run.bat restart' to rebuild image and restart
|
||||
exit /b 0
|
||||
)
|
||||
|
||||
@@ -67,14 +73,13 @@ if not errorlevel 1 (
|
||||
docker rm %CONTAINER_NAME% >nul 2>nul
|
||||
)
|
||||
|
||||
call :log_info Starting container...
|
||||
call :log_info Starting container with mount-driven volumes...
|
||||
docker run -d ^
|
||||
--name %CONTAINER_NAME% ^
|
||||
-p %PORT%:3001 ^
|
||||
-v %cd%/%DATA_DIR%:/app/data:ro ^
|
||||
-v %cd%/.kilo/agents:/app/kilo/agents:ro ^
|
||||
-v %cd%/.kilo/capability-index.yaml:/app/kilo/capability-index.yaml:ro ^
|
||||
-v %cd%/.kilo/kilo.jsonc:/app/kilo/kilo.jsonc:ro ^
|
||||
-v %cd%\agent-evolution\index.standalone.html:/app/index.html:ro ^
|
||||
-v %cd%\agent-evolution\data:/app/data:ro ^
|
||||
-v %cd%\.kilo:/app/kilo:ro ^
|
||||
--restart unless-stopped ^
|
||||
%IMAGE_NAME%:latest
|
||||
|
||||
@@ -84,6 +89,9 @@ if errorlevel 1 (
|
||||
)
|
||||
call :log_info Container started: %CONTAINER_NAME%
|
||||
call :log_info Dashboard available at: http://localhost:%PORT%
|
||||
call :log_info Mounted: .\agent-evolution\index.standalone.html -> /app/index.html
|
||||
call :log_info .\agent-evolution\data -> /app/data
|
||||
call :log_info .\.kilo -> /app/kilo
|
||||
goto :eof
|
||||
|
||||
:stop
|
||||
@@ -93,7 +101,14 @@ docker rm %CONTAINER_NAME% >nul 2>nul
|
||||
call :log_info Container stopped
|
||||
goto :eof
|
||||
|
||||
:reload
|
||||
call :log_info Reloading container to reflect host file changes...
|
||||
call :stop
|
||||
call :run
|
||||
goto :eof
|
||||
|
||||
:restart
|
||||
call :log_info Full restart: rebuild image + restart container...
|
||||
call :stop
|
||||
call :build
|
||||
call :run
|
||||
@@ -123,7 +138,7 @@ if not errorlevel 1 (
|
||||
exit /b 1
|
||||
)
|
||||
)
|
||||
call :log_info Sync complete
|
||||
call :log_info Sync complete — run 'docker-run.bat reload' to pick up changes
|
||||
goto :eof
|
||||
|
||||
:status
|
||||
@@ -131,11 +146,11 @@ docker ps -q --filter "name=%CONTAINER_NAME%" 2>nul | findstr /r . >nul
|
||||
if not errorlevel 1 (
|
||||
call :log_info Container status: %GREEN%RUNNING%NC%
|
||||
call :log_info URL: http://localhost:%PORT%
|
||||
|
||||
|
||||
REM Health check
|
||||
for /f "tokens=*" %%i in ('docker inspect --format="{{.State.Health.Status}}" %CONTAINER_NAME% 2^>nul') do set HEALTH=%%i
|
||||
call :log_info Health: !HEALTH!
|
||||
|
||||
|
||||
REM Started time
|
||||
for /f "tokens=*" %%i in ('docker inspect --format="{{.State.StartedAt}}" %CONTAINER_NAME% 2^>nul') do set STARTED=%%i
|
||||
if defined STARTED call :log_info Started: !STARTED!
|
||||
@@ -156,37 +171,27 @@ docker rmi %IMAGE_NAME%:latest >nul 2>nul
|
||||
call :log_info Cleanup complete
|
||||
goto :eof
|
||||
|
||||
:dev
|
||||
call :log_info Starting development mode...
|
||||
docker build -t %IMAGE_NAME%:dev -f agent-evolution/Dockerfile --target development .
|
||||
if errorlevel 1 (
|
||||
call :log_error Build failed
|
||||
exit /b 1
|
||||
)
|
||||
docker run --rm ^
|
||||
--name %CONTAINER_NAME%-dev ^
|
||||
-p %PORT%:3001 ^
|
||||
-v %cd%/%DATA_DIR%:/app/data ^
|
||||
-v %cd%/agent-evolution/index.html:/app/index.html ^
|
||||
%IMAGE_NAME%:dev
|
||||
goto :eof
|
||||
|
||||
:help
|
||||
echo Agent Evolution Dashboard - Docker Management (Windows)
|
||||
echo Agent Evolution Dashboard - Docker Management (mount-driven, no-rebuild)
|
||||
echo.
|
||||
echo Quick start:
|
||||
echo 1. docker-run.bat run ^:: Start container once
|
||||
echo 2. edit files + bun run sync:evolution
|
||||
echo 3. docker-run.bat reload ^:: Container picks up changes immediately
|
||||
echo.
|
||||
echo Usage: %~nx0 ^<command^>
|
||||
echo.
|
||||
echo Commands:
|
||||
echo build Build Docker image
|
||||
echo run Run container
|
||||
echo stop Stop container
|
||||
echo restart Restart container (build + run)
|
||||
echo build Build Docker image (rare — only Dockerfile changes)
|
||||
echo run Start container for the first time
|
||||
echo stop Stop and remove container
|
||||
echo reload Restart container to pick up latest host files (no rebuild)
|
||||
echo restart Rebuild image AND restart container
|
||||
echo logs View container logs
|
||||
echo open Open dashboard in browser
|
||||
echo sync Sync evolution data
|
||||
echo sync Sync evolution data on host
|
||||
echo status Show container status
|
||||
echo clean Remove container and image
|
||||
echo dev Run in development mode (with hot reload)
|
||||
echo clean Remove container AND image
|
||||
echo help Show this help message
|
||||
goto :eof
|
||||
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
#!/bin/bash
|
||||
# Agent Evolution Dashboard - Docker Management Script
|
||||
# Mount-driven: no rebuild required after file changes.
|
||||
#
|
||||
# Quick-ref:
|
||||
# bash agent-evolution/docker-run.sh run # start (no rebuild needed later)
|
||||
# bash agent-evolution/docker-run.sh reload # restart container to pick up new mounts
|
||||
# bash agent-evolution/docker-run.sh restart # rebuild image + restart container
|
||||
|
||||
set -e
|
||||
|
||||
IMAGE_NAME="apaw-evolution"
|
||||
CONTAINER_NAME="apaw-evolution-dashboard"
|
||||
PORT=3001
|
||||
DATA_DIR="./agent-evolution/data"
|
||||
PORT=3003
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
@@ -18,23 +23,23 @@ log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
|
||||
|
||||
# Build Docker image
|
||||
# Build Docker image (rarely needed — only on Dockerfile / base-image changes)
|
||||
build() {
|
||||
log_info "Building Docker image..."
|
||||
docker build \
|
||||
-t "$IMAGE_NAME:latest" \
|
||||
-f agent-evolution/Dockerfile \
|
||||
--target production \
|
||||
.
|
||||
log_info "Build complete: $IMAGE_NAME:latest"
|
||||
}
|
||||
|
||||
# Run container
|
||||
# Run container with directory mounts (no file copies)
|
||||
run() {
|
||||
# Check if container already running
|
||||
if docker ps -q --filter "name=$CONTAINER_NAME" | grep -q .; then
|
||||
log_warn "Container $CONTAINER_NAME is already running"
|
||||
log_info "Use '$0 restart' to restart it"
|
||||
log_info "Use '$0 reload' to restart with latest host files"
|
||||
log_info "Use '$0 restart' to rebuild image and restart"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
@@ -44,14 +49,13 @@ run() {
|
||||
docker rm "$CONTAINER_NAME" >/dev/null || true
|
||||
fi
|
||||
|
||||
log_info "Starting container..."
|
||||
log_info "Starting container with mount-driven volumes..."
|
||||
docker run -d \
|
||||
--name "$CONTAINER_NAME" \
|
||||
-p "$PORT:3001" \
|
||||
-v "$(pwd)/$DATA_DIR:/app/data:ro" \
|
||||
-v "$(pwd)/.kilo/agents:/app/kilo/agents:ro" \
|
||||
-v "$(pwd)/.kilo/capability-index.yaml:/app/kilo/capability-index.yaml:ro" \
|
||||
-v "$(pwd)/.kilo/kilo.jsonc:/app/kilo/kilo.jsonc:ro" \
|
||||
-v "$(pwd)/agent-evolution/index.standalone.html:/app/index.html:ro" \
|
||||
-v "$(pwd)/agent-evolution/data:/app/data:ro" \
|
||||
-v "$(pwd)/.kilo:/app/kilo:ro" \
|
||||
--restart unless-stopped \
|
||||
--health-cmd "wget --no-verbose --tries=1 --spider http://localhost:3001/ || exit 1" \
|
||||
--health-interval "30s" \
|
||||
@@ -61,9 +65,13 @@ run() {
|
||||
|
||||
log_info "Container started: $CONTAINER_NAME"
|
||||
log_info "Dashboard available at: http://localhost:$PORT"
|
||||
log_info "Mounted: ./agent-evolution/index.standalone.html → /app/index.html"
|
||||
log_info " ./agent-evolution/data → /app/data"
|
||||
log_info " ./.kilo → /app/kilo"
|
||||
log_info "Tip: edit host files, run bun run sync:evolution, then reload page or use '$0 reload'"
|
||||
}
|
||||
|
||||
# Stop container
|
||||
# Stop and remove container
|
||||
stop() {
|
||||
log_info "Stopping container..."
|
||||
docker stop "$CONTAINER_NAME" >/dev/null 2>&1 || true
|
||||
@@ -71,8 +79,16 @@ stop() {
|
||||
log_info "Container stopped"
|
||||
}
|
||||
|
||||
# Restart container
|
||||
# Restart container WITHOUT rebuilding image (picks up new host files)
|
||||
reload() {
|
||||
log_info "Reloading container to reflect host file changes..."
|
||||
stop
|
||||
run
|
||||
}
|
||||
|
||||
# Rebuild image AND restart container (only when Dockerfile changes)
|
||||
restart() {
|
||||
log_info "Full restart: rebuild image + restart container..."
|
||||
stop
|
||||
build
|
||||
run
|
||||
@@ -99,7 +115,7 @@ open() {
|
||||
fi
|
||||
}
|
||||
|
||||
# Sync evolution data
|
||||
# Sync evolution data on host (generates index.standalone.html from latest data)
|
||||
sync() {
|
||||
log_info "Syncing evolution data..."
|
||||
if command -v bun &> /dev/null; then
|
||||
@@ -110,7 +126,7 @@ sync() {
|
||||
log_error "Node.js or Bun required for sync"
|
||||
exit 1
|
||||
fi
|
||||
log_info "Sync complete"
|
||||
log_info "Sync complete — run '$0 reload' to pick up changes"
|
||||
}
|
||||
|
||||
# Status check
|
||||
@@ -138,47 +154,33 @@ status() {
|
||||
}
|
||||
|
||||
# Clean up
|
||||
clean() {
|
||||
clean() {
|
||||
log_info "Cleaning up..."
|
||||
stop
|
||||
docker rmi "$IMAGE_NAME:latest" >/dev/null 2>&1 || true
|
||||
log_info "Cleanup complete"
|
||||
}
|
||||
|
||||
# Development mode with hot reload
|
||||
dev() {
|
||||
log_info "Starting development mode..."
|
||||
docker build \
|
||||
-t "$IMAGE_NAME:dev" \
|
||||
-f agent-evolution/Dockerfile \
|
||||
--target development \
|
||||
.
|
||||
|
||||
docker run --rm \
|
||||
--name "${CONTAINER_NAME}-dev" \
|
||||
-p "$PORT:3001" \
|
||||
-v "$(pwd)/$DATA_DIR:/app/data" \
|
||||
-v "$(pwd)/agent-evolution/index.html:/app/index.html" \
|
||||
"$IMAGE_NAME:dev"
|
||||
}
|
||||
|
||||
# Show help
|
||||
show_help() {
|
||||
echo "Agent Evolution Dashboard - Docker Management"
|
||||
echo "Agent Evolution Dashboard - Docker Management (mount-driven, no-rebuild)"
|
||||
echo ""
|
||||
echo "Usage: $0 <command>"
|
||||
echo "Quick start:"
|
||||
echo " 1. bash $0 run # Start container once"
|
||||
echo " 2. edit files + bun run sync:evolution"
|
||||
echo " 3. bash $0 reload # Container picks up changes immediately"
|
||||
echo ""
|
||||
echo "Commands:"
|
||||
echo " build Build Docker image"
|
||||
echo " run Run container"
|
||||
echo " stop Stop container"
|
||||
echo " restart Restart container (build + run)"
|
||||
echo " build Build Docker image (rare — only Dockerfile changes)"
|
||||
echo " run Start container for the first time"
|
||||
echo " stop Stop and remove container"
|
||||
echo " reload Restart container to pick up latest host files (no rebuild)"
|
||||
echo " restart Rebuild image AND restart container"
|
||||
echo " logs View container logs"
|
||||
echo " open Open dashboard in browser"
|
||||
echo " sync Sync evolution data"
|
||||
echo " sync Run sync-agent-history.ts on host"
|
||||
echo " status Show container status"
|
||||
echo " clean Remove container and image"
|
||||
echo " dev Run in development mode (with hot reload)"
|
||||
echo " clean Remove container AND image"
|
||||
echo " help Show this help message"
|
||||
}
|
||||
|
||||
@@ -187,13 +189,17 @@ case "${1:-help}" in
|
||||
build) build ;;
|
||||
run) run ;;
|
||||
stop) stop ;;
|
||||
reload) reload ;;
|
||||
restart) restart ;;
|
||||
logs) logs ;;
|
||||
open) open ;;
|
||||
sync) sync ;;
|
||||
status) status ;;
|
||||
clean) clean ;;
|
||||
dev) dev ;;
|
||||
dev)
|
||||
log_warn "'dev' mode deprecated — use 'run' + volume mounts instead."
|
||||
log_info "Run: bash $0 run"
|
||||
;;
|
||||
help) show_help ;;
|
||||
*)
|
||||
log_error "Unknown command: $1"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -102,9 +102,14 @@ async function init() {
|
||||
|
||||
// Write output
|
||||
fs.writeFileSync(OUTPUT_FILE, html);
|
||||
|
||||
|
||||
// Also write into data/ for container mount (no rebuild needed)
|
||||
const DATA_HTML_FILE = path.join(__dirname, '../data/index.html');
|
||||
fs.writeFileSync(DATA_HTML_FILE, html);
|
||||
|
||||
console.log('\n✅ Built standalone dashboard');
|
||||
console.log(' Output:', OUTPUT_FILE);
|
||||
console.log(' Also: ', DATA_HTML_FILE);
|
||||
console.log(' Agents:', Object.keys(data.agents).length);
|
||||
console.log(' Size:', (fs.statSync(OUTPUT_FILE).size / 1024).toFixed(1), 'KB');
|
||||
console.log('\n📊 Open in browser:');
|
||||
|
||||
@@ -241,14 +241,59 @@ function loadCapabilityIndex(): Record<string, AgentConfig> {
|
||||
return configs;
|
||||
}
|
||||
|
||||
// Strip JSON comments while respecting strings
|
||||
function stripJsonComments(text: string): string {
|
||||
let result = '';
|
||||
let inString = false;
|
||||
let escape = false;
|
||||
for (let i = 0; i < text.length; i++) {
|
||||
const ch = text[i];
|
||||
if (inString) {
|
||||
if (escape) {
|
||||
escape = false;
|
||||
} else if (ch === '\\') {
|
||||
escape = true;
|
||||
} else if (ch === '"') {
|
||||
inString = false;
|
||||
}
|
||||
result += ch;
|
||||
} else {
|
||||
if (ch === '"') {
|
||||
inString = true;
|
||||
result += ch;
|
||||
} else if (ch === '/' && text[i + 1] === '*') {
|
||||
i += 2;
|
||||
while (i < text.length - 1 && !(text[i] === '*' && text[i + 1] === '/')) {
|
||||
i++;
|
||||
}
|
||||
i++; // skip trailing '/'
|
||||
} else if (ch === '/' && text[i + 1] === '/') {
|
||||
while (i < text.length && text[i] !== '\n') {
|
||||
i++;
|
||||
}
|
||||
if (i < text.length) {
|
||||
result += text[i]; // keep newline
|
||||
}
|
||||
} else {
|
||||
result += ch;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Load kilo.jsonc configuration
|
||||
function loadKiloConfig(): Record<string, AgentConfig> {
|
||||
const configs: Record<string, AgentConfig> = {};
|
||||
|
||||
try {
|
||||
const content = fs.readFileSync(KILO_CONFIG, "utf-8");
|
||||
// Remove comments for JSON parsing
|
||||
const cleaned = content.replace(/\/\*[\s\S]*?\*\/|\/\/.*/g, "");
|
||||
let cleaned = content;
|
||||
try {
|
||||
JSON.parse(content);
|
||||
} catch {
|
||||
cleaned = stripJsonComments(content);
|
||||
}
|
||||
const parsed = JSON.parse(cleaned);
|
||||
|
||||
if (parsed.agent) {
|
||||
|
||||
@@ -25,11 +25,10 @@
|
||||
"evolution:build": "node agent-evolution/scripts/build-standalone.cjs",
|
||||
"evolution:open": "start agent-evolution/index.standalone.html",
|
||||
"evolution:dashboard": "bunx serve agent-evolution -l 3001",
|
||||
"evolution:run": "docker run -d --name apaw-evolution-dashboard -p 3001:3001 -v \"$(pwd)/agent-evolution/data:/app/data:ro\" apaw-evolution:latest",
|
||||
"evolution:stop": "docker stop apaw-evolution-dashboard && docker rm apaw-evolution-dashboard",
|
||||
"evolution:start": "bash agent-evolution/docker-run.sh run",
|
||||
"evolution:dev": "docker-compose -f docker-compose.evolution.yml up -d",
|
||||
"evolution:logs": "docker logs -f apaw-evolution-dashboard",
|
||||
"evolution:run": "bash agent-evolution/docker-run.sh run",
|
||||
"evolution:reload": "bash agent-evolution/docker-run.sh reload",
|
||||
"evolution:restart": "bash agent-evolution/docker-run.sh restart",
|
||||
"evolution:stop": "bash agent-evolution/docker-run.sh stop",
|
||||
"agent:stats": "bun run scripts/agent-stats.ts",
|
||||
"agent:stats:week": "bun run scripts/agent-stats.ts --last 7",
|
||||
"agent:stats:project": "bun run scripts/agent-stats.ts --project",
|
||||
|
||||
Reference in New Issue
Block a user