diff --git a/.kilo/agents/browser-automation.md b/.kilo/agents/browser-automation.md
index 8e2ba56..013f7c0 100755
--- a/.kilo/agents/browser-automation.md
+++ b/.kilo/agents/browser-automation.md
@@ -1,7 +1,7 @@
 ---
 description: Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction (GNS-2 Tier 0)
 mode: subagent
-model: ollama-cloud/qwen3-coder:480b
+model: ollama-cloud/deepseek-v4-flash
 color: "#1E88E5"
 permission:
   read: allow
diff --git a/.kilo/agents/capability-analyst.md b/.kilo/agents/capability-analyst.md
index 851ec22..a1f279f 100755
--- a/.kilo/agents/capability-analyst.md
+++ b/.kilo/agents/capability-analyst.md
@@ -1,7 +1,7 @@
 ---
 description: Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components. Tier 2 meta-agent with self-cascade enabled.
 mode: subagent
-model: ollama-cloud/glm-5.1
+model: ollama-cloud/deepseek-v4-pro-max
 color: "#6366F1"
 permission:
   read: allow
diff --git a/.kilo/agents/history-miner.md b/.kilo/agents/history-miner.md
index d1dd2fb..7b776bd 100755
--- a/.kilo/agents/history-miner.md
+++ b/.kilo/agents/history-miner.md
@@ -1,7 +1,7 @@
 ---
 description: Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work (GNS-2 Tier 0)
 mode: subagent
-model: ollama-cloud/nemotron-3-super
+model: ollama-cloud/qwen3.5-122b
 color: "#059669"
 permission:
   read: allow
diff --git a/.kilo/agents/markdown-validator.md b/.kilo/agents/markdown-validator.md
index 6463400..152d840 100755
--- a/.kilo/agents/markdown-validator.md
+++ b/.kilo/agents/markdown-validator.md
@@ -1,7 +1,7 @@
 ---
 description: Validates and corrects Markdown descriptions for Gitea issues (GNS-2 Tier 0)
 mode: subagent
-model: ollama-cloud/deepseek-v4-pro-max
+model: ollama-cloud/nemotron-3-nano
 color: "#F97316"
 permission:
   read: allow
diff --git a/.kilo/agents/release-manager.md b/.kilo/agents/release-manager.md
index e02809b..180f9e7 100755
--- a/.kilo/agents/release-manager.md
+++ b/.kilo/agents/release-manager.md
@@ -1,7 +1,7 @@
 ---
 description: Manages git operations, semantic versioning, branching, and deployments. Ensures clean history (GNS-2 Tier 1)
 mode: subagent
-model: ollama-cloud/glm-5.1
+model: ollama-cloud/kimi-k2.6
 color: "#581C87"
 permission:
   read: allow
diff --git a/.kilo/capability-index.yaml b/.kilo/capability-index.yaml
index 86de347..f39913e 100644
--- a/.kilo/capability-index.yaml
+++ b/.kilo/capability-index.yaml
@@ -412,7 +412,7 @@ agents:
     - screenshots
     forbidden:
     - unit_testing
-    model: ollama-cloud/qwen3-coder:480b
+    model: ollama-cloud/deepseek-v4-flash
     mode: subagent
     delegates_to:
     - orchestrator
@@ -501,7 +501,7 @@ agents:
     - new_agent_specs
     forbidden:
     - implementation
-    model: ollama-cloud/glm-5.1
+    model: ollama-cloud/deepseek-v4-pro-max
     mode: subagent
     delegates_to:
     - agent-architect
@@ -585,7 +585,7 @@ agents:
     forbidden:
     - code_changes
     - feature_development
-    model: ollama-cloud/glm-5.1
+    model: ollama-cloud/kimi-k2.6
     mode: subagent
     delegates_to:
     - evaluator
@@ -734,7 +734,7 @@ agents:
     - corrections
     forbidden:
     - content_creation
-    model: ollama-cloud/deepseek-v4-pro-max
+    model: ollama-cloud/nemotron-3-nano
     mode: subagent
     delegates_to:
     - orchestrator
diff --git a/agent-evolution/Dockerfile b/agent-evolution/Dockerfile
index e60fca5..c7dfe16 100644
--- a/agent-evolution/Dockerfile
+++ b/agent-evolution/Dockerfile
@@ -1,30 +1,24 @@
 # Agent Evolution Dashboard Dockerfile
-# Standalone version - works from file:// or HTTP
+# Mount-required version: all content is mounted via volumes.
+# No file copies into the image — rebuild is never required for data changes.
+#
+# Build once:
+#   docker build -t apaw-evolution -f agent-evolution/Dockerfile .
+#
+# Workflow:
+#   bun run sync:evolution   # host-side — regenerates index.standalone.html
+#   bash agent-evolution/docker-run.sh reload   # container restarts with new mounts
 
-# Build stage - run sync to generate standalone HTML
-FROM oven/bun:1 AS builder
-
-WORKDIR /build
-
-# Copy config files for sync
-COPY .kilo/agents/*.md ./.kilo/agents/
-COPY .kilo/capability-index.yaml ./.kilo/
-COPY .kilo/kilo.jsonc ./.kilo/
-COPY agent-evolution/ ./agent-evolution/
-
-# Run sync to generate standalone HTML with embedded data
-RUN bun agent-evolution/scripts/sync-agent-history.ts || true
-
-# Production stage - Python HTTP server
-FROM python:3.12-alpine AS production
+FROM python:3.12-alpine
 
 WORKDIR /app
 
-# Copy standalone HTML (embedded data)
-COPY --from=builder /build/agent-evolution/index.standalone.html ./index.html
+# Placeholder content until host mounts the real index.standalone.html
+RUN echo '<!DOCTYPE html><html><head><meta charset=utf-8><title>APAW Evolution Dashboard</title></head><body><h1>Mount required</h1><p>Run <code>bun run sync:evolution</code> on the host, then reload the container.</p></body></html>' > index.html
 
-# Expose port
 EXPOSE 3001
 
-# Simple HTTP server (no CORS issues)
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+  CMD wget --no-verbose --tries=1 --spider http://127.0.0.1:3001/ || exit 1
+
 CMD ["python3", "-m", "http.server", "3001"]
\ No newline at end of file
diff --git a/agent-evolution/data/agent-versions.json b/agent-evolution/data/agent-versions.json
index a3e7889..8a397de 100644
--- a/agent-evolution/data/agent-versions.json
+++ b/agent-evolution/data/agent-versions.json
@@ -1,22 +1,17 @@
 {
   "version": "1.0.0",
-  "lastUpdated": "2026-04-27T20:28:58.592Z",
+  "lastUpdated": "2026-05-25T13:37:20.281Z",
   "agents": {
     "lead-developer": {
       "current": {
-        "description": "Primary code writer for backend and core logic. Writes implementation to pass tests",
+        "description": "Primary code writer for backend and core logic. Writes implementation to pass tests (GNS-2 Tier 1)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#DC2626\"",
         "category": "General",
-        "capabilities": [
-          "code_writing",
-          "refactoring",
-          "bug_fixing",
-          "implementation"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -25,47 +20,39 @@
           "type": "model_change",
           "from": null,
           "to": "ollama-cloud/qwen3-coder:480b",
-          "reason": "Initial configuration from capability-index.yaml",
+          "reason": "Initial configuration",
           "source": "git"
         },
         {
-          "date": "2026-04-27T16:56:09.013Z",
+          "date": "2026-04-27T16:56:09Z",
           "commit": "model-research-sync",
           "type": "model_change",
           "from": "ollama-cloud/qwen3-coder:480b",
           "to": "ollama-cloud/nemotron-3-super",
-          "reason": "Nemotron 3 Super has better reasoning for core development tasks and RULER@1M context window. SWE-bench 68% vs Qwen's 66.5%.",
+          "reason": "Nemotron 3 Super has better reasoning",
           "source": "research"
         },
         {
-          "date": "2026-04-27T20:28:58.592Z",
-          "commit": "model-research-sync",
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
           "type": "model_change",
-          "from": "ollama-cloud/qwen3-coder:480b",
-          "to": "ollama-cloud/nemotron-3-super",
-          "reason": "Nemotron 3 Super has better reasoning for core development tasks and RULER@1M context window. SWE-bench 68% vs Qwen's 66.5%.",
-          "source": "research"
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/qwen3-coder:480b",
+          "reason": "Reverted to qwen3-coder: SWE-bench 66.5% is coding-benchmark standard. Matrix score 92 vs nemotron 70.",
+          "source": "orchestrator-analysis"
         }
       ],
       "performance_log": []
     },
     "frontend-developer": {
       "current": {
-        "description": "Handles UI implementation with multimodal capabilities. Accepts visual references like screenshots and mockups",
+        "description": "Handles UI implementation with multimodal capabilities. Accepts visual references like screenshots and mockups (GNS-2 Tier 1)",
         "mode": "all",
-        "model": "ollama-cloud/qwen3-coder:480b",
+        "model": "ollama-cloud/minimax-m2.5",
         "provider": "Ollama",
         "color": "\"#0EA5E9\"",
         "category": "General",
-        "capabilities": [
-          "ui_implementation",
-          "component_creation",
-          "styling",
-          "responsive_design",
-          "nextjs_development",
-          "vue_nuxt_development",
-          "react_development"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -76,48 +63,41 @@
           "to": "ollama-cloud/qwen3-coder:480b",
           "reason": "Flutter development support added",
           "source": "git"
+        },
+        {
+          "date": "2026-04-27T17:00:00Z",
+          "commit": "model-research-sync",
+          "type": "model_change",
+          "from": "ollama-cloud/qwen3-coder:480b",
+          "to": "ollama-cloud/minimax-m2.5",
+          "reason": "Matrix score 92 for frontend on M2.5. SWE-bench 80.2%.",
+          "source": "research"
         }
       ],
       "performance_log": []
     },
     "backend-developer": {
       "current": {
-        "description": "Backend specialist for Node.js, Express, APIs, and database integration",
+        "description": "Backend specialist for Node.js, Express, APIs, and database integration (GNS-2 Tier 1)",
         "mode": "subagent",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "color": "\"#10B981\"",
         "category": "General",
-        "capabilities": [
-          "api_development",
-          "database_design",
-          "server_logic",
-          "authentication",
-          "postgresql_integration",
-          "sqlite_integration"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "go-developer": {
       "current": {
-        "description": "Go backend specialist for Gin, Echo, APIs, and database integration",
+        "description": "Go backend specialist for Gin, Echo, APIs, and database integration (GNS-2 Tier 1)",
         "mode": "subagent",
-        "model": "ollama-cloud/qwen3-coder:480b",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
         "color": "\"#00ADD8\"",
         "category": "General",
-        "capabilities": [
-          "go_api_development",
-          "go_database_design",
-          "go_concurrent_programming",
-          "go_authentication",
-          "go_microservices",
-          "postgresql_integration",
-          "sqlite_integration",
-          "clickhouse_integration"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -126,64 +106,57 @@
           "type": "model_change",
           "from": "ollama-cloud/deepseek-v3.2",
           "to": "ollama-cloud/qwen3-coder:480b",
-          "reason": "Qwen3-Coder optimized for Go development",
+          "reason": "Qwen3-Coder optimized for Go",
           "source": "git"
+        },
+        {
+          "date": "2026-04-27T17:00:00Z",
+          "commit": "model-research-sync",
+          "type": "model_change",
+          "from": "ollama-cloud/qwen3-coder:480b",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Matrix score 88 for go-dev on V4-Pro. DeepSeek traditionally strong in Go/Rust.",
+          "source": "research"
         }
       ],
       "performance_log": []
     },
     "sdet-engineer": {
       "current": {
-        "description": "Writes tests following TDD methodology. Tests MUST fail initially (Red phase)",
+        "description": "Writes tests following TDD methodology. Tests MUST fail initially (Red phase) (GNS-2 Tier 1)",
         "mode": "all",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#8B5CF6\"",
         "category": "General",
-        "capabilities": [
-          "unit_tests",
-          "integration_tests",
-          "e2e_tests",
-          "test_planning",
-          "visual_regression"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "code-skeptic": {
       "current": {
-        "description": "Adversarial code reviewer. Finds problems and issues. Does NOT suggest implementations",
+        "description": "Adversarial code reviewer. Finds problems and issues. Does NOT suggest implementations (GNS-2 Tier 0)",
         "mode": "subagent",
         "model": "ollama-cloud/minimax-m2.5",
         "provider": "Ollama",
         "color": "\"#E11D48\"",
         "category": "General",
-        "capabilities": [
-          "code_review",
-          "security_review",
-          "style_check",
-          "issue_identification"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "security-auditor": {
       "current": {
-        "description": "Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets",
-        "mode": "all",
-        "model": "ollama-cloud/nemotron-3-super",
+        "description": "Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets (GNS-2 Tier 0)",
+        "mode": "subagent",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
-        "color": "\"#7F1D1D\"",
+        "color": "\"#DC2626\"",
         "category": "General",
-        "capabilities": [
-          "vulnerability_scan",
-          "owasp_check",
-          "secret_detection",
-          "auth_review"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -192,26 +165,30 @@
           "type": "model_change",
           "from": "ollama-cloud/deepseek-v3.2",
           "to": "ollama-cloud/nemotron-3-super",
-          "reason": "Nemotron 3 Super optimized for security analysis with RULER@1M",
+          "reason": "Nemotron 3 Super optimized for security analysis",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "V4-Pro Max matrix=80 vs nemotron=76. SWE-V 80.6, 1M context.",
+          "source": "orchestrator-analysis"
         }
       ],
       "performance_log": []
     },
     "performance-engineer": {
       "current": {
-        "description": "Reviews code for performance issues. Focuses on efficiency, N+1 queries, memory leaks, and algorithmic complexity",
+        "description": "Reviews code for performance issues. Focuses on efficiency, N+1 queries, memory leaks, and algorithmic complexity (GNS-2 Tier 0)",
         "mode": "all",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
         "color": "\"#0D9488\"",
         "category": "General",
-        "capabilities": [
-          "performance_analysis",
-          "n_plus_one_detection",
-          "memory_leak_check",
-          "algorithm_analysis"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -222,68 +199,54 @@
           "to": "ollama-cloud/nemotron-3-super",
           "reason": "Better reasoning for performance analysis",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Matrix=84 for perf-engineer on V4-Pro. GPQA 90.1 for reasoning.",
+          "source": "orchestrator-analysis"
         }
       ],
       "performance_log": []
     },
     "browser-automation": {
       "current": {
-        "description": "Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction",
+        "description": "Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction (GNS-2 Tier 0)",
         "mode": "subagent",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "color": "\"#1E88E5\"",
         "category": "General",
-        "capabilities": [
-          "e2e_browser_tests",
-          "form_filling",
-          "navigation_testing",
-          "screenshot_capture"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "visual-tester": {
       "current": {
-        "description": "Visual regression testing agent that captures screenshots, extracts UI elements with bounding boxes, compares via pixelmatch, and detects console/network errors",
+        "description": "Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff (GNS-2 Tier 0)",
         "mode": "subagent",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "color": "\"#E91E63\"",
         "category": "General",
-        "capabilities": [
-          "visual_regression",
-          "pixel_comparison",
-          "screenshot_diff",
-          "ui_validation",
-          "bbox_element_extraction",
-          "console_error_detection",
-          "network_error_detection",
-          "responsive_layout_check",
-          "button_overflow_detection",
-          "gitea_integration",
-          "docker_networking"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "system-analyst": {
       "current": {
-        "description": "Designs technical specifications, data schemas, and API contracts before implementation",
+        "description": "Designs technical specifications, data schemas, and API contracts before implementation (GNS-2 Tier 1)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
-        "variant": "thinking",
         "color": "\"#0891B2\"",
         "category": "General",
-        "capabilities": [
-          "architecture_design",
-          "api_specification",
-          "database_modeling",
-          "technical_documentation"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -292,11 +255,11 @@
           "type": "model_change",
           "from": "ollama-cloud/gpt-oss:120b",
           "to": "ollama-cloud/glm-5",
-          "reason": "GLM-5 better for system engineering and architecture",
+          "reason": "GLM-5 better for system engineering",
           "source": "git"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5",
@@ -305,32 +268,44 @@
           "source": "git"
         },
         {
-          "date": "2026-04-27T16:59:52.825Z",
+          "date": "2026-04-27T16:59:52Z",
           "commit": "model-research-sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5.1",
           "to": "ollama-cloud/nemotron-3-super",
           "reason": "Test recommendation for model research sync script",
           "source": "research"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/glm-5.1",
+          "reason": "Reverted: GLM-5.1 Arena ELO 1451, instruction following ~90. Standardization with 12 other agents.",
+          "source": "orchestrator-analysis"
+        },
+        {
+          "date": "2026-05-25T13:37:20.281Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "requirement-refiner": {
       "current": {
-        "description": "Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists",
+        "description": "Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists (GNS-2 Tier 1)",
         "mode": "all",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/kimi-k2-thinking",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#4F46E5\"",
-        "category": "General",
-        "capabilities": [
-          "requirement_analysis",
-          "user_story_creation",
-          "acceptance_criteria",
-          "clarification"
-        ]
+        "category": "General"
       },
       "history": [
         {
@@ -339,39 +314,51 @@
           "type": "model_change",
           "from": "ollama-cloud/nemotron-3-super",
           "to": "ollama-cloud/glm-5",
-          "reason": "+33% quality. GLM-5 excels at requirement analysis and system engineering",
+          "reason": "+33% quality. GLM-5 excels at requirement analysis",
           "source": "research"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5",
           "to": "ollama-cloud/glm-5.1",
           "reason": "Model update from sync",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/kimi-k2.6",
+          "reason": "kimi-k2.6 IF=91 highest, multimodal for mockup understanding. Matrix ~88-90 for req-refiner.",
+          "source": "orchestrator-analysis"
+        },
+        {
+          "date": "2026-05-23T23:35:02.184Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/kimi-k2.6",
+          "to": "ollama-cloud/kimi-k2-thinking",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "history-miner": {
       "current": {
-        "description": "Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work",
+        "description": "Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work (GNS-2 Tier 0)",
         "mode": "subagent",
         "model": "ollama-cloud/nemotron-3-super",
         "provider": "Ollama",
         "color": "\"#059669\"",
-        "category": "General",
-        "capabilities": [
-          "git_search",
-          "duplicate_detection",
-          "past_solution_finder",
-          "pattern_identification"
-        ]
+        "category": "General"
       },
       "history": [
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5",
@@ -384,18 +371,13 @@
     },
     "capability-analyst": {
       "current": {
-        "description": "Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components.",
+        "description": "Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components. Tier 2 meta-agent with self-cascade enabled.",
         "mode": "subagent",
         "model": "ollama-cloud/glm-5.1",
         "provider": "Ollama",
         "color": "\"#6366F1\"",
         "category": "General",
-        "capabilities": [
-          "gap_analysis",
-          "capability_mapping",
-          "recommendation_generation",
-          "coverage_analysis"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -404,11 +386,11 @@
           "type": "model_change",
           "from": "ollama-cloud/nemotron-3-super",
           "to": "openrouter/qwen/qwen3.6-plus:free",
-          "reason": "+23% quality, IF:90 score, 1M context, FREE via OpenRouter",
+          "reason": "+23% quality, IF:90, FREE via OpenRouter",
           "source": "research"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "openrouter/qwen/qwen3.6-plus:free",
@@ -421,51 +403,50 @@
     },
     "orchestrator": {
       "current": {
-        "description": "Main dispatcher. Routes tasks between agents based on Issue status and manages the workflow state machine. IF:90 for optimal routing accuracy.",
+        "description": "Main dispatcher. Routes tasks between agents based on Issue status and manages the workflow state machine. IF:90 for optimal routing accuracy. (GNS-2 Tier 1)",
         "mode": "all",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/kimi-k2.6",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#7C3AED\"",
         "category": "General",
-        "capabilities": [
-          "task_routing",
-          "state_management",
-          "agent_coordination",
-          "workflow_execution"
-        ]
+        "capabilities": []
       },
       "history": [
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5",
           "to": "ollama-cloud/glm-5.1",
           "reason": "Model update from sync",
           "source": "git"
+        },
+        {
+          "date": "2026-04-27T20:28:58Z",
+          "commit": "model-research-sync",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/kimi-k2.6",
+          "reason": "kimi-k2.6 best fit for orchestration (92). 300 sub-agent swarm.",
+          "source": "research"
         }
       ],
       "performance_log": []
     },
     "release-manager": {
       "current": {
-        "description": "Manages git operations, semantic versioning, branching, and deployments. Ensures clean history",
+        "description": "Manages git operations, semantic versioning, branching, and deployments. Ensures clean history (GNS-2 Tier 1)",
         "mode": "subagent",
         "model": "ollama-cloud/glm-5.1",
         "provider": "Ollama",
         "color": "\"#581C87\"",
         "category": "General",
-        "capabilities": [
-          "git_operations",
-          "version_management",
-          "changelog_creation",
-          "deployment"
-        ]
+        "capabilities": []
       },
       "history": [
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/devstral-2:123b",
@@ -478,19 +459,14 @@
     },
     "evaluator": {
       "current": {
-        "description": "Scores agent effectiveness after task completion for continuous improvement",
+        "description": "Scores agent effectiveness after task completion for continuous improvement. Tier 2 meta-agent with self-cascade enabled.",
         "mode": "subagent",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/qwen3.5-122b",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#047857\"",
         "category": "General",
-        "capabilities": [
-          "performance_scoring",
-          "process_analysis",
-          "pattern_identification",
-          "improvement_recommendations"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -512,31 +488,35 @@
           "source": "research"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "openrouter/qwen/qwen3.6-plus:free",
           "to": "ollama-cloud/glm-5.1",
           "reason": "Model update from sync",
           "source": "git"
+        },
+        {
+          "date": "2026-05-25T13:37:20.281Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/qwen3.5-122b",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "prompt-optimizer": {
       "current": {
-        "description": "Improves agent system prompts based on performance failures. Meta-learner for prompt optimization",
+        "description": "Improves agent system prompts based on performance failures. Meta-learner for prompt optimization (GNS-2 Tier 1)",
         "mode": "subagent",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/qwen3.5-122b",
         "provider": "Ollama",
-        "variant": "instant",
         "color": "\"#BE185D\"",
         "category": "General",
-        "capabilities": [
-          "prompt_analysis",
-          "prompt_improvement",
-          "failure_pattern_detection"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -549,48 +529,66 @@
           "source": "git"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/nemotron-3-super",
           "to": "ollama-cloud/glm-5.1",
           "reason": "Model update from sync",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/qwen3.5",
+          "reason": "MIGRATION: qwen3.6-plus was OpenRouter (not Ollama Cloud). qwen3.5 has IF=92, updated 2 days ago, 12.4M pulls.",
+          "source": "orchestrator-analysis"
+        },
+        {
+          "date": "2026-05-23T23:35:02.184Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/qwen3.5",
+          "to": "ollama-cloud/qwen3.6-plus",
+          "reason": "Model update from sync",
+          "source": "git"
+        },
+        {
+          "date": "2026-05-25T13:37:20.281Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/qwen3.6-plus",
+          "to": "ollama-cloud/qwen3.5-122b",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "the-fixer": {
       "current": {
-        "description": "Iteratively fixes bugs based on specific error reports and test failures",
+        "description": "Iteratively fixes bugs based on specific error reports and test failures (GNS-2 Tier 1)",
         "mode": "all",
-        "model": "ollama-cloud/minimax-m2.5",
+        "model": "ollama-cloud/kimi-k2.6",
         "provider": "Ollama",
         "color": "\"#F59E0B\"",
         "category": "General",
-        "capabilities": [
-          "bug_fixing",
-          "issue_resolution",
-          "code_correction"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "product-owner": {
       "current": {
-        "description": "Manages issue checklists, status labels, tracks progress and coordinates with human users",
+        "description": "Manages issue checklists, status labels, tracks progress and coordinates with human users (GNS-2 Tier 1)",
         "mode": "subagent",
         "model": "ollama-cloud/glm-5.1",
         "provider": "Ollama",
         "color": "\"#EA580C\"",
         "category": "General",
-        "capabilities": [
-          "issue_management",
-          "prioritization",
-          "backlog_management",
-          "workflow_completion"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -603,7 +601,7 @@
           "source": "git"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5",
@@ -616,45 +614,46 @@
     },
     "workflow-architect": {
       "current": {
-        "description": "Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates",
+        "description": "Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates (GNS-2 Tier 1)",
         "mode": "subagent",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/qwen3.5-122b",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#EC4899\"",
         "category": "General",
-        "capabilities": [
-          "workflow_design",
-          "process_definition",
-          "automation_setup"
-        ]
+        "capabilities": []
       },
       "history": [
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5",
           "to": "ollama-cloud/glm-5.1",
           "reason": "Model update from sync",
           "source": "git"
+        },
+        {
+          "date": "2026-05-25T13:37:20.281Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/qwen3.5-122b",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "markdown-validator": {
       "current": {
-        "description": "Validates and corrects Markdown descriptions for Gitea issues",
+        "description": "Validates and corrects Markdown descriptions for Gitea issues (GNS-2 Tier 0)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-nano:30b",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
         "color": "\"#F97316\"",
         "category": "General",
-        "capabilities": [
-          "markdown_validation",
-          "formatting_check",
-          "link_validation"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -665,6 +664,24 @@
           "to": "ollama-cloud/nemotron-3-nano:30b",
           "reason": "Nano efficient for lightweight validation tasks",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-nano:30b",
+          "to": "ollama-cloud/nemotron-3-nano",
+          "reason": "Unified naming. Nano IF=68, tiny and cheap, perfect for validation.",
+          "source": "orchestrator-analysis"
+        },
+        {
+          "date": "2026-05-23T23:35:02.185Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-nano",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
@@ -673,17 +690,12 @@
       "current": {
         "name": "Agent Architect",
         "mode": "subagent",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/kimi-k2.6",
         "provider": "Ollama",
-        "variant": "thinking",
-        "description": "Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis",
+        "description": "Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis. Tier 2 meta-agent with self-cascade enabled.",
         "color": "\"#8B5CF6\"",
         "category": "General",
-        "capabilities": [
-          "agent_design",
-          "prompt_engineering",
-          "capability_definition"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -692,36 +704,39 @@
           "type": "model_change",
           "from": "ollama-cloud/nemotron-3-super",
           "to": "openrouter/qwen/qwen3.6-plus:free",
-          "reason": "+22% quality, IF:90 for YAML frontmatter generation, 1M context for all agents analysis",
+          "reason": "+22% quality, IF:90 for YAML frontmatter generation",
           "source": "research"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "openrouter/qwen/qwen3.6-plus:free",
           "to": "ollama-cloud/glm-5.1",
           "reason": "Model update from sync",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/kimi-k2.6",
+          "reason": "kimi-k2.6 best fit for agent-architect (86). Multimodal for reviewing UI components.",
+          "source": "orchestrator-analysis"
         }
       ],
       "performance_log": []
     },
     "planner": {
       "current": {
-        "description": "Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect",
+        "description": "Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect (GNS-2 Tier 0)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
         "color": "\"#F59E0B\"",
         "category": "General",
-        "capabilities": [
-          "task_decomposition",
-          "chain_of_thought",
-          "tree_of_thoughts",
-          "plan_execute_reflect",
-          "dependency_analysis"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -732,25 +747,28 @@
           "to": "ollama-cloud/nemotron-3-super",
           "reason": "Nemotron 3 Super excels at planning",
           "source": "git"
+        },
+        {
+          "date": "2026-04-27T17:00:00Z",
+          "commit": "model-research-sync",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Matrix score 88 for planner on V4-Pro. GPQA 90.1.",
+          "source": "research"
         }
       ],
       "performance_log": []
     },
     "reflector": {
       "current": {
-        "description": "Self-reflection agent using Reflexion pattern - learns from mistakes",
+        "description": "Self-reflection agent using Reflexion pattern - learns from mistakes (GNS-2 Tier 0)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
         "color": "\"#10B981\"",
         "category": "General",
-        "capabilities": [
-          "self_reflection",
-          "mistake_analysis",
-          "lesson_extraction",
-          "trajectory_analysis",
-          "heuristic_evaluation"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -761,25 +779,28 @@
           "to": "ollama-cloud/nemotron-3-super",
           "reason": "Better for reflection tasks",
           "source": "git"
+        },
+        {
+          "date": "2026-04-27T17:00:00Z",
+          "commit": "model-research-sync",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Matrix score 84. Strong reasoning chains.",
+          "source": "research"
         }
       ],
       "performance_log": []
     },
     "memory-manager": {
       "current": {
-        "description": "Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences)",
+        "description": "Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences) (GNS-2 Tier 0)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
         "color": "\"#8B5CF6\"",
         "category": "General",
-        "capabilities": [
-          "memory_retrieval",
-          "memory_storage",
-          "memory_consolidation",
-          "relevance_scoring",
-          "episodic_management"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -790,44 +811,59 @@
           "to": "ollama-cloud/nemotron-3-super",
           "reason": "RULER@1M critical for memory ctx",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "MIGRATION: qwen3.6-plus was OpenRouter. deepseek-v4-pro-max has 1M context (same as nemotron), matrix 86, SWE-V 80.6.",
+          "source": "orchestrator-analysis"
+        },
+        {
+          "date": "2026-05-23T23:35:02.184Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/deepseek-v4-pro-max",
+          "to": "ollama-cloud/qwen3.6-plus",
+          "reason": "Model update from sync",
+          "source": "git"
+        },
+        {
+          "date": "2026-05-25T13:37:20.281Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/qwen3.6-plus",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "devops-engineer": {
       "current": {
-        "description": "DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management",
+        "description": "DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management (GNS-2 Tier 1)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/kimi-k2.6",
         "provider": "Ollama",
         "color": "\"#FF6B35\"",
         "category": "General",
-        "capabilities": [
-          "docker_configuration",
-          "kubernetes_setup",
-          "ci_cd_pipeline",
-          "infrastructure_automation",
-          "container_optimization"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "flutter-developer": {
       "current": {
-        "description": "Flutter mobile specialist for cross-platform apps, state management, and UI components",
+        "description": "Flutter mobile specialist for cross-platform apps, state management, and UI components (GNS-2 Tier 1)",
         "mode": "subagent",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "color": "\"#02569B\"",
         "category": "General",
-        "capabilities": [
-          "dart_programming",
-          "flutter_ui",
-          "mobile_app_development",
-          "widget_creation",
-          "state_management"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -844,100 +880,153 @@
     },
     "architect-indexer": {
       "current": {
-        "description": "Indexes and maps project codebase architecture into .architect/ directory. Creates and maintains structured documentation of entities, APIs, DB schema, file graphs, and conventions.",
+        "description": "Indexes and maps project codebase architecture into .architect/ directory. Creates and maintains structured documentation of entities, APIs, DB schema, file graphs, and conventions. (GNS-2 Tier 0)",
         "mode": "subagent",
         "model": "ollama-cloud/glm-5.1",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#10B981\"",
         "category": "General",
-        "capabilities": [
-          "codebase_indexing",
-          "project_mapping",
-          "architecture_documentation",
-          "dependency_analysis",
-          "entity_extraction",
-          "api_surface_discovery",
-          "convention_detection",
-          "staleness_detection"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "php-developer": {
       "current": {
-        "description": "PHP backend specialist for Laravel, Symfony, WordPress, and full-stack web applications",
+        "description": "PHP backend specialist for Laravel, Symfony, WordPress, and full-stack web applications (GNS-2 Tier 1)",
         "mode": "subagent",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#8B5CF6\"",
         "category": "General",
-        "capabilities": [
-          "php_web_development",
-          "laravel_development",
-          "symfony_development",
-          "wordpress_development",
-          "php_api_development",
-          "php_database_design",
-          "php_authentication",
-          "php_modular_architecture",
-          "php_testing",
-          "php_security"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "pipeline-judge": {
       "current": {
-        "description": "Automated pipeline judge. Evaluates workflow execution by running tests, measuring token cost and wall-clock time. Produces objective fitness scores. Never writes code - only measures and scores.",
+        "description": "Automated pipeline judge. Evaluates workflow execution by running tests, measuring token cost and wall-clock time. Produces objective fitness scores. Never writes code - only measures and scores. (GNS-2 Tier 0)",
         "mode": "subagent",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/kimi-k2.6",
         "provider": "Ollama",
         "color": "\"#DC2626\"",
         "category": "General",
-        "capabilities": [
-          "test_execution",
-          "fitness_scoring",
-          "metric_collection",
-          "bottleneck_detection"
-        ]
+        "capabilities": []
       },
       "history": [
         {
-          "date": "2026-04-06T00:23:50 +0100Z",
+          "date": "2026-04-06T00:23:50+0100Z",
           "commit": "fa68141d",
           "type": "agent_created",
           "from": null,
           "to": "",
           "reason": "feat: add pipeline-judge agent and evolution workflow system",
           "source": "git"
+        },
+        {
+          "date": "2026-05-25T13:37:20.281Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/kimi-k2.6",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "python-developer": {
       "current": {
-        "description": "Python backend specialist for Django, FastAPI, data science, and API development",
+        "description": "Python backend specialist for Django, FastAPI, data science, and API development (GNS-2 Tier 1)",
         "mode": "subagent",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#3776AB\"",
         "category": "General",
-        "capabilities": [
-          "python_web_development",
-          "django_development",
-          "fastapi_development",
-          "python_api_development",
-          "python_database_design",
-          "python_authentication",
-          "python_async_patterns",
-          "python_testing",
-          "python_security"
-        ]
+        "capabilities": []
+      },
+      "history": [],
+      "performance_log": []
+    },
+    "incident-responder": {
+      "current": {
+        "description": "Server incident response and system hardening specialist. Handles live forensics, malware removal, persistence hunting, SSH-based server cleanup, and post-incident hardening. Works with any OS and panel.",
+        "mode": "subagent",
+        "model": "ollama-cloud/kimi-k2.6",
+        "provider": "Ollama",
+        "color": "\"#B91C1C\"",
+        "category": "General",
+        "capabilities": []
+      },
+      "history": [],
+      "performance_log": []
+    },
+    "workflow-cross-checker": {
+      "current": {
+        "description": "Workflow cross-checker and process inspector. Analyzes inter-agent interaction logic, prevents conflicting tasks between agents, validates conformance to project architecture, tracks current state, and asks uncomfortable but important questions before expensive work begins.",
+        "mode": "subagent",
+        "model": "ollama-cloud/kimi-k2.6",
+        "provider": "Ollama",
+        "variant": "thinking",
+        "color": "\"#9333EA\"",
+        "category": "General",
+        "capabilities": []
+      },
+      "history": [],
+      "performance_log": []
+    },
+    "code": {
+      "current": {
+        "model": "ollama-cloud/qwen3-coder:480b",
+        "provider": "Ollama",
+        "category": "Built-in",
+        "mode": "primary",
+        "color": "#3B82F6",
+        "description": "Primary code writer. Full tool access for development tasks.",
+        "capabilities": []
+      },
+      "history": [],
+      "performance_log": []
+    },
+    "ask": {
+      "current": {
+        "model": "ollama-cloud/glm-5.1",
+        "provider": "Ollama",
+        "category": "Built-in",
+        "mode": "primary",
+        "color": "#3B82F6",
+        "description": "Read-only Q&A agent for codebase questions.",
+        "capabilities": []
+      },
+      "history": [],
+      "performance_log": []
+    },
+    "plan": {
+      "current": {
+        "model": "ollama-cloud/nemotron-3-super",
+        "provider": "Ollama",
+        "category": "Built-in",
+        "mode": "primary",
+        "color": "#3B82F6",
+        "description": "Task planner. Creates detailed implementation plans.",
+        "capabilities": []
+      },
+      "history": [],
+      "performance_log": []
+    },
+    "debug": {
+      "current": {
+        "model": "ollama-cloud/glm-5.1",
+        "provider": "Ollama",
+        "category": "Built-in",
+        "mode": "primary",
+        "color": "#3B82F6",
+        "description": "Bug diagnostics and troubleshooting. GLM-5.1 ★88, reasoning for deep debug.",
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
@@ -955,10 +1044,10 @@
     }
   },
   "evolution_metrics": {
-    "total_agents": 32,
+    "total_agents": 38,
     "agents_with_history": 22,
     "pending_recommendations": 0,
-    "last_sync": "2026-04-23T06:24:32.546Z",
+    "last_sync": "2026-05-25T13:37:20.282Z",
     "sync_sources": [
       "git",
       "capability-index.yaml",
diff --git a/agent-evolution/data/model-benchmarks.json b/agent-evolution/data/model-benchmarks.json
index c17d33f..96253bf 100644
--- a/agent-evolution/data/model-benchmarks.json
+++ b/agent-evolution/data/model-benchmarks.json
@@ -1,1718 +1,851 @@
-{
-  "version": "1.0.0",
-  "generated": "2026-04-30T07:00:00Z",
-  "source": "capability-index.yaml v3 optimal",
-  "total_agents": 30,
-  "total_models_tracked": 11,
-  "providers": [
-    "ollama",
-    "ollama-cloud",
-    "openrouter",
-    "groq"
-  ],
-  "models": [
-    {
-      "id": "qwen3-coder-480b",
-      "name": "Qwen3-Coder 480B",
-      "organization": "Qwen",
-      "parameters": "480B/35B active",
-      "context_window": "256K\u21921M",
-      "swe_bench": 66.5,
-      "if_score": 88,
-      "categories": [
-        "coding",
-        "agent"
-      ],
-      "description": "SOTA open-source \u043a\u043e\u0434\u0438\u043d\u0433. \u0421\u0440\u0430\u0432\u043d\u0438\u043c \u0441 Claude Sonnet 4.",
-      "tags": [
-        "coding",
-        "agent",
-        "tools"
-      ],
-      "openrouter": false,
-      "provider": "ollama"
-    },
-    {
-      "id": "minimax-m2.5",
-      "name": "MiniMax M2.5",
-      "organization": "MiniMax",
-      "parameters": "MoE undisclosed",
-      "context_window": "128K",
-      "swe_bench": 80.2,
-      "if_score": 82,
-      "categories": [
-        "coding",
-        "agent"
-      ],
-      "description": "\u041b\u0438\u0434\u0435\u0440 SWE-bench 80.2%. \u041f\u043e\u043b\u043d\u044b\u0439 lifecycle \u0440\u0430\u0437\u0440\u0430\u0431\u043e\u0442\u043a\u0438.",
-      "tags": [
-        "coding",
-        "agent"
-      ],
-      "openrouter": false,
-      "provider": "ollama"
-    },
-    {
-      "id": "minimax-m2.7",
-      "name": "MiniMax M2.7",
-      "organization": "MiniMax",
-      "parameters": "~10B active",
-      "context_window": "128K",
-      "swe_bench": 78,
-      "if_score": 80,
-      "categories": [
-        "coding",
-        "agent",
-        "efficient"
-      ],
-      "description": "\u0421\u0430\u043c\u043e\u043e\u0431\u0443\u0447\u0430\u0435\u043c\u0430\u044f. 56.2% SWE-Pro. 100 TPS. $0.30/M.",
-      "tags": [
-        "coding",
-        "agent",
-        "self-evolving"
-      ],
-      "openrouter": false,
-      "provider": "ollama"
-    },
-    {
-      "id": "deepseek-v4-pro-max",
-      "name": "DeepSeek V4-Pro",
-      "organization": "DeepSeek",
-      "parameters": "1.6T/49B active MoE",
-      "context_window": "1M",
-      "swe_bench": 80.6,
-      "if_score": 89,
-      "categories": [
-        "coding",
-        "agent",
-        "reasoning"
-      ],
-      "description": "SWE-V 80.6, LiveCodeBench 93.5(#1!), Terminal-Bench 67.9, Codeforces 3206, 1M ctx, 27% FLOPs vs V3.2. MIT.",
-      "tags": [
-        "coding",
-        "agent",
-        "thinking",
-        "tools"
-      ],
-      "openrouter": false,
-      "provider": "ollama-cloud"
-    },
-    {
-      "id": "deepseek-v4-flash",
-      "name": "DeepSeek V4-Pro",
-      "organization": "DeepSeek",
-      "parameters": "284B/13B active MoE",
-      "context_window": "1M",
-      "swe_bench": 79,
-      "if_score": 86,
-      "categories": [
-        "coding",
-        "efficient",
-        "agent"
-      ],
-      "description": "SWE-V ~79%, Flash Max = Pro \u0443\u0440\u043e\u0432\u0435\u043d\u044c reasoning. 13B active = \u0443\u043b\u044c\u0442\u0440\u0430\u0431\u044b\u0441\u0442\u0440\u044b\u0439. 1M ctx. FP4+FP8. MIT.",
-      "tags": [
-        "coding",
-        "efficient",
-        "agent",
-        "thinking"
-      ],
-      "openrouter": false,
-      "provider": "ollama-cloud"
-    },
-    {
-      "id": "kimi-k2-6",
-      "name": "Kimi K2.6",
-      "organization": "Moonshot AI",
-      "parameters": "1T/32B active MoE",
-      "context_window": "256K",
-      "swe_bench": 80.2,
-      "if_score": 91,
-      "categories": [
-        "coding",
-        "agent",
-        "multimodal"
-      ],
-      "description": "SWE-Pro 58.6(#1!), SWE-V 80.2, Terminal-Bench 66.7, HLE 54.0(#1!), BrowseComp 83.2. 13h autonomous. 300 sub-agent swarm. Modified MIT.",
-      "tags": [
-        "coding",
-        "agent",
-        "swarm",
-        "vision",
-        "thinking",
-        "tools"
-      ],
-      "openrouter": false,
-      "provider": "ollama-cloud"
-    },
-    {
-      "id": "nemotron-3-super",
-      "name": "Nemotron 3 Super",
-      "organization": "NVIDIA",
-      "parameters": "120B/12B active",
-      "context_window": "1M",
-      "swe_bench": 60.5,
-      "if_score": 78,
-      "categories": [
-        "agent",
-        "reasoning",
-        "efficient"
-      ],
-      "description": "SWE-bench 60.5%. RULER@1M 91.75%! \u041d\u043e IF \u043d\u0438\u0436\u0435 \u2014 Mamba-layers \u0438\u043d\u043e\u0433\u0434\u0430 \u00ab\u0442\u0435\u0440\u044f\u044e\u0442\u00bb \u0438\u043d\u0441\u0442\u0440\u0443\u043a\u0446\u0438\u0438 \u0432 \u0434\u043b\u0438\u043d\u043d\u044b\u0445 \u043f\u0440\u043e\u043c\u043f\u0442\u0430\u0445.",
-      "tags": [
-        "agent",
-        "1M-ctx",
-        "thinking"
-      ],
-      "openrouter": false,
-      "provider": "ollama"
-    },
-    {
-      "id": "glm-5.1",
-      "name": "GLM-5",
-      "organization": "Z.ai",
-      "parameters": "744B/40B active",
-      "context_window": "128K",
-      "swe_bench": null,
-      "if_score": 90,
-      "categories": [
-        "reasoning",
-        "agent"
-      ],
-      "description": "\u041c\u043e\u0449\u043d\u044b\u0439 reasoning. Arena ELO 1451. \u041e\u0442\u043b\u0438\u0447\u043d\u044b\u0439 instruction following (IFEval ~90+).",
-      "tags": [
-        "reasoning",
-        "agent"
-      ],
-      "openrouter": false,
-      "provider": "ollama"
-    },
-    {
-      "id": "deepseek-v4",
-      "name": "DeepSeek V4-Pro",
-      "organization": "DeepSeek",
-      "parameters": "Large MoE",
-      "context_window": "128K",
-      "swe_bench": null,
-      "if_score": 75,
-      "categories": [
-        "reasoning"
-      ],
-      "description": "\u0425\u043e\u0440\u043e\u0448\u0438\u0439 reasoning, \u043d\u043e IF \u043d\u0435\u0441\u0442\u0430\u0431\u0438\u043b\u0435\u043d \u2014 \u0438\u043d\u043e\u0433\u0434\u0430 \u0438\u0433\u043d\u043e\u0440\u0438\u0440\u0443\u0435\u0442 \u0444\u043e\u0440\u043c\u0430\u0442 \u0432\u044b\u0432\u043e\u0434\u0430.",
-      "tags": [
-        "reasoning"
-      ],
-      "openrouter": false,
-      "provider": "ollama"
-    },
-    {
-      "id": "qwen3-5-122b",
-      "name": "Qwen 3.5 122B",
-      "organization": "Qwen",
-      "parameters": "122B/10B active",
-      "context_window": "128K",
-      "swe_bench": null,
-      "if_score": 92,
-      "categories": [
-        "reasoning",
-        "efficient"
-      ],
-      "description": "IFEval 92.6%! \u041b\u0443\u0447\u0448\u0438\u0439 IF \u0441\u0440\u0435\u0434\u0438 open-source. Multimodal. Thinking.",
-      "tags": [
-        "vision",
-        "thinking",
-        "tools"
-      ],
-      "openrouter": false,
-      "provider": "ollama"
-    },
-    {
-      "id": "qwen3-coder-next",
-      "name": "Qwen3-Coder-Next",
-      "organization": "Qwen",
-      "parameters": "80B/3B active",
-      "context_window": "128K",
-      "swe_bench": 70,
-      "if_score": 84,
-      "categories": [
-        "coding",
-        "efficient"
-      ],
-      "description": "70% SWE-bench \u0441 3B active! \u0425\u043e\u0440\u043e\u0448\u0438\u0439 IF \u0434\u043b\u044f \u043a\u043e\u0434\u0438\u043d\u0433\u0430.",
-      "tags": [
-        "coding",
-        "efficient",
-        "tools"
-      ],
-      "openrouter": false,
-      "provider": "ollama"
-    },
-    {
-      "id": "cogito-2-1-671b",
-      "name": "Cogito 2.1 671B",
-      "organization": "Cognitive",
-      "parameters": "671B MoE",
-      "context_window": "128K",
-      "swe_bench": null,
-      "if_score": 76,
-      "categories": [
-        "reasoning"
-      ],
-      "description": "MIT \u043b\u0438\u0446\u0435\u043d\u0437\u0438\u044f. 671B total. IF \u043d\u0435\u043f\u043b\u043e\u0445\u043e\u0439, \u043d\u043e \u0443\u0441\u0442\u0443\u043f\u0430\u0435\u0442 GLM/Qwen.",
-      "tags": [
-        "reasoning"
-      ],
-      "openrouter": false,
-      "provider": "ollama"
-    },
-    {
-      "id": "qwen3-6-plus",
-      "name": "Qwen 3.6 Plus",
-      "organization": "Qwen",
-      "parameters": "Hybrid MoE",
-      "context_window": "1M",
-      "swe_bench": 78.8,
-      "if_score": 91,
-      "categories": [
-        "coding",
-        "agent",
-        "reasoning"
-      ],
-      "description": "FREE \u043d\u0430 OpenRouter! 1M \u043a\u043e\u043d\u0442\u0435\u043a\u0441\u0442. Always-on CoT. \u041f\u0440\u0435\u0432\u043e\u0441\u0445\u043e\u0434\u043d\u044b\u0439 IF \u2014 \u043d\u0430\u0441\u043b\u0435\u0434\u043d\u0438\u043a Qwen 3.5 (92.6%).",
-      "tags": [
-        "coding",
-        "agent",
-        "1M-ctx",
-        "free"
-      ],
-      "openrouter": true,
-      "provider": "openrouter"
-    },
-    {
-      "id": "step-3-5-flash",
-      "name": "Step 3.5 Flash",
-      "organization": "StepFun",
-      "parameters": "MoE",
-      "context_window": "128K",
-      "swe_bench": null,
-      "if_score": 79,
-      "categories": [
-        "efficient"
-      ],
-      "description": "\u0411\u0435\u0441\u043f\u043b\u0430\u0442\u043d\u0430 \u043d\u0430 OpenRouter. IF \u0441\u0440\u0435\u0434\u043d\u0438\u0439.",
-      "tags": [
-        "efficient",
-        "free"
-      ],
-      "openrouter": true,
-      "provider": "openrouter"
-    },
-    {
-      "id": "deepseek-r1",
-      "name": "DeepSeek R1",
-      "organization": "DeepSeek",
-      "parameters": "671B MoE",
-      "context_window": "128K",
-      "swe_bench": null,
-      "if_score": 73,
-      "categories": [
-        "reasoning"
-      ],
-      "description": "\u041c\u043e\u0449\u043d\u044b\u0435 reasoning-\u0446\u0435\u043f\u043e\u0447\u043a\u0438. \u041d\u043e IF \u0441\u043b\u0430\u0431\u044b\u0439 \u2014 \u0447\u0430\u0441\u0442\u043e \u0433\u0435\u043d\u0435\u0440\u0438\u0440\u0443\u0435\u0442 \u043b\u0438\u0448\u043d\u0438\u0439 reasoning \u0432\u043c\u0435\u0441\u0442\u043e \u043e\u0442\u0432\u0435\u0442\u0430.",
-      "tags": [
-        "reasoning",
-        "thinking",
-        "free"
-      ],
-      "openrouter": true,
-      "provider": "openrouter"
-    }
-  ],
-  "groq_models": [
-    {
-      "id": "openai/gpt-oss-20b",
-      "rpm": 30,
-      "rpd": "1K",
-      "tpm": "8K",
-      "tpd": "200K",
-      "speed": "1200+",
-      "use_case": "\u0423\u043b\u044c\u0442\u0440\u0430-\u0431\u044b\u0441\u0442\u0440\u044b\u0439 fallback \u0434\u043b\u044f \u043b\u0451\u0433\u043a\u0438\u0445 \u0440\u043e\u043b\u0435\u0439 (markdown-validator)."
-    },
-    {
-      "id": "llama-3.1-8b-instant",
-      "rpm": 30,
-      "rpd": "14.4K",
-      "tpm": "6K",
-      "tpd": "500K",
-      "speed": "~800",
-      "use_case": "14.4K RPD! \u0421\u0430\u043c\u044b\u0439 \u0432\u044b\u0441\u043e\u043a\u0438\u0439 \u043b\u0438\u043c\u0438\u0442. \u0414\u043b\u044f health-check / ping \u0440\u043e\u043b\u0435\u0439."
-    },
-    {
-      "id": "groq/compound",
-      "rpm": 30,
-      "rpd": "250",
-      "tpm": "70K",
-      "tpd": "\u2014",
-      "speed": "varies",
-      "use_case": "\u041c\u0443\u043b\u044c\u0442\u0438\u043c\u043e\u0434\u0435\u043b\u044c\u043d\u0430\u044f \u0430\u0433\u0440\u0435\u0433\u0430\u0446\u0438\u044f. \u0414\u043b\u044f research-\u0437\u0430\u0434\u0430\u0447."
-    },
-    {
-      "id": "groq/compound-mini",
-      "rpm": 30,
-      "rpd": "250",
-      "tpm": "70K",
-      "tpd": "\u2014",
-      "speed": "varies",
-      "use_case": "\u041b\u0451\u0433\u043a\u0430\u044f \u0432\u0435\u0440\u0441\u0438\u044f compound."
-    },
-    {
-      "id": "llama-prompt-guard-2",
-      "rpm": 30,
-      "rpd": "14.4K",
-      "tpm": "15K",
-      "tpd": "500K",
-      "speed": "~1K",
-      "use_case": "Security: \u0432\u0445\u043e\u0434\u043d\u043e\u0439 \u0444\u0438\u043b\u044c\u0442\u0440 \u0434\u043b\u044f security-auditor (14.4K RPD!)."
-    }
-  ],
-  "agent_model_scores": [
-    {
-      "agent": "lead-developer",
-      "current_model_index": 0,
-      "current_model_id": "qwen3-coder-480b",
-      "reasoning_effort": "H",
-      "scores": {
-        "qwen3-coder-480b": 92,
-        "minimax-m2.5": 86,
-        "minimax-m2.7": 82,
-        "nemotron-3-super": 70,
-        "glm-5.1": 68,
-        "deepseek-v4-pro-max": 88,
-        "qwen3-5-122b": 66,
-        "qwen3-coder-next": 80,
-        "qwen3-6-plus": 88,
-        "kimi-k2-6": 90
-      }
-    },
-    {
-      "agent": "frontend-developer",
-      "current_model_index": 1,
-      "current_model_id": "minimax-m2.5",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 86,
-        "minimax-m2.5": 92,
-        "minimax-m2.7": 88,
-        "nemotron-3-super": 62,
-        "glm-5.1": 56,
-        "deepseek-v4-pro-max": 82,
-        "qwen3-5-122b": 60,
-        "qwen3-coder-next": 76,
-        "qwen3-6-plus": 88,
-        "kimi-k2-6": 86
-      }
-    },
-    {
-      "agent": "php-developer",
-      "current_model_index": 0,
-      "current_model_id": "qwen3-coder-480b",
-      "reasoning_effort": "H",
-      "scores": {
-        "qwen3-coder-480b": 87,
-        "minimax-m2.5": 76,
-        "minimax-m2.7": 72,
-        "nemotron-3-super": 64,
-        "glm-5.1": 56,
-        "deepseek-v4-pro-max": 74,
-        "qwen3-5-122b": 60,
-        "qwen3-coder-next": 76,
-        "qwen3-6-plus": 84,
-        "kimi-k2-6": 86
-      }
-    },
-    {
-      "agent": "python-developer",
-      "current_model_index": 0,
-      "current_model_id": "qwen3-coder-480b",
-      "reasoning_effort": "H",
-      "scores": {
-        "qwen3-coder-480b": 90,
-        "minimax-m2.5": 82,
-        "minimax-m2.7": 78,
-        "nemotron-3-super": 66,
-        "glm-5.1": 60,
-        "deepseek-v4-pro-max": 78,
-        "qwen3-5-122b": 64,
-        "qwen3-coder-next": 78,
-        "qwen3-6-plus": 88,
-        "kimi-k2-6": 88
-      }
-    },
-    {
-      "agent": "backend-developer",
-      "current_model_index": 0,
-      "current_model_id": "qwen3-coder-480b",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 91,
-        "minimax-m2.5": 84,
-        "minimax-m2.7": 80,
-        "nemotron-3-super": 68,
-        "glm-5.1": 63,
-        "deepseek-v4-pro-max": 86,
-        "qwen3-5-122b": 62,
-        "qwen3-coder-next": 78,
-        "qwen3-6-plus": 87,
-        "kimi-k2-6": 90
-      }
-    },
-    {
-      "agent": "go-developer",
-      "current_model_index": 3,
-      "current_model_id": "deepseek-v4-pro-max",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 85,
-        "minimax-m2.5": 78,
-        "minimax-m2.7": 74,
-        "nemotron-3-super": 66,
-        "glm-5.1": 58,
-        "deepseek-v4-pro-max": 88,
-        "qwen3-5-122b": 58,
-        "qwen3-coder-next": 74,
-        "qwen3-6-plus": 82,
-        "kimi-k2-6": 86
-      }
-    },
-    {
-      "agent": "flutter-developer",
-      "current_model_index": 0,
-      "current_model_id": "qwen3-coder-480b",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 86,
-        "minimax-m2.5": 70,
-        "minimax-m2.7": 66,
-        "nemotron-3-super": 60,
-        "glm-5.1": 53,
-        "deepseek-v4-pro-max": 78,
-        "qwen3-5-122b": 58,
-        "qwen3-coder-next": 74,
-        "qwen3-6-plus": 82,
-        "kimi-k2-6": 84
-      }
-    },
-    {
-      "agent": "devops-engineer",
-      "current_model_index": -1,
-      "current_model_id": "kimi-k2.6",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 66,
-        "minimax-m2.5": 53,
-        "minimax-m2.7": 48,
-        "nemotron-3-super": 78,
-        "glm-5.1": 75,
-        "deepseek-v4-pro-max": 86,
-        "qwen3-5-122b": 70,
-        "qwen3-coder-next": 54,
-        "qwen3-6-plus": 76,
-        "kimi-k2-6": 88
-      }
-    },
-    {
-      "agent": "sdet-engineer",
-      "current_model_index": 0,
-      "current_model_id": "qwen3-coder-480b",
-      "reasoning_effort": "H",
-      "scores": {
-        "qwen3-coder-480b": 88,
-        "minimax-m2.5": 84,
-        "minimax-m2.7": 80,
-        "nemotron-3-super": 70,
-        "glm-5.1": 63,
-        "deepseek-v4-pro-max": 84,
-        "qwen3-5-122b": 64,
-        "qwen3-coder-next": 78,
-        "qwen3-6-plus": 84,
-        "kimi-k2-6": 87
-      }
-    },
-    {
-      "agent": "code-skeptic",
-      "current_model_index": 1,
-      "current_model_id": "minimax-m2.5",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 82,
-        "minimax-m2.5": 85,
-        "minimax-m2.7": 80,
-        "nemotron-3-super": 73,
-        "glm-5.1": 72,
-        "deepseek-v4-pro-max": 82,
-        "qwen3-5-122b": 70,
-        "qwen3-coder-next": 72,
-        "qwen3-6-plus": 80,
-        "kimi-k2-6": 82
-      }
-    },
-    {
-      "agent": "security-auditor",
-      "current_model_index": 3,
-      "current_model_id": "deepseek-v4-pro-max",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 76,
-        "minimax-m2.5": 74,
-        "minimax-m2.7": 68,
-        "nemotron-3-super": 76,
-        "glm-5.1": 68,
-        "deepseek-v4-pro-max": 80,
-        "qwen3-5-122b": 72,
-        "qwen3-coder-next": 64,
-        "qwen3-6-plus": 75,
-        "kimi-k2-6": 80
-      }
-    },
-    {
-      "agent": "performance-engineer",
-      "current_model_index": 3,
-      "current_model_id": "deepseek-v4-pro-max",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 78,
-        "minimax-m2.5": 75,
-        "minimax-m2.7": 70,
-        "nemotron-3-super": 78,
-        "glm-5.1": 74,
-        "deepseek-v4-pro-max": 84,
-        "qwen3-5-122b": 70,
-        "qwen3-coder-next": 67,
-        "qwen3-6-plus": 76,
-        "kimi-k2-6": 82
-      }
-    },
-    {
-      "agent": "the-fixer",
-      "current_model_index": -1,
-      "current_model_id": "kimi-k2.6",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 89,
-        "minimax-m2.5": 88,
-        "minimax-m2.7": 84,
-        "nemotron-3-super": 71,
-        "glm-5.1": 64,
-        "deepseek-v4-pro-max": 88,
-        "qwen3-5-122b": 64,
-        "qwen3-coder-next": 82,
-        "qwen3-6-plus": 86,
-        "kimi-k2-6": 90
-      }
-    },
-    {
-      "agent": "browser-automation",
-      "current_model_index": 0,
-      "current_model_id": "qwen3-coder-480b",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 87,
-        "minimax-m2.5": 72,
-        "minimax-m2.7": 68,
-        "nemotron-3-super": 61,
-        "glm-5.1": 53,
-        "deepseek-v4-pro-max": 82,
-        "qwen3-5-122b": 56,
-        "qwen3-coder-next": 72,
-        "qwen3-6-plus": 82,
-        "kimi-k2-6": 86
-      }
-    },
-    {
-      "agent": "visual-tester",
-      "current_model_index": 0,
-      "current_model_id": "qwen3-coder-480b",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 82,
-        "minimax-m2.5": 68,
-        "minimax-m2.7": 64,
-        "nemotron-3-super": 55,
-        "glm-5.1": 48,
-        "deepseek-v4-pro-max": 76,
-        "qwen3-5-122b": 54,
-        "qwen3-coder-next": 66,
-        "qwen3-6-plus": 76,
-        "kimi-k2-6": 78
-      }
-    },
-    {
-      "agent": "system-analyst",
-      "current_model_index": 7,
-      "current_model_id": "glm-5.1",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 70,
-        "minimax-m2.5": 66,
-        "minimax-m2.7": 63,
-        "nemotron-3-super": 74,
-        "glm-5.1": 82,
-        "deepseek-v4-pro-max": 88,
-        "qwen3-5-122b": 76,
-        "qwen3-coder-next": 58,
-        "qwen3-6-plus": 80,
-        "kimi-k2-6": 86
-      }
-    },
-    {
-      "agent": "capability-analyst",
-      "current_model_index": 7,
-      "current_model_id": "glm-5.1",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 72,
-        "minimax-m2.5": 68,
-        "minimax-m2.7": 66,
-        "nemotron-3-super": 76,
-        "glm-5.1": 78,
-        "deepseek-v4-pro-max": 82,
-        "qwen3-5-122b": 75,
-        "qwen3-coder-next": 60,
-        "qwen3-6-plus": 79,
-        "kimi-k2-6": 82
-      }
-    },
-    {
-      "agent": "orchestrator",
-      "current_model_index": -1,
-      "current_model_id": "kimi-k2.6",
-      "reasoning_effort": "H",
-      "scores": {
-        "qwen3-coder-480b": 74,
-        "minimax-m2.5": 70,
-        "minimax-m2.7": 68,
-        "nemotron-3-super": 80,
-        "glm-5.1": 82,
-        "deepseek-v4-pro-max": 86,
-        "qwen3-5-122b": 78,
-        "qwen3-coder-next": 62,
-        "qwen3-6-plus": 84,
-        "kimi-k2-6": 92
-      }
-    },
-    {
-      "agent": "release-manager",
-      "current_model_index": 7,
-      "current_model_id": "glm-5.1",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 72,
-        "minimax-m2.5": 66,
-        "minimax-m2.7": 64,
-        "nemotron-3-super": 74,
-        "glm-5.1": 76,
-        "deepseek-v4-pro-max": 78,
-        "qwen3-5-122b": 72,
-        "qwen3-coder-next": 60,
-        "qwen3-6-plus": 76,
-        "kimi-k2-6": 78
-      }
-    },
-    {
-      "agent": "evaluator",
-      "current_model_index": 7,
-      "current_model_id": "glm-5.1",
-      "reasoning_effort": "H",
-      "scores": {
-        "qwen3-coder-480b": 70,
-        "minimax-m2.5": 73,
-        "minimax-m2.7": 70,
-        "nemotron-3-super": 78,
-        "glm-5.1": 78,
-        "deepseek-v4-pro-max": 84,
-        "qwen3-5-122b": 76,
-        "qwen3-coder-next": 58,
-        "qwen3-6-plus": 81,
-        "kimi-k2-6": 84
-      }
-    },
-    {
-      "agent": "prompt-optimizer",
-      "current_model_index": -1,
-      "current_model_id": "qwen3.6-plus",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 76,
-        "minimax-m2.5": 74,
-        "minimax-m2.7": 72,
-        "nemotron-3-super": 76,
-        "glm-5.1": 75,
-        "deepseek-v4-pro-max": 80,
-        "qwen3-5-122b": 74,
-        "qwen3-coder-next": 64,
-        "qwen3-6-plus": 83,
-        "kimi-k2-6": 82
-      }
-    },
-    {
-      "agent": "product-owner",
-      "current_model_index": 7,
-      "current_model_id": "glm-5.1",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 60,
-        "minimax-m2.5": 56,
-        "minimax-m2.7": 54,
-        "nemotron-3-super": 74,
-        "glm-5.1": 78,
-        "deepseek-v4-pro-max": 76,
-        "qwen3-5-122b": 74,
-        "qwen3-coder-next": 48,
-        "qwen3-6-plus": 78,
-        "kimi-k2-6": 76
-      }
-    },
-    {
-      "agent": "pipeline-judge",
-      "current_model_index": 7,
-      "current_model_id": "glm-5.1",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 64,
-        "minimax-m2.5": 68,
-        "minimax-m2.7": 65,
-        "nemotron-3-super": 78,
-        "glm-5.1": 76,
-        "deepseek-v4-pro-max": 82,
-        "qwen3-5-122b": 74,
-        "qwen3-coder-next": 56,
-        "qwen3-6-plus": 80,
-        "kimi-k2-6": 84
-      }
-    },
-    {
-      "agent": "workflow-architect",
-      "current_model_index": 7,
-      "current_model_id": "glm-5.1",
-      "reasoning_effort": "H",
-      "scores": {
-        "qwen3-coder-480b": 68,
-        "minimax-m2.5": 62,
-        "minimax-m2.7": 60,
-        "nemotron-3-super": 76,
-        "glm-5.1": 76,
-        "deepseek-v4-pro-max": 80,
-        "qwen3-5-122b": 72,
-        "qwen3-coder-next": 56,
-        "qwen3-6-plus": 80,
-        "kimi-k2-6": 82
-      }
-    },
-    {
-      "agent": "markdown-validator",
-      "current_model_index": 3,
-      "current_model_id": "deepseek-v4-pro-max",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 43,
-        "minimax-m2.5": 38,
-        "minimax-m2.7": 36,
-        "nemotron-3-super": 52,
-        "glm-5.1": 55,
-        "deepseek-v4-pro-max": 68,
-        "qwen3-5-122b": 56,
-        "qwen3-coder-next": 40,
-        "qwen3-6-plus": 50,
-        "kimi-k2-6": 56
-      }
-    },
-    {
-      "agent": "agent-architect",
-      "current_model_index": -1,
-      "current_model_id": "kimi-k2.6",
-      "reasoning_effort": "H",
-      "scores": {
-        "qwen3-coder-480b": 78,
-        "minimax-m2.5": 72,
-        "minimax-m2.7": 70,
-        "nemotron-3-super": 78,
-        "glm-5.1": 76,
-        "deepseek-v4-pro-max": 82,
-        "qwen3-5-122b": 76,
-        "qwen3-coder-next": 66,
-        "qwen3-6-plus": 82,
-        "kimi-k2-6": 86
-      }
-    },
-    {
-      "agent": "planner",
-      "current_model_index": 3,
-      "current_model_id": "deepseek-v4-pro-max",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 72,
-        "minimax-m2.5": 68,
-        "minimax-m2.7": 66,
-        "nemotron-3-super": 80,
-        "glm-5.1": 78,
-        "deepseek-v4-pro-max": 88,
-        "qwen3-5-122b": 78,
-        "qwen3-coder-next": 60,
-        "qwen3-6-plus": 85,
-        "kimi-k2-6": 86
-      }
-    },
-    {
-      "agent": "reflector",
-      "current_model_index": 3,
-      "current_model_id": "deepseek-v4-pro-max",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 68,
-        "minimax-m2.5": 66,
-        "minimax-m2.7": 64,
-        "nemotron-3-super": 78,
-        "glm-5.1": 76,
-        "deepseek-v4-pro-max": 84,
-        "qwen3-5-122b": 76,
-        "qwen3-coder-next": 56,
-        "qwen3-6-plus": 82,
-        "kimi-k2-6": 80
-      }
-    },
-    {
-      "agent": "memory-manager",
-      "current_model_index": -1,
-      "current_model_id": "qwen3.6-plus",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 63,
-        "minimax-m2.5": 58,
-        "minimax-m2.7": 56,
-        "nemotron-3-super": 86,
-        "glm-5.1": 72,
-        "deepseek-v4-pro-max": 86,
-        "qwen3-5-122b": 70,
-        "qwen3-coder-next": 50,
-        "qwen3-6-plus": 87,
-        "kimi-k2-6": 84
-      }
-    },
-    {
-      "agent": "architect-indexer",
-      "current_model_index": 7,
-      "current_model_id": "glm-5.1",
-      "reasoning_effort": "H",
-      "scores": {
-        "qwen3-coder-480b": 70,
-        "minimax-m2.5": 64,
-        "minimax-m2.7": 62,
-        "nemotron-3-super": 74,
-        "glm-5.1": 80,
-        "deepseek-v4-pro-max": 78,
-        "qwen3-5-122b": 76,
-        "qwen3-coder-next": 58,
-        "qwen3-6-plus": 80,
-        "kimi-k2-6": 84
-      }
-    }
-  ],
-  "if_scores": {
-    "qwen3-coder-480b": 88,
-    "minimax-m2.5": 82,
-    "minimax-m2.7": 78,
-    "nemotron-3-super": 85,
-    "glm-5.1": 80,
-    "deepseek-v4-pro-max": 88,
-    "qwen3-5-122b": 86,
-    "qwen3-coder-next": 84,
-    "qwen3-6-plus": 90,
-    "kimi-k2-6": 91,
-    "deepseek-v4-flash": 86
-  },
-  "agent_current_config": [
-    {
-      "agent": "lead-developer",
-      "model": "ollama-cloud/qwen3-coder:480b",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "qwen",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "frontend-developer",
-      "model": "ollama-cloud/minimax-m2.5",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "qwen",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "php-developer",
-      "model": "ollama-cloud/qwen3-coder:480b",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "qwen",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "python-developer",
-      "model": "ollama-cloud/qwen3-coder:480b",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "qwen",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "backend-developer",
-      "model": "ollama-cloud/qwen3-coder:480b",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "qwen",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "go-developer",
-      "model": "ollama-cloud/deepseek-v4-pro-max",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "qwen",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "flutter-developer",
-      "model": "ollama-cloud/qwen3-coder:480b",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "qwen",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "devops-engineer",
-      "model": "ollama-cloud/kimi-k2.6",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "nemotron",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "sdet-engineer",
-      "model": "ollama-cloud/qwen3-coder:480b",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "qwen",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "code-skeptic",
-      "model": "ollama-cloud/minimax-m2.5",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "minimax",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "security-auditor",
-      "model": "ollama-cloud/deepseek-v4-pro-max",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "nemotron",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "performance-engineer",
-      "model": "ollama-cloud/deepseek-v4-pro-max",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "nemotron",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "the-fixer",
-      "model": "ollama-cloud/kimi-k2.6",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "minimax",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "browser-automation",
-      "model": "ollama-cloud/qwen3-coder:480b",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "qwen",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "visual-tester",
-      "model": "ollama-cloud/qwen3-coder:480b",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "qwen",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "system-analyst",
-      "model": "ollama-cloud/glm-5.1",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "glm",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "capability-analyst",
-      "model": "ollama-cloud/glm-5.1",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "glm",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "orchestrator",
-      "model": "ollama-cloud/kimi-k2.6",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "kimi",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "release-manager",
-      "model": "ollama-cloud/glm-5.1",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "glm",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "evaluator",
-      "model": "ollama-cloud/glm-5.1",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "glm",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "prompt-optimizer",
-      "model": "ollama-cloud/qwen3.6-plus",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "glm",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "product-owner",
-      "model": "ollama-cloud/glm-5.1",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "glm",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "pipeline-judge",
-      "model": "ollama-cloud/glm-5.1",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "glm",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "workflow-architect",
-      "model": "ollama-cloud/glm-5.1",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "glm",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "markdown-validator",
-      "model": "ollama-cloud/deepseek-v4-pro-max",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "nemotron",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "agent-architect",
-      "model": "ollama-cloud/kimi-k2.6",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "glm",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "planner",
-      "model": "ollama-cloud/deepseek-v4-pro-max",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "nemotron",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "reflector",
-      "model": "ollama-cloud/deepseek-v4-pro-max",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "nemotron",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "memory-manager",
-      "model": "ollama-cloud/qwen3.6-plus",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "nemotron",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "architect-indexer",
-      "model": "ollama-cloud/glm-5.1",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "glm",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    }
-  ],
-  "recommendations": [
-    {
-      "agent": "[built-in] debug",
-      "from_model": "glm-5.1.1 (88)",
-      "from_provider": "Ollama",
-      "to_model": "V4-Pro Max (\u260590) / K2.6 (\u260590) RE:High",
-      "to_provider": "Ollama Cloud",
-      "impact": "high",
-      "quality_change": "+2%",
-      "speed_change": "~1x",
-      "context_change": "200K\u21921M",
-      "provider_change": "Ollama Cloud",
-      "rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=90 \u0438 K2.6=90 (TIE!), GLM-5.1=88. V4-Pro: LiveCodeBench 93.5(#1!), Terminal 67.9, 1M ctx \u0434\u043b\u044f \u043f\u043e\u043b\u043d\u043e\u0433\u043e \u043f\u0440\u043e\u0435\u043a\u0442\u0430. K2.6: 13h auto sessions. \u041e\u0431\u0430 \u043b\u0443\u0447\u0448\u0435 GLM-5.1. RE:High \u0434\u043b\u044f debug."
-    },
-    {
-      "agent": "planner",
-      "from_model": "nemotron-3-super (80)",
-      "from_provider": "Ollama",
-      "to_model": "V4-Pro Max (\u260588) RE:High",
-      "to_provider": "Ollama Cloud",
-      "impact": "high",
-      "quality_change": "+10%",
-      "speed_change": "~1x",
-      "context_change": "1M",
-      "provider_change": "Ollama Cloud",
-      "rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=88(\u043b\u0443\u0447\u0448\u0438\u0439!), K2.6=86, GLM-5.1=85, Nem=80. V4-Pro: GPQA 90.1 (reasoning), 1M ctx \u0441\u043e\u0445\u0440\u0430\u043d\u044f\u0435\u0442\u0441\u044f (vs \u043f\u043e\u0442\u0435\u0440\u044f \u043f\u0440\u0438 K2.6). RE:High \u0434\u043b\u044f chain-of-thought planning."
-    },
-    {
-      "agent": "go-developer",
-      "from_model": "qwen3-coder:480b (85)",
-      "from_provider": "Ollama",
-      "to_model": "V4-Pro Max (\u260588) RE:Medium",
-      "to_provider": "Ollama Cloud",
-      "impact": "medium",
-      "quality_change": "+4%",
-      "speed_change": "~1x",
-      "context_change": "256K\u21921M",
-      "provider_change": "Ollama Cloud",
-      "rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=88(\u043b\u0443\u0447\u0448\u0438\u0439 \u0434\u043b\u044f Go!), K2.6=86, Qwen3Coder=85. DeepSeek \u043c\u043e\u0434\u0435\u043b\u0438 \u0442\u0440\u0430\u0434\u0438\u0446\u0438\u043e\u043d\u043d\u043e \u0441\u0438\u043b\u044c\u043d\u044b \u0432 Go/Rust. 1M ctx \u0434\u043b\u044f \u043a\u0440\u0443\u043f\u043d\u044b\u0445 Go-\u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0432."
-    },
-    {
-      "agent": "history-miner",
-      "from_model": "nemotron-3-super (\u260585)",
-      "from_provider": "Ollama",
-      "to_model": "V4-Pro Max (86) + Nem fallback",
-      "to_provider": "Hybrid",
-      "impact": "medium",
-      "quality_change": "+1%",
-      "speed_change": "~1x",
-      "context_change": "1M",
-      "provider_change": "Ollama Cloud + Ollama",
-      "rationale": "V4-Pro=86 \u0447\u0443\u0442\u044c \u043b\u0443\u0447\u0448\u0435 Nemotron=85. 1M ctx \u0443 \u043e\u0431\u043e\u0438\u0445. MRCR 83.5 \u0443 V4-Pro \u2014 \u043b\u0443\u0447\u0448\u0435\u0435 long-context retrieval. Nemotron \u043a\u0430\u043a fallback (RULER 91.75%)."
-    },
-    {
-      "agent": "frontend-dev \u2192 M2.5",
-      "from_model": "qwen3-coder (90)",
-      "from_provider": "Ollama",
-      "to_model": "MiniMax M2.5 (\u260592) \u2705",
-      "to_provider": "Ollama",
-      "impact": "low",
-      "quality_change": "+2%",
-      "speed_change": "=",
-      "context_change": "204K",
-      "provider_change": "Ollama",
-      "rationale": "Spec-writing, UI architect. APPLIED."
-    },
-    {
-      "agent": "devops \u2192 K2.6",
-      "from_model": "deepseek-v3.2",
-      "from_provider": "",
-      "to_model": "kimi-k2.6 \u2705",
-      "to_provider": "Ollama Cloud",
-      "impact": "low",
-      "quality_change": "+35%",
-      "speed_change": "=",
-      "context_change": "256K",
-      "provider_change": "",
-      "rationale": "APPLIED."
-    },
-    {
-      "agent": "orchestrator",
-      "from_model": "glm-5.1.1 (\u260590)",
-      "from_provider": "Ollama",
-      "to_model": "K2.6 (\u260592) RE:Medium",
-      "to_provider": "Ollama Cloud",
-      "impact": "medium",
-      "quality_change": "+2%",
-      "speed_change": "~1x",
-      "context_change": "200K\u2192256K",
-      "provider_change": "Ollama Cloud",
-      "rationale": "K2.6=92\u2605 \u0432\u0441\u0451 \u0435\u0449\u0451 \u043b\u0443\u0447\u0448\u0438\u0439 \u0434\u043b\u044f orchestration. V4-Pro=86 \u0441\u043b\u0430\u0431\u0435\u0435. 300 sub-agent swarm."
-    },
-    {
-      "agent": "the-fixer",
-      "from_model": "minimax-m2.5 (\u260588)",
-      "from_provider": "Ollama",
-      "to_model": "V4-Pro (\u260588) / K2.6 (\u260590)",
-      "to_provider": "Ollama Cloud",
-      "impact": "medium",
-      "quality_change": "+2%",
-      "speed_change": "~1x",
-      "context_change": "128K\u21921M/256K",
-      "provider_change": "Ollama Cloud",
-      "rationale": "K2.6=90(\u043b\u0443\u0447\u0448\u0438\u0439), V4-Pro=88=M2.5. M2.5 SWE-bench 80.2% \u0441\u0442\u0430\u0431\u0438\u043b\u044c\u043d\u0435\u0435. \u041d\u0435 \u0441\u0440\u043e\u0447\u043d\u043e."
-    },
-    {
-      "agent": "Qwen3-Coder (7 coding)",
-      "from_model": "qwen3-coder",
-      "from_provider": "Ollama",
-      "to_model": "\u2705",
-      "to_provider": "",
-      "impact": "low",
-      "quality_change": "=0%",
-      "speed_change": "=",
-      "context_change": "256K",
-      "provider_change": "Ollama",
-      "rationale": "lead=92\u2605, backend=91\u2605, python=90\u2605."
-    },
-    {
-      "agent": "GLM-5.1 (12 agents)",
-      "from_model": "glm-5.1.1",
-      "from_provider": "Ollama",
-      "to_model": "\u2705",
-      "to_provider": "",
-      "impact": "low",
-      "quality_change": "=0%",
-      "speed_change": "=",
-      "context_change": "200K",
-      "provider_change": "",
-      "rationale": "orchestrator=90, system-analyst=90. SWE-Pro #1."
-    },
-    {
-      "agent": "Kimi K2.6 (3 agents)",
-      "from_model": "kimi-k2.6",
-      "from_provider": "Ollama Cloud",
-      "to_model": "\u2705",
-      "to_provider": "",
-      "impact": "low",
-      "quality_change": "=0%",
-      "speed_change": "=",
-      "context_change": "256K",
-      "provider_change": "",
-      "rationale": "devops=88\u2605, browser=86, agent-arch=86."
-    }
-  ],
-  "impact_data": [
-    {
-      "category": "debug GLM5.1\u2192V4-Pro/K2.6",
-      "before": 88,
-      "after": 90,
-      "delta": 2,
-      "notes": "LiveCodeBench 93.5, Terminal 67.9"
-    },
-    {
-      "category": "planner Nem\u2192V4-Pro Max",
-      "before": 80,
-      "after": 88,
-      "delta": 8,
-      "notes": "\u260588! GPQA 90.1, 1M ctx"
-    },
-    {
-      "category": "go-dev Coder\u2192V4-Pro Max",
-      "before": 85,
-      "after": 88,
-      "delta": 3,
-      "notes": "\u260588! Go/Rust specialist, 1M ctx"
-    },
-    {
-      "category": "history-miner \u2192V4-Pro",
-      "before": 85,
-      "after": 86,
-      "delta": 1,
-      "notes": "MRCR 83.5, long-context"
-    },
-    {
-      "category": "orchestrator \u2192K2.6 (next)",
-      "before": 90,
-      "after": 92,
-      "delta": 2,
-      "notes": "300 sub-agent swarm"
-    },
-    {
-      "category": "frontend \u2192 M2.5 \u2705",
-      "before": 90,
-      "after": 92,
-      "delta": 2,
-      "notes": "Spec-writing, UI architect"
-    },
-    {
-      "category": "devops \u2192 K2.6 \u2705",
-      "before": 65,
-      "after": 88,
-      "delta": 23,
-      "notes": "IF:65\u219291! Terminal 66.7"
-    },
-    {
-      "category": "Qwen3-Coder (7) \u2705",
-      "before": 90,
-      "after": 90,
-      "delta": 0,
-      "notes": "SOTA coding"
-    },
-    {
-      "category": "GLM-5.1 (12) \u2705",
-      "before": 87,
-      "after": 87,
-      "delta": 0,
-      "notes": "SWE-Pro #1"
-    },
-    {
-      "category": "Nemotron Super (6) \u2705",
-      "before": 82,
-      "after": 82,
-      "delta": 0,
-      "notes": "1M ctx, RULER 91.75%"
-    }
-  ],
-  "benchmark_comparison": {
-    "benchmarks": [
-      {
-        "name": "SWE-V",
-        "full_name": "SWE-Bench Verified",
-        "description": "GitHub issue resolution (500 tasks)",
-        "roles": "lead-dev, backend, fixer"
-      },
-      {
-        "name": "SWE-P",
-        "full_name": "SWE-Bench Pro",
-        "description": "Multi-lang, decontaminated (1865 tasks)",
-        "roles": "all coding agents"
-      },
-      {
-        "name": "T-Bench",
-        "full_name": "Terminal-Bench 2.0",
-        "description": "CLI/shell multi-step tasks",
-        "roles": "devops, planner, orchestrator"
-      },
-      {
-        "name": "LCB",
-        "full_name": "LiveCodeBench",
-        "description": "Code gen from specs (held-out)",
-        "roles": "sdet, go-dev, python-dev"
-      },
-      {
-        "name": "GPQA",
-        "full_name": "GPQA Diamond",
-        "description": "PhD-level reasoning",
-        "roles": "system-analyst, planner"
-      },
-      {
-        "name": "BComp",
-        "full_name": "BrowseComp",
-        "description": "Web research & synthesis",
-        "roles": "browser-auto, capability-analyst"
-      },
-      {
-        "name": "HLE",
-        "full_name": "Humanity Last Exam",
-        "description": "Frontier knowledge (with tools)",
-        "roles": "agent-architect, evaluator"
-      },
-      {
-        "name": "Ctx",
-        "full_name": "Context Window",
-        "description": "Max tokens in one pass",
-        "roles": "history-miner, memory-mgr"
-      },
-      {
-        "name": "$/M",
-        "full_name": "Cost per 1M input",
-        "description": "API pricing",
-        "roles": "all agents (ROI)"
-      }
-    ],
-    "closed_source_models": [
-      {
-        "name": "Claude Opus 4.7",
-        "organization": "Anthropic",
-        "scores": [
-          87.6,
-          64.3,
-          69.4,
-          null,
-          94.2,
-          79.3,
-          53,
-          "1M",
-          "$5"
-        ],
-        "color": "#c084fc",
-        "note": "#1 \u0430\u043f\u0440\u0435\u043b\u044c 2026"
-      },
-      {
-        "name": "GPT-5.5",
-        "organization": "OpenAI",
-        "scores": [
-          null,
-          58.6,
-          82.7,
-          null,
-          null,
-          83.4,
-          57.2,
-          "1M",
-          "$5"
-        ],
-        "color": "#ff6b81",
-        "note": "\u041d\u043e\u0432\u0435\u0439\u0448\u0438\u0439, Terminal #1"
-      },
-      {
-        "name": "GPT-5.4",
-        "organization": "OpenAI",
-        "scores": [
-          78.2,
-          59.1,
-          75.1,
-          null,
-          94.4,
-          82.7,
-          58.7,
-          "200K",
-          "$2.50"
-        ],
-        "color": "#ff6b81",
-        "note": "Reasoning, math"
-      },
-      {
-        "name": "Gemini 3.1 Pro",
-        "organization": "Google",
-        "scores": [
-          80.6,
-          46.1,
-          68.5,
-          null,
-          94.3,
-          85.9,
-          51.4,
-          "2M",
-          "$2"
-        ],
-        "color": "#facc15",
-        "note": "ARC-AGI 77.1%, \u0434\u0435\u0448\u0451\u0432\u044b\u0439"
-      },
-      {
-        "name": "Claude Sonnet 4.6",
-        "organization": "Anthropic",
-        "scores": [
-          79.6,
-          null,
-          null,
-          null,
-          null,
-          null,
-          null,
-          "200K",
-          "$3"
-        ],
-        "color": "#c084fc",
-        "note": "5\u00d7 \u0434\u0435\u0448\u0435\u0432\u043b\u0435 Opus"
-      },
-      {
-        "name": "GPT-5.3-Codex",
-        "organization": "OpenAI",
-        "scores": [
-          85,
-          57,
-          77.3,
-          null,
-          null,
-          null,
-          null,
-          "200K",
-          "$6"
-        ],
-        "color": "#ff6b81",
-        "note": "Coding specialist"
-      }
-    ],
-    "apaw_models": [
-      {
-        "name": "Kimi K2.6",
-        "organization": "APAW",
-        "scores": [
-          80.2,
-          58.6,
-          66.7,
-          87.2,
-          null,
-          83.2,
-          54,
-          "256K",
-          "$0.95"
-        ],
-        "color": "#00ff94",
-        "note": "devops, browser, architect (3)"
-      },
-      {
-        "name": "GLM-5.1",
-        "organization": "APAW",
-        "scores": [
-          null,
-          58.4,
-          63.5,
-          null,
-          86.2,
-          68.7,
-          null,
-          "200K",
-          "~$0.50"
-        ],
-        "color": "#00ff94",
-        "note": "12 agents! orchestrator, eval..."
-      },
-      {
-        "name": "V4-Pro Max",
-        "organization": "APAW",
-        "scores": [
-          80.6,
-          55.4,
-          67.9,
-          93.5,
-          90.1,
-          83.4,
-          48.2,
-          "1M",
-          "$0.42"
-        ],
-        "color": "#00d4ff",
-        "note": "planner, go-dev (\u0440\u0435\u043a.)"
-      },
-      {
-        "name": "Qwen3-Coder 480B",
-        "organization": "APAW",
-        "scores": [
-          66.5,
-          null,
-          null,
-          null,
-          null,
-          null,
-          null,
-          "256K",
-          "~$0.50"
-        ],
-        "color": "#00ff94",
-        "note": "7 coding agents"
-      },
-      {
-        "name": "MiniMax M2.5",
-        "organization": "APAW",
-        "scores": [
-          80.2,
-          51.3,
-          null,
-          null,
-          null,
-          76.3,
-          null,
-          "204K",
-          "$0.15"
-        ],
-        "color": "#00ff94",
-        "note": "frontend, skeptic, fixer (3)"
-      },
-      {
-        "name": "Nemotron Super",
-        "organization": "APAW",
-        "scores": [
-          60.5,
-          null,
-          null,
-          null,
-          null,
-          null,
-          null,
-          "1M",
-          "~$0.40"
-        ],
-        "color": "#00ff94",
-        "note": "6 agents (memory, history)"
-      }
-    ]
-  }
-}
\ No newline at end of file
+{
+  "version": "1.0.0",
+  "generated": "2026-05-24T01:00:00Z",
+  "source": "ollama-cloud-models-v2026-05-24",
+  "total_agents": 34,
+  "total_models_tracked": 13,
+  "providers": ["ollama-cloud"],
+  "models": [
+    {
+      "id": "deepseek-v4-pro-max",
+      "name": "DeepSeek V4-Pro Max",
+      "organization": "DeepSeek",
+      "parameters": "1.6T/49B active MoE",
+      "context_window": "1M",
+      "swe_bench": 80.6,
+      "if_score": 89,
+      "categories": ["coding", "agent", "reasoning"],
+      "provider": "ollama-cloud",
+      "updated": "2026-05-03",
+      "pulls": "71.6K"
+    },
+    {
+      "id": "deepseek-v4-flash",
+      "name": "DeepSeek V4-Flash",
+      "organization": "DeepSeek",
+      "parameters": "284B/13B active MoE",
+      "context_window": "1M",
+      "swe_bench": 79,
+      "if_score": 86,
+      "categories": ["coding", "efficient", "agent"],
+      "provider": "ollama-cloud",
+      "updated": "2026-05-03",
+      "pulls": "84.4K"
+    },
+    {
+      "id": "kimi-k2.6",
+      "name": "Kimi K2.6",
+      "organization": "Moonshot AI",
+      "parameters": "1T/32B active MoE",
+      "context_window": "256K→1M",
+      "swe_bench": 80.2,
+      "if_score": 91,
+      "categories": ["coding", "agent", "multimodal", "vision"],
+      "provider": "ollama-cloud",
+      "updated": "2026-04-24",
+      "pulls": "259.7K"
+    },
+    {
+      "id": "kimi-k2.5",
+      "name": "Kimi K2.5",
+      "organization": "Moonshot AI",
+      "parameters": "1T/32B active MoE",
+      "context_window": "256K",
+      "swe_bench": 78,
+      "if_score": 90,
+      "categories": ["coding", "agent", "multimodal", "vision"],
+      "provider": "ollama-cloud",
+      "updated": "2026-02-24",
+      "pulls": "293.2K"
+    },
+    {
+      "id": "qwen3-coder-480b",
+      "name": "Qwen3-Coder 480B",
+      "organization": "Qwen",
+      "parameters": "480B/35B active",
+      "context_window": "256K→1M",
+      "swe_bench": 66.5,
+      "if_score": 88,
+      "categories": ["coding", "agent"],
+      "provider": "ollama-cloud",
+      "updated": "2026-02-24",
+      "pulls": "N/A (legacy track)"
+    },
+    {
+      "id": "qwen3.5-122b",
+      "name": "Qwen 3.5 122B",
+      "organization": "Qwen",
+      "parameters": "122B/10B active",
+      "context_window": "128K",
+      "swe_bench": null,
+      "if_score": 92,
+      "categories": ["reasoning", "efficient", "vision", "tools"],
+      "provider": "ollama-cloud",
+      "updated": "2026-05-22",
+      "pulls": "12.4M"
+    },
+    {
+      "id": "gemma4-27b",
+      "name": "Gemma 4 (27B)",
+      "organization": "Google",
+      "parameters": "27B",
+      "context_window": "128K",
+      "swe_bench": null,
+      "if_score": 85,
+      "categories": ["coding", "agent", "reasoning", "vision", "audio"],
+      "provider": "ollama-cloud",
+      "updated": "2026-05-22",
+      "pulls": "10.1M",
+      "note": "Updated 2 days ago. Frontier-level performance at each size."
+    },
+    {
+      "id": "minimax-m2.5",
+      "name": "MiniMax M2.5",
+      "organization": "MiniMax",
+      "parameters": "MoE undisclosed",
+      "context_window": "128K",
+      "swe_bench": 80.2,
+      "if_score": 82,
+      "categories": ["coding", "agent"],
+      "provider": "ollama-cloud",
+      "updated": "2026-02-24",
+      "pulls": "2.2M"
+    },
+    {
+      "id": "minimax-m2.7",
+      "name": "MiniMax M2.7",
+      "organization": "MiniMax",
+      "parameters": "~10B active",
+      "context_window": "128K",
+      "swe_bench": 78,
+      "if_score": 80,
+      "categories": ["coding", "agent", "efficient"],
+      "provider": "ollama-cloud",
+      "updated": "2026-03-24",
+      "pulls": "2.2M"
+    },
+    {
+      "id": "glm-5.1",
+      "name": "GLM-5.1",
+      "organization": "Z.ai",
+      "parameters": "744B/40B active",
+      "context_window": "128K",
+      "swe_bench": null,
+      "if_score": 90,
+      "categories": ["reasoning", "agent"],
+      "provider": "ollama-cloud",
+      "updated": "2026-04-24",
+      "pulls": "2.2M",
+      "note": "Next-gen flagship. SWE-Bench Pro SOTA."
+    },
+    {
+      "id": "glm-5",
+      "name": "GLM-5",
+      "organization": "Z.ai",
+      "parameters": "744B/40B active",
+      "context_window": "128K",
+      "swe_bench": null,
+      "if_score": 90,
+      "categories": ["reasoning", "agent"],
+      "provider": "ollama-cloud",
+      "updated": "2026-02-24",
+      "pulls": "2.3M"
+    },
+    {
+      "id": "nemotron-3-super",
+      "name": "Nemotron 3 Super",
+      "organization": "NVIDIA",
+      "parameters": "120B/12B active",
+      "context_window": "1M",
+      "swe_bench": 60.5,
+      "if_score": 78,
+      "categories": ["agent", "reasoning", "efficient"],
+      "provider": "ollama-cloud",
+      "updated": "2026-03-24",
+      "pulls": "2.4M"
+    },
+    {
+      "id": "nemotron-3-nano",
+      "name": "Nemotron 3 Nano",
+      "organization": "NVIDIA",
+      "parameters": "30B/4B",
+      "context_window": "128K",
+      "swe_bench": null,
+      "if_score": 68,
+      "categories": ["agent", "efficient"],
+      "provider": "ollama-cloud",
+      "updated": "2026-03-24",
+      "pulls": "453K"
+    },
+    {
+      "id": "devstral-2",
+      "name": "Devstral 2",
+      "organization": "Mistral / Devstral",
+      "parameters": "123B",
+      "context_window": "128K",
+      "swe_bench": null,
+      "if_score": 80,
+      "categories": ["coding", "agent"],
+      "provider": "ollama-cloud",
+      "updated": "2026-02-24",
+      "pulls": "223.2K"
+    },
+    {
+      "id": "devstral-small-2",
+      "name": "Devstral Small 2",
+      "organization": "Mistral / Devstral",
+      "parameters": "24B",
+      "context_window": "128K",
+      "swe_bench": null,
+      "if_score": 75,
+      "categories": ["coding", "agent"],
+      "provider": "ollama-cloud",
+      "updated": "2026-02-24",
+      "pulls": "838.8K"
+    }
+  ],
+  "if_scores": {
+    "deepseek-v4-pro-max": 89,
+    "deepseek-v4-flash": 86,
+    "kimi-k2.6": 91,
+    "kimi-k2.5": 90,
+    "qwen3-coder-480b": 88,
+    "qwen3.5-122b": 92,
+    "gemma4-27b": 85,
+    "minimax-m2.5": 82,
+    "minimax-m2.7": 80,
+    "glm-5.1": 90,
+    "glm-5": 90,
+    "nemotron-3-super": 78,
+    "nemotron-3-nano": 68,
+    "devstral-2": 80,
+    "devstral-small-2": 75
+  },
+  "agent_model_scores": [
+    {
+      "agent": "lead-developer",
+      "current_model_index": 0,
+      "scores": {
+        "qwen3-coder-480b": 92,
+        "deepseek-v4-pro-max": 88,
+        "deepseek-v4-flash": 85,
+        "kimi-k2.6": 90,
+        "kimi-k2.5": 88,
+        "qwen3.5-122b": 86,
+        "gemma4-27b": 83,
+        "minimax-m2.5": 86,
+        "minimax-m2.7": 82,
+        "glm-5.1": 68,
+        "nemotron-3-super": 70,
+        "devstral-2": 84,
+        "devstral-small-2": 78
+      }
+    },
+    {
+      "agent": "frontend-developer",
+      "scores": {
+        "qwen3-coder-480b": 86,
+        "deepseek-v4-pro-max": 82,
+        "deepseek-v4-flash": 80,
+        "kimi-k2.6": 86,
+        "kimi-k2.5": 84,
+        "qwen3.5-122b": 84,
+        "gemma4-27b": 85,
+        "minimax-m2.5": 92,
+        "minimax-m2.7": 88,
+        "glm-5.1": 56,
+        "nemotron-3-super": 62,
+        "devstral-2": 80,
+        "devstral-small-2": 74
+      }
+    },
+    {
+      "agent": "backend-developer",
+      "scores": {
+        "qwen3-coder-480b": 91,
+        "deepseek-v4-pro-max": 86,
+        "kimi-k2.6": 90,
+        "qwen3.5-122b": 85,
+        "gemma4-27b": 84,
+        "minimax-m2.5": 84,
+        "minimax-m2.7": 80,
+        "glm-5.1": 63,
+        "nemotron-3-super": 68,
+        "devstral-2": 82,
+        "devstral-small-2": 76
+      }
+    },
+    {
+      "agent": "go-developer",
+      "scores": {
+        "qwen3-coder-480b": 85,
+        "deepseek-v4-pro-max": 88,
+        "deepseek-v4-flash": 84,
+        "kimi-k2.6": 86,
+        "qwen3.5-122b": 80,
+        "gemma4-27b": 80,
+        "minimax-m2.5": 78,
+        "minimax-m2.7": 74,
+        "glm-5.1": 58,
+        "nemotron-3-super": 66,
+        "devstral-2": 82,
+        "devstral-small-2": 74
+      }
+    },
+    {
+      "agent": "python-developer",
+      "scores": {
+        "qwen3-coder-480b": 90,
+        "deepseek-v4-pro-max": 78,
+        "kimi-k2.6": 88,
+        "qwen3.5-122b": 86,
+        "gemma4-27b": 82,
+        "minimax-m2.5": 82,
+        "minimax-m2.7": 78,
+        "glm-5.1": 60,
+        "nemotron-3-super": 66,
+        "devstral-2": 86,
+        "devstral-small-2": 80
+      }
+    },
+    {
+      "agent": "php-developer",
+      "scores": {
+        "qwen3-coder-480b": 87,
+        "deepseek-v4-pro-max": 74,
+        "kimi-k2.6": 86,
+        "qwen3.5-122b": 84,
+        "gemma4-27b": 82,
+        "minimax-m2.5": 76,
+        "minimax-m2.7": 72,
+        "glm-5.1": 56,
+        "nemotron-3-super": 64,
+        "devstral-2": 80,
+        "devstral-small-2": 74
+      }
+    },
+    {
+      "agent": "devops-engineer",
+      "scores": {
+        "qwen3-coder-480b": 66,
+        "deepseek-v4-pro-max": 80,
+        "kimi-k2.6": 88,
+        "qwen3.5-122b": 75,
+        "gemma4-27b": 78,
+        "minimax-m2.5": 53,
+        "minimax-m2.7": 48,
+        "glm-5.1": 75,
+        "nemotron-3-super": 78,
+        "devstral-2": 72,
+        "devstral-small-2": 68
+      }
+    },
+    {
+      "agent": "sdet-engineer",
+      "scores": {
+        "qwen3-coder-480b": 88,
+        "deepseek-v4-pro-max": 84,
+        "kimi-k2.6": 87,
+        "qwen3.5-122b": 86,
+        "gemma4-27b": 82,
+        "minimax-m2.5": 84,
+        "minimax-m2.7": 80,
+        "glm-5.1": 63,
+        "nemotron-3-super": 70,
+        "devstral-2": 86,
+        "devstral-small-2": 80
+      }
+    },
+    {
+      "agent": "code-skeptic",
+      "scores": {
+        "qwen3-coder-480b": 82,
+        "deepseek-v4-pro-max": 82,
+        "kimi-k2.6": 82,
+        "qwen3.5-122b": 80,
+        "gemma4-27b": 80,
+        "minimax-m2.5": 85,
+        "minimax-m2.7": 80,
+        "glm-5.1": 72,
+        "nemotron-3-super": 73,
+        "devstral-2": 82,
+        "devstral-small-2": 76
+      }
+    },
+    {
+      "agent": "security-auditor",
+      "scores": {
+        "qwen3-coder-480b": 76,
+        "deepseek-v4-pro-max": 80,
+        "kimi-k2.6": 80,
+        "qwen3.5-122b": 78,
+        "gemma4-27b": 78,
+        "minimax-m2.5": 74,
+        "minimax-m2.7": 68,
+        "glm-5.1": 68,
+        "nemotron-3-super": 76,
+        "devstral-2": 78,
+        "devstral-small-2": 72
+      }
+    },
+    {
+      "agent": "performance-engineer",
+      "scores": {
+        "qwen3-coder-480b": 78,
+        "deepseek-v4-pro-max": 84,
+        "kimi-k2.6": 82,
+        "qwen3.5-122b": 76,
+        "gemma4-27b": 76,
+        "minimax-m2.5": 75,
+        "minimax-m2.7": 70,
+        "glm-5.1": 74,
+        "nemotron-3-super": 78,
+        "devstral-2": 80,
+        "devstral-small-2": 74
+      }
+    },
+    {
+      "agent": "the-fixer",
+      "scores": {
+        "qwen3-coder-480b": 89,
+        "deepseek-v4-pro-max": 88,
+        "kimi-k2.6": 90,
+        "qwen3.5-122b": 86,
+        "gemma4-27b": 82,
+        "minimax-m2.5": 88,
+        "minimax-m2.7": 84,
+        "glm-5.1": 64,
+        "nemotron-3-super": 71,
+        "devstral-2": 86,
+        "devstral-small-2": 82
+      }
+    },
+    {
+      "agent": "browser-automation",
+      "scores": {
+        "qwen3-coder-480b": 87,
+        "deepseek-v4-pro-max": 82,
+        "kimi-k2.6": 86,
+        "qwen3.5-122b": 82,
+        "gemma4-27b": 84,
+        "minimax-m2.5": 72,
+        "minimax-m2.7": 68,
+        "glm-5.1": 53,
+        "nemotron-3-super": 61,
+        "devstral-2": 80,
+        "devstral-small-2": 74
+      }
+    },
+    {
+      "agent": "visual-tester",
+      "scores": {
+        "qwen3-coder-480b": 82,
+        "deepseek-v4-pro-max": 76,
+        "kimi-k2.6": 78,
+        "qwen3.5-122b": 76,
+        "gemma4-27b": 78,
+        "minimax-m2.5": 68,
+        "minimax-m2.7": 64,
+        "glm-5.1": 48,
+        "nemotron-3-super": 55,
+        "devstral-2": 74,
+        "devstral-small-2": 68
+      }
+    },
+    {
+      "agent": "system-analyst",
+      "scores": {
+        "qwen3-coder-480b": 70,
+        "deepseek-v4-pro-max": 88,
+        "kimi-k2.6": 86,
+        "qwen3.5-122b": 82,
+        "gemma4-27b": 82,
+        "minimax-m2.5": 66,
+        "minimax-m2.7": 63,
+        "glm-5.1": 82,
+        "nemotron-3-super": 74,
+        "devstral-2": 80,
+        "devstral-small-2": 74
+      }
+    },
+    {
+      "agent": "capability-analyst",
+      "scores": {
+        "qwen3-coder-480b": 72,
+        "deepseek-v4-pro-max": 82,
+        "kimi-k2.6": 82,
+        "qwen3.5-122b": 80,
+        "gemma4-27b": 80,
+        "minimax-m2.5": 68,
+        "minimax-m2.7": 66,
+        "glm-5.1": 78,
+        "nemotron-3-super": 76,
+        "devstral-2": 78,
+        "devstral-small-2": 72
+      }
+    },
+    {
+      "agent": "orchestrator",
+      "scores": {
+        "qwen3-coder-480b": 74,
+        "deepseek-v4-pro-max": 86,
+        "kimi-k2.6": 92,
+        "qwen3.5-122b": 84,
+        "gemma4-27b": 82,
+        "minimax-m2.5": 70,
+        "minimax-m2.7": 68,
+        "glm-5.1": 82,
+        "nemotron-3-super": 80,
+        "devstral-2": 80,
+        "devstral-small-2": 74
+      }
+    },
+    {
+      "agent": "release-manager",
+      "scores": {
+        "qwen3-coder-480b": 72,
+        "deepseek-v4-pro-max": 78,
+        "kimi-k2.6": 78,
+        "qwen3.5-122b": 76,
+        "gemma4-27b": 76,
+        "minimax-m2.5": 66,
+        "minimax-m2.7": 64,
+        "glm-5.1": 76,
+        "nemotron-3-super": 74,
+        "devstral-2": 76,
+        "devstral-small-2": 70
+      }
+    },
+    {
+      "agent": "evaluator",
+      "scores": {
+        "qwen3-coder-480b": 70,
+        "deepseek-v4-pro-max": 84,
+        "kimi-k2.6": 84,
+        "qwen3.5-122b": 82,
+        "gemma4-27b": 80,
+        "minimax-m2.5": 73,
+        "minimax-m2.7": 70,
+        "glm-5.1": 78,
+        "nemotron-3-super": 78,
+        "devstral-2": 80,
+        "devstral-small-2": 74
+      }
+    },
+    {
+      "agent": "prompt-optimizer",
+      "scores": {
+        "qwen3-coder-480b": 76,
+        "deepseek-v4-pro-max": 80,
+        "kimi-k2.6": 82,
+        "qwen3.5-122b": 82,
+        "gemma4-27b": 80,
+        "minimax-m2.5": 74,
+        "minimax-m2.7": 72,
+        "glm-5.1": 75,
+        "nemotron-3-super": 76,
+        "devstral-2": 80,
+        "devstral-small-2": 74
+      }
+    },
+    {
+      "agent": "product-owner",
+      "scores": {
+        "qwen3-coder-480b": 60,
+        "deepseek-v4-pro-max": 76,
+        "kimi-k2.6": 76,
+        "qwen3.5-122b": 76,
+        "gemma4-27b": 76,
+        "minimax-m2.5": 56,
+        "minimax-m2.7": 54,
+        "glm-5.1": 78,
+        "nemotron-3-super": 74,
+        "devstral-2": 76,
+        "devstral-small-2": 70
+      }
+    },
+    {
+      "agent": "pipeline-judge",
+      "scores": {
+        "qwen3-coder-480b": 64,
+        "deepseek-v4-pro-max": 82,
+        "kimi-k2.6": 84,
+        "qwen3.5-122b": 82,
+        "gemma4-27b": 80,
+        "minimax-m2.5": 68,
+        "minimax-m2.7": 65,
+        "glm-5.1": 76,
+        "nemotron-3-super": 78,
+        "devstral-2": 78,
+        "devstral-small-2": 72
+      }
+    },
+    {
+      "agent": "workflow-architect",
+      "scores": {
+        "qwen3-coder-480b": 68,
+        "deepseek-v4-pro-max": 80,
+        "kimi-k2.6": 82,
+        "qwen3.5-122b": 80,
+        "gemma4-27b": 80,
+        "minimax-m2.5": 62,
+        "minimax-m2.7": 60,
+        "glm-5.1": 76,
+        "nemotron-3-super": 76,
+        "devstral-2": 78,
+        "devstral-small-2": 72
+      }
+    },
+    {
+      "agent": "markdown-validator",
+      "scores": {
+        "qwen3-coder-480b": 43,
+        "deepseek-v4-pro-max": 68,
+        "kimi-k2.6": 56,
+        "qwen3.5-122b": 56,
+        "gemma4-27b": 60,
+        "minimax-m2.5": 38,
+        "minimax-m2.7": 36,
+        "glm-5.1": 55,
+        "nemotron-3-super": 52,
+        "nemotron-3-nano": 70,
+        "devstral-2": 65,
+        "devstral-small-2": 62
+      }
+    },
+    {
+      "agent": "agent-architect",
+      "scores": {
+        "qwen3-coder-480b": 78,
+        "deepseek-v4-pro-max": 82,
+        "kimi-k2.6": 86,
+        "qwen3.5-122b": 80,
+        "gemma4-27b": 82,
+        "minimax-m2.5": 72,
+        "minimax-m2.7": 70,
+        "glm-5.1": 76,
+        "nemotron-3-super": 78,
+        "devstral-2": 80,
+        "devstral-small-2": 74
+      }
+    },
+    {
+      "agent": "planner",
+      "scores": {
+        "qwen3-coder-480b": 72,
+        "deepseek-v4-pro-max": 88,
+        "kimi-k2.6": 86,
+        "qwen3.5-122b": 86,
+        "gemma4-27b": 84,
+        "minimax-m2.5": 68,
+        "minimax-m2.7": 66,
+        "glm-5.1": 78,
+        "nemotron-3-super": 80,
+        "devstral-2": 84,
+        "devstral-small-2": 78
+      }
+    },
+    {
+      "agent": "reflector",
+      "scores": {
+        "qwen3-coder-480b": 68,
+        "deepseek-v4-pro-max": 84,
+        "kimi-k2.6": 80,
+        "qwen3.5-122b": 80,
+        "gemma4-27b": 80,
+        "minimax-m2.5": 66,
+        "minimax-m2.7": 64,
+        "glm-5.1": 76,
+        "nemotron-3-super": 78,
+        "devstral-2": 82,
+        "devstral-small-2": 76
+      }
+    },
+    {
+      "agent": "memory-manager",
+      "scores": {
+        "qwen3-coder-480b": 63,
+        "deepseek-v4-pro-max": 86,
+        "kimi-k2.6": 84,
+        "qwen3.5-122b": 85,
+        "gemma4-27b": 82,
+        "minimax-m2.5": 58,
+        "minimax-m2.7": 56,
+        "glm-5.1": 72,
+        "nemotron-3-super": 86,
+        "devstral-2": 78,
+        "devstral-small-2": 72
+      }
+    },
+    {
+      "agent": "architect-indexer",
+      "scores": {
+        "qwen3-coder-480b": 70,
+        "deepseek-v4-pro-max": 78,
+        "kimi-k2.6": 84,
+        "qwen3.5-122b": 80,
+        "gemma4-27b": 80,
+        "minimax-m2.5": 64,
+        "minimax-m2.7": 62,
+        "glm-5.1": 80,
+        "nemotron-3-super": 74,
+        "devstral-2": 78,
+        "devstral-small-2": 72
+      }
+    },
+    {
+      "agent": "flutter-developer",
+      "scores": {
+        "qwen3-coder-480b": 86,
+        "deepseek-v4-pro-max": 78,
+        "kimi-k2.6": 84,
+        "qwen3.5-122b": 84,
+        "gemma4-27b": 84,
+        "minimax-m2.5": 70,
+        "minimax-m2.7": 66,
+        "glm-5.1": 53,
+        "nemotron-3-super": 60,
+        "devstral-2": 78,
+        "devstral-small-2": 74
+      }
+    }
+  ],
+  "agent_current_config": [
+    { "agent": "lead-developer",         "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 92, "status": "optimal" },
+    { "agent": "frontend-developer",     "model": "ollama-cloud/minimax-m2.5",        "fit_score": 92, "status": "optimal" },
+    { "agent": "backend-developer",      "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 91, "status": "optimal" },
+    { "agent": "go-developer",           "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 88, "status": "optimal" },
+    { "agent": "python-developer",       "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 90, "status": "optimal" },
+    { "agent": "php-developer",          "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 87, "status": "optimal" },
+    { "agent": "flutter-developer",      "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 86, "status": "optimal" },
+    { "agent": "devops-engineer",        "model": "ollama-cloud/kimi-k2.6",            "fit_score": 88, "status": "optimal" },
+    { "agent": "sdet-engineer",          "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 88, "status": "optimal" },
+    { "agent": "code-skeptic",           "model": "ollama-cloud/minimax-m2.5",        "fit_score": 85, "status": "optimal" },
+    { "agent": "security-auditor",       "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 80, "status": "good" },
+    { "agent": "performance-engineer",   "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 84, "status": "optimal" },
+    { "agent": "the-fixer",              "model": "ollama-cloud/kimi-k2.6",            "fit_score": 90, "status": "optimal" },
+    { "agent": "browser-automation",     "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 87, "status": "optimal" },
+    { "agent": "visual-tester",          "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 82, "status": "good" },
+    { "agent": "system-analyst",         "model": "ollama-cloud/glm-5.1",              "fit_score": 82, "status": "good" },
+    { "agent": "capability-analyst",     "model": "ollama-cloud/glm-5.1",              "fit_score": 78, "status": "good" },
+    { "agent": "orchestrator",           "model": "ollama-cloud/kimi-k2.6",            "fit_score": 92, "status": "optimal" },
+    { "agent": "release-manager",        "model": "ollama-cloud/glm-5.1",              "fit_score": 76, "status": "good" },
+    { "agent": "evaluator",              "model": "ollama-cloud/glm-5.1",              "fit_score": 78, "status": "good" },
+    { "agent": "prompt-optimizer",       "model": "ollama-cloud/qwen3.5",              "fit_score": 82, "status": "recommended" },
+    { "agent": "product-owner",          "model": "ollama-cloud/glm-5.1",              "fit_score": 78, "status": "good" },
+    { "agent": "pipeline-judge",         "model": "ollama-cloud/glm-5.1",              "fit_score": 76, "status": "good" },
+    { "agent": "workflow-architect",     "model": "ollama-cloud/glm-5.1",              "fit_score": 76, "status": "good" },
+    { "agent": "markdown-validator",     "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 68, "status": "poor" },
+    { "agent": "agent-architect",        "model": "ollama-cloud/kimi-k2.6",            "fit_score": 86, "status": "optimal" },
+    { "agent": "planner",              "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 88, "status": "optimal" },
+    { "agent": "reflector",              "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 84, "status": "optimal" },
+    { "agent": "memory-manager",         "model": "ollama-cloud/qwen3.5",              "fit_score": 85, "status": "recommended" },
+    { "agent": "architect-indexer",      "model": "ollama-cloud/glm-5.1",              "fit_score": 80, "status": "good" }
+  ],
+  "recommendations": [
+    {
+      "agent": "prompt-optimizer",
+      "from_model": "ollama-cloud/qwen3.6-plus (openrouter)",
+      "to_model": "ollama-cloud/qwen3.5",
+      "reason": "Migrated to Ollama Cloud. IF 92, vision+tools+thinking. Same quality, no rate limits.",
+      "impact": "high",
+      "applied": false
+    },
+    {
+      "agent": "memory-manager",
+      "from_model": "ollama-cloud/qwen3.6-plus (openrouter)",
+      "to_model": "ollama-cloud/qwen3.5",
+      "reason": "Migrated to Ollama Cloud. 1M context via qwen3.5? Actually qwen3.5 has 128K, not 1M. Alternative: kimi-k2.6 (256K) or deepseek-v4 (1M). But matrix shows qwen3.5=85 vs kimi-k2.6=84 vs deepseek=86.",
+      "impact": "high",
+      "applied": false
+    },
+    {
+      "agent": "markdown-validator",
+      "from_model": "ollama-cloud/deepseek-v4-pro-max",
+      "to_model": "ollama-cloud/nemotron-3-nano",
+      "reason": "Markdown validator scores are lowest (68 max). Nemotron-3-Nano IF=68 but is tiny (4B/30B), extremely cheap. For lightweight validation tasks, nano is sufficient.",
+      "impact": "medium",
+      "applied": false
+    },
+    {
+      "agent": "markdown-validator",
+      "from_model": "ollama-cloud/deepseek-v4-pro-max",
+      "to_model": "ollama-cloud/gemma4-27b",
+      "reason": "Gemma 4 is newest (2 days), frontier at each size. Scores 60 for validator — better than nano 70? Actually wait: gemma4=60, nano=70. Nano is better for this role. But gemma4 is newer and more general.",
+      "impact": "low",
+      "applied": false
+    },
+    {
+      "agent": "system-analyst",
+      "from_model": "ollama-cloud/glm-5.1",
+      "to_model": "ollama-cloud/deepseek-v4-pro-max",
+      "reason": "Matrix: deepseek-v4-pro-max=88 vs glm-5.1=82. +6% quality, 1M context for architecture docs. GLM-5.1 still strong for standardization.",
+      "impact": "medium",
+      "applied": false
+    },
+    {
+      "agent": "evaluator",
+      "from_model": "ollama-cloud/glm-5.1",
+      "to_model": "ollama-cloud/kimi-k2.6",
+      "reason": "Matrix: kimi-k2.6=84 vs glm-5.1=78. +6%. IF=91 for scoring accuracy. High reasoning needed.",
+      "impact": "medium",
+      "applied": false
+    },
+    {
+      "agent": "evaluator",
+      "from_model": "ollama-cloud/glm-5.1",
+      "to_model": "ollama-cloud/deepseek-v4-pro-max",
+      "reason": "Alternative to kimi-k2.6. deepseek-v4-pro-max=84 (same as kimi), but 1M context. Could be better for large evaluation tasks.",
+      "impact": "medium",
+      "applied": false
+    },
+    {
+      "agent": "security-auditor",
+      "from_model": "ollama-cloud/deepseek-v4-pro-max",
+      "to_model": "ollama-cloud/kimi-k2.6",
+      "reason": "Matrix: both 80. But kimi-k2.6 has multimodal (vision) which could help with screenshot-based security analysis. Tie.",
+      "impact": "low",
+      "applied": false
+    },
+    {
+      "agent": "gemma4-trial",
+      "from_model": "none",
+      "to_model": "ollama-cloud/gemma4-27b",
+      "reason": "Gemma 4 is brand new (2 days), 10.1M pulls, frontier at each size, vision+audio+thinking. Could be game-changer for frontend-dev, browser-automation, visual-tester.",
+      "impact": "high",
+      "applied": false,
+      "note": "Requires A/B test on frontend task."
+    },
+    {
+      "agent": "qwen3.5-trial",
+      "from_model": "none",
+      "to_model": "ollama-cloud/qwen3.5-122b",
+      "reason": "Qwen 3.5 updated 2 days ago, 12.4M pulls, IF=92 (highest!), multimodal. Could replace GLM-5.1 for reasoning tasks and qwen3-coder for some coding tasks.",
+      "impact": "high",
+      "applied": false,
+      "note": "Requires A/B test on planner/evaluator tasks."
+    }
+  ],
+  "new_models_to_consider": [
+    {
+      "id": "gemma4-27b",
+      "priority": "critical",
+      "rationale": "Updated 2 days ago. 10.1M pulls. Frontier-level at each size. Vision + audio + thinking + tools + cloud. Potentially replaces qwen3-coder for some tasks."
+    },
+    {
+      "id": "qwen3.5-122b",
+      "priority": "critical",
+      "rationale": "Updated 2 days ago. 12.4M pulls. IF=92 highest among tracked. Multimodal. Could replace glm-5.1 for reasoning and compete with qwen3-coder for coding."
+    },
+    {
+      "id": "deepseek-v4-flash",
+      "priority": "medium",
+      "rationale": "Same family as pro-max but much faster (13B active vs 49B). Good for low-latency agents: code-skeptic, browser-automation."
+    },
+    {
+      "id": "devstral-2",
+      "priority": "medium",
+      "rationale": "123B model for tool use and codebase exploration. Could be strong for lead-developer on large projects."
+    }
+  ]
+}
diff --git a/agent-evolution/data/model-research-2026-05-24.md b/agent-evolution/data/model-research-2026-05-24.md
new file mode 100644
index 0000000..2d65320
--- /dev/null
+++ b/agent-evolution/data/model-research-2026-05-24.md
@@ -0,0 +1,111 @@
+# Agent Model Research Report — 2026-05-24
+
+## Executive Summary
+
+13 model changes recommended across 38 agents. 2 CRITICAL (prompt-optimizer, memory-manager on non-Ollama-Cloud models that must migrate). 4 HIGH priority. 5 MEDIUM. 2 LOW.
+
+9 models benchmarked but assigned to zero agents—wasted potential.
+
+## Composite Score Formula
+`composite = (IF_score * 0.5) + (SWE_bench * 0.3) + (context_kb / 1000 * 0.2)`
+
+| Model | IF | SWE | Ctx(K) | Composite | Pulls | Assigned |
+|-------|-----|------|--------|-----------|-------|----------|
+| kimi-k2.6 | 91 | 80.2 | 1000 | **69.76** | 259.7K | 7 agents |
+| deepseek-v4-pro-max | 89 | 80.6 | 1000 | **68.88** | 71.6K | 4 agents |
+| kimi-k2.5 | 90 | 78.0 | 256 | **68.45** | 293.2K | **0** |
+| deepseek-v4-flash | 86 | 79.0 | 1000 | **66.90** | 84.4K | **0** |
+| minimax-m2.5 | 82 | 80.2 | 128 | **65.09** | 2.2M | 2 agents |
+| qwen3-coder-480b | 88 | 66.5 | 1000 | **64.15** | N/A | 7 agents |
+| minimax-m2.7 | 80 | 78.0 | 128 | **63.43** | 2.2M | **0** |
+| nemotron-3-super | 78 | 60.5 | 1000 | **57.35** | 2.4M | 2 agents |
+| glm-5.1 | 90 | null | 128 | 45.03* | 2.2M | 8 agents |
+| glm-5 | 90 | null | 128 | 45.03* | 2.3M | **0** |
+| qwen3.5-122b | 92 | null | 128 | 46.03* | **12.4M** | **0** |
+| gemma4-27b | 85 | null | 128 | 42.53* | **10.1M** | **0** |
+| devstral-2 | 80 | null | 128 | 40.03* | 223.2K | **0** |
+| devstral-small-2 | 75 | null | 128 | 37.53* | 838.8K | **0** |
+| nemotron-3-nano | 68 | null | 128 | 34.03* | 453K | **0** |
+
+\* SWE missing → composite artificially low. Est: +20-25 with SWE~75.
+
+## Concentration Risks
+
+| Model | Agents | Risk |
+|-------|--------|------|
+| glm-5.1 | 8 | All agents on model with NO SWE score |
+| kimi-k2.6 | 7 | Highest-quality model over-concentrated |
+| qwen3-coder-480b | 7 | SWE=66.5 below deepseek-v4-flash (79) |
+| deepseek-v4-pro-max | 4 | Expensive (49B active) |
+
+## Idle Models (0 agents assigned — wasted potential)
+
+| Model | Composite | Pulls | Why Idle |
+|-------|-----------|-------|----------|
+| qwen3.5-122b | ~68.5* | **12.4M** | Newest, highest IF=92, needs integration |
+| gemma4-27b | ~62* | **10.1M** | Multimodal, needs A/B for coding |
+| deepseek-v4-flash | 66.90 | 84.4K | Best efficiency, 13B active |
+| minimax-m2.7 | 63.43 | 2.2M | Self-evolving, could suit meta-agents |
+| glm-5 | ~67* | 2.3M | Superseded by glm-5.1 |
+| devstral-2 | 40.03* | 223.2K | Code exploration, alternative for coding |
+| devstral-small-2 | 37.53* | 838.8K | Lightweight, IF too low |
+| kimi-k2.5 | 68.45 | 293.2K | Superseded by k2.6 |
+| nemotron-3-nano | 34.03* | 453K | Ultra-lightweight for simple tasks |
+
+## Recommendations
+
+### CRITICAL
+
+| Agent | From | To | Delta | Rationale |
+|-------|------|-----|-------|-----------|
+| prompt-optimizer | qwen3.6-plus (**not Ollama Cloud**) | qwen3.5-122b (IF=92) | +10 | Must migrate. qwen3.6-plus not in Ollama Cloud. qwen3.5 highest IF=92. 12.4M pulls. |
+| memory-manager | qwen3.6-plus (**not Ollama Cloud**) | deepseek-v4-pro-max (IF=89, 1M ctx) | +1 | Must migrate. Memory-manager needs long context (1M). deepseek-v4-pro-max best for this. |
+
+### HIGH
+
+| Agent | From | To | Delta | Rationale |
+|-------|------|-----|-------|-----------|
+| system-analyst | glm-5.1 (matrix=82) | deepseek-v4-pro-max (matrix=88) | +6 | IF=89, SWE=80.6, 1M context for architecture docs. glm-5.1 has no SWE score. |
+| evaluator | glm-5.1 (matrix=78) | qwen3.5-122b (IF=92, est=82) | +4 | IF-critical role. qwen3.5-122b has highest IF=92. 12.4M pulls. |
+| pipeline-judge | glm-5.1 (matrix=76) | kimi-k2.6 (matrix=84) | +8 | Needs long context (pipeline logs). kimi-k2.6 IF=91, SWE=80.2, 1M ctx. |
+| workflow-architect | glm-5.1 (matrix=76) | qwen3.5-122b (est=80) | +4 | High IF for YAML/structured output. qwen3.5 IF=92. |
+
+### MEDIUM
+
+| Agent | From | To | Delta | Rationale |
+|-------|------|-----|-------|-----------|
+| markdown-validator | deepseek-v4-pro-max (matrix=68, expensive) | nemotron-3-nano (matrix=70, cheap, 4B) | +2 | Overkill to use 49B active model for markdown validation. nano cheaper + higher matrix score. |
+| release-manager | glm-5.1 (matrix=76) | kimi-k2.6 (matrix=78) | +2 | 1M context for large git diffs. IF=91 vs 90. |
+| capability-analyst | glm-5.1 (matrix=78) | deepseek-v4-pro-max (matrix=82) | +4 | 1M context for capability-index analysis. |
+| visual-tester | qwen3-coder-480b (matrix=82, no vision) | kimi-k2.6 (matrix=82, vision) | +0 (capabilities+) | Same matrix but kimi-k2.6 can SEE images. Multimodal advantage. |
+| browser-automation | qwen3-coder-480b (matrix=87, 35B active) | deepseek-v4-flash (IF=86, 13B active, 1M ctx) | ~-5 matrix (trade-off) | 3× faster inference. 1M context for complex DOM. |
+
+### LOW
+
+| Agent | From | To | Delta | Rationale |
+|-------|------|-----|-------|-----------|
+| history-miner | nemotron-3-super (IF=78, composite=57.35) | qwen3.5-122b (IF=92, 12.4M pulls) | +14 IF | Lowest model quality in pipeline. Easy upgrade. |
+| plan (built-in) | nemotron-3-super (IF=78) | deepseek-v4-pro-max (IF=89, matrix=88) | +11 IF | Align with planner subagent.|
+
+## Data Gaps
+
+| Model | Missing | Impact |
+|-------|---------|--------|
+| qwen3.5-122b | SWE-bench | Cannot confirm coding. IF-only role safe. |
+| gemma4-27b | SWE-bench | Newest release. Needs A/B for coding. |
+| glm-5.1 | SWE-bench | 8 agents! Unverified coding capability. |
+| devstral-2 | SWE-bench | Code model no coding benchmark—risky. |
+| nemotron-3-nano | SWE-bench | Not needed: lightweight tasks only. |
+
+## Recently Updated Models (2 days old)
+
+- **qwen3.5-122b** (2026-05-22): 12.4M pulls since launch
+- **gemma4-27b** (2026-05-22): 10.1M pulls since launch, announced "frontier at each size"
+
+## Next Actions
+
+1. Apply CRITICAL: migrate prompt-optimizer + memory-manager
+2. Apply HIGH: system-analyst + evaluator + pipeline-judge + workflow-architect
+3. Run pipeline A/B test on qwen3.5-122b and deepseek-v4-flash
+4. Fill data gaps: collect SWE-bench for qwen3.5-122b and gemma4-27b
+5. Update dashboard to show idle model alerts
diff --git a/agent-evolution/data/model-research-latest.json b/agent-evolution/data/model-research-latest.json
index a88b409..e9177c2 100644
--- a/agent-evolution/data/model-research-latest.json
+++ b/agent-evolution/data/model-research-latest.json
@@ -1,59 +1,325 @@
 {
   "version": "1.0.0",
-  "generated": "2026-04-27T17:51:36.000Z",
-  "source": "/research model-optimization",
-  "models": [],
+  "generated": "2026-05-24T00:16:00Z",
+  "source": "orchestrator-deep-analysis",
+  "models": [
+    {
+      "id": "deepseek-v4-pro-max",
+      "name": "DeepSeek V4-Pro Max",
+      "organization": "DeepSeek",
+      "parameters": "1.6T/49B active MoE",
+      "context_window": "1M",
+      "swe_bench": 80.6,
+      "if_score": 89,
+      "categories": ["coding", "agent", "reasoning"],
+      "provider": "ollama-cloud"
+    },
+    {
+      "id": "kimi-k2-6",
+      "name": "Kimi K2.6",
+      "organization": "Moonshot AI",
+      "parameters": "1T/32B active MoE",
+      "context_window": "256K→1M",
+      "swe_bench": 80.2,
+      "if_score": 91,
+      "categories": ["coding", "agent", "multimodal"],
+      "provider": "ollama-cloud"
+    },
+    {
+      "id": "qwen3-coder-480b",
+      "name": "Qwen3-Coder 480B",
+      "organization": "Qwen",
+      "parameters": "480B/35B active",
+      "context_window": "256K→1M",
+      "swe_bench": 66.5,
+      "if_score": 88,
+      "categories": ["coding", "agent"],
+      "provider": "ollama-cloud"
+    },
+    {
+      "id": "minimax-m2.5",
+      "name": "MiniMax M2.5",
+      "organization": "MiniMax",
+      "parameters": "MoE undisclosed",
+      "context_window": "128K",
+      "swe_bench": 80.2,
+      "if_score": 82,
+      "categories": ["coding", "agent"],
+      "provider": "ollama-cloud"
+    },
+    {
+      "id": "glm-5.1",
+      "name": "GLM-5",
+      "organization": "Z.ai",
+      "parameters": "744B/40B active",
+      "context_window": "128K",
+      "swe_bench": null,
+      "if_score": 90,
+      "categories": ["reasoning", "agent"],
+      "provider": "ollama-cloud"
+    },
+    {
+      "id": "qwen3-6-plus",
+      "name": "Qwen 3.6 Plus",
+      "organization": "Qwen",
+      "parameters": "Hybrid MoE",
+      "context_window": "1M",
+      "swe_bench": 78.8,
+      "if_score": 91,
+      "categories": ["coding", "agent", "reasoning"],
+      "provider": "openrouter",
+      "note": "FREE on OpenRouter. Rate-limited."
+    }
+  ],
   "recommendations": [
     {
-      "agent": "lead-developer",
-      "action": "update_model",
-      "current_model": "ollama-cloud/qwen3-coder:480b",
-      "current_provider": "ollama-cloud",
-      "recommended_model": "ollama-cloud/nemotron-3-super",
-      "recommended_provider": "ollama-cloud",
+      "agent": "frontend-developer",
+      "action": "sync_to_source_of_truth",
+      "current_model_in_agent_versions": "ollama-cloud/qwen3-coder:480b",
+      "source_of_truth_model": "ollama-cloud/minimax-m2.5",
       "impact": "high",
       "expected_improvement": {
-        "quality": "+15%",
-        "speed": "+20%",
-        "context_window": "1M→1M"
+        "quality": "+6% (92 vs 86 in benchmark matrix)",
+        "speed": "~1x",
+        "context_window": "128K"
       },
-      "score_before": 85,
+      "score_before": 86,
       "score_after": 92,
-      "score_delta": 7,
-      "rationale": "Nemotron 3 Super has better reasoning for core development tasks and RULER@1M context window. SWE-bench 68% vs Qwen's 66.5%.",
+      "score_delta": 6,
+      "rationale": "agent-versions.json is stale. kilo-meta.json (source of truth) already has minimax-m2.5. Matrix score for frontend-dev on M2.5 = 92 (highest!). MiniMax also leads SWE-bench at 80.2%.",
       "applied": false,
       "applied_date": null
     },
     {
-      "agent": "devops-engineer",
-      "action": "confirm_model",
-      "current_model": "ollama-cloud/nemotron-3-super",
-      "current_provider": "ollama-cloud",
-      "recommended_model": "ollama-cloud/nemotron-3-super",
-      "recommended_provider": "ollama-cloud",
+      "agent": "lead-developer",
+      "action": "sync_to_source_of_truth",
+      "current_model_in_agent_versions": "ollama-cloud/nemotron-3-super",
+      "source_of_truth_model": "ollama-cloud/qwen3-coder:480b",
+      "impact": "high",
+      "expected_improvement": {
+        "quality": "+22% (92 vs 70 in benchmark matrix)",
+        "speed": "~1x",
+        "context_window": "256K→1M"
+      },
+      "score_before": 70,
+      "score_after": 92,
+      "score_delta": 22,
+      "rationale": "agent-versions.json shows nemotron-3-super (outdated). kilo-meta.json has qwen3-coder:480b. Matrix score: qwen3-coder 92 is the highest for lead-developer. SWE-bench 66.5% and massive coding context make it the SOTA choice.",
+      "applied": false,
+      "applied_date": null
+    },
+    {
+      "agent": "system-analyst",
+      "action": "consider_upgrade",
+      "current_model": "ollama-cloud/glm-5.1",
+      "recommended_model": "ollama-cloud/deepseek-v4-pro-max",
+      "impact": "medium",
+      "expected_improvement": {
+        "quality": "+6% (88 vs 82 in benchmark matrix)",
+        "speed": "~1x",
+        "context_window": "128K→1M"
+      },
+      "score_before": 82,
+      "score_after": 88,
+      "score_delta": 6,
+      "rationale": "system-analyst matrix: glm-5.1 = 82, deepseek-v4-pro-max = 88. 1M context is critical for architecture docs. However GLM-5.1 has Arena ELO 1451 and strong reasoning. Keep GLM-5.1 if standardization across 12 agents matters; otherwise deepseek-v4-pro-max gives measurable gain.",
+      "applied": false,
+      "applied_date": null
+    },
+    {
+      "agent": "evaluator",
+      "action": "consider_upgrade",
+      "current_model": "ollama-cloud/glm-5.1",
+      "recommended_model": "ollama-cloud/kimi-k2.6",
+      "impact": "medium",
+      "expected_improvement": {
+        "quality": "+6% (84 vs 78)",
+        "speed": "~1x",
+        "context_window": "128K→256K"
+      },
+      "score_before": 78,
+      "score_after": 84,
+      "score_delta": 6,
+      "rationale": "evaluator needs high IF and reasoning accuracy. kimi-k2-6 IF=91, matrix score 84 vs glm-5.1 78. Alternative: deepseek-v4-pro-max also 84.",
+      "applied": false,
+      "applied_date": null
+    },
+    {
+      "agent": "planner",
+      "action": "confirm_current",
+      "current_model": "ollama-cloud/deepseek-v4-pro-max",
       "impact": "low",
       "expected_improvement": {
-        "quality": "0%",
-        "speed": "0%",
-        "context_window": "1M→1M"
+        "quality": "0% (already optimal)",
+        "speed": "~1x",
+        "context_window": "1M"
       },
       "score_before": 88,
       "score_after": 88,
       "score_delta": 0,
-      "rationale": "Current model already optimal for DevOps tasks. Nemotron 3 Super's RULER@1M is critical for parsing complex Docker/Compose configs.",
+      "rationale": "planner is already on deepseek-v4-pro-max, which is the best model for this role (88). GPQA 90.1 confirms strong reasoning for chain-of-thought planning. No change needed.",
+      "applied": true,
+      "applied_date": "2026-04-27"
+    },
+    {
+      "agent": "reflector",
+      "action": "confirm_current",
+      "current_model": "ollama-cloud/deepseek-v4-pro-max",
+      "impact": "low",
+      "expected_improvement": {
+        "quality": "0% (already optimal)",
+        "speed": "~1x",
+        "context_window": "1M"
+      },
+      "score_before": 84,
+      "score_after": 84,
+      "score_delta": 0,
+      "rationale": "reflector already on deepseek-v4-pro-max (84), the best fit. Self-reflection requires strong reasoning chains; deepseek-v4 excels here.",
+      "applied": true,
+      "applied_date": "2026-04-27"
+    },
+    {
+      "agent": "workflow-architect",
+      "action": "consider_upgrade",
+      "current_model": "ollama-cloud/glm-5.1",
+      "recommended_model": "ollama-cloud/kimi-k2.6",
+      "impact": "medium",
+      "expected_improvement": {
+        "quality": "+6% (82 vs 76)",
+        "speed": "~1x",
+        "context_window": "128K→256K"
+      },
+      "score_before": 76,
+      "score_after": 82,
+      "score_delta": 6,
+      "rationale": "workflow-architect matrix: glm-5.1 = 76, kimi-k2-6 = 82. Alternative deepseek-v4-pro-max = 80.",
       "applied": false,
       "applied_date": null
+    },
+    {
+      "agent": "pipeline-judge",
+      "action": "consider_free_tier",
+      "current_model": "ollama-cloud/glm-5.1",
+      "recommended_model": "openrouter/qwen3-6-plus:free",
+      "impact": "low",
+      "expected_improvement": {
+        "quality": "+4% (80 vs 76)",
+        "speed": "~1x (rate-limited)",
+        "context_window": "128K→1M"
+      },
+      "score_before": 76,
+      "score_after": 80,
+      "score_delta": 4,
+      "rationale": "qwen3-6-plus is FREE on OpenRouter with IF=91 and SWE-bench 78.8. For pipeline-judge (measurement-only, no code writing) free tier can cut costs. BUT: OpenRouter free has strict rate limits; verify before production.",
+      "applied": false,
+      "applied_date": null,
+    },
+    {
+      "agent": "orchestrator",
+      "action": "confirm_current",
+      "current_model": "ollama-cloud/kimi-k2.6",
+      "impact": "low",
+      "expected_improvement": {
+        "quality": "0% (already optimal)",
+        "speed": "~1x",
+        "context_window": "256K"
+      },
+      "score_before": 92,
+      "score_after": 92,
+      "score_delta": 0,
+      "rationale": "orchestrator on kimi-k2.6 is the absolute best fit (92). 300 sub-agent swarm capability aligns with orchestration needs. IF=91 ensures routing accuracy.",
+      "applied": true,
+      "applied_date": "2026-04-27"
+    },
+    {
+      "agent": "the-fixer",
+      "action": "confirm_current",
+      "current_model": "ollama-cloud/kimi-k2.6",
+      "impact": "low",
+      "expected_improvement": {
+        "quality": "0% (already optimal)",
+        "speed": "~1x",
+        "context_window": "256K"
+      },
+      "score_before": 90,
+      "score_after": 90,
+      "score_delta": 0,
+      "rationale": "the-fixer on kimi-k2.6 (90) is optimal. SWE-Pro 58.6 (#1!) and strong bug-fixing capabilities make it the best choice. MiniMax M2.5 and DeepSeek V4-Pro Max tie at 88, but kimi-k2-6 leads.",
+      "applied": true,
+      "applied_date": "2026-04-27"
+    },
+    {
+      "agent": "memory-manager",
+      "action": "confirm_current",
+      "current_model": "ollama-cloud/qwen3.6-plus",
+      "impact": "low",
+      "expected_improvement": {
+        "quality": "0% (already optimal)",
+        "speed": "~1x",
+        "context_window": "1M"
+      },
+      "score_before": 87,
+      "score_after": 87,
+      "score_delta": 0,
+      "rationale": "memory-manager on qwen3.6-plus (87) is the best fit. 1M context is critical for memory operations. DeepSeek V4-Pro Max and Nemotron-3-Super tie at 86.",
+      "applied": true,
+      "applied_date": "2026-04-27"
+    }
+  ],
+  "data_gaps": [
+    {
+      "gap": "performance_log is empty for ALL agents",
+      "severity": "critical",
+      "impact": "Cannot compute Avg Score, Success Rate, Avg Duration",
+      "action": "Instrument agent-executions.jsonl parser into sync-agent-history.ts to populate performance_log from Gitea issue comments"
+    },
+    {
+      "gap": "No latency / TPS per model",
+      "severity": "high",
+      "impact": "Cannot optimize speed or cost-per-token for high-frequency agents (orchestrator, code-skeptic)",
+      "action": "Add timing instrumentation to pipeline-judge and log wall-clock time per agent invocation"
+    },
+    {
+      "gap": "No invocation frequency / heatmap per agent",
+      "severity": "medium",
+      "impact": "Cannot identify bottlenecks or overused agents; no data for load-balancing decisions",
+      "action": "Add invocation counter to agent-executions.jsonl and build frequency heatmap in dashboard"
+    },
+    {
+      "gap": "No A/B test results for model changes",
+      "severity": "medium",
+      "impact": "Recommendations are purely benchmark-based, not validated with real pipeline data",
+      "action": "After any model change, run 5 pipeline iterations and compare fitness scores before/after"
+    },
+    {
+      "gap": "Missing cost data for OpenRouter free-tier agents",
+      "severity": "medium",
+      "impact": "Cannot compute true ROI for pipeline-judge / evaluator if switched to free models",
+      "action": "Track actual token consumption per provider and compute $/task"
+    },
+    {
+      "gap": "Stale agent-versions.json (not synced with kilo-meta.json)",
+      "severity": "high",
+      "impact": "Dashboard shows incorrect current models for 8+ agents; recommendations targeting wrong baseline",
+      "action": "Run sync-agent-history.ts with kilo-meta.json as primary source and fix JSON parse error in kilo.jsonc"
+    },
+    {
+      "gap": "No custom benchmark for markdown-validator",
+      "severity": "low",
+      "impact": "markdown-validator scores are lowest across matrix (68 max). Need lightweight-model benchmark.",
+      "action": "Create micro-benchmark for YAML frontmatter validation and test nano/instant models"
     }
   ],
-  "heatmap": {},
-  "closed_source_comparison": {},
-  "capability_index_patch": [],
   "summary": {
-    "avg_quality_improvement": "+7.5%",
-    "providers_used": ["ollama-cloud"],
-    "key_models": ["nemotron-3-super"],
-    "total_recommendations": 2,
-    "applied_count": 0,
-    "pending_count": 2
+    "agents_total": 34,
+    "agents_optimal": 22,
+    "agents_need_sync": 2,
+    "agents_need_upgrade": 4,
+    "agents_consider_free_tier": 1,
+    "avg_quality_improvement_potential": "+4.2%",
+    "providers_used": ["ollama-cloud", "openrouter"],
+    "key_models": ["kimi-k2.6", "deepseek-v4-pro-max", "qwen3-coder-480b", "minimax-m2.5", "glm-5.1"],
+    "pending_recommendations": 11,
+    "critical_data_gaps": 2
   }
-}
\ No newline at end of file
+}
diff --git a/agent-evolution/docker-compose.yml b/agent-evolution/docker-compose.yml
index 61ebbea..3bd1e69 100644
--- a/agent-evolution/docker-compose.yml
+++ b/agent-evolution/docker-compose.yml
@@ -1,6 +1,11 @@
-# Docker Compose for Agent Evolution Dashboard
-# Usage: docker-compose -f docker-compose.evolution.yml up -d
-
+# Docker Compose for Agent Evolution Dashboard (mount-driven, no-rebuild)
+# Usage:
+#   docker compose -f agent-evolution/docker-compose.yml up -d
+#   # Edit any file in agent-evolution/ or .kilo/ on host → instant reflection
+#   # Just run:
+#     bun run sync:evolution
+#   # and reload the page
+#
 version: '3.8'
 
 services:
@@ -8,17 +13,16 @@ services:
     build:
       context: .
       dockerfile: agent-evolution/Dockerfile
-      target: production
     container_name: apaw-evolution
     ports:
       - "3001:3001"
     volumes:
-      # Mount data directory for live updates
+      # Mount the generated standalone HTML to the container's web root
+      - ./agent-evolution/index.standalone.html:/app/index.html:ro
+      # Mount data directory for any additional assets
       - ./agent-evolution/data:/app/data:ro
-      # Mount for reading source files (optional, for sync)
-      - ./.kilo/agents:/app/kilo/agents:ro
-      - ./.kilo/capability-index.yaml:/app/kilo/capability-index.yaml:ro
-      - ./.kilo/kilo.jsonc:/app/kilo/kilo.jsonc:ro
+      # Mount .kilo directory for live config access
+      - ./.kilo:/app/kilo:ro
     environment:
       - NODE_ENV=production
       - TZ=UTC
diff --git a/agent-evolution/docker-run.bat b/agent-evolution/docker-run.bat
index 0450ee7..75cdd1c 100644
--- a/agent-evolution/docker-run.bat
+++ b/agent-evolution/docker-run.bat
@@ -1,12 +1,17 @@
 @echo off
 REM Agent Evolution Dashboard - Docker Management Script (Windows)
+REM Mount-driven: no rebuild required after file changes.
+REM
+REM Quick start:
+REM   1. docker-run.bat run           :: start container once
+REM   2. edit files + bun run sync:evolution
+REM   3. docker-run.bat reload         :: restart container to pick up latest files (no rebuild)
 
 setlocal enabledelayedexpansion
 
 set IMAGE_NAME=apaw-evolution
 set CONTAINER_NAME=apaw-evolution-dashboard
 set PORT=3001
-set DATA_DIR=.\agent-evolution\data
 
 REM Colors (limited in Windows CMD)
 set RED=[91m
@@ -20,12 +25,12 @@ if "%1"=="build" goto build
 if "%1"=="run" goto run
 if "%1"=="stop" goto stop
 if "%1"=="restart" goto restart
+if "%1"=="reload" goto reload
 if "%1"=="logs" goto logs
 if "%1"=="open" goto open
 if "%1"=="sync" goto sync
 if "%1"=="status" goto status
 if "%1"=="clean" goto clean
-if "%1"=="dev" goto dev
 if "%1"=="help" goto help
 goto unknown
 
@@ -43,7 +48,7 @@ goto :eof
 
 :build
 call :log_info Building Docker image...
-docker build -t %IMAGE_NAME%:latest -f agent-evolution/Dockerfile --target production .
+docker build -t %IMAGE_NAME%:latest -f agent-evolution/Dockerfile .
 if errorlevel 1 (
     call :log_error Build failed
     exit /b 1
@@ -56,7 +61,8 @@ REM Check if already running
 docker ps -q --filter "name=%CONTAINER_NAME%" 2>nul | findstr /r . >nul
 if not errorlevel 1 (
     call :log_warn Container %CONTAINER_NAME% is already running
-    call :log_info Use 'docker-run.bat restart' to restart it
+    call :log_info Use 'docker-run.bat reload' to restart with latest host files
+    call :log_info Use 'docker-run.bat restart' to rebuild image and restart
     exit /b 0
 )
 
@@ -67,14 +73,13 @@ if not errorlevel 1 (
     docker rm %CONTAINER_NAME% >nul 2>nul
 )
 
-call :log_info Starting container...
+call :log_info Starting container with mount-driven volumes...
 docker run -d ^
     --name %CONTAINER_NAME% ^
     -p %PORT%:3001 ^
-    -v %cd%/%DATA_DIR%:/app/data:ro ^
-    -v %cd%/.kilo/agents:/app/kilo/agents:ro ^
-    -v %cd%/.kilo/capability-index.yaml:/app/kilo/capability-index.yaml:ro ^
-    -v %cd%/.kilo/kilo.jsonc:/app/kilo/kilo.jsonc:ro ^
+    -v %cd%\agent-evolution\index.standalone.html:/app/index.html:ro ^
+    -v %cd%\agent-evolution\data:/app/data:ro ^
+    -v %cd%\.kilo:/app/kilo:ro ^
     --restart unless-stopped ^
     %IMAGE_NAME%:latest
 
@@ -84,6 +89,9 @@ if errorlevel 1 (
 )
 call :log_info Container started: %CONTAINER_NAME%
 call :log_info Dashboard available at: http://localhost:%PORT%
+call :log_info Mounted: .\agent-evolution\index.standalone.html -> /app/index.html
+call :log_info          .\agent-evolution\data          -> /app/data
+call :log_info          .\.kilo                         -> /app/kilo
 goto :eof
 
 :stop
@@ -93,7 +101,14 @@ docker rm %CONTAINER_NAME% >nul 2>nul
 call :log_info Container stopped
 goto :eof
 
+:reload
+call :log_info Reloading container to reflect host file changes...
+call :stop
+call :run
+goto :eof
+
 :restart
+call :log_info Full restart: rebuild image + restart container...
 call :stop
 call :build
 call :run
@@ -123,7 +138,7 @@ if not errorlevel 1 (
         exit /b 1
     )
 )
-call :log_info Sync complete
+call :log_info Sync complete — run 'docker-run.bat reload' to pick up changes
 goto :eof
 
 :status
@@ -131,11 +146,11 @@ docker ps -q --filter "name=%CONTAINER_NAME%" 2>nul | findstr /r . >nul
 if not errorlevel 1 (
     call :log_info Container status: %GREEN%RUNNING%NC%
     call :log_info URL: http://localhost:%PORT%
-    
+
     REM Health check
     for /f "tokens=*" %%i in ('docker inspect --format="{{.State.Health.Status}}" %CONTAINER_NAME% 2^>nul') do set HEALTH=%%i
     call :log_info Health: !HEALTH!
-    
+
     REM Started time
     for /f "tokens=*" %%i in ('docker inspect --format="{{.State.StartedAt}}" %CONTAINER_NAME% 2^>nul') do set STARTED=%%i
     if defined STARTED call :log_info Started: !STARTED!
@@ -156,37 +171,27 @@ docker rmi %IMAGE_NAME%:latest >nul 2>nul
 call :log_info Cleanup complete
 goto :eof
 
-:dev
-call :log_info Starting development mode...
-docker build -t %IMAGE_NAME%:dev -f agent-evolution/Dockerfile --target development .
-if errorlevel 1 (
-    call :log_error Build failed
-    exit /b 1
-)
-docker run --rm ^
-    --name %CONTAINER_NAME%-dev ^
-    -p %PORT%:3001 ^
-    -v %cd%/%DATA_DIR%:/app/data ^
-    -v %cd%/agent-evolution/index.html:/app/index.html ^
-    %IMAGE_NAME%:dev
-goto :eof
-
 :help
-echo Agent Evolution Dashboard - Docker Management (Windows)
+echo Agent Evolution Dashboard - Docker Management (mount-driven, no-rebuild)
+echo.
+echo Quick start:
+echo   1. docker-run.bat run        ^:: Start container once
+echo   2. edit files + bun run sync:evolution
+echo   3. docker-run.bat reload     ^:: Container picks up changes immediately
 echo.
 echo Usage: %~nx0 ^<command^>
 echo.
 echo Commands:
-echo   build     Build Docker image
-echo   run       Run container
-echo   stop      Stop container
-echo   restart   Restart container (build + run)
+echo   build     Build Docker image (rare — only Dockerfile changes)
+echo   run       Start container for the first time
+echo   stop      Stop and remove container
+echo   reload    Restart container to pick up latest host files (no rebuild)
+echo   restart   Rebuild image AND restart container
 echo   logs      View container logs
 echo   open      Open dashboard in browser
-echo   sync      Sync evolution data
+echo   sync      Sync evolution data on host
 echo   status    Show container status
-echo   clean     Remove container and image
-echo   dev       Run in development mode (with hot reload)
+echo   clean     Remove container AND image
 echo   help      Show this help message
 goto :eof
 
diff --git a/agent-evolution/docker-run.sh b/agent-evolution/docker-run.sh
index a8aa9db..c8015b4 100644
--- a/agent-evolution/docker-run.sh
+++ b/agent-evolution/docker-run.sh
@@ -1,12 +1,17 @@
 #!/bin/bash
 # Agent Evolution Dashboard - Docker Management Script
+# Mount-driven: no rebuild required after file changes.
+#
+# Quick-ref:
+#   bash agent-evolution/docker-run.sh run      # start (no rebuild needed later)
+#   bash agent-evolution/docker-run.sh reload   # restart container to pick up new mounts
+#   bash agent-evolution/docker-run.sh restart  # rebuild image + restart container
 
 set -e
 
 IMAGE_NAME="apaw-evolution"
 CONTAINER_NAME="apaw-evolution-dashboard"
-PORT=3001
-DATA_DIR="./agent-evolution/data"
+PORT=3003
 
 # Colors for output
 RED='\033[0;31m'
@@ -18,23 +23,23 @@ log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
 log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
 log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
 
-# Build Docker image
+# Build Docker image (rarely needed — only on Dockerfile / base-image changes)
 build() {
     log_info "Building Docker image..."
     docker build \
         -t "$IMAGE_NAME:latest" \
         -f agent-evolution/Dockerfile \
-        --target production \
         .
     log_info "Build complete: $IMAGE_NAME:latest"
 }
 
-# Run container
+# Run container with directory mounts (no file copies)
 run() {
     # Check if container already running
     if docker ps -q --filter "name=$CONTAINER_NAME" | grep -q .; then
         log_warn "Container $CONTAINER_NAME is already running"
-        log_info "Use '$0 restart' to restart it"
+        log_info "Use '$0 reload' to restart with latest host files"
+        log_info "Use '$0 restart' to rebuild image and restart"
         exit 0
     fi
 
@@ -44,14 +49,13 @@ run() {
         docker rm "$CONTAINER_NAME" >/dev/null || true
     fi
 
-    log_info "Starting container..."
+    log_info "Starting container with mount-driven volumes..."
     docker run -d \
         --name "$CONTAINER_NAME" \
         -p "$PORT:3001" \
-        -v "$(pwd)/$DATA_DIR:/app/data:ro" \
-        -v "$(pwd)/.kilo/agents:/app/kilo/agents:ro" \
-        -v "$(pwd)/.kilo/capability-index.yaml:/app/kilo/capability-index.yaml:ro" \
-        -v "$(pwd)/.kilo/kilo.jsonc:/app/kilo/kilo.jsonc:ro" \
+        -v "$(pwd)/agent-evolution/index.standalone.html:/app/index.html:ro" \
+        -v "$(pwd)/agent-evolution/data:/app/data:ro" \
+        -v "$(pwd)/.kilo:/app/kilo:ro" \
         --restart unless-stopped \
         --health-cmd "wget --no-verbose --tries=1 --spider http://localhost:3001/ || exit 1" \
         --health-interval "30s" \
@@ -61,9 +65,13 @@ run() {
 
     log_info "Container started: $CONTAINER_NAME"
     log_info "Dashboard available at: http://localhost:$PORT"
+    log_info "Mounted: ./agent-evolution/index.standalone.html → /app/index.html"
+    log_info "         ./agent-evolution/data          → /app/data"
+    log_info "         ./.kilo                         → /app/kilo"
+    log_info "Tip: edit host files, run bun run sync:evolution, then reload page or use '$0 reload'"
 }
 
-# Stop container
+# Stop and remove container
 stop() {
     log_info "Stopping container..."
     docker stop "$CONTAINER_NAME" >/dev/null 2>&1 || true
@@ -71,8 +79,16 @@ stop() {
     log_info "Container stopped"
 }
 
-# Restart container
+# Restart container WITHOUT rebuilding image (picks up new host files)
+reload() {
+    log_info "Reloading container to reflect host file changes..."
+    stop
+    run
+}
+
+# Rebuild image AND restart container (only when Dockerfile changes)
 restart() {
+    log_info "Full restart: rebuild image + restart container..."
     stop
     build
     run
@@ -99,7 +115,7 @@ open() {
     fi
 }
 
-# Sync evolution data
+# Sync evolution data on host (generates index.standalone.html from latest data)
 sync() {
     log_info "Syncing evolution data..."
     if command -v bun &> /dev/null; then
@@ -110,7 +126,7 @@ sync() {
         log_error "Node.js or Bun required for sync"
         exit 1
     fi
-    log_info "Sync complete"
+    log_info "Sync complete — run '$0 reload' to pick up changes"
 }
 
 # Status check
@@ -138,47 +154,33 @@ status() {
 }
 
 # Clean up
-clean() {
+ clean() {
     log_info "Cleaning up..."
     stop
     docker rmi "$IMAGE_NAME:latest" >/dev/null 2>&1 || true
     log_info "Cleanup complete"
 }
 
-# Development mode with hot reload
-dev() {
-    log_info "Starting development mode..."
-    docker build \
-        -t "$IMAGE_NAME:dev" \
-        -f agent-evolution/Dockerfile \
-        --target development \
-        .
-
-    docker run --rm \
-        --name "${CONTAINER_NAME}-dev" \
-        -p "$PORT:3001" \
-        -v "$(pwd)/$DATA_DIR:/app/data" \
-        -v "$(pwd)/agent-evolution/index.html:/app/index.html" \
-        "$IMAGE_NAME:dev"
-}
-
 # Show help
 show_help() {
-    echo "Agent Evolution Dashboard - Docker Management"
+    echo "Agent Evolution Dashboard - Docker Management (mount-driven, no-rebuild)"
     echo ""
-    echo "Usage: $0 <command>"
+    echo "Quick start:"
+    echo "  1. bash $0 run        # Start container once"
+    echo "  2. edit files + bun run sync:evolution"
+    echo "  3. bash $0 reload     # Container picks up changes immediately"
     echo ""
     echo "Commands:"
-    echo "  build     Build Docker image"
-    echo "  run       Run container"
-    echo "  stop      Stop container"
-    echo "  restart   Restart container (build + run)"
+    echo "  build     Build Docker image (rare — only Dockerfile changes)"
+    echo "  run       Start container for the first time"
+    echo "  stop      Stop and remove container"
+    echo "  reload    Restart container to pick up latest host files (no rebuild)"
+    echo "  restart   Rebuild image AND restart container"
     echo "  logs      View container logs"
     echo "  open      Open dashboard in browser"
-    echo "  sync      Sync evolution data"
+    echo "  sync      Run sync-agent-history.ts on host"
     echo "  status    Show container status"
-    echo "  clean     Remove container and image"
-    echo "  dev       Run in development mode (with hot reload)"
+    echo "  clean     Remove container AND image"
     echo "  help      Show this help message"
 }
 
@@ -187,13 +189,17 @@ case "${1:-help}" in
     build) build ;;
     run) run ;;
     stop) stop ;;
+    reload) reload ;;
     restart) restart ;;
     logs) logs ;;
     open) open ;;
     sync) sync ;;
     status) status ;;
     clean) clean ;;
-    dev) dev ;;
+    dev)
+        log_warn "'dev' mode deprecated — use 'run' + volume mounts instead."
+        log_info "Run: bash $0 run"
+        ;;
     help) show_help ;;
     *)
         log_error "Unknown command: $1"
diff --git a/agent-evolution/index.html b/agent-evolution/index.html
index bb40485..00f4c48 100644
--- a/agent-evolution/index.html
+++ b/agent-evolution/index.html
@@ -472,6 +472,59 @@
         .score-fill.medium { background: linear-gradient(90deg, var(--accent-orange), #ffc048); }
         .score-fill.low { background: linear-gradient(90deg, var(--accent-red), #ff6b81); }
 
+        /* Heatmap */
+        .hm-wrap { overflow-x:auto; border-radius:11px; border:1px solid var(--border); background:var(--bg-card); padding:18px; margin-bottom:26px; }
+        .hm-title { font-weight:700; font-size:1.05em; }
+        .hm-sub { font-size:.76em; color:var(--text-muted); margin-bottom:14px; }
+        .hm-table { border-collapse:separate; border-spacing:2px; width:100%; }
+        .hm-table th { font-family:'JetBrains Mono',monospace; font-size:.62em; color:var(--text-muted); padding:8px 5px; text-align:center; white-space:nowrap; vertical-align:bottom; }
+        .hm-table th.hm-role { text-align:left; min-width:140px; font-size:.68em; padding-left:10px; }
+        .hm-table td { text-align:center; padding:6px 4px; font-family:'JetBrains Mono',monospace; font-size:.72em; font-weight:700; border-radius:6px; cursor:pointer; transition:all .15s cubic-bezier(.4,0,.2,1); min-width:42px; position:relative; line-height:1.4; }
+        .hm-table td:hover { transform:scale(1.1); z-index:2; box-shadow:0 4px 12px rgba(0,0,0,.35); }
+        .hm-table td.hm-r { text-align:left; font-family:'Inter',sans-serif; font-size:.82em; font-weight:600; color:var(--text-primary); cursor:default; padding-left:10px; }
+        .hm-table td.hm-r:hover { transform:none; box-shadow:none; }
+        .hm-star { position:absolute; top:2px; right:2px; font-size:.65em; text-shadow:0 1px 2px rgba(0,0,0,.5); }
+        .hm-cur { box-shadow:inset 0 0 0 2px var(--accent-cyan), 0 0 8px rgba(0,212,255,.35); border-radius:6px; }
+        .hm-cur::after { content:''; position:absolute; bottom:2px; left:50%; transform:translateX(-50%); width:8px; height:3px; background:var(--accent-cyan); border-radius:2px; }
+        .hm-if-warn { position:absolute; top:2px; left:2px; font-size:.6em; opacity:.8; }
+
+        /* Smooth gradient legend bar */
+        .hm-legend-wrap { margin-top:18px; padding:0 4px; }
+        .hm-legend-track { position:relative; height:22px; border-radius:11px; background:linear-gradient(90deg, rgba(0,255,148,.85) 0%, rgba(0,212,255,.75) 20%, rgba(59,130,246,.6) 40%, rgba(168,85,247,.45) 58%, rgba(255,159,67,.35) 75%, rgba(255,71,87,.3) 88%, rgba(90,104,128,.2) 100%); box-shadow:inset 0 1px 3px rgba(0,0,0,.3); }
+        .hm-legend-labels { display:flex; justify-content:space-between; align-items:center; margin-top:8px; padding:0 4px; }
+        .hm-legend-labels span { font-size:.68em; font-family:'JetBrains Mono',monospace; color:var(--text-muted); }
+        .hm-legend-left { color:var(--accent-green); }
+        .hm-legend-right { color:var(--accent-red); }
+        .hm-legend-marks { display:flex; justify-content:space-between; padding:0 2px; margin-top:3px; }
+        .hm-legend-marks span { font-size:.58em; font-family:'JetBrains Mono',monospace; color:var(--text-muted); min-width:20px; text-align:center; }
+
+        /* Heatmap Modal Tabs */
+        .hm-modal-tabs { display:flex; gap:3px; background:var(--bg-panel); border-bottom:1px solid var(--border); padding:4px 18px; }
+        .hm-tab-btn { padding:8px 16px; background:none; border:none; color:var(--text-secondary); font-family:'Inter'; font-size:.82em; font-weight:600; border-radius:8px; cursor:pointer; transition:all .25s; }
+        .hm-tab-btn.active { color:var(--bg-deep); background:linear-gradient(135deg,var(--accent-cyan),var(--accent-green)); }
+        .hm-tab-content { display:none; }
+        .hm-tab-content.active { display:block; }
+        .hm-model-timeline { display:flex; flex-direction:column; gap:12px; }
+        .hm-tl-item { display:flex; gap:14px; align-items:center; padding:10px; background:var(--bg-deep); border-radius:8px; border-left:3px solid var(--accent-cyan); }
+        .hm-tl-date { font-family:'JetBrains Mono',monospace; font-size:.72em; color:var(--text-muted); min-width:100px; }
+        .hm-tl-change { display:flex; align-items:center; gap:8px; }
+        .hm-tl-from { text-decoration:line-through; color:#ff6b81; background:rgba(255,71,87,.08); padding:2px 6px; border-radius:4px; }
+        .hm-tl-arrow { color:var(--accent-green); }
+        .hm-tl-to { color:var(--accent-green); background:rgba(0,255,148,.08); padding:2px 6px; border-radius:4px; font-weight:600; }
+        .hm-tl-current { border-left-color:var(--accent-green); background:rgba(0,255,148,.05); }
+        .hm-no-data { color:var(--text-muted); font-size:.9em; padding:16px; text-align:center; }
+        .hm-capabilities { display:flex; flex-wrap:wrap; gap:6px; }
+        .hm-cap-tag { padding:4px 10px; background:rgba(0,212,255,.1); border:1px solid var(--border); border-radius:16px; font-size:.78em; color:var(--accent-cyan); }
+        .hm-agent-desc { font-size:.9em; color:var(--text-secondary); line-height:1.5; margin-bottom:14px; padding:12px; background:var(--bg-deep); border-radius:8px; }
+        .hm-model-tl-score { margin-left:auto; font-family:'JetBrains Mono',monospace; font-size:.8em; color:var(--accent-cyan); }
+
+        /* Tooltip */
+        #ttOverlay { display:none; position:fixed; top:0;left:0;right:0;bottom:0; z-index:999; pointer-events:none; }
+        #ttOverlay.show { display:block; }
+        #ttBox { position:absolute; background:var(--bg-panel); border:1px solid var(--accent-cyan); border-radius:9px; padding:12px 16px; max-width:300px; box-shadow:0 10px 32px rgba(0,0,0,.55); z-index:1000; }
+        #ttBox h4 { color:var(--accent-cyan); font-size:.9em; margin-bottom:4px; }
+        #ttBox p { font-size:.78em; color:var(--text-secondary); line-height:1.45; }
+
         /* Export */
         .actions-row {
             display: flex;
@@ -551,11 +604,137 @@
             white-space: pre-wrap;
         }
 
+        /* Impact Tab */
+        .chart-wrap { background: var(--bg-card); border: 1px solid var(--border); border-radius: 12px; padding: 20px; margin-bottom: 24px; }
+        .chart-title { font-size: 1.1em; font-weight: 700; margin-bottom: 16px; }
+        .chart-sub { font-size: 0.76em; color: var(--text-muted); margin-bottom: 14px; }
+        #impactCanvas { width: 100%; height: 300px; border-radius: 8px; background: var(--bg-panel); }
+        .chart-placeholder { text-align: center; padding: 60px 20px; color: var(--text-muted); font-size: 0.95em; }
+
+        /* Recommendation Cards */
+        .rec-card { background: var(--bg-card); border: 1px solid var(--border); border-radius: 12px; padding: 20px; transition: all 0.3s; margin-bottom: 16px; }
+        .rec-card:hover { border-color: var(--accent-cyan); transform: translateY(-2px); box-shadow: 0 8px 32px var(--glow-cyan); }
+        .rec-hdr { display: flex; justify-content: space-between; align-items: center; margin-bottom: 14px; }
+        .rec-agent { font-weight: 700; font-size: 1.1em; display: flex; align-items: center; gap: 10px; }
+        .rec-agent-name { color: var(--text-primary); }
+        .impact-badge { font-family: 'JetBrains Mono', monospace; font-size: 0.7em; font-weight: 700; padding: 4px 10px; border-radius: 6px; text-transform: uppercase; letter-spacing: 0.5px; }
+        .impact-badge.critical { background: rgba(255,71,87,0.2); color: #ff6b81; border: 1px solid rgba(255,71,87,0.4); }
+        .impact-badge.high { background: rgba(255,159,67,0.2); color: #ffc048; border: 1px solid rgba(255,159,67,0.4); }
+        .impact-badge.medium { background: rgba(59,130,246,0.2); color: #60a5fa; border: 1px solid rgba(59,130,246,0.4); }
+        .impact-badge.low { background: rgba(0,255,148,0.15); color: #4ade80; border: 1px solid rgba(0,255,148,0.3); }
+        .swap-vis { display: flex; align-items: center; gap: 12px; margin: 16px 0; padding: 14px; background: var(--bg-panel); border-radius: 8px; }
+        .swap-from, .swap-to { flex: 1; padding: 10px 14px; border-radius: 6px; font-family: 'JetBrains Mono', monospace; font-size: 0.8em; }
+        .swap-from { background: rgba(255,71,87,0.1); color: #ff6b81; border: 1px solid rgba(255,71,87,0.3); }
+        .swap-to { background: rgba(0,255,148,0.1); color: #4ade80; border: 1px solid rgba(0,255,148,0.3); }
+        .swap-arrow { color: var(--accent-cyan); font-size: 1.4em; font-weight: 700; }
+        .rec-metrics { display: grid; grid-template-columns: repeat(4, 1fr); gap: 12px; margin-bottom: 14px; }
+        .rec-metric { text-align: center; padding: 10px; background: var(--bg-panel); border-radius: 6px; }
+        .rec-metric-label { font-size: 0.65em; color: var(--text-muted); text-transform: uppercase; letter-spacing: 0.5px; }
+        .rec-metric-value { font-family: 'JetBrains Mono', monospace; font-size: 0.95em; font-weight: 600; color: var(--accent-green); margin-top: 4px; }
+        .rec-rationale { font-size: 0.85em; color: var(--text-secondary); line-height: 1.6; padding: 12px; background: rgba(0,212,255,0.05); border-radius: 6px; border-left: 3px solid var(--accent-cyan); }
+
+        /* Recommendation Card Checkbox */
+        .rec-checkbox { position: absolute; top: 16px; right: 16px; }
+        .rec-checkbox input { width: 18px; height: 18px; cursor: pointer; accent-color: var(--accent-cyan); }
+
+        /* Progress Modal */
+        .progress-overlay {
+            display: none;
+            position: fixed;
+            inset: 0;
+            background: rgba(0,0,0,0.85);
+            z-index: 10000;
+            justify-content: center;
+            align-items: center;
+            flex-direction: column;
+        }
+        .progress-overlay.show { display: flex; }
+        .progress-card {
+            background: var(--bg-panel);
+            border: 1px solid var(--accent-cyan);
+            border-radius: 14px;
+            padding: 32px 40px;
+            text-align: center;
+            max-width: 500px;
+            width: 90%;
+            box-shadow: 0 20px 60px rgba(0,0,0,0.5);
+        }
+        .progress-title { font-size: 1.2em; font-weight: 700; margin-bottom: 24px; }
+        .progress-bar-wrap { background: var(--bg-card); border-radius: 4px; height: 8px; overflow: hidden; margin-bottom: 20px; }
+        .progress-bar-fill {
+            height: 100%;
+            width: 0%;
+            background: linear-gradient(90deg, var(--accent-green), #00ff94);
+            border-radius: 4px;
+            transition: width 0.3s ease-out;
+        }
+        .progress-status { font-size: 0.9em; color: var(--text-secondary); margin-bottom: 20px; min-height: 24px; }
+        .progress-result { display: none; }
+        .progress-result.show { display: block; }
+        .progress-result p { font-size: 1em; color: var(--accent-green); margin-bottom: 20px; }
+        .progress-close-btn {
+            padding: 10px 24px;
+            background: var(--bg-card);
+            border: 1px solid var(--border);
+            color: var(--text-primary);
+            border-radius: 8px;
+            cursor: pointer;
+            font-size: 0.9em;
+        }
+        .progress-close-btn:hover { border-color: var(--accent-cyan); color: var(--accent-cyan); }
+
+        /* Research Modal */
+        .research-steps { text-align: left; margin: 20px 0; }
+        .research-step { padding: 12px 16px; background: var(--bg-card); border-radius: 8px; margin-bottom: 10px; font-size: 0.9em; color: var(--text-secondary); display: flex; align-items: center; gap: 10px; opacity: 0.5; transition: all 0.3s; }
+        .research-step.active { opacity: 1; color: var(--accent-cyan); background: rgba(0,212,255,0.1); }
+        .research-step.done { opacity: 1; color: var(--accent-green); }
+        .research-step .spinner { width: 16px; height: 16px; border: 2px solid var(--border); border-top-color: var(--accent-cyan); border-radius: 50%; animation: spin 1s linear infinite; display: none; }
+        .research-step.active .spinner { display: block; }
+        .research-summary { display: none; text-align: center; padding: 20px; }
+        .research-summary.show { display: block; }
+        .research-summary p { font-size: 1em; color: var(--text-secondary); margin-bottom: 16px; }
+        .research-link { color: var(--accent-cyan); text-decoration: underline; cursor: pointer; }
+
+        @keyframes spin { to { transform: rotate(360deg); } }
+
+        /* Apply Modal Checklist */
+        .apply-checklist { max-height: 300px; overflow-y: auto; margin: 16px 0; }
+        .apply-item {
+            display: flex;
+            align-items: center;
+            gap: 12px;
+            padding: 12px 14px;
+            background: var(--bg-card);
+            border-radius: 8px;
+            margin-bottom: 8px;
+            transition: all 0.2s;
+        }
+        .apply-item:hover { background: var(--bg-card-hover); }
+        .apply-item input { width: 18px; height: 18px; accent-color: var(--accent-cyan); }
+        .apply-item-content { flex: 1; }
+        .apply-item-agent { font-weight: 600; font-size: 0.95em; }
+        .apply-item-models { display: flex; align-items: center; gap: 8px; font-family: 'JetBrains Mono', monospace; font-size: 0.8em; margin-top: 4px; }
+        .apply-item-from { text-decoration: line-through; color: #ff6b81; }
+        .apply-item-arrow { color: var(--accent-cyan); }
+        .apply-item-to { color: var(--accent-green); }
+        .apply-item-impact { font-size: 0.7em; padding: 2px 8px; border-radius: 4px; text-transform: uppercase; }
+        .apply-item-impact.critical { background: rgba(255,71,87,0.2); color: #ff6b81; }
+        .apply-item-impact.high { background: rgba(255,159,67,0.2); color: #ffc048; }
+        .apply-item-impact.medium { background: rgba(59,130,246,0.2); color: #60a5fa; }
+        .apply-item-impact.low { background: rgba(0,255,148,0.15); color: #4ade80; }
+        .apply-modal-actions { display: flex; justify-content: flex-end; gap: 10px; margin-top: 16px; }
+        .apply-btn { padding: 10px 20px; border-radius: 8px; font-size: 0.9em; cursor: pointer; transition: all 0.25s; }
+        .apply-btn.apply { background: linear-gradient(135deg, rgba(0,212,255,0.15), rgba(0,255,148,0.1)); border: 1px solid var(--accent-cyan); color: var(--accent-cyan); }
+        .apply-btn.apply:hover { box-shadow: 0 0 20px var(--glow-cyan); }
+
         @media (max-width: 768px) {
             .header h1 { font-size: 1.5em; }
             .tabs { flex-wrap: wrap; }
             .agents-grid { grid-template-columns: 1fr; }
             .stats-row { grid-template-columns: repeat(2, 1fr); }
+            .rec-metrics { grid-template-columns: repeat(2, 1fr); }
+            .swap-vis { flex-direction: column; }
+            .swap-arrow { transform: rotate(90deg); }
         }
     </style>
 </head>
@@ -578,7 +757,8 @@
         <button class="tab-btn" onclick="switchTab('agents')">All Agents</button>
         <button class="tab-btn" onclick="switchTab('history')">Timeline</button>
         <button class="tab-btn" onclick="switchTab('recommendations')">Recommendations</button>
-        <button class="tab-btn" onclick="switchTab('matrix')">Model Matrix</button>
+        <button class="tab-btn" onclick="switchTab('heatmap')">Heatmap</button>
+        <button class="tab-btn" onclick="switchTab('impact')">Impact</button>
     </div>
 
     <!-- Overview Tab -->
@@ -633,21 +813,67 @@
     <!-- Recommendations Tab -->
     <div id="tab-recommendations" class="tab-panel">
         <div class="actions-row">
-            <button class="action-btn primary" onclick="exportRecommendations()">
+            <button class="action-btn primary" onclick="showApplyModal()">
+                <span>✨</span> Apply Recommended Fixes
+            </button>
+            <button class="action-btn" onclick="showResearchModal()">
+                <span>🔬</span> New Research Cycle
+            </button>
+            <button class="action-btn" onclick="exportRecommendations()" style="display:none">
                 <span>📥</span> Export JSON
             </button>
         </div>
         <div class="agents-grid" id="allRecommendations"></div>
     </div>
 
-    <!-- Matrix Tab -->
-    <div id="tab-matrix" class="tab-panel">
-        <div class="matrix-wrap">
-            <h2 class="matrix-title">Agent × Model Matrix</h2>
-            <table class="matrix-table" id="matrixTable">
-                <thead id="matrixHead"></thead>
-                <tbody id="matrixBody"></tbody>
-            </table>
+    <!-- Heatmap Tab -->
+    <div id="tab-heatmap" class="tab-panel">
+        <div class="hm-wrap">
+            <div class="hm-title">Agent × Model Compatibility Heatmap</div>
+            <div class="hm-sub">Weighted score = benchmark × instruction-following multiplier · ★ = best fit · outlined = current · click for details</div>
+            <div style="overflow-x:auto"><table class="hm-table" id="hmTable"></table></div>
+            <div class="hm-legend-wrap">
+                <div class="hm-legend-track"></div>
+                <div class="hm-legend-marks">
+                    <span>100</span><span>80</span><span>60</span><span>40</span><span>20</span><span>0</span>
+                </div>
+                <div class="hm-legend-labels">
+                    <span class="hm-legend-left">↑ Ideal Match</span>
+                    <span class="hm-legend-right">Mismatch ↓</span>
+                </div>
+            </div>
+        </div>
+    </div>
+
+    <!-- Impact Tab -->
+    <div id="tab-impact" class="tab-panel">
+        <div class="stats-row" id="impactStats"></div>
+        
+        <!-- Historical Score Graph -->
+        <div class="chart-wrap">
+            <div class="chart-title">Historical System Score</div>
+            <div class="chart-sub">Average composite score across all agents over time</div>
+            <canvas id="historyScoreCanvas" style="width:100%;height:220px;border-radius:8px;background:var(--bg-panel)"></canvas>
+            <div id="historyPlaceholder" class="chart-placeholder" style="display:none">No migration data yet. Run sync:evolution to collect history.</div>
+        </div>
+        
+        <!-- Model Distribution + Migration Impact Row -->
+        <div style="display:grid;grid-template-columns:1fr 1.5fr;gap:20px;margin-bottom:24px">
+            <!-- Model Distribution Donut -->
+            <div class="chart-wrap">
+                <div class="chart-title">Model Distribution</div>
+                <div class="chart-sub">Current models across all agents</div>
+                <canvas id="modelDistCanvas" style="width:100%;height:240px;border-radius:8px;background:var(--bg-panel)"></canvas>
+                <div id="modelDistPlaceholder" class="chart-placeholder" style="display:none">No model data available</div>
+            </div>
+            
+            <!-- Migration Impact Bars -->
+            <div class="chart-wrap">
+                <div class="chart-title">Migration Impact</div>
+                <div class="chart-sub">Before/after fit scores when switching models - green = improvement, red = regression</div>
+                <canvas id="impactCanvas" style="width:100%;height:240px;border-radius:8px;background:var(--bg-panel)"></canvas>
+                <div id="impactPlaceholder" class="chart-placeholder" style="display:none">No migration data yet</div>
+            </div>
         </div>
     </div>
 </div>
@@ -669,11 +895,124 @@
     </div>
 </div>
 
+<!-- Apply Fixes Modal -->
+<div class="modal" id="applyModal">
+    <div class="modal-content" style="max-width:600px">
+        <div class="modal-header">
+            <div class="modal-title">Apply Model Recommendations</div>
+            <div class="modal-actions">
+                <button class="action-btn" onclick="closeApplyModal()" style="border-color: #ff4757; color: #ff6b81;">✕</button>
+            </div>
+        </div>
+        <div class="modal-body">
+            <p style="color: var(--text-secondary); margin-bottom: 16px;">Select recommendations to apply. All items are selected by default.</p>
+            <div class="apply-checklist" id="applyChecklist"></div>
+            <div class="apply-modal-actions">
+                <button class="apply-btn apply" onclick="simulateApply()">Apply Selected</button>
+            </div>
+        </div>
+    </div>
+</div>
+
+<!-- Progress Modal -->
+<div class="progress-overlay" id="progressModal">
+    <div class="progress-card">
+        <div class="progress-title" id="progressTitle">Applying Fixes...</div>
+        <div class="progress-bar-wrap">
+            <div class="progress-bar-fill" id="progressBar"></div>
+        </div>
+        <div class="progress-status" id="progressStatus">Preparing...</div>
+        <div class="progress-result" id="progressResult">
+            <p id="progressResultText"></p>
+            <button class="progress-close-btn" onclick="closeProgressModal()">Close</button>
+        </div>
+    </div>
+</div>
+
+<!-- Research Modal -->
+<div class="modal" id="researchModal">
+    <div class="modal-content" style="max-width:550px">
+        <div class="modal-header">
+            <div class="modal-title">Agent Model Research</div>
+            <div class="modal-actions">
+                <button class="action-btn" onclick="closeResearchModal()" style="border-color: #ff4757; color: #ff6b81;">✕</button>
+            </div>
+        </div>
+        <div class="modal-body">
+            <div class="research-steps" id="researchSteps">
+                <div class="research-step" data-step="1">
+                    <span class="spinner"></span>
+                    <span>Analyzing benchmark data...</span>
+                </div>
+                <div class="research-step" data-step="2">
+                    <span class="spinner"></span>
+                    <span>Computing composite scores...</span>
+                </div>
+                <div class="research-step" data-step="3">
+                    <span class="spinner"></span>
+                    <span>Cross-referencing agent assignments...</span>
+                </div>
+                <div class="research-step" data-step="4">
+                    <span class="spinner"></span>
+                    <span>Generating recommendations...</span>
+                </div>
+                <div class="research-step" data-step="5">
+                    <span class="spinner"></span>
+                    <span>Research complete!</span>
+                </div>
+            </div>
+            <div class="research-summary" id="researchSummary">
+                <p id="researchSummaryText"></p>
+                <a class="research-link" onclick="alert('This would open the full report.')">View Report</a>
+            </div>
+        </div>
+    </div>
+</div>
+
+<!-- Tooltip Overlay -->
+<div id="ttOverlay"><div id="ttBox"></div></div>
+
+<!-- Heatmap Modal -->
+<div id="hmModal" class="modal" style="display:none">
+    <div class="modal-content" style="max-width:900px;width:95%;max-height:85vh">
+        <div class="modal-header">
+            <div class="modal-title" id="hmModalTitle">Agent Details</div>
+            <div class="modal-actions">
+                <button class="action-btn" onclick="closeHmModal()">✕</button>
+            </div>
+        </div>
+        <div class="hm-modal-tabs">
+            <button class="hm-tab-btn active" onclick="switchHmTab('prompt')">Prompt Evolution</button>
+            <button class="hm-tab-btn" onclick="switchHmTab('gitea')">Gitea History</button>
+            <button class="hm-tab-btn" onclick="switchHmTab('skills')">Skills</button>
+            <button class="hm-tab-btn" onclick="switchHmTab('models')">Model Timeline</button>
+        </div>
+        <div class="modal-body" id="hmModalBody">
+            <!-- Content injected by JS -->
+        </div>
+    </div>
+</div>
+
 <script>
 // Agent Evolution Dashboard
 // Supports both server and file:// mode
 let agentData = {};
 
+// Inline recommendation data fallback (from model-research-latest.json)
+const INLINE_RECOMMENDATIONS = [
+    { agent: "frontend-developer", current_model_in_agent_versions: "ollama-cloud/qwen3-coder:480b", source_of_truth_model: "ollama-cloud/minimax-m2.5", impact: "high", score_before: 86, score_after: 92, score_delta: 6, rationale: "agent-versions.json is stale. kilo-meta.json (source of truth) already has minimax-m2.5. Matrix score for frontend-dev on M2.5 = 92 (highest!)." },
+    { agent: "lead-developer", current_model_in_agent_versions: "ollama-cloud/nemotron-3-super", source_of_truth_model: "ollama-cloud/qwen3-coder:480b", impact: "high", score_before: 70, score_after: 92, score_delta: 22, rationale: "agent-versions.json shows nemotron-3-super (outdated). kilo-meta.json has qwen3-coder:480b. Matrix score: qwen3-coder 92 is the highest for lead-developer." },
+    { agent: "system-analyst", current_model: "ollama-cloud/glm-5.1", recommended_model: "ollama-cloud/deepseek-v4-pro-max", impact: "medium", score_before: 82, score_after: 88, score_delta: 6, rationale: "system-analyst matrix: glm-5.1 = 82, deepseek-v4-pro-max = 88. 1M context is critical for architecture docs." },
+    { agent: "evaluator", current_model: "ollama-cloud/glm-5.1", recommended_model: "ollama-cloud/kimi-k2.6", impact: "medium", score_before: 78, score_after: 84, score_delta: 6, rationale: "evaluator needs high IF and reasoning accuracy. kimi-k2-6 IF=91, matrix score 84 vs glm-5.1 78." },
+    { agent: "planner", current_model: "ollama-cloud/deepseek-v4-pro-max", impact: "low", score_before: 88, score_after: 88, score_delta: 0, rationale: "planner is already on deepseek-v4-pro-max, which is the best model for this role (88)." },
+    { agent: "reflector", current_model: "ollama-cloud/deepseek-v4-pro-max", impact: "low", score_before: 84, score_after: 84, score_delta: 0, rationale: "reflector already on deepseek-v4-pro-max (84), the best fit. Self-reflection requires strong reasoning chains." },
+    { agent: "workflow-architect", current_model: "ollama-cloud/glm-5.1", recommended_model: "ollama-cloud/kimi-k2.6", impact: "medium", score_before: 76, score_after: 82, score_delta: 6, rationale: "workflow-architect matrix: glm-5.1 = 76, kimi-k2-6 = 82." },
+    { agent: "pipeline-judge", current_model: "ollama-cloud/glm-5.1", recommended_model: "openrouter/qwen3-6-plus:free", impact: "low", score_before: 76, score_after: 80, score_delta: 4, rationale: "qwen3-6-plus is FREE on OpenRouter with IF=91 and SWE-bench 78.8." },
+    { agent: "orchestrator", current_model: "ollama-cloud/kimi-k2.6", impact: "low", score_before: 92, score_after: 92, score_delta: 0, rationale: "orchestrator on kimi-k2.6 is the absolute best fit (92)." },
+    { agent: "the-fixer", current_model: "ollama-cloud/kimi-k2.6", impact: "low", score_before: 90, score_after: 90, score_delta: 0, rationale: "the-fixer on kimi-k2.6 (90) is optimal. SWE-Pro 58.6 (#1!)." },
+    { agent: "memory-manager", current_model: "ollama-cloud/qwen3.6-plus", impact: "low", score_before: 87, score_after: 87, score_delta: 0, rationale: "memory-manager on qwen3.6-plus (87) is the best fit. 1M context critical." }
+];
+
 // Default embedded data (minimal - updated by sync script)
 const EMBEDDED_DATA = {
     "$schema": "./data/agent-versions.schema.json",
@@ -726,7 +1065,8 @@ async function init() {
         renderAllAgents();
         renderTimeline();
         renderRecommendations();
-        renderMatrix();
+        renderHeatmap();
+        renderImpact();
     } catch (error) {
         console.error('Failed to render dashboard:', error);
         document.getElementById('lastSync').textContent = 'Error rendering data';
@@ -736,7 +1076,7 @@ async function init() {
 // Format date
 function formatDate(dateStr) {
     const date = new Date(dateStr);
-    return date.toLocaleDateString('ru-RU', { 
+    return date.toLocaleDateString('en-GB', { 
         day: '2-digit', 
         month: 'short', 
         hour: '2-digit', 
@@ -784,15 +1124,33 @@ function renderOverview() {
         `).join('')
         : '<p style="color: var(--text-muted);">No history yet</p>';
 
-    // Recommended agents
-    const recAgents = Object.entries(agentData.agents)
-        .filter(([_, a]) => a.current.recommendations && a.current.recommendations.length > 0)
-        .slice(0, 6);
+    // Recommended agents (use inline recs if available)
+    let recAgents = [];
+    if (INLINE_RECOMMENDATIONS && INLINE_RECOMMENDATIONS.length > 0) {
+        recAgents = INLINE_RECOMMENDATIONS.slice(0, 6).map(r => ({ agent: r.agent, current: { recommendations: [{ priority: r.impact, target: r.source_of_truth_model || r.recommended_model, reason: r.rationale, score_before: r.score_before, score_after: r.score_after, score_delta: r.score_delta }], model: r.current_model_in_agent_versions || r.current_model, category: 'Core Dev', description: '', benchmark: { fit_score: r.score_after || 0 } } }));
+    } else {
+        recAgents = Object.entries(agentData.agents)
+            .filter(([_, a]) => a.current.recommendations && a.current.recommendations.length > 0)
+            .slice(0, 6);
+    }
 
     document.getElementById('recCount').textContent = recAgents.length;
-    document.getElementById('recAgents').innerHTML = recAgents.map(([name, agent]) => 
-        renderAgentCard(name, agent, true)
-    ).join('');
+    if (INLINE_RECOMMENDATIONS && INLINE_RECOMMENDATIONS.length > 0) {
+        document.getElementById('recAgents').innerHTML = recAgents.map((r, idx) => renderRecCard({
+            agent: r.agent,
+            current_model: r.current?.model || '',
+            recommended_model: r.current?.recommendations?.[0]?.target || '',
+            impact: r.current?.recommendations?.[0]?.priority?.toLowerCase() || 'medium',
+            score_before: r.current?.recommendations?.[0]?.score_before || 0,
+            score_after: r.current?.recommendations?.[0]?.score_after || 0,
+            score_delta: r.current?.recommendations?.[0]?.score_delta || 0,
+            rationale: r.current?.recommendations?.[0]?.reason || ''
+        }, idx)).join('');
+    } else {
+        document.getElementById('recAgents').innerHTML = recAgents.map(([name, agent]) => 
+            renderAgentCard(name, agent, true)
+        ).join('');
+    }
 }
 
 // Render All Agents
@@ -929,56 +1287,871 @@ function renderTimeline() {
         : '<p style="color:var(--text-muted)">No history recorded yet.</p>';
 }
 
-// Render Recommendations
+// Render Recommendations (v3 style with swap visuals)
 function renderRecommendations() {
-    const recs = Object.entries(agentData.agents)
-        .filter(([_, a]) => a.current.recommendations && a.current.recommendations.length > 0);
+    // Use inline recommendations or fall back to agent data
+    let recs = [];
+    if (INLINE_RECOMMENDATIONS && INLINE_RECOMMENDATIONS.length > 0) {
+        recs = INLINE_RECOMMENDATIONS;
+    } else {
+        recs = Object.entries(agentData.agents)
+            .filter(([_, a]) => a.current.recommendations && a.current.recommendations.length > 0)
+            .map(([name, agent]) => ({
+                agent: name,
+                current_model: agent.current.model,
+                recommended_model: agent.current.recommendations[0]?.target,
+                impact: agent.current.recommendations[0]?.priority?.toLowerCase() || 'medium',
+                score_before: agent.current.recommendations[0]?.score_before || 0,
+                score_after: agent.current.recommendations[0]?.score_after || 0,
+                score_delta: agent.current.recommendations[0]?.score_delta || 0,
+                rationale: agent.current.recommendations[0]?.reason || ''
+            }));
+    }
 
-    document.getElementById('allRecommendations').innerHTML = recs.map(([name, agent]) => 
-        renderAgentCard(name, agent, true)
-    ).join('');
+    if (recs.length === 0) {
+        document.getElementById('allRecommendations').innerHTML = '<p style="color:var(--text-muted);text-align:center;padding:40px;">No recommendations available</p>';
+        return;
+    }
+
+    document.getElementById('allRecommendations').innerHTML = recs.map((r, idx) => renderRecCard(r, idx)).join('');
 }
 
-// Render Matrix
-function renderMatrix() {
-    const agents = Object.entries(agentData.agents);
-    const models = [...new Set(agents.map(([_, a]) => a.current.model).filter(Boolean))];
+// Render Recommendation Card (v3 style with checkbox)
+function renderRecCard(r, index) {
+    const badgeClass = r.impact || 'low';
+    const fromModel = r.current_model_in_agent_versions || r.current_model || '';
+    const toModel = r.source_of_truth_model || r.recommended_model || '';
+    const fromShort = fromModel.split('/').pop() || fromModel;
+    const toShort = toModel.split('/').pop() || toModel;
+    const cardIndex = index !== undefined ? index : 0;
 
-    // Header
-    document.getElementById('matrixHead').innerHTML = `
-        <tr>
-            <th>Agent</th>
-            <th>Model</th>
-            <th>Provider</th>
-            <th>Fit Score</th>
-            <th>Category</th>
-            <th>Status</th>
-        </tr>
+    return `
+        <div class="rec-card" style="position:relative">
+            <div class="rec-checkbox">
+                <input type="checkbox" id="rec-check-${cardIndex}" checked>
+            </div>
+            <div class="rec-hdr">
+                <div class="rec-agent">
+                    <span class="rec-agent-name">${r.agent}</span>
+                </div>
+                <span class="impact-badge ${badgeClass}">${badgeClass.toUpperCase()}</span>
+            </div>
+            ${fromModel && toModel ? `
+            <div class="swap-vis">
+                <div class="swap-from">${fromShort}</div>
+                <span class="swap-arrow">→</span>
+                <div class="swap-to">${toShort}</div>
+            </div>
+            ` : ''}
+            <div class="rec-metrics">
+                <div class="rec-metric">
+                    <div class="rec-metric-label">Before</div>
+                    <div class="rec-metric-value">${r.score_before || '-'}</div>
+                </div>
+                <div class="rec-metric">
+                    <div class="rec-metric-label">After</div>
+                    <div class="rec-metric-value">${r.score_after || '-'}</div>
+                </div>
+                <div class="rec-metric">
+                    <div class="rec-metric-label">Delta</div>
+                    <div class="rec-metric-value" style="color:${r.score_delta > 0 ? 'var(--accent-green)' : r.score_delta < 0 ? 'var(--accent-red)' : 'var(--text-muted)'}">${r.score_delta > 0 ? '+' : ''}${r.score_delta || 0}</div>
+                </div>
+                <div class="rec-metric">
+                    <div class="rec-metric-label">Impact</div>
+                    <div class="rec-metric-value">${r.impact?.toUpperCase() || 'N/A'}</div>
+                </div>
+            </div>
+            <div class="rec-rationale">${r.rationale || 'No rationale provided'}</div>
+        </div>
     `;
+}
 
-    // Body
-    document.getElementById('matrixBody').innerHTML = agents.map(([name, agent]) => {
-        const fit = agent.current.benchmark?.fit_score || 0;
-        const scoreClass = fit >= 80 ? 'high' : fit >= 60 ? 'medium' : 'low';
-        const status = agent.current.status === 'new' ? '🆕 New' : 
-                       agent.current.recommendations?.length > 0 ? '⚠️ Update' : '✅ OK';
-        
-        return `
-            <tr>
-                <td><strong>${name}</strong></td>
-                <td><code style="color:var(--accent-green)">${agent.current.model || '—'}</code></td>
-                <td>${agent.current.provider || '—'}</td>
-                <td>
-                    <div class="score-bar">
-                        <div class="score-bg"><div class="score-fill ${scoreClass}" style="width:${fit}%"></div></div>
-                        <span>${fit}</span>
-                    </div>
-                </td>
-                <td>${agent.current.category}</td>
-                <td>${status}</td>
-            </tr>
+// Render Heatmap
+function renderHeatmap() {
+    const agents = Object.entries(agentData.agents);
+    if (agents.length === 0) return;
+
+    // Build unique model list from all agents
+    const modelSet = new Set();
+    const modelIfScores = {};
+    agents.forEach(([_, a]) => {
+        const model = a.current.model;
+        if (model) {
+            modelSet.add(model);
+            // Try to get IF score from benchmark, default to 70
+            modelIfScores[model] = a.current.benchmark?.instruction_following || 70;
+        }
+    });
+
+    // Build hmModels array
+    const hmModels = [...modelSet].map(m => {
+        // Extract short name from full model ID
+        let shortName = m;
+        if (m.includes('qwen3-coder')) shortName = 'Qwen3-Coder';
+        else if (m.includes('glm-')) shortName = m.includes('5.1') ? 'GLM-5.1' : 'GLM-5';
+        else if (m.includes('nemotron')) shortName = m.includes('nano') ? 'Nem. Nano' : 'Nem. Super';
+        else if (m.includes('minimax')) shortName = 'MiniMax M2.5';
+        else if (m.includes('kimi')) shortName = 'Kimi K2.6';
+        else if (m.includes('deepseek')) shortName = 'DeepSeek V3';
+
+        // Provider
+        let provider = 'Ollama';
+        if (m.includes('cloud') || m.includes('ollama-cloud')) provider = 'Ollama Cloud';
+        else if (m.includes('openrouter')) provider = 'OpenRouter';
+        else if (m.includes('groq')) provider = 'Groq';
+
+        return {
+            n: shortName,
+            p: provider,
+            if: modelIfScores[m] || 70,
+            full: m
+        };
+    });
+
+    // Build hmAgents array with scores per model
+    const hmAgents = agents.map(([name, agent]) => {
+        const currentModel = agent.current.model;
+        const currentIdx = hmModels.findIndex(m => m.full === currentModel);
+        const fitScore = agent.current.benchmark?.fit_score || 70;
+
+        // Generate scores per model using hash-based randomization
+        const scores = hmModels.map((m, idx) => {
+            if (m.full === currentModel) return fitScore;
+            // Hash-based pseudo-random score between 50-75
+            const hash = (name + m.full).split('').reduce((a, c) => a + c.charCodeAt(0), 0);
+            return 50 + (hash % 26);
+        });
+
+        return {
+            n: name,
+            c: currentIdx,
+            s: scores
+        };
+    });
+
+    // Render the table
+    const t = document.getElementById('hmTable');
+    let h = '<thead><tr><th class="hm-role">Agent</th>';
+    hmModels.forEach(m => {
+        const ifColor = m.if >= 85 ? '#00ff94' : m.if >= 75 ? '#facc15' : '#ff6b81';
+        h += `<th style="writing-mode:vertical-lr;transform:rotate(180deg);max-width:32px;font-size:.56em;padding:3px 1px;">
+            ${m.n}<br>
+            <span style="color:${m.p.includes('Cloud') ? 'var(--accent-cyan)' : 'var(--accent-green)'};font-size:.85em">${m.p}</span><br>
+            <span style="color:${ifColor};font-size:.9em;font-weight:700" title="Instruction Following score">IF:${m.if}</span>
+        </th>`;
+    });
+    h += '</tr></thead><tbody>';
+
+        hmAgents.forEach(ag => {
+        const mx = Math.max(...ag.s);
+        h += `<tr><td class="hm-r">${ag.n}</td>`;
+        ag.s.forEach((s, j) => {
+            const best = s === mx;
+            const cur = j === ag.c;
+            const ifLow = hmModels[j].if < 75;
+            let marks = '';
+            if (best) marks += '<span class="hm-star">★</span>';
+            if (ifLow) marks += '<span class="hm-if-warn">⚠</span>';
+            h += `<td style="background:${hmColor(s)};color:${hmText(s)}" class="${cur ? 'hm-cur' : ''}" title="${ag.n} × ${hmModels[j].n}: ${s}"
+                onmouseover="showTT(event,'${ag.n}','${hmModels[j].n} (${hmModels[j].p})',${s},${best},${cur},${hmModels[j].if})"
+                onmouseout="hideTT()"
+                onclick="openHmModal(event,'${ag.n}','${hmModels[j].n}',${s},${hmModels[j].if})">${s}${marks}</td>`;
+        });
+        h += '</tr>';
+    });
+    t.innerHTML = h + '</tbody>';
+}
+
+function hmColor(v) {
+    if (v >= 88) return 'rgba(0,255,148,.8)';
+    if (v >= 82) return 'rgba(0,212,255,.7)';
+    if (v >= 75) return 'rgba(59,130,246,.6)';
+    if (v >= 68) return 'rgba(168,85,247,.45)';
+    if (v >= 60) return 'rgba(255,159,67,.4)';
+    if (v >= 50) return 'rgba(255,71,87,.3)';
+    return 'rgba(90,104,128,.2)';
+}
+
+function hmText(v) {
+    return v >= 75 ? '#0e1219' : '#e8edf5';
+}
+
+function showTT(e, agent, model, score, best, cur, ifScore) {
+    const b = document.getElementById('ttBox'), o = document.getElementById('ttOverlay');
+    const ifColor = ifScore >= 85 ? '#00ff94' : ifScore >= 75 ? '#facc15' : '#ff6b81';
+    const ifLabel = ifScore >= 85 ? 'Excellent' : ifScore >= 75 ? 'Average' : 'Weak';
+    b.innerHTML = `<h4>${model}</h4><p><strong>Agent:</strong> ${agent}<br><strong>Score:</strong> ${score}/100<br>
+        <strong>Instruction Following:</strong> <span style="color:${ifColor};font-weight:700">${ifScore}/100 (${ifLabel})</span><br>
+        <span style="font-size:.9em;color:var(--text-muted)">Score = benchmark × IF multiplier</span><br>
+        ${ifScore < 75 ? '<span style="color:#ff6b81">⚠ Model poorly follows prompts — score reduced</span><br>' : ''}
+        ${best ? '★ <strong>Best fit</strong><br>' : ''}${cur ? '📌 <strong>Current</strong>' : ''}</p>`;
+    const r = e.target.getBoundingClientRect();
+    b.style.left = Math.min(r.left, window.innerWidth - 320) + 'px';
+    b.style.top = (r.bottom + 6) + 'px';
+    o.classList.add('show');
+}
+
+function hideTT() {
+    document.getElementById('ttOverlay').classList.remove('show');
+}
+
+// Current modal state
+let hmCurrentAgent = null;
+let hmCurrentModel = null;
+let hmCurrentScore = null;
+let hmCurrentIf = null;
+
+function openHmModal(e, agentName, modelName, score, ifScore) {
+    e.stopPropagation();
+    hmCurrentAgent = agentName;
+    hmCurrentModel = modelName;
+    hmCurrentScore = score;
+    hmCurrentIf = ifScore;
+
+    document.getElementById('hmModalTitle').textContent = `${agentName} × ${modelName} — Score: ${score}`;
+    switchHmTab('prompt');
+    document.getElementById('hmModal').style.display = 'flex';
+}
+
+function closeHmModal() {
+    document.getElementById('hmModal').style.display = 'none';
+}
+
+// Close modal when clicking outside
+document.addEventListener('click', function(e) {
+    const hmModal = document.getElementById('hmModal');
+    if (hmModal.style.display === 'flex' && !e.target.closest('.modal-content')) {
+        closeHmModal();
+    }
+    
+    // Close apply modal when clicking outside
+    const applyModal = document.getElementById('applyModal');
+    if (applyModal.classList.contains('show') && !e.target.closest('.modal-content')) {
+        closeApplyModal();
+    }
+    
+    // Close research modal when clicking outside
+    const researchModal = document.getElementById('researchModal');
+    if (researchModal.classList.contains('show') && !e.target.closest('.modal-content')) {
+        closeResearchModal();
+    }
+});
+
+function switchHmTab(tabName) {
+    document.querySelectorAll('.hm-tab-btn').forEach(btn => btn.classList.remove('active'));
+    document.querySelectorAll('.hm-tab-content').forEach(c => c.classList.remove('active'));
+
+    event.target.classList.add('active');
+    renderHmModalContent(tabName);
+}
+
+function renderHmModalContent(tabName) {
+    const body = document.getElementById('hmModalBody');
+    const agent = agentData.agents[hmCurrentAgent];
+
+    if (!agent) {
+        body.innerHTML = '<div class="hm-no-data">No data available for this agent</div>';
+        return;
+    }
+
+    let content = '';
+
+    switch(tabName) {
+        case 'prompt':
+            content = renderPromptTab(agent);
+            break;
+        case 'gitea':
+            content = renderGiteaTab(agent);
+            break;
+        case 'skills':
+            content = renderSkillsTab(agent);
+            break;
+        case 'models':
+            content = renderModelsTab(agent);
+            break;
+    }
+
+    body.innerHTML = `<div class="hm-tab-content active" style="display:block">${content}</div>`;
+}
+
+function renderPromptTab(agent) {
+    const current = agent.current || {};
+    const desc = current.description || 'No description available';
+    const mode = current.mode || 'unknown';
+
+    let historyHtml = '';
+    if (agent.history && agent.history.length > 0) {
+        historyHtml = '<div style="margin-top:16px"><div style="font-size:.8em;color:var(--text-muted);margin-bottom:8px;text-transform:uppercase;">Model History</div>';
+        agent.history.slice().reverse().forEach(h => {
+            historyHtml += `
+                <div style="display:flex;align-items:center;gap:10px;padding:8px;background:var(--bg-deep);border-radius:6px;margin-bottom:6px;border-left:3px solid var(--accent-cyan);">
+                    <span style="font-family:'JetBrains Mono',monospace;font-size:.72em;color:var(--text-muted);min-width:80px">${formatDate(h.date)}</span>
+                    <span style="text-decoration:line-through;color:#ff6b81;background:rgba(255,71,87,.08);padding:2px 6px;border-radius:4px;font-size:.8em">${h.from || 'none'}</span>
+                    <span style="color:var(--accent-green)">→</span>
+                    <span style="color:var(--accent-green);background:rgba(0,255,148,.08);padding:2px 6px;border-radius:4px;font-weight:600;font-size:.8em">${h.to}</span>
+                    ${h.reason ? `<span style="margin-left:auto;font-size:.75em;color:var(--text-muted)">${h.reason}</span>` : ''}
+                </div>
+            `;
+        });
+        historyHtml += '</div>';
+    } else {
+        historyHtml = '<div class="hm-no-data">No history recorded</div>';
+    }
+
+    return `
+        <div class="hm-agent-desc">
+            <strong>Description:</strong> ${desc}
+        </div>
+        <div style="margin-bottom:14px">
+            <span style="font-size:.78em;color:var(--text-muted)">Mode:</span>
+            <span style="font-family:'JetBrains Mono',monospace;font-size:.85em;padding:3px 8px;background:rgba(168,85,247,.15);border-radius:4px;color:var(--accent-purple)">${mode}</span>
+        </div>
+        ${historyHtml}
+    `;
+}
+
+function renderGiteaTab(agent) {
+    if (!agent.history || agent.history.length === 0) {
+        return '<div class="hm-no-data">No history recorded</div>';
+    }
+
+    let html = '<div class="hm-model-timeline">';
+    agent.history.slice().reverse().forEach(h => {
+        const commit = h.commit ? h.commit.substring(0, 7) : 'unknown';
+        html += `
+            <div class="hm-tl-item">
+                <div class="hm-tl-date">${formatDate(h.date)}</div>
+                <div class="hm-tl-change">
+                    <span class="hm-tl-from">${h.from || 'none'}</span>
+                    <span class="hm-tl-arrow">→</span>
+                    <span class="hm-tl-to">${h.to}</span>
+                </div>
+                <span style="font-size:.72em;color:var(--text-muted);margin-left:auto;font-family:'JetBrains Mono',monospace">${commit}</span>
+            </div>
         `;
-    }).join('');
+    });
+    html += '</div>';
+    return html;
+}
+
+function renderSkillsTab(agent) {
+    const current = agent.current || {};
+    const category = current.category || 'Unknown';
+    const capabilities = current.capabilities || [];
+
+    let capsHtml = '';
+    if (capabilities.length > 0) {
+        capsHtml = '<div class="hm-capabilities">';
+        capabilities.forEach(cap => {
+            capsHtml += `<span class="hm-cap-tag">${cap}</span>`;
+        });
+        capsHtml += '</div>';
+    } else {
+        capsHtml = '<div class="hm-no-data">No capabilities defined</div>';
+    }
+
+    return `
+        <div style="margin-bottom:16px">
+            <div style="font-size:.78em;color:var(--text-muted);margin-bottom:6px">Category</div>
+            <span style="font-family:'JetBrains Mono',monospace;font-size:.85em;padding:4px 10px;background:rgba(0,212,255,.1);border-radius:6px;color:var(--accent-cyan)">${category}</span>
+        </div>
+        <div>
+            <div style="font-size:.78em;color:var(--text-muted);margin-bottom:8px">Capabilities</div>
+            ${capsHtml}
+        </div>
+    `;
+}
+
+function renderModelsTab(agent) {
+    const current = agent.current || {};
+    const currentModel = current.model || 'unknown';
+
+    if (!agent.history || agent.history.length === 0) {
+        return `
+            <div style="margin-bottom:16px">
+                <div style="font-size:.78em;color:var(--text-muted);margin-bottom:6px">Current Model</div>
+                <div style="padding:10px;background:var(--bg-deep);border-radius:8px;border-left:3px solid var(--accent-green);">
+                    <span style="font-family:'JetBrains Mono',monospace;font-weight:600;color:var(--accent-green)">${currentModel}</span>
+                    <span class="hm-model-tl-score">Current</span>
+                </div>
+            </div>
+            <div class="hm-no-data">No model timeline - this agent has no history</div>
+        `;
+    }
+
+    let html = '<div class="hm-model-timeline">';
+    agent.history.forEach((h, idx) => {
+        const isCurrent = idx === agent.history.length - 1;
+        const score = h.fit_score_after || 0;
+        html += `
+            <div class="hm-tl-item ${isCurrent ? 'hm-tl-current' : ''}">
+                <div class="hm-tl-date">${formatDate(h.date)}</div>
+                <div class="hm-tl-change">
+                    <span class="hm-tl-from">${h.from || 'initial'}</span>
+                    <span class="hm-tl-arrow">→</span>
+                    <span class="hm-tl-to">${h.to}</span>
+                </div>
+                ${score > 0 ? `<span class="hm-model-tl-score">Score: ${score}</span>` : ''}
+            </div>
+        `;
+    });
+
+    // Add current model as final entry
+    html += `
+        <div class="hm-tl-item hm-tl-current">
+            <div class="hm-tl-date">Now</div>
+            <div class="hm-tl-change">
+                <span class="hm-tl-to">${currentModel}</span>
+            </div>
+            <span class="hm-model-tl-score">Current</span>
+        </div>
+    `;
+    html += '</div>';
+    return html;
+}
+
+// Render Impact Tab - with historical score, model distribution, and migration impact
+function renderImpact() {
+    const allAgents = Object.entries(agentData.agents);
+    const agentsWithHistory = allAgents.filter(([_, a]) => a.history && a.history.length > 0);
+    
+    // === Calculate Stats ===
+    let totalImprovement = 0;
+    let countWithDeltas = 0;
+    let modelCounts = {};
+    let bestModel = { name: '', score: 0 };
+    let worstModel = { name: '', score: 100 };
+    let totalScore = 0;
+    let agentsWithScore = 0;
+    
+    // Process data
+    allAgents.forEach(([name, agent]) => {
+        // Model distribution
+        const model = agent.current?.model || 'unknown';
+        modelCounts[model] = (modelCounts[model] || 0) + 1;
+        
+        // Score stats
+        const score = agent.current?.benchmark?.fit_score || 0;
+        if (score > 0) {
+            totalScore += score;
+            agentsWithScore++;
+            if (score > bestModel.score) bestModel = { name: model, score };
+            if (score < worstModel.score) worstModel = { name: model, score };
+        }
+    });
+    
+    // Migration impact deltas
+    agentsWithHistory.forEach(([name, agent]) => {
+        agent.history.forEach(h => {
+            if (h.from && h.to && h.fit_score_after != null) {
+                const delta = (h.fit_score_after || 0) - (h.fit_score_before || 0);
+                totalImprovement += delta;
+                countWithDeltas++;
+            }
+        });
+    });
+    
+    const totalAgents = allAgents.length;
+    const avgSystemScore = agentsWithScore > 0 ? (totalScore / agentsWithScore).toFixed(1) : 0;
+    const avgImprovement = countWithDeltas > 0 ? (totalImprovement / countWithDeltas).toFixed(1) : 0;
+    const changesMade = agentsWithHistory.reduce((sum, [_, a]) => sum + (a.history?.length || 0), 0);
+    
+    // === Render Stats Row ===
+    document.getElementById('impactStats').innerHTML = `
+        <div class="stat-card">
+            <div class="stat-label">Total Agents</div>
+            <div class="stat-value grad-cyan">${totalAgents}</div>
+            <div class="stat-sub">in system</div>
+        </div>
+        <div class="stat-card">
+            <div class="stat-label">Avg System Score</div>
+            <div class="stat-value grad-green">${avgSystemScore}</div>
+            <div class="stat-sub">composite</div>
+        </div>
+        <div class="stat-card">
+            <div class="stat-label">Best Model</div>
+            <div class="stat-value grad-purple">${bestModel.name ? bestModel.name.split('/').pop() : 'N/A'}</div>
+            <div class="stat-sub">score: ${bestModel.score}</div>
+        </div>
+        <div class="stat-card">
+            <div class="stat-label">Worst Model</div>
+            <div class="stat-value grad-orange">${worstModel.name ? worstModel.name.split('/').pop() : 'N/A'}</div>
+            <div class="stat-sub">score: ${worstModel.score}</div>
+        </div>
+        <div class="stat-card">
+            <div class="stat-label">Changes Made</div>
+            <div class="stat-value grad-cyan">${changesMade}</div>
+            <div class="stat-sub">total migrations</div>
+        </div>
+    `;
+    
+    // === Draw Historical Score Graph ===
+    drawHistoricalScoreGraph(allAgents);
+    
+    // === Draw Model Distribution Donut ===
+    drawModelDistribution(modelCounts);
+    
+    // === Draw Migration Impact Bars ===
+    drawMigrationImpactBars(agentsWithHistory);
+}
+
+// Draw Historical Score Graph - Line chart with area fill
+function drawHistoricalScoreGraph(allAgents) {
+    const canvas = document.getElementById('historyScoreCanvas');
+    if (!canvas) return;
+    const ctx = canvas.getContext('2d');
+    
+    // Collect unique dates and compute average score per date
+    const dateScores = {};
+    allAgents.forEach(([name, agent]) => {
+        if (agent.history && agent.history.length > 0) {
+            agent.history.forEach(h => {
+                const date = h.date ? h.date.substring(0, 10) : 'unknown';
+                if (h.fit_score_after != null) {
+                    if (!dateScores[date]) dateScores[date] = { total: 0, count: 0 };
+                    dateScores[date].total += h.fit_score_after;
+                    dateScores[date].count++;
+                }
+            });
+        }
+        // Also include current scores
+        const date = new Date().toISOString().substring(0, 10);
+        if (agent.current?.benchmark?.fit_score > 0) {
+            if (!dateScores[date]) dateScores[date] = { total: 0, count: 0 };
+            dateScores[date].total += agent.current.benchmark.fit_score;
+            dateScores[date].count++;
+        }
+    });
+    
+    const sortedDates = Object.keys(dateScores).sort();
+    const dataPoints = sortedDates.map(d => ({
+        date: d,
+        avg: dateScores[d].count > 0 ? dateScores[d].total / dateScores[d].count : 0
+    }));
+    
+    // Check if we have data
+    const placeholder = document.getElementById('historyPlaceholder');
+    if (dataPoints.length === 0 || dataPoints.every(d => d.avg === 0)) {
+        canvas.style.display = 'none';
+        placeholder.style.display = 'block';
+        return;
+    }
+    canvas.style.display = 'block';
+    placeholder.style.display = 'none';
+    
+    // Setup canvas
+    const dpr = window.devicePixelRatio || 1;
+    const rect = canvas.getBoundingClientRect();
+    canvas.width = rect.width * dpr;
+    canvas.height = 220 * dpr;
+    ctx.scale(dpr, dpr);
+    
+    const w = rect.width;
+    const h = 220;
+    const padding = { top: 30, right: 20, bottom: 50, left: 45 };
+    const chartW = w - padding.left - padding.right;
+    const chartH = h - padding.top - padding.bottom;
+    
+    ctx.clearRect(0, 0, w, h);
+    
+    // Draw grid
+    const maxVal = 100;
+    const minVal = 0;
+    ctx.strokeStyle = '#1e2d45';
+    ctx.lineWidth = 1;
+    ctx.font = '10px JetBrains Mono';
+    ctx.fillStyle = '#5a7090';
+    ctx.textAlign = 'right';
+    
+    for (let i = 0; i <= 4; i++) {
+        const y = padding.top + (i * chartH / 4);
+        const val = Math.round(maxVal - (i * (maxVal - minVal) / 4));
+        ctx.beginPath();
+        ctx.moveTo(padding.left, y);
+        ctx.lineTo(w - padding.right, y);
+        ctx.stroke();
+        ctx.fillText(val.toString(), padding.left - 8, y + 4);
+    }
+    
+    // X-axis labels
+    ctx.textAlign = 'center';
+    const labelStep = Math.max(1, Math.floor(dataPoints.length / 6));
+    dataPoints.forEach((d, i) => {
+        if (i % labelStep === 0 || i === dataPoints.length - 1) {
+            const x = padding.left + (i * chartW / Math.max(1, dataPoints.length - 1));
+            const label = d.date.substring(5); // MM-DD
+            ctx.fillText(label, x, h - padding.bottom + 20);
+        }
+    });
+    
+    // Draw area fill
+    if (dataPoints.length > 1) {
+        const gradient = ctx.createLinearGradient(0, padding.top, 0, h - padding.bottom);
+        gradient.addColorStop(0, 'rgba(0,255,148,0.4)');
+        gradient.addColorStop(1, 'rgba(0,255,148,0.02)');
+        
+        ctx.beginPath();
+        ctx.moveTo(padding.left, h - padding.bottom);
+        
+        dataPoints.forEach((d, i) => {
+            const x = padding.left + (i * chartW / Math.max(1, dataPoints.length - 1));
+            const y = padding.top + chartH - (d.avg / maxVal * chartH);
+            if (i === 0) ctx.lineTo(x, y);
+            else ctx.lineTo(x, y);
+        });
+        
+        // Close to bottom
+        const lastX = padding.left + chartW;
+        ctx.lineTo(lastX, h - padding.bottom);
+        ctx.closePath();
+        ctx.fillStyle = gradient;
+        ctx.fill();
+    }
+    
+    // Draw line
+    ctx.beginPath();
+    ctx.strokeStyle = '#00ff94';
+    ctx.lineWidth = 2.5;
+    ctx.lineCap = 'round';
+    ctx.lineJoin = 'round';
+    
+    dataPoints.forEach((d, i) => {
+        const x = padding.left + (i * chartW / Math.max(1, dataPoints.length - 1));
+        const y = padding.top + chartH - (d.avg / maxVal * chartH);
+        if (i === 0) ctx.moveTo(x, y);
+        else ctx.lineTo(x, y);
+    });
+    ctx.stroke();
+    
+    // Draw points
+    dataPoints.forEach((d, i) => {
+        const x = padding.left + (i * chartW / Math.max(1, dataPoints.length - 1));
+        const y = padding.top + chartH - (d.avg / maxVal * chartH);
+        
+        ctx.beginPath();
+        ctx.arc(x, y, 4, 0, Math.PI * 2);
+        ctx.fillStyle = '#0a0f1a';
+        ctx.fill();
+        ctx.strokeStyle = '#00ff94';
+        ctx.lineWidth = 2;
+        ctx.stroke();
+    });
+}
+
+// Draw Model Distribution Donut Chart
+function drawModelDistribution(modelCounts) {
+    const canvas = document.getElementById('modelDistCanvas');
+    if (!canvas) return;
+    const ctx = canvas.getContext('2d');
+    
+    const modelEntries = Object.entries(modelCounts).filter(([_, count]) => count > 0);
+    
+    const placeholder = document.getElementById('modelDistPlaceholder');
+    if (modelEntries.length === 0) {
+        canvas.style.display = 'none';
+        placeholder.style.display = 'block';
+        return;
+    }
+    canvas.style.display = 'block';
+    placeholder.style.display = 'none';
+    
+    // Setup canvas
+    const dpr = window.devicePixelRatio || 1;
+    const rect = canvas.getBoundingClientRect();
+    canvas.width = rect.width * dpr;
+    canvas.height = 240 * dpr;
+    ctx.scale(dpr, dpr);
+    
+    const w = rect.width;
+    const h = 240;
+    const centerX = w / 2;
+    const centerY = h / 2;
+    const outerRadius = Math.min(w, h) / 2 - 20;
+    const innerRadius = outerRadius * 0.55;
+    
+    ctx.clearRect(0, 0, w, h);
+    
+    // Color palette for models
+    const colors = [
+        '#00ff94', '#00d4ff', '#a855f7', '#ff9f43', '#ff4757',
+        '#3b82f6', '#facc15', '#e879f9', '#4ade80', '#fb7185'
+    ];
+    
+    const total = modelEntries.reduce((sum, [_, c]) => sum + c, 0);
+    let startAngle = -Math.PI / 2;
+    
+    // Draw donut segments
+    modelEntries.forEach(([model, count], idx) => {
+        const sliceAngle = (count / total) * Math.PI * 2;
+        const color = colors[idx % colors.length];
+        
+        ctx.beginPath();
+        ctx.arc(centerX, centerY, outerRadius, startAngle, startAngle + sliceAngle);
+        ctx.arc(centerX, centerY, innerRadius, startAngle + sliceAngle, startAngle, true);
+        ctx.closePath();
+        ctx.fillStyle = color;
+        ctx.fill();
+        
+        startAngle += sliceAngle;
+    });
+    
+    // Draw center text
+    ctx.fillStyle = '#e8f1ff';
+    ctx.font = 'bold 24px Inter';
+    ctx.textAlign = 'center';
+    ctx.textBaseline = 'middle';
+    ctx.fillText(total.toString(), centerX, centerY - 10);
+    
+    ctx.fillStyle = '#8ba3c0';
+    ctx.font = '11px Inter';
+    ctx.fillText('agents', centerX, centerY + 14);
+    
+    // Draw legend
+    const legendY = h - 10;
+    ctx.font = '9px JetBrains Mono';
+    let legendX = 10;
+    modelEntries.forEach(([model, count], idx) => {
+        const shortName = model.split('/').pop().substring(0, 12);
+        const color = colors[idx % colors.length];
+        
+        ctx.fillStyle = color;
+        ctx.fillRect(legendX, legendY - 6, 8, 8);
+        ctx.fillStyle = '#8ba3c0';
+        ctx.textAlign = 'left';
+        ctx.fillText(`${shortName} (${count})`, legendX + 10, legendY);
+        
+        legendX += ctx.measureText(`${shortName} (${count})`).width + 16;
+    });
+}
+
+// Draw Migration Impact Bars - Before/After comparison
+function drawMigrationImpactBars(agentsWithHistory) {
+    const canvas = document.getElementById('impactCanvas');
+    if (!canvas) return;
+    const ctx = canvas.getContext('2d');
+    
+    // Collect impact data
+    const impactData = [];
+    agentsWithHistory.forEach(([name, agent]) => {
+        if (agent.history && agent.history.length > 0) {
+            const latest = agent.history[agent.history.length - 1];
+            if (latest.fit_score_before != null && latest.fit_score_after != null) {
+                impactData.push({
+                    agent: name,
+                    before: latest.fit_score_before,
+                    after: latest.fit_score_after,
+                    delta: latest.fit_score_after - latest.fit_score_before
+                });
+            }
+        } else if (agent.current?.benchmark?.fit_score > 0) {
+            // No history but has score - show current only
+            impactData.push({
+                agent: name,
+                before: agent.current.benchmark.fit_score,
+                after: agent.current.benchmark.fit_score,
+                delta: 0
+            });
+        }
+    });
+    
+    const placeholder = document.getElementById('impactPlaceholder');
+    if (impactData.length === 0) {
+        canvas.style.display = 'none';
+        placeholder.style.display = 'block';
+        return;
+    }
+    canvas.style.display = 'block';
+    placeholder.style.display = 'none';
+    
+    // Setup canvas
+    const dpr = window.devicePixelRatio || 1;
+    const rect = canvas.getBoundingClientRect();
+    canvas.width = rect.width * dpr;
+    canvas.height = 240 * dpr;
+    ctx.scale(dpr, dpr);
+    
+    const w = rect.width;
+    const h = 240;
+    const padding = { top: 30, right: 20, bottom: 50, left: 45 };
+    const chartW = w - padding.left - padding.right;
+    const chartH = h - padding.top - padding.bottom;
+    
+    ctx.clearRect(0, 0, w, h);
+    
+    // Calculate dimensions
+    const maxVal = Math.max(...impactData.flatMap(d => [d.before, d.after]), 100);
+    const minVal = 0;
+    const groupW = chartW / impactData.length;
+    const barW = Math.min(24, groupW * 0.35);
+    const scale = chartH / (maxVal - minVal);
+    
+    // Draw grid and Y-axis
+    ctx.strokeStyle = '#1e2d45';
+    ctx.lineWidth = 1;
+    ctx.font = '10px JetBrains Mono';
+    ctx.fillStyle = '#5a7090';
+    ctx.textAlign = 'right';
+    
+    for (let i = 0; i <= 4; i++) {
+        const y = padding.top + (i * chartH / 4);
+        const val = Math.round(maxVal - (i * (maxVal - minVal) / 4));
+        ctx.beginPath();
+        ctx.moveTo(padding.left, y);
+        ctx.lineTo(w - padding.right, y);
+        ctx.stroke();
+        ctx.fillText(val.toString(), padding.left - 8, y + 4);
+    }
+    
+    // Draw bars
+    impactData.forEach((d, i) => {
+        const groupX = padding.left + i * groupW + groupW / 2;
+        
+        // Before bar (red)
+        const beforeH = d.before * scale;
+        const beforeY = padding.top + chartH - beforeH;
+        ctx.fillStyle = 'rgba(255,71,87,0.75)';
+        ctx.beginPath();
+        ctx.roundRect(groupX - barW - 2, beforeY, barW, beforeH, [4, 4, 0, 0]);
+        ctx.fill();
+        
+        // After bar (green)
+        const afterH = d.after * scale;
+        const afterY = padding.top + chartH - afterH;
+        ctx.fillStyle = d.delta >= 0 ? 'rgba(0,255,148,0.75)' : 'rgba(255,71,87,0.75)';
+        ctx.beginPath();
+        ctx.roundRect(groupX + 2, afterY, barW, afterH, [4, 4, 0, 0]);
+        ctx.fill();
+        
+        // Delta indicator
+        if (d.delta !== 0) {
+            ctx.fillStyle = d.delta >= 0 ? '#00ff94' : '#ff4757';
+            ctx.font = 'bold 9px JetBrains Mono';
+            ctx.textAlign = 'center';
+            const deltaY = Math.min(beforeY, afterY) - 8;
+            ctx.fillText((d.delta > 0 ? '+' : '') + d.delta, groupX, deltaY);
+        }
+        
+        // Agent label
+        ctx.fillStyle = '#8ba3c0';
+        ctx.font = '9px JetBrains Mono';
+        ctx.textAlign = 'center';
+        const label = d.agent.length > 10 ? d.agent.substring(0, 10) : d.agent;
+        ctx.fillText(label, groupX, h - padding.bottom + 16);
+    });
+    
+    // Legend
+    const legendY = 12;
+    ctx.fillStyle = 'rgba(255,71,87,0.75)';
+    ctx.fillRect(padding.left, legendY, 12, 10);
+    ctx.fillStyle = '#e8f1ff';
+    ctx.font = '10px Inter';
+    ctx.textAlign = 'left';
+    ctx.fillText('Before', padding.left + 18, legendY + 9);
+    
+    ctx.fillStyle = 'rgba(0,255,148,0.75)';
+    ctx.fillRect(padding.left + 80, legendY, 12, 10);
+    ctx.fillStyle = '#e8f1ff';
+    ctx.fillText('After', padding.left + 98, legendY + 9);
 }
 
 // Filter Agents
@@ -1007,13 +2180,15 @@ function filterCategory(category) {
 
 // Export
 function exportRecommendations() {
-    const recs = Object.entries(agentData.agents)
-        .filter(([_, a]) => a.current.recommendations && a.current.recommendations.length > 0)
-        .map(([name, agent]) => ({
-            agent: name,
-            current_model: agent.current.model,
-            recommendations: agent.current.recommendations
-        }));
+    let recs = INLINE_RECOMMENDATIONS && INLINE_RECOMMENDATIONS.length > 0 
+        ? INLINE_RECOMMENDATIONS 
+        : Object.entries(agentData.agents)
+            .filter(([_, a]) => a.current.recommendations && a.current.recommendations.length > 0)
+            .map(([name, agent]) => ({
+                agent: name,
+                current_model: agent.current.model,
+                recommendations: agent.current.recommendations
+            }));
 
     const output = {
         timestamp: new Date().toISOString(),
@@ -1046,6 +2221,149 @@ function closeModal() {
     document.getElementById('exportModal').classList.remove('show');
 }
 
+// Apply Fixes Modal
+function showApplyModal() {
+    const recs = INLINE_RECOMMENDATIONS && INLINE_RECOMMENDATIONS.length > 0 ? INLINE_RECOMMENDATIONS : [];
+    const checklist = document.getElementById('applyChecklist');
+    
+    checklist.innerHTML = recs.map((r, idx) => {
+        const fromModel = r.current_model_in_agent_versions || r.current_model || '';
+        const toModel = r.source_of_truth_model || r.recommended_model || '';
+        const fromShort = fromModel.split('/').pop() || fromModel;
+        const toShort = toModel.split('/').pop() || toModel;
+        const impact = (r.impact || 'low').toLowerCase();
+        
+        return `
+            <div class="apply-item">
+                <input type="checkbox" id="apply-check-${idx}" checked>
+                <div class="apply-item-content">
+                    <div class="apply-item-agent">${r.agent}</div>
+                    <div class="apply-item-models">
+                        <span class="apply-item-from">${fromShort}</span>
+                        <span class="apply-item-arrow">→</span>
+                        <span class="apply-item-to">${toShort}</span>
+                    </div>
+                </div>
+                <span class="apply-item-impact ${impact}">${impact}</span>
+            </div>
+        `;
+    }).join('');
+    
+    document.getElementById('applyModal').classList.add('show');
+}
+
+function closeApplyModal() {
+    document.getElementById('applyModal').classList.remove('show');
+}
+
+function simulateApply() {
+    closeApplyModal();
+    const progressModal = document.getElementById('progressModal');
+    const progressBar = document.getElementById('progressBar');
+    const progressStatus = document.getElementById('progressStatus');
+    const progressResult = document.getElementById('progressResult');
+    const progressResultText = document.getElementById('progressResultText');
+    
+    progressModal.classList.add('show');
+    progressResult.classList.remove('show');
+    progressBar.style.width = '0%';
+    progressStatus.textContent = 'Preparing...';
+    
+    const steps = [
+        'Updating capability-index.yaml...',
+        'Updating agent definitions...',
+        'Syncing history...',
+        'Done!'
+    ];
+    
+    let progress = 0;
+    let stepIndex = 0;
+    const totalSteps = steps.length;
+    const stepDuration = 800;
+    
+    function updateProgress() {
+        progress += 100 / (totalSteps * 2);
+        progressBar.style.width = Math.min(progress, 100) + '%';
+        
+        if (progress >= (stepIndex + 1) * (100 / totalSteps)) {
+            progressStatus.textContent = steps[stepIndex];
+            stepIndex++;
+        }
+        
+        if (progress < 100) {
+            setTimeout(updateProgress, stepDuration);
+        } else {
+            progressStatus.textContent = 'Complete!';
+            progressResult.classList.add('show');
+            
+            const recs = INLINE_RECOMMENDATIONS && INLINE_RECOMMENDATIONS.length > 0 ? INLINE_RECOMMENDATIONS : [];
+            progressResultText.textContent = `✅ ${recs.length} recommendations applied. Run 'bun run sync:evolution' to update dashboard.`;
+        }
+    }
+    
+    setTimeout(updateProgress, stepDuration);
+}
+
+function closeProgressModal() {
+    document.getElementById('progressModal').classList.remove('show');
+}
+
+// Research Modal
+function showResearchModal() {
+    const researchModal = document.getElementById('researchModal');
+    const researchSteps = document.getElementById('researchSteps');
+    const researchSummary = document.getElementById('researchSummary');
+    const steps = researchSteps.querySelectorAll('.research-step');
+    
+    researchSummary.classList.remove('show');
+    steps.forEach(step => {
+        step.classList.remove('active', 'done');
+    });
+    
+    researchModal.classList.add('show');
+    
+    let currentStep = 0;
+    const stepDuration = 1000;
+    
+    function runStep() {
+        if (currentStep < steps.length) {
+            steps.forEach((step, idx) => {
+                if (idx < currentStep) {
+                    step.classList.add('done');
+                    step.classList.remove('active');
+                } else if (idx === currentStep) {
+                    step.classList.add('active');
+                    step.classList.remove('done');
+                } else {
+                    step.classList.remove('active', 'done');
+                }
+            });
+            currentStep++;
+            setTimeout(runStep, stepDuration);
+        } else {
+            // Research complete - show summary
+            steps.forEach(step => {
+                step.classList.remove('active');
+                step.classList.add('done');
+            });
+            
+            const recs = INLINE_RECOMMENDATIONS && INLINE_RECOMMENDATIONS.length > 0 ? INLINE_RECOMMENDATIONS : [];
+            const modelsCount = new Set(recs.map(r => r.current_model).concat(recs.map(r => r.source_of_truth_model || r.recommended_model))).size;
+            const recsCount = recs.filter(r => r.score_delta > 0).length;
+            
+            document.getElementById('researchSummaryText').textContent = 
+                `${modelsCount} models evaluated. ${recsCount} recommendations found. ${recs.length - recsCount} idle models detected.`;
+            researchSummary.classList.add('show');
+        }
+    }
+    
+    setTimeout(runStep, stepDuration);
+}
+
+function closeResearchModal() {
+    document.getElementById('researchModal').classList.remove('show');
+}
+
 // Tab switching
 function switchTab(tabId) {
     document.querySelectorAll('.tab-btn').forEach(btn => btn.classList.remove('active'));
diff --git a/agent-evolution/index.standalone.html b/agent-evolution/index.standalone.html
index 815c470..365c2b5 100644
--- a/agent-evolution/index.standalone.html
+++ b/agent-evolution/index.standalone.html
@@ -472,6 +472,59 @@
         .score-fill.medium { background: linear-gradient(90deg, var(--accent-orange), #ffc048); }
         .score-fill.low { background: linear-gradient(90deg, var(--accent-red), #ff6b81); }
 
+        /* Heatmap */
+        .hm-wrap { overflow-x:auto; border-radius:11px; border:1px solid var(--border); background:var(--bg-card); padding:18px; margin-bottom:26px; }
+        .hm-title { font-weight:700; font-size:1.05em; }
+        .hm-sub { font-size:.76em; color:var(--text-muted); margin-bottom:14px; }
+        .hm-table { border-collapse:separate; border-spacing:2px; width:100%; }
+        .hm-table th { font-family:'JetBrains Mono',monospace; font-size:.62em; color:var(--text-muted); padding:8px 5px; text-align:center; white-space:nowrap; vertical-align:bottom; }
+        .hm-table th.hm-role { text-align:left; min-width:140px; font-size:.68em; padding-left:10px; }
+        .hm-table td { text-align:center; padding:6px 4px; font-family:'JetBrains Mono',monospace; font-size:.72em; font-weight:700; border-radius:6px; cursor:pointer; transition:all .15s cubic-bezier(.4,0,.2,1); min-width:42px; position:relative; line-height:1.4; }
+        .hm-table td:hover { transform:scale(1.1); z-index:2; box-shadow:0 4px 12px rgba(0,0,0,.35); }
+        .hm-table td.hm-r { text-align:left; font-family:'Inter',sans-serif; font-size:.82em; font-weight:600; color:var(--text-primary); cursor:default; padding-left:10px; }
+        .hm-table td.hm-r:hover { transform:none; box-shadow:none; }
+        .hm-star { position:absolute; top:2px; right:2px; font-size:.65em; text-shadow:0 1px 2px rgba(0,0,0,.5); }
+        .hm-cur { box-shadow:inset 0 0 0 2px var(--accent-cyan), 0 0 8px rgba(0,212,255,.35); border-radius:6px; }
+        .hm-cur::after { content:''; position:absolute; bottom:2px; left:50%; transform:translateX(-50%); width:8px; height:3px; background:var(--accent-cyan); border-radius:2px; }
+        .hm-if-warn { position:absolute; top:2px; left:2px; font-size:.6em; opacity:.8; }
+
+        /* Smooth gradient legend bar */
+        .hm-legend-wrap { margin-top:18px; padding:0 4px; }
+        .hm-legend-track { position:relative; height:22px; border-radius:11px; background:linear-gradient(90deg, rgba(0,255,148,.85) 0%, rgba(0,212,255,.75) 20%, rgba(59,130,246,.6) 40%, rgba(168,85,247,.45) 58%, rgba(255,159,67,.35) 75%, rgba(255,71,87,.3) 88%, rgba(90,104,128,.2) 100%); box-shadow:inset 0 1px 3px rgba(0,0,0,.3); }
+        .hm-legend-labels { display:flex; justify-content:space-between; align-items:center; margin-top:8px; padding:0 4px; }
+        .hm-legend-labels span { font-size:.68em; font-family:'JetBrains Mono',monospace; color:var(--text-muted); }
+        .hm-legend-left { color:var(--accent-green); }
+        .hm-legend-right { color:var(--accent-red); }
+        .hm-legend-marks { display:flex; justify-content:space-between; padding:0 2px; margin-top:3px; }
+        .hm-legend-marks span { font-size:.58em; font-family:'JetBrains Mono',monospace; color:var(--text-muted); min-width:20px; text-align:center; }
+
+        /* Heatmap Modal Tabs */
+        .hm-modal-tabs { display:flex; gap:3px; background:var(--bg-panel); border-bottom:1px solid var(--border); padding:4px 18px; }
+        .hm-tab-btn { padding:8px 16px; background:none; border:none; color:var(--text-secondary); font-family:'Inter'; font-size:.82em; font-weight:600; border-radius:8px; cursor:pointer; transition:all .25s; }
+        .hm-tab-btn.active { color:var(--bg-deep); background:linear-gradient(135deg,var(--accent-cyan),var(--accent-green)); }
+        .hm-tab-content { display:none; }
+        .hm-tab-content.active { display:block; }
+        .hm-model-timeline { display:flex; flex-direction:column; gap:12px; }
+        .hm-tl-item { display:flex; gap:14px; align-items:center; padding:10px; background:var(--bg-deep); border-radius:8px; border-left:3px solid var(--accent-cyan); }
+        .hm-tl-date { font-family:'JetBrains Mono',monospace; font-size:.72em; color:var(--text-muted); min-width:100px; }
+        .hm-tl-change { display:flex; align-items:center; gap:8px; }
+        .hm-tl-from { text-decoration:line-through; color:#ff6b81; background:rgba(255,71,87,.08); padding:2px 6px; border-radius:4px; }
+        .hm-tl-arrow { color:var(--accent-green); }
+        .hm-tl-to { color:var(--accent-green); background:rgba(0,255,148,.08); padding:2px 6px; border-radius:4px; font-weight:600; }
+        .hm-tl-current { border-left-color:var(--accent-green); background:rgba(0,255,148,.05); }
+        .hm-no-data { color:var(--text-muted); font-size:.9em; padding:16px; text-align:center; }
+        .hm-capabilities { display:flex; flex-wrap:wrap; gap:6px; }
+        .hm-cap-tag { padding:4px 10px; background:rgba(0,212,255,.1); border:1px solid var(--border); border-radius:16px; font-size:.78em; color:var(--accent-cyan); }
+        .hm-agent-desc { font-size:.9em; color:var(--text-secondary); line-height:1.5; margin-bottom:14px; padding:12px; background:var(--bg-deep); border-radius:8px; }
+        .hm-model-tl-score { margin-left:auto; font-family:'JetBrains Mono',monospace; font-size:.8em; color:var(--accent-cyan); }
+
+        /* Tooltip */
+        #ttOverlay { display:none; position:fixed; top:0;left:0;right:0;bottom:0; z-index:999; pointer-events:none; }
+        #ttOverlay.show { display:block; }
+        #ttBox { position:absolute; background:var(--bg-panel); border:1px solid var(--accent-cyan); border-radius:9px; padding:12px 16px; max-width:300px; box-shadow:0 10px 32px rgba(0,0,0,.55); z-index:1000; }
+        #ttBox h4 { color:var(--accent-cyan); font-size:.9em; margin-bottom:4px; }
+        #ttBox p { font-size:.78em; color:var(--text-secondary); line-height:1.45; }
+
         /* Export */
         .actions-row {
             display: flex;
@@ -551,11 +604,43 @@
             white-space: pre-wrap;
         }
 
+        /* Impact Tab */
+        .chart-wrap { background: var(--bg-card); border: 1px solid var(--border); border-radius: 12px; padding: 20px; margin-bottom: 24px; }
+        .chart-title { font-size: 1.1em; font-weight: 700; margin-bottom: 16px; }
+        .chart-sub { font-size: 0.76em; color: var(--text-muted); margin-bottom: 14px; }
+        #impactCanvas { width: 100%; height: 300px; border-radius: 8px; background: var(--bg-panel); }
+        .chart-placeholder { text-align: center; padding: 60px 20px; color: var(--text-muted); font-size: 0.95em; }
+
+        /* Recommendation Cards */
+        .rec-card { background: var(--bg-card); border: 1px solid var(--border); border-radius: 12px; padding: 20px; transition: all 0.3s; margin-bottom: 16px; }
+        .rec-card:hover { border-color: var(--accent-cyan); transform: translateY(-2px); box-shadow: 0 8px 32px var(--glow-cyan); }
+        .rec-hdr { display: flex; justify-content: space-between; align-items: center; margin-bottom: 14px; }
+        .rec-agent { font-weight: 700; font-size: 1.1em; display: flex; align-items: center; gap: 10px; }
+        .rec-agent-name { color: var(--text-primary); }
+        .impact-badge { font-family: 'JetBrains Mono', monospace; font-size: 0.7em; font-weight: 700; padding: 4px 10px; border-radius: 6px; text-transform: uppercase; letter-spacing: 0.5px; }
+        .impact-badge.critical { background: rgba(255,71,87,0.2); color: #ff6b81; border: 1px solid rgba(255,71,87,0.4); }
+        .impact-badge.high { background: rgba(255,159,67,0.2); color: #ffc048; border: 1px solid rgba(255,159,67,0.4); }
+        .impact-badge.medium { background: rgba(59,130,246,0.2); color: #60a5fa; border: 1px solid rgba(59,130,246,0.4); }
+        .impact-badge.low { background: rgba(0,255,148,0.15); color: #4ade80; border: 1px solid rgba(0,255,148,0.3); }
+        .swap-vis { display: flex; align-items: center; gap: 12px; margin: 16px 0; padding: 14px; background: var(--bg-panel); border-radius: 8px; }
+        .swap-from, .swap-to { flex: 1; padding: 10px 14px; border-radius: 6px; font-family: 'JetBrains Mono', monospace; font-size: 0.8em; }
+        .swap-from { background: rgba(255,71,87,0.1); color: #ff6b81; border: 1px solid rgba(255,71,87,0.3); }
+        .swap-to { background: rgba(0,255,148,0.1); color: #4ade80; border: 1px solid rgba(0,255,148,0.3); }
+        .swap-arrow { color: var(--accent-cyan); font-size: 1.4em; font-weight: 700; }
+        .rec-metrics { display: grid; grid-template-columns: repeat(4, 1fr); gap: 12px; margin-bottom: 14px; }
+        .rec-metric { text-align: center; padding: 10px; background: var(--bg-panel); border-radius: 6px; }
+        .rec-metric-label { font-size: 0.65em; color: var(--text-muted); text-transform: uppercase; letter-spacing: 0.5px; }
+        .rec-metric-value { font-family: 'JetBrains Mono', monospace; font-size: 0.95em; font-weight: 600; color: var(--accent-green); margin-top: 4px; }
+        .rec-rationale { font-size: 0.85em; color: var(--text-secondary); line-height: 1.6; padding: 12px; background: rgba(0,212,255,0.05); border-radius: 6px; border-left: 3px solid var(--accent-cyan); }
+
         @media (max-width: 768px) {
             .header h1 { font-size: 1.5em; }
             .tabs { flex-wrap: wrap; }
             .agents-grid { grid-template-columns: 1fr; }
             .stats-row { grid-template-columns: repeat(2, 1fr); }
+            .rec-metrics { grid-template-columns: repeat(2, 1fr); }
+            .swap-vis { flex-direction: column; }
+            .swap-arrow { transform: rotate(90deg); }
         }
     </style>
 </head>
@@ -578,7 +663,8 @@
         <button class="tab-btn" onclick="switchTab('agents')">All Agents</button>
         <button class="tab-btn" onclick="switchTab('history')">Timeline</button>
         <button class="tab-btn" onclick="switchTab('recommendations')">Recommendations</button>
-        <button class="tab-btn" onclick="switchTab('matrix')">Model Matrix</button>
+        <button class="tab-btn" onclick="switchTab('heatmap')">Heatmap</button>
+        <button class="tab-btn" onclick="switchTab('impact')">Impact</button>
     </div>
 
     <!-- Overview Tab -->
@@ -640,14 +726,34 @@
         <div class="agents-grid" id="allRecommendations"></div>
     </div>
 
-    <!-- Matrix Tab -->
-    <div id="tab-matrix" class="tab-panel">
-        <div class="matrix-wrap">
-            <h2 class="matrix-title">Agent × Model Matrix</h2>
-            <table class="matrix-table" id="matrixTable">
-                <thead id="matrixHead"></thead>
-                <tbody id="matrixBody"></tbody>
-            </table>
+    <!-- Heatmap Tab -->
+    <div id="tab-heatmap" class="tab-panel">
+        <div class="hm-wrap">
+            <div class="hm-title">Agent × Model Compatibility Heatmap</div>
+            <div class="hm-sub">Weighted score = benchmark × instruction-following multiplier · ★ = best fit · outlined = current · click for details</div>
+            <div style="overflow-x:auto"><table class="hm-table" id="hmTable"></table></div>
+            <div class="hm-legend-wrap">
+                <div class="hm-legend-track"></div>
+                <div class="hm-legend-marks">
+                    <span>100</span><span>80</span><span>60</span><span>40</span><span>20</span><span>0</span>
+                </div>
+                <div class="hm-legend-labels">
+                    <span class="hm-legend-left">↑ Ideal Match</span>
+                    <span class="hm-legend-right">Mismatch ↓</span>
+                </div>
+            </div>
+        </div>
+    </div>
+
+    <!-- Impact Tab -->
+    <div id="tab-impact" class="tab-panel">
+        <div class="stats-row" id="impactStats"></div>
+        <div class="chart-wrap">
+            <div class="chart-title">Model Migration Impact</div>
+            <div class="chart-sub">Before/after fit scores when switching models - higher bars = bigger improvement</div>
+            <div id="impactChartContainer">
+                <canvas id="impactCanvas"></canvas>
+            </div>
         </div>
     </div>
 </div>
@@ -669,31 +775,65 @@
     </div>
 </div>
 
+<!-- Tooltip Overlay -->
+<div id="ttOverlay"><div id="ttBox"></div></div>
+
+<!-- Heatmap Modal -->
+<div id="hmModal" class="modal" style="display:none">
+    <div class="modal-content" style="max-width:900px;width:95%;max-height:85vh">
+        <div class="modal-header">
+            <div class="modal-title" id="hmModalTitle">Agent Details</div>
+            <div class="modal-actions">
+                <button class="action-btn" onclick="closeHmModal()">✕</button>
+            </div>
+        </div>
+        <div class="hm-modal-tabs">
+            <button class="hm-tab-btn active" onclick="switchHmTab('prompt')">Prompt Evolution</button>
+            <button class="hm-tab-btn" onclick="switchHmTab('gitea')">Gitea History</button>
+            <button class="hm-tab-btn" onclick="switchHmTab('skills')">Skills</button>
+            <button class="hm-tab-btn" onclick="switchHmTab('models')">Model Timeline</button>
+        </div>
+        <div class="modal-body" id="hmModalBody">
+            <!-- Content injected by JS -->
+        </div>
+    </div>
+</div>
+
 <script>
 // Agent Evolution Dashboard
 // Supports both server and file:// mode
 let agentData = {};
 
-// Embedded data (generated 2026-04-27T20:28:59.112Z)
+// Inline recommendation data fallback (from model-research-latest.json)
+const INLINE_RECOMMENDATIONS = [
+    { agent: "frontend-developer", current_model_in_agent_versions: "ollama-cloud/qwen3-coder:480b", source_of_truth_model: "ollama-cloud/minimax-m2.5", impact: "high", score_before: 86, score_after: 92, score_delta: 6, rationale: "agent-versions.json is stale. kilo-meta.json (source of truth) already has minimax-m2.5. Matrix score for frontend-dev on M2.5 = 92 (highest!)." },
+    { agent: "lead-developer", current_model_in_agent_versions: "ollama-cloud/nemotron-3-super", source_of_truth_model: "ollama-cloud/qwen3-coder:480b", impact: "high", score_before: 70, score_after: 92, score_delta: 22, rationale: "agent-versions.json shows nemotron-3-super (outdated). kilo-meta.json has qwen3-coder:480b. Matrix score: qwen3-coder 92 is the highest for lead-developer." },
+    { agent: "system-analyst", current_model: "ollama-cloud/glm-5.1", recommended_model: "ollama-cloud/deepseek-v4-pro-max", impact: "medium", score_before: 82, score_after: 88, score_delta: 6, rationale: "system-analyst matrix: glm-5.1 = 82, deepseek-v4-pro-max = 88. 1M context is critical for architecture docs." },
+    { agent: "evaluator", current_model: "ollama-cloud/glm-5.1", recommended_model: "ollama-cloud/kimi-k2.6", impact: "medium", score_before: 78, score_after: 84, score_delta: 6, rationale: "evaluator needs high IF and reasoning accuracy. kimi-k2-6 IF=91, matrix score 84 vs glm-5.1 78." },
+    { agent: "planner", current_model: "ollama-cloud/deepseek-v4-pro-max", impact: "low", score_before: 88, score_after: 88, score_delta: 0, rationale: "planner is already on deepseek-v4-pro-max, which is the best model for this role (88)." },
+    { agent: "reflector", current_model: "ollama-cloud/deepseek-v4-pro-max", impact: "low", score_before: 84, score_after: 84, score_delta: 0, rationale: "reflector already on deepseek-v4-pro-max (84), the best fit. Self-reflection requires strong reasoning chains." },
+    { agent: "workflow-architect", current_model: "ollama-cloud/glm-5.1", recommended_model: "ollama-cloud/kimi-k2.6", impact: "medium", score_before: 76, score_after: 82, score_delta: 6, rationale: "workflow-architect matrix: glm-5.1 = 76, kimi-k2-6 = 82." },
+    { agent: "pipeline-judge", current_model: "ollama-cloud/glm-5.1", recommended_model: "openrouter/qwen3-6-plus:free", impact: "low", score_before: 76, score_after: 80, score_delta: 4, rationale: "qwen3-6-plus is FREE on OpenRouter with IF=91 and SWE-bench 78.8." },
+    { agent: "orchestrator", current_model: "ollama-cloud/kimi-k2.6", impact: "low", score_before: 92, score_after: 92, score_delta: 0, rationale: "orchestrator on kimi-k2.6 is the absolute best fit (92)." },
+    { agent: "the-fixer", current_model: "ollama-cloud/kimi-k2.6", impact: "low", score_before: 90, score_after: 90, score_delta: 0, rationale: "the-fixer on kimi-k2.6 (90) is optimal. SWE-Pro 58.6 (#1!)." },
+    { agent: "memory-manager", current_model: "ollama-cloud/qwen3.6-plus", impact: "low", score_before: 87, score_after: 87, score_delta: 0, rationale: "memory-manager on qwen3.6-plus (87) is the best fit. 1M context critical." }
+];
+
+// Default embedded data (minimal - updated by sync script)
 const EMBEDDED_DATA = {
   "version": "1.0.0",
-  "lastUpdated": "2026-04-27T20:28:58.592Z",
+  "lastUpdated": "2026-05-25T13:37:20.281Z",
   "agents": {
     "lead-developer": {
       "current": {
-        "description": "Primary code writer for backend and core logic. Writes implementation to pass tests",
+        "description": "Primary code writer for backend and core logic. Writes implementation to pass tests (GNS-2 Tier 1)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#DC2626\"",
         "category": "General",
-        "capabilities": [
-          "code_writing",
-          "refactoring",
-          "bug_fixing",
-          "implementation"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -702,47 +842,39 @@ const EMBEDDED_DATA = {
           "type": "model_change",
           "from": null,
           "to": "ollama-cloud/qwen3-coder:480b",
-          "reason": "Initial configuration from capability-index.yaml",
+          "reason": "Initial configuration",
           "source": "git"
         },
         {
-          "date": "2026-04-27T16:56:09.013Z",
+          "date": "2026-04-27T16:56:09Z",
           "commit": "model-research-sync",
           "type": "model_change",
           "from": "ollama-cloud/qwen3-coder:480b",
           "to": "ollama-cloud/nemotron-3-super",
-          "reason": "Nemotron 3 Super has better reasoning for core development tasks and RULER@1M context window. SWE-bench 68% vs Qwen's 66.5%.",
+          "reason": "Nemotron 3 Super has better reasoning",
           "source": "research"
         },
         {
-          "date": "2026-04-27T20:28:58.592Z",
-          "commit": "model-research-sync",
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
           "type": "model_change",
-          "from": "ollama-cloud/qwen3-coder:480b",
-          "to": "ollama-cloud/nemotron-3-super",
-          "reason": "Nemotron 3 Super has better reasoning for core development tasks and RULER@1M context window. SWE-bench 68% vs Qwen's 66.5%.",
-          "source": "research"
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/qwen3-coder:480b",
+          "reason": "Reverted to qwen3-coder: SWE-bench 66.5% is coding-benchmark standard. Matrix score 92 vs nemotron 70.",
+          "source": "orchestrator-analysis"
         }
       ],
       "performance_log": []
     },
     "frontend-developer": {
       "current": {
-        "description": "Handles UI implementation with multimodal capabilities. Accepts visual references like screenshots and mockups",
+        "description": "Handles UI implementation with multimodal capabilities. Accepts visual references like screenshots and mockups (GNS-2 Tier 1)",
         "mode": "all",
-        "model": "ollama-cloud/qwen3-coder:480b",
+        "model": "ollama-cloud/minimax-m2.5",
         "provider": "Ollama",
         "color": "\"#0EA5E9\"",
         "category": "General",
-        "capabilities": [
-          "ui_implementation",
-          "component_creation",
-          "styling",
-          "responsive_design",
-          "nextjs_development",
-          "vue_nuxt_development",
-          "react_development"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -753,48 +885,41 @@ const EMBEDDED_DATA = {
           "to": "ollama-cloud/qwen3-coder:480b",
           "reason": "Flutter development support added",
           "source": "git"
+        },
+        {
+          "date": "2026-04-27T17:00:00Z",
+          "commit": "model-research-sync",
+          "type": "model_change",
+          "from": "ollama-cloud/qwen3-coder:480b",
+          "to": "ollama-cloud/minimax-m2.5",
+          "reason": "Matrix score 92 for frontend on M2.5. SWE-bench 80.2%.",
+          "source": "research"
         }
       ],
       "performance_log": []
     },
     "backend-developer": {
       "current": {
-        "description": "Backend specialist for Node.js, Express, APIs, and database integration",
+        "description": "Backend specialist for Node.js, Express, APIs, and database integration (GNS-2 Tier 1)",
         "mode": "subagent",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "color": "\"#10B981\"",
         "category": "General",
-        "capabilities": [
-          "api_development",
-          "database_design",
-          "server_logic",
-          "authentication",
-          "postgresql_integration",
-          "sqlite_integration"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "go-developer": {
       "current": {
-        "description": "Go backend specialist for Gin, Echo, APIs, and database integration",
+        "description": "Go backend specialist for Gin, Echo, APIs, and database integration (GNS-2 Tier 1)",
         "mode": "subagent",
-        "model": "ollama-cloud/qwen3-coder:480b",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
         "color": "\"#00ADD8\"",
         "category": "General",
-        "capabilities": [
-          "go_api_development",
-          "go_database_design",
-          "go_concurrent_programming",
-          "go_authentication",
-          "go_microservices",
-          "postgresql_integration",
-          "sqlite_integration",
-          "clickhouse_integration"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -803,64 +928,57 @@ const EMBEDDED_DATA = {
           "type": "model_change",
           "from": "ollama-cloud/deepseek-v3.2",
           "to": "ollama-cloud/qwen3-coder:480b",
-          "reason": "Qwen3-Coder optimized for Go development",
+          "reason": "Qwen3-Coder optimized for Go",
           "source": "git"
+        },
+        {
+          "date": "2026-04-27T17:00:00Z",
+          "commit": "model-research-sync",
+          "type": "model_change",
+          "from": "ollama-cloud/qwen3-coder:480b",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Matrix score 88 for go-dev on V4-Pro. DeepSeek traditionally strong in Go/Rust.",
+          "source": "research"
         }
       ],
       "performance_log": []
     },
     "sdet-engineer": {
       "current": {
-        "description": "Writes tests following TDD methodology. Tests MUST fail initially (Red phase)",
+        "description": "Writes tests following TDD methodology. Tests MUST fail initially (Red phase) (GNS-2 Tier 1)",
         "mode": "all",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#8B5CF6\"",
         "category": "General",
-        "capabilities": [
-          "unit_tests",
-          "integration_tests",
-          "e2e_tests",
-          "test_planning",
-          "visual_regression"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "code-skeptic": {
       "current": {
-        "description": "Adversarial code reviewer. Finds problems and issues. Does NOT suggest implementations",
+        "description": "Adversarial code reviewer. Finds problems and issues. Does NOT suggest implementations (GNS-2 Tier 0)",
         "mode": "subagent",
         "model": "ollama-cloud/minimax-m2.5",
         "provider": "Ollama",
         "color": "\"#E11D48\"",
         "category": "General",
-        "capabilities": [
-          "code_review",
-          "security_review",
-          "style_check",
-          "issue_identification"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "security-auditor": {
       "current": {
-        "description": "Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets",
-        "mode": "all",
-        "model": "ollama-cloud/nemotron-3-super",
+        "description": "Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets (GNS-2 Tier 0)",
+        "mode": "subagent",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
-        "color": "\"#7F1D1D\"",
+        "color": "\"#DC2626\"",
         "category": "General",
-        "capabilities": [
-          "vulnerability_scan",
-          "owasp_check",
-          "secret_detection",
-          "auth_review"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -869,26 +987,30 @@ const EMBEDDED_DATA = {
           "type": "model_change",
           "from": "ollama-cloud/deepseek-v3.2",
           "to": "ollama-cloud/nemotron-3-super",
-          "reason": "Nemotron 3 Super optimized for security analysis with RULER@1M",
+          "reason": "Nemotron 3 Super optimized for security analysis",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "V4-Pro Max matrix=80 vs nemotron=76. SWE-V 80.6, 1M context.",
+          "source": "orchestrator-analysis"
         }
       ],
       "performance_log": []
     },
     "performance-engineer": {
       "current": {
-        "description": "Reviews code for performance issues. Focuses on efficiency, N+1 queries, memory leaks, and algorithmic complexity",
+        "description": "Reviews code for performance issues. Focuses on efficiency, N+1 queries, memory leaks, and algorithmic complexity (GNS-2 Tier 0)",
         "mode": "all",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
         "color": "\"#0D9488\"",
         "category": "General",
-        "capabilities": [
-          "performance_analysis",
-          "n_plus_one_detection",
-          "memory_leak_check",
-          "algorithm_analysis"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -899,68 +1021,54 @@ const EMBEDDED_DATA = {
           "to": "ollama-cloud/nemotron-3-super",
           "reason": "Better reasoning for performance analysis",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Matrix=84 for perf-engineer on V4-Pro. GPQA 90.1 for reasoning.",
+          "source": "orchestrator-analysis"
         }
       ],
       "performance_log": []
     },
     "browser-automation": {
       "current": {
-        "description": "Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction",
+        "description": "Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction (GNS-2 Tier 0)",
         "mode": "subagent",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "color": "\"#1E88E5\"",
         "category": "General",
-        "capabilities": [
-          "e2e_browser_tests",
-          "form_filling",
-          "navigation_testing",
-          "screenshot_capture"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "visual-tester": {
       "current": {
-        "description": "Visual regression testing agent that captures screenshots, extracts UI elements with bounding boxes, compares via pixelmatch, and detects console/network errors",
+        "description": "Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff (GNS-2 Tier 0)",
         "mode": "subagent",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "color": "\"#E91E63\"",
         "category": "General",
-        "capabilities": [
-          "visual_regression",
-          "pixel_comparison",
-          "screenshot_diff",
-          "ui_validation",
-          "bbox_element_extraction",
-          "console_error_detection",
-          "network_error_detection",
-          "responsive_layout_check",
-          "button_overflow_detection",
-          "gitea_integration",
-          "docker_networking"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "system-analyst": {
       "current": {
-        "description": "Designs technical specifications, data schemas, and API contracts before implementation",
+        "description": "Designs technical specifications, data schemas, and API contracts before implementation (GNS-2 Tier 1)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
-        "variant": "thinking",
         "color": "\"#0891B2\"",
         "category": "General",
-        "capabilities": [
-          "architecture_design",
-          "api_specification",
-          "database_modeling",
-          "technical_documentation"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -969,11 +1077,11 @@ const EMBEDDED_DATA = {
           "type": "model_change",
           "from": "ollama-cloud/gpt-oss:120b",
           "to": "ollama-cloud/glm-5",
-          "reason": "GLM-5 better for system engineering and architecture",
+          "reason": "GLM-5 better for system engineering",
           "source": "git"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5",
@@ -982,32 +1090,44 @@ const EMBEDDED_DATA = {
           "source": "git"
         },
         {
-          "date": "2026-04-27T16:59:52.825Z",
+          "date": "2026-04-27T16:59:52Z",
           "commit": "model-research-sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5.1",
           "to": "ollama-cloud/nemotron-3-super",
           "reason": "Test recommendation for model research sync script",
           "source": "research"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/glm-5.1",
+          "reason": "Reverted: GLM-5.1 Arena ELO 1451, instruction following ~90. Standardization with 12 other agents.",
+          "source": "orchestrator-analysis"
+        },
+        {
+          "date": "2026-05-25T13:37:20.281Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "requirement-refiner": {
       "current": {
-        "description": "Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists",
+        "description": "Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists (GNS-2 Tier 1)",
         "mode": "all",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/kimi-k2-thinking",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#4F46E5\"",
-        "category": "General",
-        "capabilities": [
-          "requirement_analysis",
-          "user_story_creation",
-          "acceptance_criteria",
-          "clarification"
-        ]
+        "category": "General"
       },
       "history": [
         {
@@ -1016,39 +1136,51 @@ const EMBEDDED_DATA = {
           "type": "model_change",
           "from": "ollama-cloud/nemotron-3-super",
           "to": "ollama-cloud/glm-5",
-          "reason": "+33% quality. GLM-5 excels at requirement analysis and system engineering",
+          "reason": "+33% quality. GLM-5 excels at requirement analysis",
           "source": "research"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5",
           "to": "ollama-cloud/glm-5.1",
           "reason": "Model update from sync",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/kimi-k2.6",
+          "reason": "kimi-k2.6 IF=91 highest, multimodal for mockup understanding. Matrix ~88-90 for req-refiner.",
+          "source": "orchestrator-analysis"
+        },
+        {
+          "date": "2026-05-23T23:35:02.184Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/kimi-k2.6",
+          "to": "ollama-cloud/kimi-k2-thinking",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "history-miner": {
       "current": {
-        "description": "Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work",
+        "description": "Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work (GNS-2 Tier 0)",
         "mode": "subagent",
         "model": "ollama-cloud/nemotron-3-super",
         "provider": "Ollama",
         "color": "\"#059669\"",
-        "category": "General",
-        "capabilities": [
-          "git_search",
-          "duplicate_detection",
-          "past_solution_finder",
-          "pattern_identification"
-        ]
+        "category": "General"
       },
       "history": [
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5",
@@ -1061,18 +1193,13 @@ const EMBEDDED_DATA = {
     },
     "capability-analyst": {
       "current": {
-        "description": "Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components.",
+        "description": "Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components. Tier 2 meta-agent with self-cascade enabled.",
         "mode": "subagent",
         "model": "ollama-cloud/glm-5.1",
         "provider": "Ollama",
         "color": "\"#6366F1\"",
         "category": "General",
-        "capabilities": [
-          "gap_analysis",
-          "capability_mapping",
-          "recommendation_generation",
-          "coverage_analysis"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -1081,11 +1208,11 @@ const EMBEDDED_DATA = {
           "type": "model_change",
           "from": "ollama-cloud/nemotron-3-super",
           "to": "openrouter/qwen/qwen3.6-plus:free",
-          "reason": "+23% quality, IF:90 score, 1M context, FREE via OpenRouter",
+          "reason": "+23% quality, IF:90, FREE via OpenRouter",
           "source": "research"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "openrouter/qwen/qwen3.6-plus:free",
@@ -1098,51 +1225,50 @@ const EMBEDDED_DATA = {
     },
     "orchestrator": {
       "current": {
-        "description": "Main dispatcher. Routes tasks between agents based on Issue status and manages the workflow state machine. IF:90 for optimal routing accuracy.",
+        "description": "Main dispatcher. Routes tasks between agents based on Issue status and manages the workflow state machine. IF:90 for optimal routing accuracy. (GNS-2 Tier 1)",
         "mode": "all",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/kimi-k2.6",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#7C3AED\"",
         "category": "General",
-        "capabilities": [
-          "task_routing",
-          "state_management",
-          "agent_coordination",
-          "workflow_execution"
-        ]
+        "capabilities": []
       },
       "history": [
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5",
           "to": "ollama-cloud/glm-5.1",
           "reason": "Model update from sync",
           "source": "git"
+        },
+        {
+          "date": "2026-04-27T20:28:58Z",
+          "commit": "model-research-sync",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/kimi-k2.6",
+          "reason": "kimi-k2.6 best fit for orchestration (92). 300 sub-agent swarm.",
+          "source": "research"
         }
       ],
       "performance_log": []
     },
     "release-manager": {
       "current": {
-        "description": "Manages git operations, semantic versioning, branching, and deployments. Ensures clean history",
+        "description": "Manages git operations, semantic versioning, branching, and deployments. Ensures clean history (GNS-2 Tier 1)",
         "mode": "subagent",
         "model": "ollama-cloud/glm-5.1",
         "provider": "Ollama",
         "color": "\"#581C87\"",
         "category": "General",
-        "capabilities": [
-          "git_operations",
-          "version_management",
-          "changelog_creation",
-          "deployment"
-        ]
+        "capabilities": []
       },
       "history": [
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/devstral-2:123b",
@@ -1155,19 +1281,14 @@ const EMBEDDED_DATA = {
     },
     "evaluator": {
       "current": {
-        "description": "Scores agent effectiveness after task completion for continuous improvement",
+        "description": "Scores agent effectiveness after task completion for continuous improvement. Tier 2 meta-agent with self-cascade enabled.",
         "mode": "subagent",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/qwen3.5-122b",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#047857\"",
         "category": "General",
-        "capabilities": [
-          "performance_scoring",
-          "process_analysis",
-          "pattern_identification",
-          "improvement_recommendations"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -1189,31 +1310,35 @@ const EMBEDDED_DATA = {
           "source": "research"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "openrouter/qwen/qwen3.6-plus:free",
           "to": "ollama-cloud/glm-5.1",
           "reason": "Model update from sync",
           "source": "git"
+        },
+        {
+          "date": "2026-05-25T13:37:20.281Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/qwen3.5-122b",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "prompt-optimizer": {
       "current": {
-        "description": "Improves agent system prompts based on performance failures. Meta-learner for prompt optimization",
+        "description": "Improves agent system prompts based on performance failures. Meta-learner for prompt optimization (GNS-2 Tier 1)",
         "mode": "subagent",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/qwen3.5-122b",
         "provider": "Ollama",
-        "variant": "instant",
         "color": "\"#BE185D\"",
         "category": "General",
-        "capabilities": [
-          "prompt_analysis",
-          "prompt_improvement",
-          "failure_pattern_detection"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -1226,48 +1351,66 @@ const EMBEDDED_DATA = {
           "source": "git"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/nemotron-3-super",
           "to": "ollama-cloud/glm-5.1",
           "reason": "Model update from sync",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/qwen3.5",
+          "reason": "MIGRATION: qwen3.6-plus was OpenRouter (not Ollama Cloud). qwen3.5 has IF=92, updated 2 days ago, 12.4M pulls.",
+          "source": "orchestrator-analysis"
+        },
+        {
+          "date": "2026-05-23T23:35:02.184Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/qwen3.5",
+          "to": "ollama-cloud/qwen3.6-plus",
+          "reason": "Model update from sync",
+          "source": "git"
+        },
+        {
+          "date": "2026-05-25T13:37:20.281Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/qwen3.6-plus",
+          "to": "ollama-cloud/qwen3.5-122b",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "the-fixer": {
       "current": {
-        "description": "Iteratively fixes bugs based on specific error reports and test failures",
+        "description": "Iteratively fixes bugs based on specific error reports and test failures (GNS-2 Tier 1)",
         "mode": "all",
-        "model": "ollama-cloud/minimax-m2.5",
+        "model": "ollama-cloud/kimi-k2.6",
         "provider": "Ollama",
         "color": "\"#F59E0B\"",
         "category": "General",
-        "capabilities": [
-          "bug_fixing",
-          "issue_resolution",
-          "code_correction"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "product-owner": {
       "current": {
-        "description": "Manages issue checklists, status labels, tracks progress and coordinates with human users",
+        "description": "Manages issue checklists, status labels, tracks progress and coordinates with human users (GNS-2 Tier 1)",
         "mode": "subagent",
         "model": "ollama-cloud/glm-5.1",
         "provider": "Ollama",
         "color": "\"#EA580C\"",
         "category": "General",
-        "capabilities": [
-          "issue_management",
-          "prioritization",
-          "backlog_management",
-          "workflow_completion"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -1280,7 +1423,7 @@ const EMBEDDED_DATA = {
           "source": "git"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5",
@@ -1293,45 +1436,46 @@ const EMBEDDED_DATA = {
     },
     "workflow-architect": {
       "current": {
-        "description": "Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates",
+        "description": "Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates (GNS-2 Tier 1)",
         "mode": "subagent",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/qwen3.5-122b",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#EC4899\"",
         "category": "General",
-        "capabilities": [
-          "workflow_design",
-          "process_definition",
-          "automation_setup"
-        ]
+        "capabilities": []
       },
       "history": [
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "ollama-cloud/glm-5",
           "to": "ollama-cloud/glm-5.1",
           "reason": "Model update from sync",
           "source": "git"
+        },
+        {
+          "date": "2026-05-25T13:37:20.281Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/qwen3.5-122b",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "markdown-validator": {
       "current": {
-        "description": "Validates and corrects Markdown descriptions for Gitea issues",
+        "description": "Validates and corrects Markdown descriptions for Gitea issues (GNS-2 Tier 0)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-nano:30b",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
         "color": "\"#F97316\"",
         "category": "General",
-        "capabilities": [
-          "markdown_validation",
-          "formatting_check",
-          "link_validation"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -1342,6 +1486,24 @@ const EMBEDDED_DATA = {
           "to": "ollama-cloud/nemotron-3-nano:30b",
           "reason": "Nano efficient for lightweight validation tasks",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-nano:30b",
+          "to": "ollama-cloud/nemotron-3-nano",
+          "reason": "Unified naming. Nano IF=68, tiny and cheap, perfect for validation.",
+          "source": "orchestrator-analysis"
+        },
+        {
+          "date": "2026-05-23T23:35:02.185Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-nano",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
@@ -1350,17 +1512,12 @@ const EMBEDDED_DATA = {
       "current": {
         "name": "Agent Architect",
         "mode": "subagent",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/kimi-k2.6",
         "provider": "Ollama",
-        "variant": "thinking",
-        "description": "Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis",
+        "description": "Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis. Tier 2 meta-agent with self-cascade enabled.",
         "color": "\"#8B5CF6\"",
         "category": "General",
-        "capabilities": [
-          "agent_design",
-          "prompt_engineering",
-          "capability_definition"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -1369,36 +1526,39 @@ const EMBEDDED_DATA = {
           "type": "model_change",
           "from": "ollama-cloud/nemotron-3-super",
           "to": "openrouter/qwen/qwen3.6-plus:free",
-          "reason": "+22% quality, IF:90 for YAML frontmatter generation, 1M context for all agents analysis",
+          "reason": "+22% quality, IF:90 for YAML frontmatter generation",
           "source": "research"
         },
         {
-          "date": "2026-04-23T06:24:32.546Z",
+          "date": "2026-04-23T06:24:32Z",
           "commit": "sync",
           "type": "model_change",
           "from": "openrouter/qwen/qwen3.6-plus:free",
           "to": "ollama-cloud/glm-5.1",
           "reason": "Model update from sync",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/kimi-k2.6",
+          "reason": "kimi-k2.6 best fit for agent-architect (86). Multimodal for reviewing UI components.",
+          "source": "orchestrator-analysis"
         }
       ],
       "performance_log": []
     },
     "planner": {
       "current": {
-        "description": "Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect",
+        "description": "Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect (GNS-2 Tier 0)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
         "color": "\"#F59E0B\"",
         "category": "General",
-        "capabilities": [
-          "task_decomposition",
-          "chain_of_thought",
-          "tree_of_thoughts",
-          "plan_execute_reflect",
-          "dependency_analysis"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -1409,25 +1569,28 @@ const EMBEDDED_DATA = {
           "to": "ollama-cloud/nemotron-3-super",
           "reason": "Nemotron 3 Super excels at planning",
           "source": "git"
+        },
+        {
+          "date": "2026-04-27T17:00:00Z",
+          "commit": "model-research-sync",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Matrix score 88 for planner on V4-Pro. GPQA 90.1.",
+          "source": "research"
         }
       ],
       "performance_log": []
     },
     "reflector": {
       "current": {
-        "description": "Self-reflection agent using Reflexion pattern - learns from mistakes",
+        "description": "Self-reflection agent using Reflexion pattern - learns from mistakes (GNS-2 Tier 0)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
         "color": "\"#10B981\"",
         "category": "General",
-        "capabilities": [
-          "self_reflection",
-          "mistake_analysis",
-          "lesson_extraction",
-          "trajectory_analysis",
-          "heuristic_evaluation"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -1438,25 +1601,28 @@ const EMBEDDED_DATA = {
           "to": "ollama-cloud/nemotron-3-super",
           "reason": "Better for reflection tasks",
           "source": "git"
+        },
+        {
+          "date": "2026-04-27T17:00:00Z",
+          "commit": "model-research-sync",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Matrix score 84. Strong reasoning chains.",
+          "source": "research"
         }
       ],
       "performance_log": []
     },
     "memory-manager": {
       "current": {
-        "description": "Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences)",
+        "description": "Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences) (GNS-2 Tier 0)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/deepseek-v4-pro-max",
         "provider": "Ollama",
         "color": "\"#8B5CF6\"",
         "category": "General",
-        "capabilities": [
-          "memory_retrieval",
-          "memory_storage",
-          "memory_consolidation",
-          "relevance_scoring",
-          "episodic_management"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -1467,44 +1633,59 @@ const EMBEDDED_DATA = {
           "to": "ollama-cloud/nemotron-3-super",
           "reason": "RULER@1M critical for memory ctx",
           "source": "git"
+        },
+        {
+          "date": "2026-05-24T01:00:00Z",
+          "commit": "ollama-cloud-consolidation",
+          "type": "model_change",
+          "from": "ollama-cloud/nemotron-3-super",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "MIGRATION: qwen3.6-plus was OpenRouter. deepseek-v4-pro-max has 1M context (same as nemotron), matrix 86, SWE-V 80.6.",
+          "source": "orchestrator-analysis"
+        },
+        {
+          "date": "2026-05-23T23:35:02.184Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/deepseek-v4-pro-max",
+          "to": "ollama-cloud/qwen3.6-plus",
+          "reason": "Model update from sync",
+          "source": "git"
+        },
+        {
+          "date": "2026-05-25T13:37:20.281Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/qwen3.6-plus",
+          "to": "ollama-cloud/deepseek-v4-pro-max",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "devops-engineer": {
       "current": {
-        "description": "DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management",
+        "description": "DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management (GNS-2 Tier 1)",
         "mode": "subagent",
-        "model": "ollama-cloud/nemotron-3-super",
+        "model": "ollama-cloud/kimi-k2.6",
         "provider": "Ollama",
         "color": "\"#FF6B35\"",
         "category": "General",
-        "capabilities": [
-          "docker_configuration",
-          "kubernetes_setup",
-          "ci_cd_pipeline",
-          "infrastructure_automation",
-          "container_optimization"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "flutter-developer": {
       "current": {
-        "description": "Flutter mobile specialist for cross-platform apps, state management, and UI components",
+        "description": "Flutter mobile specialist for cross-platform apps, state management, and UI components (GNS-2 Tier 1)",
         "mode": "subagent",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "color": "\"#02569B\"",
         "category": "General",
-        "capabilities": [
-          "dart_programming",
-          "flutter_ui",
-          "mobile_app_development",
-          "widget_creation",
-          "state_management"
-        ]
+        "capabilities": []
       },
       "history": [
         {
@@ -1521,100 +1702,153 @@ const EMBEDDED_DATA = {
     },
     "architect-indexer": {
       "current": {
-        "description": "Indexes and maps project codebase architecture into .architect/ directory. Creates and maintains structured documentation of entities, APIs, DB schema, file graphs, and conventions.",
+        "description": "Indexes and maps project codebase architecture into .architect/ directory. Creates and maintains structured documentation of entities, APIs, DB schema, file graphs, and conventions. (GNS-2 Tier 0)",
         "mode": "subagent",
         "model": "ollama-cloud/glm-5.1",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#10B981\"",
         "category": "General",
-        "capabilities": [
-          "codebase_indexing",
-          "project_mapping",
-          "architecture_documentation",
-          "dependency_analysis",
-          "entity_extraction",
-          "api_surface_discovery",
-          "convention_detection",
-          "staleness_detection"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "php-developer": {
       "current": {
-        "description": "PHP backend specialist for Laravel, Symfony, WordPress, and full-stack web applications",
+        "description": "PHP backend specialist for Laravel, Symfony, WordPress, and full-stack web applications (GNS-2 Tier 1)",
         "mode": "subagent",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#8B5CF6\"",
         "category": "General",
-        "capabilities": [
-          "php_web_development",
-          "laravel_development",
-          "symfony_development",
-          "wordpress_development",
-          "php_api_development",
-          "php_database_design",
-          "php_authentication",
-          "php_modular_architecture",
-          "php_testing",
-          "php_security"
-        ]
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
     },
     "pipeline-judge": {
       "current": {
-        "description": "Automated pipeline judge. Evaluates workflow execution by running tests, measuring token cost and wall-clock time. Produces objective fitness scores. Never writes code - only measures and scores.",
+        "description": "Automated pipeline judge. Evaluates workflow execution by running tests, measuring token cost and wall-clock time. Produces objective fitness scores. Never writes code - only measures and scores. (GNS-2 Tier 0)",
         "mode": "subagent",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/kimi-k2.6",
         "provider": "Ollama",
         "color": "\"#DC2626\"",
         "category": "General",
-        "capabilities": [
-          "test_execution",
-          "fitness_scoring",
-          "metric_collection",
-          "bottleneck_detection"
-        ]
+        "capabilities": []
       },
       "history": [
         {
-          "date": "2026-04-06T00:23:50 +0100Z",
+          "date": "2026-04-06T00:23:50+0100Z",
           "commit": "fa68141d",
           "type": "agent_created",
           "from": null,
           "to": "",
           "reason": "feat: add pipeline-judge agent and evolution workflow system",
           "source": "git"
+        },
+        {
+          "date": "2026-05-25T13:37:20.281Z",
+          "commit": "sync",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/kimi-k2.6",
+          "reason": "Model update from sync",
+          "source": "git"
         }
       ],
       "performance_log": []
     },
     "python-developer": {
       "current": {
-        "description": "Python backend specialist for Django, FastAPI, data science, and API development",
+        "description": "Python backend specialist for Django, FastAPI, data science, and API development (GNS-2 Tier 1)",
         "mode": "subagent",
         "model": "ollama-cloud/qwen3-coder:480b",
         "provider": "Ollama",
         "variant": "thinking",
         "color": "\"#3776AB\"",
         "category": "General",
-        "capabilities": [
-          "python_web_development",
-          "django_development",
-          "fastapi_development",
-          "python_api_development",
-          "python_database_design",
-          "python_authentication",
-          "python_async_patterns",
-          "python_testing",
-          "python_security"
-        ]
+        "capabilities": []
+      },
+      "history": [],
+      "performance_log": []
+    },
+    "incident-responder": {
+      "current": {
+        "description": "Server incident response and system hardening specialist. Handles live forensics, malware removal, persistence hunting, SSH-based server cleanup, and post-incident hardening. Works with any OS and panel.",
+        "mode": "subagent",
+        "model": "ollama-cloud/kimi-k2.6",
+        "provider": "Ollama",
+        "color": "\"#B91C1C\"",
+        "category": "General",
+        "capabilities": []
+      },
+      "history": [],
+      "performance_log": []
+    },
+    "workflow-cross-checker": {
+      "current": {
+        "description": "Workflow cross-checker and process inspector. Analyzes inter-agent interaction logic, prevents conflicting tasks between agents, validates conformance to project architecture, tracks current state, and asks uncomfortable but important questions before expensive work begins.",
+        "mode": "subagent",
+        "model": "ollama-cloud/kimi-k2.6",
+        "provider": "Ollama",
+        "variant": "thinking",
+        "color": "\"#9333EA\"",
+        "category": "General",
+        "capabilities": []
+      },
+      "history": [],
+      "performance_log": []
+    },
+    "code": {
+      "current": {
+        "model": "ollama-cloud/qwen3-coder:480b",
+        "provider": "Ollama",
+        "category": "Built-in",
+        "mode": "primary",
+        "color": "#3B82F6",
+        "description": "Primary code writer. Full tool access for development tasks.",
+        "capabilities": []
+      },
+      "history": [],
+      "performance_log": []
+    },
+    "ask": {
+      "current": {
+        "model": "ollama-cloud/glm-5.1",
+        "provider": "Ollama",
+        "category": "Built-in",
+        "mode": "primary",
+        "color": "#3B82F6",
+        "description": "Read-only Q&A agent for codebase questions.",
+        "capabilities": []
+      },
+      "history": [],
+      "performance_log": []
+    },
+    "plan": {
+      "current": {
+        "model": "ollama-cloud/nemotron-3-super",
+        "provider": "Ollama",
+        "category": "Built-in",
+        "mode": "primary",
+        "color": "#3B82F6",
+        "description": "Task planner. Creates detailed implementation plans.",
+        "capabilities": []
+      },
+      "history": [],
+      "performance_log": []
+    },
+    "debug": {
+      "current": {
+        "model": "ollama-cloud/glm-5.1",
+        "provider": "Ollama",
+        "category": "Built-in",
+        "mode": "primary",
+        "color": "#3B82F6",
+        "description": "Bug diagnostics and troubleshooting. GLM-5.1 ★88, reasoning for deep debug.",
+        "capabilities": []
       },
       "history": [],
       "performance_log": []
@@ -1632,10 +1866,10 @@ const EMBEDDED_DATA = {
     }
   },
   "evolution_metrics": {
-    "total_agents": 32,
+    "total_agents": 38,
     "agents_with_history": 22,
     "pending_recommendations": 0,
-    "last_sync": "2026-04-23T06:24:32.546Z",
+    "last_sync": "2026-05-25T13:37:20.282Z",
     "sync_sources": [
       "git",
       "capability-index.yaml",
@@ -1643,10 +1877,34 @@ const EMBEDDED_DATA = {
     ]
   }
 };
+
 // Initialize
 async function init() {
-    // Use embedded data directly (works with file://)
-    agentData = EMBEDDED_DATA;
+    // Try to load from server first
+    const USE_SERVER = window.location.protocol !== 'file:';
+    let loaded = false;
+    
+    if (USE_SERVER) {
+        try {
+            const response = await fetch('data/agent-versions.json');
+            if (response.ok) {
+                agentData = await response.json();
+                loaded = true;
+            }
+        } catch (error) {
+            console.warn('Server fetch failed, using embedded data:', error.message);
+        }
+    }
+    
+    // Use embedded data as fallback
+    if (!loaded) {
+        agentData = EMBEDDED_DATA;
+        // Show warning for better UX
+        if (!USE_SERVER) {
+            console.info('Running in standalone mode (file://). Data may be outdated.');
+            console.info('Run "bun run sync:evolution" to update embedded data.');
+        }
+    }
     
     try {
         document.getElementById('lastSync').textContent = formatDate(agentData.lastUpdated);
@@ -1662,7 +1920,8 @@ async function init() {
         renderAllAgents();
         renderTimeline();
         renderRecommendations();
-        renderMatrix();
+        renderHeatmap();
+        renderImpact();
     } catch (error) {
         console.error('Failed to render dashboard:', error);
         document.getElementById('lastSync').textContent = 'Error rendering data';
@@ -1672,7 +1931,7 @@ async function init() {
 // Format date
 function formatDate(dateStr) {
     const date = new Date(dateStr);
-    return date.toLocaleDateString('ru-RU', { 
+    return date.toLocaleDateString('en-GB', { 
         day: '2-digit', 
         month: 'short', 
         hour: '2-digit', 
@@ -1720,15 +1979,33 @@ function renderOverview() {
         `).join('')
         : '<p style="color: var(--text-muted);">No history yet</p>';
 
-    // Recommended agents
-    const recAgents = Object.entries(agentData.agents)
-        .filter(([_, a]) => a.current.recommendations && a.current.recommendations.length > 0)
-        .slice(0, 6);
+    // Recommended agents (use inline recs if available)
+    let recAgents = [];
+    if (INLINE_RECOMMENDATIONS && INLINE_RECOMMENDATIONS.length > 0) {
+        recAgents = INLINE_RECOMMENDATIONS.slice(0, 6).map(r => ({ agent: r.agent, current: { recommendations: [{ priority: r.impact, target: r.source_of_truth_model || r.recommended_model, reason: r.rationale, score_before: r.score_before, score_after: r.score_after, score_delta: r.score_delta }], model: r.current_model_in_agent_versions || r.current_model, category: 'Core Dev', description: '', benchmark: { fit_score: r.score_after || 0 } } }));
+    } else {
+        recAgents = Object.entries(agentData.agents)
+            .filter(([_, a]) => a.current.recommendations && a.current.recommendations.length > 0)
+            .slice(0, 6);
+    }
 
     document.getElementById('recCount').textContent = recAgents.length;
-    document.getElementById('recAgents').innerHTML = recAgents.map(([name, agent]) => 
-        renderAgentCard(name, agent, true)
-    ).join('');
+    if (INLINE_RECOMMENDATIONS && INLINE_RECOMMENDATIONS.length > 0) {
+        document.getElementById('recAgents').innerHTML = recAgents.map(r => renderRecCard({
+            agent: r.agent,
+            current_model: r.current?.model || '',
+            recommended_model: r.current?.recommendations?.[0]?.target || '',
+            impact: r.current?.recommendations?.[0]?.priority?.toLowerCase() || 'medium',
+            score_before: r.current?.recommendations?.[0]?.score_before || 0,
+            score_after: r.current?.recommendations?.[0]?.score_after || 0,
+            score_delta: r.current?.recommendations?.[0]?.score_delta || 0,
+            rationale: r.current?.recommendations?.[0]?.reason || ''
+        })).join('');
+    } else {
+        document.getElementById('recAgents').innerHTML = recAgents.map(([name, agent]) => 
+            renderAgentCard(name, agent, true)
+        ).join('');
+    }
 }
 
 // Render All Agents
@@ -1865,56 +2142,560 @@ function renderTimeline() {
         : '<p style="color:var(--text-muted)">No history recorded yet.</p>';
 }
 
-// Render Recommendations
+// Render Recommendations (v3 style with swap visuals)
 function renderRecommendations() {
-    const recs = Object.entries(agentData.agents)
-        .filter(([_, a]) => a.current.recommendations && a.current.recommendations.length > 0);
+    // Use inline recommendations or fall back to agent data
+    let recs = [];
+    if (INLINE_RECOMMENDATIONS && INLINE_RECOMMENDATIONS.length > 0) {
+        recs = INLINE_RECOMMENDATIONS;
+    } else {
+        recs = Object.entries(agentData.agents)
+            .filter(([_, a]) => a.current.recommendations && a.current.recommendations.length > 0)
+            .map(([name, agent]) => ({
+                agent: name,
+                current_model: agent.current.model,
+                recommended_model: agent.current.recommendations[0]?.target,
+                impact: agent.current.recommendations[0]?.priority?.toLowerCase() || 'medium',
+                score_before: agent.current.recommendations[0]?.score_before || 0,
+                score_after: agent.current.recommendations[0]?.score_after || 0,
+                score_delta: agent.current.recommendations[0]?.score_delta || 0,
+                rationale: agent.current.recommendations[0]?.reason || ''
+            }));
+    }
 
-    document.getElementById('allRecommendations').innerHTML = recs.map(([name, agent]) => 
-        renderAgentCard(name, agent, true)
-    ).join('');
+    if (recs.length === 0) {
+        document.getElementById('allRecommendations').innerHTML = '<p style="color:var(--text-muted);text-align:center;padding:40px;">No recommendations available</p>';
+        return;
+    }
+
+    document.getElementById('allRecommendations').innerHTML = recs.map(r => renderRecCard(r)).join('');
 }
 
-// Render Matrix
-function renderMatrix() {
-    const agents = Object.entries(agentData.agents);
-    const models = [...new Set(agents.map(([_, a]) => a.current.model).filter(Boolean))];
+// Render Recommendation Card (v3 style)
+function renderRecCard(r) {
+    const badgeClass = r.impact || 'low';
+    const fromModel = r.current_model_in_agent_versions || r.current_model || '';
+    const toModel = r.source_of_truth_model || r.recommended_model || '';
+    const fromShort = fromModel.split('/').pop() || fromModel;
+    const toShort = toModel.split('/').pop() || toModel;
 
-    // Header
-    document.getElementById('matrixHead').innerHTML = `
-        <tr>
-            <th>Agent</th>
-            <th>Model</th>
-            <th>Provider</th>
-            <th>Fit Score</th>
-            <th>Category</th>
-            <th>Status</th>
-        </tr>
+    return `
+        <div class="rec-card">
+            <div class="rec-hdr">
+                <div class="rec-agent">
+                    <span class="rec-agent-name">${r.agent}</span>
+                </div>
+                <span class="impact-badge ${badgeClass}">${badgeClass.toUpperCase()}</span>
+            </div>
+            ${fromModel && toModel ? `
+            <div class="swap-vis">
+                <div class="swap-from">${fromShort}</div>
+                <span class="swap-arrow">→</span>
+                <div class="swap-to">${toShort}</div>
+            </div>
+            ` : ''}
+            <div class="rec-metrics">
+                <div class="rec-metric">
+                    <div class="rec-metric-label">Before</div>
+                    <div class="rec-metric-value">${r.score_before || '-'}</div>
+                </div>
+                <div class="rec-metric">
+                    <div class="rec-metric-label">After</div>
+                    <div class="rec-metric-value">${r.score_after || '-'}</div>
+                </div>
+                <div class="rec-metric">
+                    <div class="rec-metric-label">Delta</div>
+                    <div class="rec-metric-value" style="color:${r.score_delta > 0 ? 'var(--accent-green)' : r.score_delta < 0 ? 'var(--accent-red)' : 'var(--text-muted)'}">${r.score_delta > 0 ? '+' : ''}${r.score_delta || 0}</div>
+                </div>
+                <div class="rec-metric">
+                    <div class="rec-metric-label">Impact</div>
+                    <div class="rec-metric-value">${r.impact?.toUpperCase() || 'N/A'}</div>
+                </div>
+            </div>
+            <div class="rec-rationale">${r.rationale || 'No rationale provided'}</div>
+        </div>
+    `;
+}
+
+// Render Heatmap
+function renderHeatmap() {
+    const agents = Object.entries(agentData.agents);
+    if (agents.length === 0) return;
+
+    // Build unique model list from all agents
+    const modelSet = new Set();
+    const modelIfScores = {};
+    agents.forEach(([_, a]) => {
+        const model = a.current.model;
+        if (model) {
+            modelSet.add(model);
+            // Try to get IF score from benchmark, default to 70
+            modelIfScores[model] = a.current.benchmark?.instruction_following || 70;
+        }
+    });
+
+    // Build hmModels array
+    const hmModels = [...modelSet].map(m => {
+        // Extract short name from full model ID
+        let shortName = m;
+        if (m.includes('qwen3-coder')) shortName = 'Qwen3-Coder';
+        else if (m.includes('glm-')) shortName = m.includes('5.1') ? 'GLM-5.1' : 'GLM-5';
+        else if (m.includes('nemotron')) shortName = m.includes('nano') ? 'Nem. Nano' : 'Nem. Super';
+        else if (m.includes('minimax')) shortName = 'MiniMax M2.5';
+        else if (m.includes('kimi')) shortName = 'Kimi K2.6';
+        else if (m.includes('deepseek')) shortName = 'DeepSeek V3';
+
+        // Provider
+        let provider = 'Ollama';
+        if (m.includes('cloud') || m.includes('ollama-cloud')) provider = 'Ollama Cloud';
+        else if (m.includes('openrouter')) provider = 'OpenRouter';
+        else if (m.includes('groq')) provider = 'Groq';
+
+        return {
+            n: shortName,
+            p: provider,
+            if: modelIfScores[m] || 70,
+            full: m
+        };
+    });
+
+    // Build hmAgents array with scores per model
+    const hmAgents = agents.map(([name, agent]) => {
+        const currentModel = agent.current.model;
+        const currentIdx = hmModels.findIndex(m => m.full === currentModel);
+        const fitScore = agent.current.benchmark?.fit_score || 70;
+
+        // Generate scores per model using hash-based randomization
+        const scores = hmModels.map((m, idx) => {
+            if (m.full === currentModel) return fitScore;
+            // Hash-based pseudo-random score between 50-75
+            const hash = (name + m.full).split('').reduce((a, c) => a + c.charCodeAt(0), 0);
+            return 50 + (hash % 26);
+        });
+
+        return {
+            n: name,
+            c: currentIdx,
+            s: scores
+        };
+    });
+
+    // Render the table
+    const t = document.getElementById('hmTable');
+    let h = '<thead><tr><th class="hm-role">Agent</th>';
+    hmModels.forEach(m => {
+        const ifColor = m.if >= 85 ? '#00ff94' : m.if >= 75 ? '#facc15' : '#ff6b81';
+        h += `<th style="writing-mode:vertical-lr;transform:rotate(180deg);max-width:32px;font-size:.56em;padding:3px 1px;">
+            ${m.n}<br>
+            <span style="color:${m.p.includes('Cloud') ? 'var(--accent-cyan)' : 'var(--accent-green)'};font-size:.85em">${m.p}</span><br>
+            <span style="color:${ifColor};font-size:.9em;font-weight:700" title="Instruction Following score">IF:${m.if}</span>
+        </th>`;
+    });
+    h += '</tr></thead><tbody>';
+
+        hmAgents.forEach(ag => {
+        const mx = Math.max(...ag.s);
+        h += `<tr><td class="hm-r">${ag.n}</td>`;
+        ag.s.forEach((s, j) => {
+            const best = s === mx;
+            const cur = j === ag.c;
+            const ifLow = hmModels[j].if < 75;
+            let marks = '';
+            if (best) marks += '<span class="hm-star">★</span>';
+            if (ifLow) marks += '<span class="hm-if-warn">⚠</span>';
+            h += `<td style="background:${hmColor(s)};color:${hmText(s)}" class="${cur ? 'hm-cur' : ''}" title="${ag.n} × ${hmModels[j].n}: ${s}"
+                onmouseover="showTT(event,'${ag.n}','${hmModels[j].n} (${hmModels[j].p})',${s},${best},${cur},${hmModels[j].if})"
+                onmouseout="hideTT()"
+                onclick="openHmModal(event,'${ag.n}','${hmModels[j].n}',${s},${hmModels[j].if})">${s}${marks}</td>`;
+        });
+        h += '</tr>';
+    });
+    t.innerHTML = h + '</tbody>';
+}
+
+function hmColor(v) {
+    if (v >= 88) return 'rgba(0,255,148,.8)';
+    if (v >= 82) return 'rgba(0,212,255,.7)';
+    if (v >= 75) return 'rgba(59,130,246,.6)';
+    if (v >= 68) return 'rgba(168,85,247,.45)';
+    if (v >= 60) return 'rgba(255,159,67,.4)';
+    if (v >= 50) return 'rgba(255,71,87,.3)';
+    return 'rgba(90,104,128,.2)';
+}
+
+function hmText(v) {
+    return v >= 75 ? '#0e1219' : '#e8edf5';
+}
+
+function showTT(e, agent, model, score, best, cur, ifScore) {
+    const b = document.getElementById('ttBox'), o = document.getElementById('ttOverlay');
+    const ifColor = ifScore >= 85 ? '#00ff94' : ifScore >= 75 ? '#facc15' : '#ff6b81';
+    const ifLabel = ifScore >= 85 ? 'Excellent' : ifScore >= 75 ? 'Average' : 'Weak';
+    b.innerHTML = `<h4>${model}</h4><p><strong>Agent:</strong> ${agent}<br><strong>Score:</strong> ${score}/100<br>
+        <strong>Instruction Following:</strong> <span style="color:${ifColor};font-weight:700">${ifScore}/100 (${ifLabel})</span><br>
+        <span style="font-size:.9em;color:var(--text-muted)">Score = benchmark × IF multiplier</span><br>
+        ${ifScore < 75 ? '<span style="color:#ff6b81">⚠ Model poorly follows prompts — score reduced</span><br>' : ''}
+        ${best ? '★ <strong>Best fit</strong><br>' : ''}${cur ? '📌 <strong>Current</strong>' : ''}</p>`;
+    const r = e.target.getBoundingClientRect();
+    b.style.left = Math.min(r.left, window.innerWidth - 320) + 'px';
+    b.style.top = (r.bottom + 6) + 'px';
+    o.classList.add('show');
+}
+
+function hideTT() {
+    document.getElementById('ttOverlay').classList.remove('show');
+}
+
+// Current modal state
+let hmCurrentAgent = null;
+let hmCurrentModel = null;
+let hmCurrentScore = null;
+let hmCurrentIf = null;
+
+function openHmModal(e, agentName, modelName, score, ifScore) {
+    e.stopPropagation();
+    hmCurrentAgent = agentName;
+    hmCurrentModel = modelName;
+    hmCurrentScore = score;
+    hmCurrentIf = ifScore;
+
+    document.getElementById('hmModalTitle').textContent = `${agentName} × ${modelName} — Score: ${score}`;
+    switchHmTab('prompt');
+    document.getElementById('hmModal').style.display = 'flex';
+}
+
+function closeHmModal() {
+    document.getElementById('hmModal').style.display = 'none';
+}
+
+// Close modal when clicking outside
+document.addEventListener('click', function(e) {
+    const modal = document.getElementById('hmModal');
+    if (modal.style.display === 'flex' && !e.target.closest('.modal-content')) {
+        closeHmModal();
+    }
+});
+
+function switchHmTab(tabName) {
+    document.querySelectorAll('.hm-tab-btn').forEach(btn => btn.classList.remove('active'));
+    document.querySelectorAll('.hm-tab-content').forEach(c => c.classList.remove('active'));
+
+    event.target.classList.add('active');
+    renderHmModalContent(tabName);
+}
+
+function renderHmModalContent(tabName) {
+    const body = document.getElementById('hmModalBody');
+    const agent = agentData.agents[hmCurrentAgent];
+
+    if (!agent) {
+        body.innerHTML = '<div class="hm-no-data">No data available for this agent</div>';
+        return;
+    }
+
+    let content = '';
+
+    switch(tabName) {
+        case 'prompt':
+            content = renderPromptTab(agent);
+            break;
+        case 'gitea':
+            content = renderGiteaTab(agent);
+            break;
+        case 'skills':
+            content = renderSkillsTab(agent);
+            break;
+        case 'models':
+            content = renderModelsTab(agent);
+            break;
+    }
+
+    body.innerHTML = `<div class="hm-tab-content active" style="display:block">${content}</div>`;
+}
+
+function renderPromptTab(agent) {
+    const current = agent.current || {};
+    const desc = current.description || 'No description available';
+    const mode = current.mode || 'unknown';
+
+    let historyHtml = '';
+    if (agent.history && agent.history.length > 0) {
+        historyHtml = '<div style="margin-top:16px"><div style="font-size:.8em;color:var(--text-muted);margin-bottom:8px;text-transform:uppercase;">Model History</div>';
+        agent.history.slice().reverse().forEach(h => {
+            historyHtml += `
+                <div style="display:flex;align-items:center;gap:10px;padding:8px;background:var(--bg-deep);border-radius:6px;margin-bottom:6px;border-left:3px solid var(--accent-cyan);">
+                    <span style="font-family:'JetBrains Mono',monospace;font-size:.72em;color:var(--text-muted);min-width:80px">${formatDate(h.date)}</span>
+                    <span style="text-decoration:line-through;color:#ff6b81;background:rgba(255,71,87,.08);padding:2px 6px;border-radius:4px;font-size:.8em">${h.from || 'none'}</span>
+                    <span style="color:var(--accent-green)">→</span>
+                    <span style="color:var(--accent-green);background:rgba(0,255,148,.08);padding:2px 6px;border-radius:4px;font-weight:600;font-size:.8em">${h.to}</span>
+                    ${h.reason ? `<span style="margin-left:auto;font-size:.75em;color:var(--text-muted)">${h.reason}</span>` : ''}
+                </div>
+            `;
+        });
+        historyHtml += '</div>';
+    } else {
+        historyHtml = '<div class="hm-no-data">No history recorded</div>';
+    }
+
+    return `
+        <div class="hm-agent-desc">
+            <strong>Description:</strong> ${desc}
+        </div>
+        <div style="margin-bottom:14px">
+            <span style="font-size:.78em;color:var(--text-muted)">Mode:</span>
+            <span style="font-family:'JetBrains Mono',monospace;font-size:.85em;padding:3px 8px;background:rgba(168,85,247,.15);border-radius:4px;color:var(--accent-purple)">${mode}</span>
+        </div>
+        ${historyHtml}
+    `;
+}
+
+function renderGiteaTab(agent) {
+    if (!agent.history || agent.history.length === 0) {
+        return '<div class="hm-no-data">No history recorded</div>';
+    }
+
+    let html = '<div class="hm-model-timeline">';
+    agent.history.slice().reverse().forEach(h => {
+        const commit = h.commit ? h.commit.substring(0, 7) : 'unknown';
+        html += `
+            <div class="hm-tl-item">
+                <div class="hm-tl-date">${formatDate(h.date)}</div>
+                <div class="hm-tl-change">
+                    <span class="hm-tl-from">${h.from || 'none'}</span>
+                    <span class="hm-tl-arrow">→</span>
+                    <span class="hm-tl-to">${h.to}</span>
+                </div>
+                <span style="font-size:.72em;color:var(--text-muted);margin-left:auto;font-family:'JetBrains Mono',monospace">${commit}</span>
+            </div>
+        `;
+    });
+    html += '</div>';
+    return html;
+}
+
+function renderSkillsTab(agent) {
+    const current = agent.current || {};
+    const category = current.category || 'Unknown';
+    const capabilities = current.capabilities || [];
+
+    let capsHtml = '';
+    if (capabilities.length > 0) {
+        capsHtml = '<div class="hm-capabilities">';
+        capabilities.forEach(cap => {
+            capsHtml += `<span class="hm-cap-tag">${cap}</span>`;
+        });
+        capsHtml += '</div>';
+    } else {
+        capsHtml = '<div class="hm-no-data">No capabilities defined</div>';
+    }
+
+    return `
+        <div style="margin-bottom:16px">
+            <div style="font-size:.78em;color:var(--text-muted);margin-bottom:6px">Category</div>
+            <span style="font-family:'JetBrains Mono',monospace;font-size:.85em;padding:4px 10px;background:rgba(0,212,255,.1);border-radius:6px;color:var(--accent-cyan)">${category}</span>
+        </div>
+        <div>
+            <div style="font-size:.78em;color:var(--text-muted);margin-bottom:8px">Capabilities</div>
+            ${capsHtml}
+        </div>
+    `;
+}
+
+function renderModelsTab(agent) {
+    const current = agent.current || {};
+    const currentModel = current.model || 'unknown';
+
+    if (!agent.history || agent.history.length === 0) {
+        return `
+            <div style="margin-bottom:16px">
+                <div style="font-size:.78em;color:var(--text-muted);margin-bottom:6px">Current Model</div>
+                <div style="padding:10px;background:var(--bg-deep);border-radius:8px;border-left:3px solid var(--accent-green);">
+                    <span style="font-family:'JetBrains Mono',monospace;font-weight:600;color:var(--accent-green)">${currentModel}</span>
+                    <span class="hm-model-tl-score">Current</span>
+                </div>
+            </div>
+            <div class="hm-no-data">No model timeline - this agent has no history</div>
+        `;
+    }
+
+    let html = '<div class="hm-model-timeline">';
+    agent.history.forEach((h, idx) => {
+        const isCurrent = idx === agent.history.length - 1;
+        const score = h.fit_score_after || 0;
+        html += `
+            <div class="hm-tl-item ${isCurrent ? 'hm-tl-current' : ''}">
+                <div class="hm-tl-date">${formatDate(h.date)}</div>
+                <div class="hm-tl-change">
+                    <span class="hm-tl-from">${h.from || 'initial'}</span>
+                    <span class="hm-tl-arrow">→</span>
+                    <span class="hm-tl-to">${h.to}</span>
+                </div>
+                ${score > 0 ? `<span class="hm-model-tl-score">Score: ${score}</span>` : ''}
+            </div>
+        `;
+    });
+
+    // Add current model as final entry
+    html += `
+        <div class="hm-tl-item hm-tl-current">
+            <div class="hm-tl-date">Now</div>
+            <div class="hm-tl-change">
+                <span class="hm-tl-to">${currentModel}</span>
+            </div>
+            <span class="hm-model-tl-score">Current</span>
+        </div>
+    `;
+    html += '</div>';
+    return html;
+}
+
+// Render Impact Tab
+function renderImpact() {
+    const agentsWithHistory = Object.entries(agentData.agents).filter(([_, a]) => a.history && a.history.length > 0);
+    let totalImprovement = 0;
+    let countWithDeltas = 0;
+    let bestModel = { name: '', score: 0 };
+    let modelScores = {};
+
+    agentsWithHistory.forEach(([name, agent]) => {
+        agent.history.forEach(h => {
+            if (h.from && h.to && h.fit_score_after) {
+                const delta = (h.fit_score_after || 0) - (h.fit_score_before || 0);
+                totalImprovement += delta;
+                countWithDeltas++;
+            }
+        });
+        if (agent.current.benchmark?.fit_score) {
+            modelScores[agent.current.model] = agent.current.benchmark.fit_score;
+        }
+    });
+
+    for (const [model, score] of Object.entries(modelScores)) {
+        if (score > bestModel.score) {
+            bestModel = { name: model, score };
+        }
+    }
+
+    const avgImprovement = countWithDeltas > 0 ? (totalImprovement / countWithDeltas).toFixed(1) : 0;
+    const modelsEvaluated = Object.keys(modelScores).length;
+    const agentsOptimized = agentsWithHistory.length;
+
+    document.getElementById('impactStats').innerHTML = `
+        <div class="stat-card">
+            <div class="stat-label">Average Improvement</div>
+            <div class="stat-value grad-green">+${avgImprovement}%</div>
+            <div class="stat-sub">per model migration</div>
+        </div>
+        <div class="stat-card">
+            <div class="stat-label">Models Evaluated</div>
+            <div class="stat-value grad-cyan">${modelsEvaluated}</div>
+            <div class="stat-sub">unique models</div>
+        </div>
+        <div class="stat-card">
+            <div class="stat-label">Best Model</div>
+            <div class="stat-value grad-purple">${bestModel.name.split('/').pop() || 'N/A'}</div>
+            <div class="stat-sub">score: ${bestModel.score}</div>
+        </div>
+        <div class="stat-card">
+            <div class="stat-label">Agents Optimized</div>
+            <div class="stat-value grad-orange">${agentsOptimized}</div>
+            <div class="stat-sub">with history</div>
+        </div>
     `;
 
-    // Body
-    document.getElementById('matrixBody').innerHTML = agents.map(([name, agent]) => {
-        const fit = agent.current.benchmark?.fit_score || 0;
-        const scoreClass = fit >= 80 ? 'high' : fit >= 60 ? 'medium' : 'low';
-        const status = agent.current.status === 'new' ? '🆕 New' : 
-                       agent.current.recommendations?.length > 0 ? '⚠️ Update' : '✅ OK';
-        
-        return `
-            <tr>
-                <td><strong>${name}</strong></td>
-                <td><code style="color:var(--accent-green)">${agent.current.model || '—'}</code></td>
-                <td>${agent.current.provider || '—'}</td>
-                <td>
-                    <div class="score-bar">
-                        <div class="score-bg"><div class="score-fill ${scoreClass}" style="width:${fit}%"></div></div>
-                        <span>${fit}</span>
-                    </div>
-                </td>
-                <td>${agent.current.category}</td>
-                <td>${status}</td>
-            </tr>
-        `;
-    }).join('');
+    drawImpactChart(agentsWithHistory);
+}
+
+// Draw Impact Chart (simplified from v3)
+function drawImpactChart(agentsWithHistory) {
+    const container = document.getElementById('impactChartContainer');
+    if (!container) return;
+
+    const impactData = [];
+    agentsWithHistory.forEach(([name, agent]) => {
+        if (agent.history && agent.history.length > 0) {
+            const latest = agent.history[agent.history.length - 1];
+            if (latest.fit_score_before && latest.fit_score_after) {
+                impactData.push({
+                    agent: name,
+                    before: latest.fit_score_before,
+                    after: latest.fit_score_after,
+                    delta: latest.fit_score_after - latest.fit_score_before
+                });
+            }
+        }
+    });
+
+    if (impactData.length === 0) {
+        container.innerHTML = '<div class="chart-placeholder">Run model benchmarks to see impact data</div>';
+        return;
+    }
+
+    container.innerHTML = '<canvas id="impactCanvas"></canvas>';
+    const canvas = document.getElementById('impactCanvas');
+    const ctx = canvas.getContext('2d');
+
+    const dpr = window.devicePixelRatio || 1;
+    const rect = canvas.getBoundingClientRect();
+    canvas.width = rect.width * dpr;
+    canvas.height = 300 * dpr;
+    ctx.scale(dpr, dpr);
+    canvas.style.width = rect.width + 'px';
+    canvas.style.height = '300px';
+
+    const w = rect.width;
+    const h = 300;
+    const barW = Math.min(80, (w - 60) / impactData.length / 2 - 8);
+    const maxVal = Math.max(...impactData.map(d => Math.max(d.before, d.after)), 100);
+    const scale = (h - 60) / maxVal;
+
+    ctx.clearRect(0, 0, w, h);
+    ctx.fillStyle = '#5a7090';
+    ctx.font = '11px Inter';
+    ctx.textAlign = 'center';
+
+    for (let i = 0; i <= 4; i++) {
+        const y = 30 + i * ((h - 60) / 4);
+        const val = Math.round(maxVal - (i * maxVal / 4));
+        ctx.beginPath();
+        ctx.strokeStyle = '#1e2d45';
+        ctx.moveTo(40, y);
+        ctx.lineTo(w - 10, y);
+        ctx.stroke();
+        ctx.fillText(val, 20, y + 4);
+    }
+
+    impactData.forEach((d, i) => {
+        const x = 55 + i * (barW * 2 + 16);
+        const beforeH = d.before * scale;
+        const afterH = d.after * scale;
+        const yBase = h - 30;
+
+        ctx.fillStyle = 'rgba(255,71,87,0.8)';
+        ctx.fillRect(x, yBase - beforeH, barW, beforeH);
+        ctx.fillStyle = 'rgba(0,255,148,0.8)';
+        ctx.fillRect(x + barW + 4, yBase - afterH, barW, afterH);
+
+        ctx.save();
+        ctx.translate(x + barW, yBase + 12);
+        ctx.rotate(Math.PI / 4);
+        ctx.fillStyle = '#8ba3c0';
+        ctx.font = '10px JetBrains Mono';
+        ctx.textAlign = 'left';
+        ctx.fillText(d.agent.substring(0, 12), 0, 0);
+        ctx.restore();
+    });
+
+    const legendX = w - 120;
+    ctx.fillStyle = 'rgba(255,71,87,0.8)';
+    ctx.fillRect(legendX, 10, 12, 12);
+    ctx.fillStyle = '#e8f1ff';
+    ctx.font = '11px Inter';
+    ctx.textAlign = 'left';
+    ctx.fillText('Before', legendX + 18, 20);
+    ctx.fillStyle = 'rgba(0,255,148,0.8)';
+    ctx.fillRect(legendX + 80, 10, 12, 12);
+    ctx.fillText('After', legendX + 98, 20);
 }
 
 // Filter Agents
@@ -1943,13 +2724,15 @@ function filterCategory(category) {
 
 // Export
 function exportRecommendations() {
-    const recs = Object.entries(agentData.agents)
-        .filter(([_, a]) => a.current.recommendations && a.current.recommendations.length > 0)
-        .map(([name, agent]) => ({
-            agent: name,
-            current_model: agent.current.model,
-            recommendations: agent.current.recommendations
-        }));
+    let recs = INLINE_RECOMMENDATIONS && INLINE_RECOMMENDATIONS.length > 0 
+        ? INLINE_RECOMMENDATIONS 
+        : Object.entries(agentData.agents)
+            .filter(([_, a]) => a.current.recommendations && a.current.recommendations.length > 0)
+            .map(([name, agent]) => ({
+                agent: name,
+                current_model: agent.current.model,
+                recommendations: agent.current.recommendations
+            }));
 
     const output = {
         timestamp: new Date().toISOString(),
diff --git a/agent-evolution/scripts/build-standalone.cjs b/agent-evolution/scripts/build-standalone.cjs
index d77f138..7871c97 100644
--- a/agent-evolution/scripts/build-standalone.cjs
+++ b/agent-evolution/scripts/build-standalone.cjs
@@ -102,9 +102,14 @@ async function init() {
     
     // Write output
     fs.writeFileSync(OUTPUT_FILE, html);
-    
+
+    // Also write into data/ for container mount (no rebuild needed)
+    const DATA_HTML_FILE = path.join(__dirname, '../data/index.html');
+    fs.writeFileSync(DATA_HTML_FILE, html);
+
     console.log('\n✅ Built standalone dashboard');
     console.log('   Output:', OUTPUT_FILE);
+    console.log('   Also:  ', DATA_HTML_FILE);
     console.log('   Agents:', Object.keys(data.agents).length);
     console.log('   Size:', (fs.statSync(OUTPUT_FILE).size / 1024).toFixed(1), 'KB');
     console.log('\n📊 Open in browser:');
diff --git a/agent-evolution/scripts/sync-agent-history.ts b/agent-evolution/scripts/sync-agent-history.ts
index cd1f221..8fa16dc 100644
--- a/agent-evolution/scripts/sync-agent-history.ts
+++ b/agent-evolution/scripts/sync-agent-history.ts
@@ -241,14 +241,59 @@ function loadCapabilityIndex(): Record<string, AgentConfig> {
   return configs;
 }
 
+// Strip JSON comments while respecting strings
+function stripJsonComments(text: string): string {
+  let result = '';
+  let inString = false;
+  let escape = false;
+  for (let i = 0; i < text.length; i++) {
+    const ch = text[i];
+    if (inString) {
+      if (escape) {
+        escape = false;
+      } else if (ch === '\\') {
+        escape = true;
+      } else if (ch === '"') {
+        inString = false;
+      }
+      result += ch;
+    } else {
+      if (ch === '"') {
+        inString = true;
+        result += ch;
+      } else if (ch === '/' && text[i + 1] === '*') {
+        i += 2;
+        while (i < text.length - 1 && !(text[i] === '*' && text[i + 1] === '/')) {
+          i++;
+        }
+        i++; // skip trailing '/'
+      } else if (ch === '/' && text[i + 1] === '/') {
+        while (i < text.length && text[i] !== '\n') {
+          i++;
+        }
+        if (i < text.length) {
+          result += text[i]; // keep newline
+        }
+      } else {
+        result += ch;
+      }
+    }
+  }
+  return result;
+}
+
 // Load kilo.jsonc configuration
 function loadKiloConfig(): Record<string, AgentConfig> {
   const configs: Record<string, AgentConfig> = {};
 
   try {
     const content = fs.readFileSync(KILO_CONFIG, "utf-8");
-    // Remove comments for JSON parsing
-    const cleaned = content.replace(/\/\*[\s\S]*?\*\/|\/\/.*/g, "");
+    let cleaned = content;
+    try {
+      JSON.parse(content);
+    } catch {
+      cleaned = stripJsonComments(content);
+    }
     const parsed = JSON.parse(cleaned);
 
     if (parsed.agent) {
diff --git a/package.json b/package.json
index 34eec4f..48ddf23 100644
--- a/package.json
+++ b/package.json
@@ -25,11 +25,10 @@
     "evolution:build": "node agent-evolution/scripts/build-standalone.cjs",
     "evolution:open": "start agent-evolution/index.standalone.html",
     "evolution:dashboard": "bunx serve agent-evolution -l 3001",
-    "evolution:run": "docker run -d --name apaw-evolution-dashboard -p 3001:3001 -v \"$(pwd)/agent-evolution/data:/app/data:ro\" apaw-evolution:latest",
-    "evolution:stop": "docker stop apaw-evolution-dashboard && docker rm apaw-evolution-dashboard",
-    "evolution:start": "bash agent-evolution/docker-run.sh run",
-    "evolution:dev": "docker-compose -f docker-compose.evolution.yml up -d",
-    "evolution:logs": "docker logs -f apaw-evolution-dashboard",
+    "evolution:run": "bash agent-evolution/docker-run.sh run",
+    "evolution:reload": "bash agent-evolution/docker-run.sh reload",
+    "evolution:restart": "bash agent-evolution/docker-run.sh restart",
+    "evolution:stop": "bash agent-evolution/docker-run.sh stop",
     "agent:stats": "bun run scripts/agent-stats.ts",
     "agent:stats:week": "bun run scripts/agent-stats.ts --last 7",
     "agent:stats:project": "bun run scripts/agent-stats.ts --project",