fix: restore optimal v3 models + add fitness gate protection

- Restore all 30 agents to v3.html heatmap optimal models: * frontend-developer: qwen3-coder -> minimax-m2.5 (92★) * devops-engineer: nemotron-3-super -> kimi-k2.6:cloud (88★) * browser-automation: qwen3-coder -> kimi-k2.6:cloud (86★) * agent-architect: glm-5.1 -> kimi-k2.6:cloud (86★) - Add Model Evolution Guard system: * agent-evolution/scripts/lib/fitness-gate.cjs * Rejects downgrades >3 points or below score 75 * Produces detailed diff report before any file modifications * Normalized model ID lookup (v3.html ':' vs JSON '-') - Update sync-benchmarks-from-yaml.cjs with fitness gate - Update model-benchmarks.json with v3 optimal assignments - Rebuild research-dashboard.html (104KB, 30 agents, 11 models) - Add model-evolution-guard.md architecture documentation - Add v3-optimal-models.json as source-of-truth reference Fixes regression introduced by commit 3badb25 where models were silently downgraded from heatmap optimal to inferior assignments.
2026-04-29 23:19:16 +01:00
parent d1516f4856
commit 9e48a4960e
14 changed files with 2850 additions and 2049 deletions
--- a/agent-evolution/dist/research-dashboard-2026_04_29.html
+++ b/agent-evolution/dist/research-dashboard-2026_04_29.html
@@ -255,7 +255,7 @@
 <div class="container">
    <div class="header">
        <h1>APAW Agent Model Research v2</h1>
-        <div class="sub">Live dashboard • 15 models × 32 agents • 2026-04-29</div>
+        <div class="sub">Live dashboard • 15 models × 30 agents • 2026-04-29</div>
    </div>

    <div class="tabs" id="tabBar">
@@ -419,12 +419,12 @@

 <script>
 // BENCHMARK_DATA_PLACEHOLDER - REPLACED BY BUILD SCRIPT
-// Generated from model-benchmarks.json on 2026-04-29T19:58:05.244Z
+// Generated from model-benchmarks.json on 2026-04-29T22:15:07.925Z
 const EMBEDDED_DATA = {
  "version": "1.0.0",
-  "generated": "2026-04-29T19:56:51.418Z",
-  "source": ".kilo/capability-index.yaml (synced v2)",
-  "total_agents": 32,
+  "generated": "2026-04-29T21:47:05.339Z",
+  "source": ".kilo/capability-index.yaml (synced v3 + fitness-gate)",
+  "total_agents": 30,
  "total_models_tracked": 11,
  "providers": [
    "ollama",
@@ -800,8 +800,8 @@ const EMBEDDED_DATA = {
  "agent_model_scores": [
    {
      "agent": "lead-developer",
-      "current_model_index": 6,
-      "current_model_id": "nemotron-3-super",
+      "current_model_index": 0,
+      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 92,
@@ -818,8 +818,8 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "frontend-developer",
-      "current_model_index": -1,
-      "current_model_id": "qwen3-coder:480b",
+      "current_model_index": 1,
+      "current_model_id": "minimax-m2.5",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 86,
@@ -836,8 +836,8 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "php-developer",
-      "current_model_index": -1,
-      "current_model_id": "qwen3-coder:480b",
+      "current_model_index": 0,
+      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 87,
@@ -854,8 +854,8 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "python-developer",
-      "current_model_index": -1,
-      "current_model_id": "qwen3-coder:480b",
+      "current_model_index": 0,
+      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 90,
@@ -872,8 +872,8 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "backend-developer",
-      "current_model_index": -1,
-      "current_model_id": "qwen3-coder:480b",
+      "current_model_index": 0,
+      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 91,
@@ -890,8 +890,8 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "go-developer",
-      "current_model_index": -1,
-      "current_model_id": "qwen3-coder:480b",
+      "current_model_index": 0,
+      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 85,
@@ -908,8 +908,8 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "flutter-developer",
-      "current_model_index": -1,
-      "current_model_id": "qwen3-coder:480b",
+      "current_model_index": 0,
+      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 86,
@@ -926,8 +926,8 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "devops-engineer",
-      "current_model_index": 6,
-      "current_model_id": "nemotron-3-super",
+      "current_model_index": -1,
+      "current_model_id": "kimi-k2.6",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 66,
@@ -944,8 +944,8 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "sdet-engineer",
-      "current_model_index": -1,
-      "current_model_id": "qwen3-coder:480b",
+      "current_model_index": 0,
+      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 88,
@@ -1035,7 +1035,7 @@ const EMBEDDED_DATA = {
    {
      "agent": "browser-automation",
      "current_model_index": -1,
-      "current_model_id": "qwen3-coder:480b",
+      "current_model_id": "kimi-k2.6",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 87,
@@ -1052,8 +1052,8 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "visual-tester",
-      "current_model_index": -1,
-      "current_model_id": "qwen3-coder:480b",
+      "current_model_index": 0,
+      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 82,
@@ -1070,9 +1070,9 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "system-analyst",
-      "current_model_index": 6,
-      "current_model_id": "nemotron-3-super",
-      "reasoning_effort": "H",
+      "current_model_index": 7,
+      "current_model_id": "glm-5.1",
+      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 70,
        "minimax-m2.5": 66,
@@ -1086,42 +1086,6 @@ const EMBEDDED_DATA = {
        "kimi-k2-6": 86
      }
    },
-    {
-      "agent": "requirement-refiner",
-      "current_model_index": 7,
-      "current_model_id": "glm-5.1",
-      "reasoning_effort": "H",
-      "scores": {
-        "qwen3-coder-480b": 66,
-        "minimax-m2.5": 62,
-        "minimax-m2.7": 60,
-        "nemotron-3-super": 72,
-        "glm-5.1": 80,
-        "deepseek-v4-pro-max": 82,
-        "qwen3-5-122b": 74,
-        "qwen3-coder-next": 54,
-        "qwen3-6-plus": 78,
-        "kimi-k2-6": 82
-      }
-    },
-    {
-      "agent": "history-miner",
-      "current_model_index": 6,
-      "current_model_id": "nemotron-3-super",
-      "reasoning_effort": "M",
-      "scores": {
-        "qwen3-coder-480b": 68,
-        "minimax-m2.5": 60,
-        "minimax-m2.7": 56,
-        "nemotron-3-super": 85,
-        "glm-5.1": 78,
-        "deepseek-v4-pro-max": 86,
-        "qwen3-5-122b": 72,
-        "qwen3-coder-next": 56,
-        "qwen3-6-plus": 84,
-        "kimi-k2-6": 82
-      }
-    },
    {
      "agent": "capability-analyst",
      "current_model_index": 7,
@@ -1143,7 +1107,7 @@ const EMBEDDED_DATA = {
    {
      "agent": "orchestrator",
      "current_model_index": -1,
-      "current_model_id": "kimi-k2.6:cloud",
+      "current_model_id": "kimi-k2.6",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 74,
@@ -1286,8 +1250,8 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "agent-architect",
-      "current_model_index": 7,
-      "current_model_id": "glm-5.1",
+      "current_model_index": -1,
+      "current_model_id": "kimi-k2.6",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 78,
@@ -1391,17 +1355,17 @@ const EMBEDDED_DATA = {
  "agent_current_config": [
    {
      "agent": "lead-developer",
-      "model": "ollama-cloud/nemotron-3-super",
+      "model": "ollama-cloud/qwen3-coder:480b",
      "provider": "Ollama Cloud",
      "category": "Process",
-      "badge_type": "nemotron",
+      "badge_type": "qwen",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "frontend-developer",
-      "model": "ollama-cloud/qwen3-coder:480b",
+      "model": "ollama-cloud/minimax-m2.5",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "qwen",
@@ -1461,7 +1425,7 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "devops-engineer",
-      "model": "ollama-cloud/nemotron-3-super",
+      "model": "ollama-cloud/kimi-k2.6",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "nemotron",
@@ -1521,7 +1485,7 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "browser-automation",
-      "model": "ollama-cloud/qwen3-coder:480b",
+      "model": "ollama-cloud/kimi-k2.6",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "qwen",
@@ -1541,16 +1505,6 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "system-analyst",
-      "model": "ollama-cloud/nemotron-3-super",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "nemotron",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
-    {
-      "agent": "requirement-refiner",
      "model": "ollama-cloud/glm-5.1",
      "provider": "Ollama Cloud",
      "category": "Process",
@@ -1559,16 +1513,6 @@ const EMBEDDED_DATA = {
      "status": "good",
      "previous_model": null
    },
-    {
-      "agent": "history-miner",
-      "model": "ollama-cloud/nemotron-3-super",
-      "provider": "Ollama Cloud",
-      "category": "Process",
-      "badge_type": "nemotron",
-      "fit_score": 0,
-      "status": "good",
-      "previous_model": null
-    },
    {
      "agent": "capability-analyst",
      "model": "ollama-cloud/glm-5.1",
@@ -1581,7 +1525,7 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "orchestrator",
-      "model": "ollama-cloud/kimi-k2.6:cloud",
+      "model": "ollama-cloud/kimi-k2.6",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "kimi",
@@ -1661,7 +1605,7 @@ const EMBEDDED_DATA = {
    },
    {
      "agent": "agent-architect",
-      "model": "ollama-cloud/glm-5.1",
+      "model": "ollama-cloud/kimi-k2.6",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "glm",