fix: restore optimal v3 models + add fitness gate protection

- Restore all 30 agents to v3.html heatmap optimal models: * frontend-developer: qwen3-coder -> minimax-m2.5 (92★) * devops-engineer: nemotron-3-super -> kimi-k2.6:cloud (88★) * browser-automation: qwen3-coder -> kimi-k2.6:cloud (86★) * agent-architect: glm-5.1 -> kimi-k2.6:cloud (86★) - Add Model Evolution Guard system: * agent-evolution/scripts/lib/fitness-gate.cjs * Rejects downgrades >3 points or below score 75 * Produces detailed diff report before any file modifications * Normalized model ID lookup (v3.html ':' vs JSON '-') - Update sync-benchmarks-from-yaml.cjs with fitness gate - Update model-benchmarks.json with v3 optimal assignments - Rebuild research-dashboard.html (104KB, 30 agents, 11 models) - Add model-evolution-guard.md architecture documentation - Add v3-optimal-models.json as source-of-truth reference Fixes regression introduced by commit 3badb25 where models were silently downgraded from heatmap optimal to inferior assignments.
2026-04-29 23:19:16 +01:00
parent d1516f4856
commit 9e48a4960e
14 changed files with 2850 additions and 2049 deletions
--- a/.kilo/agents/lead-developer.md
+++ b/.kilo/agents/lead-developer.md
@@ -1,7 +1,7 @@
 ---
 description: Primary code writer for backend and core logic. Writes implementation to pass tests
 mode: subagent
-model: ollama-cloud/nemotron-3-super
+model: ollama-cloud/qwen3-coder:480b
 variant: thinking
 color: "#DC2626"
 permission:
--- a/.kilo/agents/orchestrator.md
+++ b/.kilo/agents/orchestrator.md
@@ -40,6 +40,7 @@ permission:
    "planner": allow
    "reflector": allow
    "memory-manager": allow
+    "devops-engineer": allow
 ---

 # Kilo Code: Orchestrator
--- a/.kilo/agents/security-auditor.md
+++ b/.kilo/agents/security-auditor.md
@@ -2,7 +2,7 @@
 description: Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets
 mode: subagent
 model: ollama-cloud/nemotron-3-super
-color: "#DC2626"
+color: #DC2626
 permission:
  read: allow
  bash: allow
--- a/.kilo/agents/system-analyst.md
+++ b/.kilo/agents/system-analyst.md
@@ -1,7 +1,7 @@
 ---
 description: Designs technical specifications, data schemas, and API contracts before implementation
 mode: subagent
-model: ollama-cloud/nemotron-3-super
+model: ollama-cloud/glm-5.1
 color: "#0891B2"
 permission:
  read: allow
--- a/.kilo/capability-index.yaml
+++ b/.kilo/capability-index.yaml
@@ -15,7 +15,7 @@ agents:
    forbidden:
    - test_writing
    - code_review
-    model: ollama-cloud/nemotron-3-super
+    model: ollama-cloud/qwen3-coder:480b
    variant: thinking
    mode: subagent
    delegates_to:
@@ -49,7 +49,7 @@ agents:
    - frontend_tests
    forbidden:
    - backend_code
-    model: ollama-cloud/qwen3-coder:480b
+    model: ollama-cloud/minimax-m2.5
    mode: subagent
    delegates_to:
    - code-skeptic
@@ -245,7 +245,7 @@ agents:
    - ci_cd_config
    forbidden:
    - application_code
-    model: ollama-cloud/nemotron-3-super
+    model: ollama-cloud/kimi-k2.6:cloud
    mode: subagent
    delegates_to:
    - code-skeptic
@@ -399,7 +399,7 @@ agents:
    - screenshots
    forbidden:
    - unit_testing
-    model: ollama-cloud/qwen3-coder:480b
+    model: ollama-cloud/kimi-k2.6:cloud
    mode: subagent
    delegates_to:
    - orchestrator
@@ -463,68 +463,14 @@ agents:
    - database_schemas
    forbidden:
    - implementation
-    model: ollama-cloud/nemotron-3-super
-    variant: thinking
-    mode: subagent
-    delegates_to:
-    - sdet-engineer
-    - orchestrator
-    fallback_models:
-    - ollama-cloud/glm-5.1
-    - ollama-cloud/deepseek-v4-pro-max
-    - ollama-cloud/kimi-k2.6:cloud
-    failover_strategy: downgraded
-  requirement-refiner:
-    capabilities:
-    - requirement_analysis
-    - user_story_creation
-    - acceptance_criteria
-    - clarification
-    receives:
-    - raw_requests
-    - feature_ideas
-    produces:
-    - user_stories
-    - acceptance_criteria
-    - requirements_doc
-    forbidden:
-    - design_decisions
    model: ollama-cloud/glm-5.1
-    variant: thinking
-    mode: subagent
-    delegates_to:
-    - history-miner
-    - system-analyst
-    fallback_models:
-    - ollama-cloud/deepseek-v4-pro-max
-    - ollama-cloud/kimi-k2.6:cloud
-    - groq/llama-3.1-8b-instant
-    - ollama-cloud/glm-5
-    failover_strategy: mixed
-  history-miner:
-    capabilities:
-    - git_search
-    - duplicate_detection
-    - past_solution_finder
-    - pattern_identification
-    receives:
-    - search_query
-    - issue_description
-    produces:
-    - commit_list
-    - duplicate_report
-    - related_files
-    forbidden:
-    - code_changes
-    model: ollama-cloud/nemotron-3-super
    mode: subagent
    delegates_to: []
    fallback_models:
    - ollama-cloud/glm-5.1
    - ollama-cloud/deepseek-v4-pro-max
-    - groq/llama-3.1-8b-instant
-    - openrouter/qwen/qwen3.6-plus:free
-    failover_strategy: mixed
+    - ollama-cloud/kimi-k2.6:cloud
+    failover_strategy: downgraded
  capability-analyst:
    capabilities:
    - gap_analysis
@@ -786,7 +732,7 @@ agents:
    - integration_plan
    forbidden:
    - agent_execution
-    model: ollama-cloud/glm-5.1
+    model: ollama-cloud/kimi-k2.6:cloud
    variant: thinking
    mode: subagent
    delegates_to: