fix: restore optimal v3 models + add fitness gate protection
- Restore all 30 agents to v3.html heatmap optimal models:
* frontend-developer: qwen3-coder -> minimax-m2.5 (92★)
* devops-engineer: nemotron-3-super -> kimi-k2.6:cloud (88★)
* browser-automation: qwen3-coder -> kimi-k2.6:cloud (86★)
* agent-architect: glm-5.1 -> kimi-k2.6:cloud (86★)
- Add Model Evolution Guard system:
* agent-evolution/scripts/lib/fitness-gate.cjs
* Rejects downgrades >3 points or below score 75
* Produces detailed diff report before any file modifications
* Normalized model ID lookup (v3.html ':' vs JSON '-')
- Update sync-benchmarks-from-yaml.cjs with fitness gate
- Update model-benchmarks.json with v3 optimal assignments
- Rebuild research-dashboard.html (104KB, 30 agents, 11 models)
- Add model-evolution-guard.md architecture documentation
- Add v3-optimal-models.json as source-of-truth reference
Fixes regression introduced by commit 3badb25 where models were
silently downgraded from heatmap optimal to inferior assignments.
This commit is contained in:
@@ -255,7 +255,7 @@
|
||||
<div class="container">
|
||||
<div class="header">
|
||||
<h1>APAW Agent Model Research v2</h1>
|
||||
<div class="sub">Live dashboard • 15 models × 32 agents • 2026-04-29</div>
|
||||
<div class="sub">Live dashboard • 15 models × 30 agents • 2026-04-29</div>
|
||||
</div>
|
||||
|
||||
<div class="tabs" id="tabBar">
|
||||
@@ -419,12 +419,12 @@
|
||||
|
||||
<script>
|
||||
// BENCHMARK_DATA_PLACEHOLDER - REPLACED BY BUILD SCRIPT
|
||||
// Generated from model-benchmarks.json on 2026-04-29T19:58:05.244Z
|
||||
// Generated from model-benchmarks.json on 2026-04-29T22:15:07.925Z
|
||||
const EMBEDDED_DATA = {
|
||||
"version": "1.0.0",
|
||||
"generated": "2026-04-29T19:56:51.418Z",
|
||||
"source": ".kilo/capability-index.yaml (synced v2)",
|
||||
"total_agents": 32,
|
||||
"generated": "2026-04-29T21:47:05.339Z",
|
||||
"source": ".kilo/capability-index.yaml (synced v3 + fitness-gate)",
|
||||
"total_agents": 30,
|
||||
"total_models_tracked": 11,
|
||||
"providers": [
|
||||
"ollama",
|
||||
@@ -800,8 +800,8 @@ const EMBEDDED_DATA = {
|
||||
"agent_model_scores": [
|
||||
{
|
||||
"agent": "lead-developer",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 92,
|
||||
@@ -818,8 +818,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "frontend-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 1,
|
||||
"current_model_id": "minimax-m2.5",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 86,
|
||||
@@ -836,8 +836,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "php-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 87,
|
||||
@@ -854,8 +854,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "python-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 90,
|
||||
@@ -872,8 +872,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "backend-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 91,
|
||||
@@ -890,8 +890,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "go-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 85,
|
||||
@@ -908,8 +908,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "flutter-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 86,
|
||||
@@ -926,8 +926,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "devops-engineer",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 66,
|
||||
@@ -944,8 +944,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "sdet-engineer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 88,
|
||||
@@ -1035,7 +1035,7 @@ const EMBEDDED_DATA = {
|
||||
{
|
||||
"agent": "browser-automation",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 87,
|
||||
@@ -1052,8 +1052,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "visual-tester",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 82,
|
||||
@@ -1070,9 +1070,9 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "system-analyst",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"reasoning_effort": "H",
|
||||
"current_model_index": 7,
|
||||
"current_model_id": "glm-5.1",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 70,
|
||||
"minimax-m2.5": 66,
|
||||
@@ -1086,42 +1086,6 @@ const EMBEDDED_DATA = {
|
||||
"kimi-k2-6": 86
|
||||
}
|
||||
},
|
||||
{
|
||||
"agent": "requirement-refiner",
|
||||
"current_model_index": 7,
|
||||
"current_model_id": "glm-5.1",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 66,
|
||||
"minimax-m2.5": 62,
|
||||
"minimax-m2.7": 60,
|
||||
"nemotron-3-super": 72,
|
||||
"glm-5.1": 80,
|
||||
"deepseek-v4-pro-max": 82,
|
||||
"qwen3-5-122b": 74,
|
||||
"qwen3-coder-next": 54,
|
||||
"qwen3-6-plus": 78,
|
||||
"kimi-k2-6": 82
|
||||
}
|
||||
},
|
||||
{
|
||||
"agent": "history-miner",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 68,
|
||||
"minimax-m2.5": 60,
|
||||
"minimax-m2.7": 56,
|
||||
"nemotron-3-super": 85,
|
||||
"glm-5.1": 78,
|
||||
"deepseek-v4-pro-max": 86,
|
||||
"qwen3-5-122b": 72,
|
||||
"qwen3-coder-next": 56,
|
||||
"qwen3-6-plus": 84,
|
||||
"kimi-k2-6": 82
|
||||
}
|
||||
},
|
||||
{
|
||||
"agent": "capability-analyst",
|
||||
"current_model_index": 7,
|
||||
@@ -1143,7 +1107,7 @@ const EMBEDDED_DATA = {
|
||||
{
|
||||
"agent": "orchestrator",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "kimi-k2.6:cloud",
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 74,
|
||||
@@ -1286,8 +1250,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "agent-architect",
|
||||
"current_model_index": 7,
|
||||
"current_model_id": "glm-5.1",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 78,
|
||||
@@ -1391,17 +1355,17 @@ const EMBEDDED_DATA = {
|
||||
"agent_current_config": [
|
||||
{
|
||||
"agent": "lead-developer",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
"badge_type": "qwen",
|
||||
"fit_score": 0,
|
||||
"status": "good",
|
||||
"previous_model": null
|
||||
},
|
||||
{
|
||||
"agent": "frontend-developer",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"model": "ollama-cloud/minimax-m2.5",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "qwen",
|
||||
@@ -1461,7 +1425,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "devops-engineer",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1521,7 +1485,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "browser-automation",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "qwen",
|
||||
@@ -1541,16 +1505,6 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "system-analyst",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
"fit_score": 0,
|
||||
"status": "good",
|
||||
"previous_model": null
|
||||
},
|
||||
{
|
||||
"agent": "requirement-refiner",
|
||||
"model": "ollama-cloud/glm-5.1",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
@@ -1559,16 +1513,6 @@ const EMBEDDED_DATA = {
|
||||
"status": "good",
|
||||
"previous_model": null
|
||||
},
|
||||
{
|
||||
"agent": "history-miner",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
"fit_score": 0,
|
||||
"status": "good",
|
||||
"previous_model": null
|
||||
},
|
||||
{
|
||||
"agent": "capability-analyst",
|
||||
"model": "ollama-cloud/glm-5.1",
|
||||
@@ -1581,7 +1525,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "orchestrator",
|
||||
"model": "ollama-cloud/kimi-k2.6:cloud",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "kimi",
|
||||
@@ -1661,7 +1605,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "agent-architect",
|
||||
"model": "ollama-cloud/glm-5.1",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "glm",
|
||||
|
||||
Reference in New Issue
Block a user