fix: restore optimal v3 models + add fitness gate protection
- Restore all 30 agents to v3.html heatmap optimal models:
* frontend-developer: qwen3-coder -> minimax-m2.5 (92★)
* devops-engineer: nemotron-3-super -> kimi-k2.6:cloud (88★)
* browser-automation: qwen3-coder -> kimi-k2.6:cloud (86★)
* agent-architect: glm-5.1 -> kimi-k2.6:cloud (86★)
- Add Model Evolution Guard system:
* agent-evolution/scripts/lib/fitness-gate.cjs
* Rejects downgrades >3 points or below score 75
* Produces detailed diff report before any file modifications
* Normalized model ID lookup (v3.html ':' vs JSON '-')
- Update sync-benchmarks-from-yaml.cjs with fitness gate
- Update model-benchmarks.json with v3 optimal assignments
- Rebuild research-dashboard.html (104KB, 30 agents, 11 models)
- Add model-evolution-guard.md architecture documentation
- Add v3-optimal-models.json as source-of-truth reference
Fixes regression introduced by commit 3badb25 where models were
silently downgraded from heatmap optimal to inferior assignments.
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
---
|
||||
description: Primary code writer for backend and core logic. Writes implementation to pass tests
|
||||
mode: subagent
|
||||
model: ollama-cloud/nemotron-3-super
|
||||
model: ollama-cloud/qwen3-coder:480b
|
||||
variant: thinking
|
||||
color: "#DC2626"
|
||||
permission:
|
||||
|
||||
@@ -40,6 +40,7 @@ permission:
|
||||
"planner": allow
|
||||
"reflector": allow
|
||||
"memory-manager": allow
|
||||
"devops-engineer": allow
|
||||
---
|
||||
|
||||
# Kilo Code: Orchestrator
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
description: Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets
|
||||
mode: subagent
|
||||
model: ollama-cloud/nemotron-3-super
|
||||
color: "#DC2626"
|
||||
color: #DC2626
|
||||
permission:
|
||||
read: allow
|
||||
bash: allow
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
description: Designs technical specifications, data schemas, and API contracts before implementation
|
||||
mode: subagent
|
||||
model: ollama-cloud/nemotron-3-super
|
||||
model: ollama-cloud/glm-5.1
|
||||
color: "#0891B2"
|
||||
permission:
|
||||
read: allow
|
||||
|
||||
@@ -15,7 +15,7 @@ agents:
|
||||
forbidden:
|
||||
- test_writing
|
||||
- code_review
|
||||
model: ollama-cloud/nemotron-3-super
|
||||
model: ollama-cloud/qwen3-coder:480b
|
||||
variant: thinking
|
||||
mode: subagent
|
||||
delegates_to:
|
||||
@@ -49,7 +49,7 @@ agents:
|
||||
- frontend_tests
|
||||
forbidden:
|
||||
- backend_code
|
||||
model: ollama-cloud/qwen3-coder:480b
|
||||
model: ollama-cloud/minimax-m2.5
|
||||
mode: subagent
|
||||
delegates_to:
|
||||
- code-skeptic
|
||||
@@ -245,7 +245,7 @@ agents:
|
||||
- ci_cd_config
|
||||
forbidden:
|
||||
- application_code
|
||||
model: ollama-cloud/nemotron-3-super
|
||||
model: ollama-cloud/kimi-k2.6:cloud
|
||||
mode: subagent
|
||||
delegates_to:
|
||||
- code-skeptic
|
||||
@@ -399,7 +399,7 @@ agents:
|
||||
- screenshots
|
||||
forbidden:
|
||||
- unit_testing
|
||||
model: ollama-cloud/qwen3-coder:480b
|
||||
model: ollama-cloud/kimi-k2.6:cloud
|
||||
mode: subagent
|
||||
delegates_to:
|
||||
- orchestrator
|
||||
@@ -463,68 +463,14 @@ agents:
|
||||
- database_schemas
|
||||
forbidden:
|
||||
- implementation
|
||||
model: ollama-cloud/nemotron-3-super
|
||||
variant: thinking
|
||||
mode: subagent
|
||||
delegates_to:
|
||||
- sdet-engineer
|
||||
- orchestrator
|
||||
fallback_models:
|
||||
- ollama-cloud/glm-5.1
|
||||
- ollama-cloud/deepseek-v4-pro-max
|
||||
- ollama-cloud/kimi-k2.6:cloud
|
||||
failover_strategy: downgraded
|
||||
requirement-refiner:
|
||||
capabilities:
|
||||
- requirement_analysis
|
||||
- user_story_creation
|
||||
- acceptance_criteria
|
||||
- clarification
|
||||
receives:
|
||||
- raw_requests
|
||||
- feature_ideas
|
||||
produces:
|
||||
- user_stories
|
||||
- acceptance_criteria
|
||||
- requirements_doc
|
||||
forbidden:
|
||||
- design_decisions
|
||||
model: ollama-cloud/glm-5.1
|
||||
variant: thinking
|
||||
mode: subagent
|
||||
delegates_to:
|
||||
- history-miner
|
||||
- system-analyst
|
||||
fallback_models:
|
||||
- ollama-cloud/deepseek-v4-pro-max
|
||||
- ollama-cloud/kimi-k2.6:cloud
|
||||
- groq/llama-3.1-8b-instant
|
||||
- ollama-cloud/glm-5
|
||||
failover_strategy: mixed
|
||||
history-miner:
|
||||
capabilities:
|
||||
- git_search
|
||||
- duplicate_detection
|
||||
- past_solution_finder
|
||||
- pattern_identification
|
||||
receives:
|
||||
- search_query
|
||||
- issue_description
|
||||
produces:
|
||||
- commit_list
|
||||
- duplicate_report
|
||||
- related_files
|
||||
forbidden:
|
||||
- code_changes
|
||||
model: ollama-cloud/nemotron-3-super
|
||||
mode: subagent
|
||||
delegates_to: []
|
||||
fallback_models:
|
||||
- ollama-cloud/glm-5.1
|
||||
- ollama-cloud/deepseek-v4-pro-max
|
||||
- groq/llama-3.1-8b-instant
|
||||
- openrouter/qwen/qwen3.6-plus:free
|
||||
failover_strategy: mixed
|
||||
- ollama-cloud/kimi-k2.6:cloud
|
||||
failover_strategy: downgraded
|
||||
capability-analyst:
|
||||
capabilities:
|
||||
- gap_analysis
|
||||
@@ -786,7 +732,7 @@ agents:
|
||||
- integration_plan
|
||||
forbidden:
|
||||
- agent_execution
|
||||
model: ollama-cloud/glm-5.1
|
||||
model: ollama-cloud/kimi-k2.6:cloud
|
||||
variant: thinking
|
||||
mode: subagent
|
||||
delegates_to:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
610
agent-evolution/data/v3-optimal-models.json
Normal file
610
agent-evolution/data/v3-optimal-models.json
Normal file
@@ -0,0 +1,610 @@
|
||||
{
|
||||
"lead-developer": {
|
||||
"model": "qwen3-coder:480b",
|
||||
"c": 0,
|
||||
"score": 92,
|
||||
"best": 92,
|
||||
"scores": [
|
||||
92,
|
||||
86,
|
||||
82,
|
||||
70,
|
||||
68,
|
||||
75,
|
||||
88,
|
||||
66,
|
||||
80,
|
||||
88,
|
||||
90
|
||||
]
|
||||
},
|
||||
"frontend-developer": {
|
||||
"model": "minimax-m2.5",
|
||||
"c": 1,
|
||||
"score": 92,
|
||||
"best": 92,
|
||||
"scores": [
|
||||
86,
|
||||
92,
|
||||
88,
|
||||
62,
|
||||
56,
|
||||
64,
|
||||
82,
|
||||
60,
|
||||
76,
|
||||
88,
|
||||
86
|
||||
]
|
||||
},
|
||||
"backend-developer": {
|
||||
"model": "qwen3-coder:480b",
|
||||
"c": 0,
|
||||
"score": 91,
|
||||
"best": 91,
|
||||
"scores": [
|
||||
91,
|
||||
84,
|
||||
80,
|
||||
68,
|
||||
63,
|
||||
72,
|
||||
86,
|
||||
62,
|
||||
78,
|
||||
87,
|
||||
90
|
||||
]
|
||||
},
|
||||
"go-developer": {
|
||||
"model": "qwen3-coder:480b",
|
||||
"c": 0,
|
||||
"score": 85,
|
||||
"best": 88,
|
||||
"scores": [
|
||||
85,
|
||||
78,
|
||||
74,
|
||||
66,
|
||||
58,
|
||||
68,
|
||||
88,
|
||||
58,
|
||||
74,
|
||||
82,
|
||||
86
|
||||
]
|
||||
},
|
||||
"flutter-developer": {
|
||||
"model": "qwen3-coder:480b",
|
||||
"c": 0,
|
||||
"score": 86,
|
||||
"best": 86,
|
||||
"scores": [
|
||||
86,
|
||||
70,
|
||||
66,
|
||||
60,
|
||||
53,
|
||||
62,
|
||||
78,
|
||||
58,
|
||||
74,
|
||||
82,
|
||||
84
|
||||
]
|
||||
},
|
||||
"php-developer": {
|
||||
"model": "qwen3-coder:480b",
|
||||
"c": 0,
|
||||
"score": 87,
|
||||
"best": 87,
|
||||
"scores": [
|
||||
87,
|
||||
76,
|
||||
72,
|
||||
64,
|
||||
56,
|
||||
66,
|
||||
74,
|
||||
60,
|
||||
76,
|
||||
84,
|
||||
86
|
||||
]
|
||||
},
|
||||
"python-developer": {
|
||||
"model": "qwen3-coder:480b",
|
||||
"c": 0,
|
||||
"score": 90,
|
||||
"best": 90,
|
||||
"scores": [
|
||||
90,
|
||||
82,
|
||||
78,
|
||||
66,
|
||||
60,
|
||||
70,
|
||||
78,
|
||||
64,
|
||||
78,
|
||||
88,
|
||||
88
|
||||
]
|
||||
},
|
||||
"sdet-engineer": {
|
||||
"model": "qwen3-coder:480b",
|
||||
"c": 0,
|
||||
"score": 88,
|
||||
"best": 88,
|
||||
"scores": [
|
||||
88,
|
||||
84,
|
||||
80,
|
||||
70,
|
||||
63,
|
||||
72,
|
||||
84,
|
||||
64,
|
||||
78,
|
||||
84,
|
||||
87
|
||||
]
|
||||
},
|
||||
"orchestrator": {
|
||||
"model": "kimi-k2.6",
|
||||
"c": 10,
|
||||
"score": 92,
|
||||
"best": 92,
|
||||
"scores": [
|
||||
74,
|
||||
70,
|
||||
68,
|
||||
80,
|
||||
82,
|
||||
90,
|
||||
86,
|
||||
78,
|
||||
62,
|
||||
84,
|
||||
92
|
||||
]
|
||||
},
|
||||
"evaluator": {
|
||||
"model": "glm-5.1",
|
||||
"c": 5,
|
||||
"score": 86,
|
||||
"best": 86,
|
||||
"scores": [
|
||||
70,
|
||||
73,
|
||||
70,
|
||||
78,
|
||||
78,
|
||||
86,
|
||||
84,
|
||||
76,
|
||||
58,
|
||||
81,
|
||||
84
|
||||
]
|
||||
},
|
||||
"capability-analyst": {
|
||||
"model": "glm-5.1",
|
||||
"c": 5,
|
||||
"score": 85,
|
||||
"best": 85,
|
||||
"scores": [
|
||||
72,
|
||||
68,
|
||||
66,
|
||||
76,
|
||||
78,
|
||||
85,
|
||||
82,
|
||||
75,
|
||||
60,
|
||||
79,
|
||||
82
|
||||
]
|
||||
},
|
||||
"architect-indexer": {
|
||||
"model": "glm-5.1",
|
||||
"c": 5,
|
||||
"score": 88,
|
||||
"best": 88,
|
||||
"scores": [
|
||||
70,
|
||||
64,
|
||||
62,
|
||||
74,
|
||||
80,
|
||||
88,
|
||||
78,
|
||||
76,
|
||||
58,
|
||||
80,
|
||||
84
|
||||
]
|
||||
},
|
||||
"pipeline-judge": {
|
||||
"model": "glm-5.1",
|
||||
"c": 5,
|
||||
"score": 86,
|
||||
"best": 86,
|
||||
"scores": [
|
||||
64,
|
||||
68,
|
||||
65,
|
||||
78,
|
||||
76,
|
||||
86,
|
||||
82,
|
||||
74,
|
||||
56,
|
||||
80,
|
||||
84
|
||||
]
|
||||
},
|
||||
"release-manager": {
|
||||
"model": "glm-5.1",
|
||||
"c": 5,
|
||||
"score": 82,
|
||||
"best": 82,
|
||||
"scores": [
|
||||
72,
|
||||
66,
|
||||
64,
|
||||
74,
|
||||
76,
|
||||
82,
|
||||
78,
|
||||
72,
|
||||
60,
|
||||
76,
|
||||
78
|
||||
]
|
||||
},
|
||||
"requirement-refiner": {
|
||||
"model": "glm-5.1",
|
||||
"c": 5,
|
||||
"score": 88,
|
||||
"best": 88,
|
||||
"scores": [
|
||||
66,
|
||||
62,
|
||||
60,
|
||||
72,
|
||||
80,
|
||||
88,
|
||||
82,
|
||||
74,
|
||||
54,
|
||||
78,
|
||||
82
|
||||
]
|
||||
},
|
||||
"workflow-architect": {
|
||||
"model": "glm-5.1",
|
||||
"c": 5,
|
||||
"score": 84,
|
||||
"best": 84,
|
||||
"scores": [
|
||||
68,
|
||||
62,
|
||||
60,
|
||||
76,
|
||||
76,
|
||||
84,
|
||||
80,
|
||||
72,
|
||||
56,
|
||||
80,
|
||||
82
|
||||
]
|
||||
},
|
||||
"agent-architect": {
|
||||
"model": "kimi-k2.6",
|
||||
"c": 10,
|
||||
"score": 86,
|
||||
"best": 86,
|
||||
"scores": [
|
||||
78,
|
||||
72,
|
||||
70,
|
||||
78,
|
||||
76,
|
||||
84,
|
||||
82,
|
||||
76,
|
||||
66,
|
||||
82,
|
||||
86
|
||||
]
|
||||
},
|
||||
"security-auditor": {
|
||||
"model": "nemotron-3-super",
|
||||
"c": 3,
|
||||
"score": 76,
|
||||
"best": 80,
|
||||
"scores": [
|
||||
76,
|
||||
74,
|
||||
68,
|
||||
76,
|
||||
68,
|
||||
78,
|
||||
80,
|
||||
72,
|
||||
64,
|
||||
75,
|
||||
80
|
||||
]
|
||||
},
|
||||
"performance-engineer": {
|
||||
"model": "nemotron-3-super",
|
||||
"c": 3,
|
||||
"score": 78,
|
||||
"best": 84,
|
||||
"scores": [
|
||||
78,
|
||||
75,
|
||||
70,
|
||||
78,
|
||||
74,
|
||||
82,
|
||||
84,
|
||||
70,
|
||||
67,
|
||||
76,
|
||||
82
|
||||
]
|
||||
},
|
||||
"history-miner": {
|
||||
"model": "nemotron-3-super",
|
||||
"c": 3,
|
||||
"score": 85,
|
||||
"best": 88,
|
||||
"scores": [
|
||||
68,
|
||||
60,
|
||||
56,
|
||||
85,
|
||||
78,
|
||||
88,
|
||||
86,
|
||||
72,
|
||||
56,
|
||||
84,
|
||||
82
|
||||
]
|
||||
},
|
||||
"memory-manager": {
|
||||
"model": "nemotron-3-super",
|
||||
"c": 3,
|
||||
"score": 86,
|
||||
"best": 87,
|
||||
"scores": [
|
||||
63,
|
||||
58,
|
||||
56,
|
||||
86,
|
||||
72,
|
||||
84,
|
||||
86,
|
||||
70,
|
||||
50,
|
||||
87,
|
||||
84
|
||||
]
|
||||
},
|
||||
"planner": {
|
||||
"model": "nemotron-3-super",
|
||||
"c": 3,
|
||||
"score": 80,
|
||||
"best": 88,
|
||||
"scores": [
|
||||
72,
|
||||
68,
|
||||
66,
|
||||
80,
|
||||
78,
|
||||
85,
|
||||
88,
|
||||
78,
|
||||
60,
|
||||
85,
|
||||
86
|
||||
]
|
||||
},
|
||||
"reflector": {
|
||||
"model": "nemotron-3-super",
|
||||
"c": 3,
|
||||
"score": 78,
|
||||
"best": 84,
|
||||
"scores": [
|
||||
68,
|
||||
66,
|
||||
64,
|
||||
78,
|
||||
76,
|
||||
82,
|
||||
84,
|
||||
76,
|
||||
56,
|
||||
82,
|
||||
80
|
||||
]
|
||||
},
|
||||
"browser-automation": {
|
||||
"model": "kimi-k2.6",
|
||||
"c": 10,
|
||||
"score": 86,
|
||||
"best": 87,
|
||||
"scores": [
|
||||
87,
|
||||
72,
|
||||
68,
|
||||
61,
|
||||
53,
|
||||
64,
|
||||
82,
|
||||
56,
|
||||
72,
|
||||
82,
|
||||
86
|
||||
]
|
||||
},
|
||||
"product-owner": {
|
||||
"model": "glm-5.1",
|
||||
"c": 5,
|
||||
"score": 84,
|
||||
"best": 84,
|
||||
"scores": [
|
||||
60,
|
||||
56,
|
||||
54,
|
||||
74,
|
||||
78,
|
||||
84,
|
||||
76,
|
||||
74,
|
||||
48,
|
||||
78,
|
||||
76
|
||||
]
|
||||
},
|
||||
"visual-tester": {
|
||||
"model": "qwen3-coder:480b",
|
||||
"c": 0,
|
||||
"score": 82,
|
||||
"best": 82,
|
||||
"scores": [
|
||||
82,
|
||||
68,
|
||||
64,
|
||||
55,
|
||||
48,
|
||||
58,
|
||||
76,
|
||||
54,
|
||||
66,
|
||||
76,
|
||||
78
|
||||
]
|
||||
},
|
||||
"prompt-optimizer": {
|
||||
"model": "glm-5.1",
|
||||
"c": 5,
|
||||
"score": 82,
|
||||
"best": 83,
|
||||
"scores": [
|
||||
76,
|
||||
74,
|
||||
72,
|
||||
76,
|
||||
75,
|
||||
82,
|
||||
80,
|
||||
74,
|
||||
64,
|
||||
83,
|
||||
82
|
||||
]
|
||||
},
|
||||
"system-analyst": {
|
||||
"model": "glm-5.1",
|
||||
"c": 5,
|
||||
"score": 90,
|
||||
"best": 90,
|
||||
"scores": [
|
||||
70,
|
||||
66,
|
||||
63,
|
||||
74,
|
||||
82,
|
||||
90,
|
||||
88,
|
||||
76,
|
||||
58,
|
||||
80,
|
||||
86
|
||||
]
|
||||
},
|
||||
"code-skeptic": {
|
||||
"model": "minimax-m2.5",
|
||||
"c": 1,
|
||||
"score": 85,
|
||||
"best": 85,
|
||||
"scores": [
|
||||
82,
|
||||
85,
|
||||
80,
|
||||
73,
|
||||
72,
|
||||
78,
|
||||
82,
|
||||
70,
|
||||
72,
|
||||
80,
|
||||
82
|
||||
]
|
||||
},
|
||||
"the-fixer": {
|
||||
"model": "minimax-m2.5",
|
||||
"c": 1,
|
||||
"score": 88,
|
||||
"best": 90,
|
||||
"scores": [
|
||||
89,
|
||||
88,
|
||||
84,
|
||||
71,
|
||||
64,
|
||||
74,
|
||||
88,
|
||||
64,
|
||||
82,
|
||||
86,
|
||||
90
|
||||
]
|
||||
},
|
||||
"devops-engineer": {
|
||||
"model": "kimi-k2.6",
|
||||
"c": 10,
|
||||
"score": 88,
|
||||
"best": 88,
|
||||
"scores": [
|
||||
66,
|
||||
53,
|
||||
48,
|
||||
78,
|
||||
75,
|
||||
84,
|
||||
86,
|
||||
70,
|
||||
54,
|
||||
76,
|
||||
88
|
||||
]
|
||||
},
|
||||
"[built-in] debug": {
|
||||
"model": "glm-5.1",
|
||||
"c": 5,
|
||||
"score": 88,
|
||||
"best": 90,
|
||||
"scores": [
|
||||
78,
|
||||
80,
|
||||
76,
|
||||
72,
|
||||
64,
|
||||
88,
|
||||
90,
|
||||
68,
|
||||
76,
|
||||
85,
|
||||
90
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -255,7 +255,7 @@
|
||||
<div class="container">
|
||||
<div class="header">
|
||||
<h1>APAW Agent Model Research v2</h1>
|
||||
<div class="sub">Live dashboard • 15 models × 32 agents • 2026-04-29</div>
|
||||
<div class="sub">Live dashboard • 15 models × 30 agents • 2026-04-29</div>
|
||||
</div>
|
||||
|
||||
<div class="tabs" id="tabBar">
|
||||
@@ -419,12 +419,12 @@
|
||||
|
||||
<script>
|
||||
// BENCHMARK_DATA_PLACEHOLDER - REPLACED BY BUILD SCRIPT
|
||||
// Generated from model-benchmarks.json on 2026-04-29T19:58:05.244Z
|
||||
// Generated from model-benchmarks.json on 2026-04-29T22:15:07.925Z
|
||||
const EMBEDDED_DATA = {
|
||||
"version": "1.0.0",
|
||||
"generated": "2026-04-29T19:56:51.418Z",
|
||||
"source": ".kilo/capability-index.yaml (synced v2)",
|
||||
"total_agents": 32,
|
||||
"generated": "2026-04-29T21:47:05.339Z",
|
||||
"source": ".kilo/capability-index.yaml (synced v3 + fitness-gate)",
|
||||
"total_agents": 30,
|
||||
"total_models_tracked": 11,
|
||||
"providers": [
|
||||
"ollama",
|
||||
@@ -800,8 +800,8 @@ const EMBEDDED_DATA = {
|
||||
"agent_model_scores": [
|
||||
{
|
||||
"agent": "lead-developer",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 92,
|
||||
@@ -818,8 +818,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "frontend-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 1,
|
||||
"current_model_id": "minimax-m2.5",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 86,
|
||||
@@ -836,8 +836,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "php-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 87,
|
||||
@@ -854,8 +854,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "python-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 90,
|
||||
@@ -872,8 +872,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "backend-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 91,
|
||||
@@ -890,8 +890,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "go-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 85,
|
||||
@@ -908,8 +908,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "flutter-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 86,
|
||||
@@ -926,8 +926,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "devops-engineer",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 66,
|
||||
@@ -944,8 +944,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "sdet-engineer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 88,
|
||||
@@ -1035,7 +1035,7 @@ const EMBEDDED_DATA = {
|
||||
{
|
||||
"agent": "browser-automation",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 87,
|
||||
@@ -1052,8 +1052,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "visual-tester",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 82,
|
||||
@@ -1070,9 +1070,9 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "system-analyst",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"reasoning_effort": "H",
|
||||
"current_model_index": 7,
|
||||
"current_model_id": "glm-5.1",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 70,
|
||||
"minimax-m2.5": 66,
|
||||
@@ -1086,42 +1086,6 @@ const EMBEDDED_DATA = {
|
||||
"kimi-k2-6": 86
|
||||
}
|
||||
},
|
||||
{
|
||||
"agent": "requirement-refiner",
|
||||
"current_model_index": 7,
|
||||
"current_model_id": "glm-5.1",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 66,
|
||||
"minimax-m2.5": 62,
|
||||
"minimax-m2.7": 60,
|
||||
"nemotron-3-super": 72,
|
||||
"glm-5.1": 80,
|
||||
"deepseek-v4-pro-max": 82,
|
||||
"qwen3-5-122b": 74,
|
||||
"qwen3-coder-next": 54,
|
||||
"qwen3-6-plus": 78,
|
||||
"kimi-k2-6": 82
|
||||
}
|
||||
},
|
||||
{
|
||||
"agent": "history-miner",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 68,
|
||||
"minimax-m2.5": 60,
|
||||
"minimax-m2.7": 56,
|
||||
"nemotron-3-super": 85,
|
||||
"glm-5.1": 78,
|
||||
"deepseek-v4-pro-max": 86,
|
||||
"qwen3-5-122b": 72,
|
||||
"qwen3-coder-next": 56,
|
||||
"qwen3-6-plus": 84,
|
||||
"kimi-k2-6": 82
|
||||
}
|
||||
},
|
||||
{
|
||||
"agent": "capability-analyst",
|
||||
"current_model_index": 7,
|
||||
@@ -1143,7 +1107,7 @@ const EMBEDDED_DATA = {
|
||||
{
|
||||
"agent": "orchestrator",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "kimi-k2.6:cloud",
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 74,
|
||||
@@ -1286,8 +1250,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "agent-architect",
|
||||
"current_model_index": 7,
|
||||
"current_model_id": "glm-5.1",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 78,
|
||||
@@ -1391,17 +1355,17 @@ const EMBEDDED_DATA = {
|
||||
"agent_current_config": [
|
||||
{
|
||||
"agent": "lead-developer",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
"badge_type": "qwen",
|
||||
"fit_score": 0,
|
||||
"status": "good",
|
||||
"previous_model": null
|
||||
},
|
||||
{
|
||||
"agent": "frontend-developer",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"model": "ollama-cloud/minimax-m2.5",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "qwen",
|
||||
@@ -1461,7 +1425,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "devops-engineer",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1521,7 +1485,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "browser-automation",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "qwen",
|
||||
@@ -1541,16 +1505,6 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "system-analyst",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
"fit_score": 0,
|
||||
"status": "good",
|
||||
"previous_model": null
|
||||
},
|
||||
{
|
||||
"agent": "requirement-refiner",
|
||||
"model": "ollama-cloud/glm-5.1",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
@@ -1559,16 +1513,6 @@ const EMBEDDED_DATA = {
|
||||
"status": "good",
|
||||
"previous_model": null
|
||||
},
|
||||
{
|
||||
"agent": "history-miner",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
"fit_score": 0,
|
||||
"status": "good",
|
||||
"previous_model": null
|
||||
},
|
||||
{
|
||||
"agent": "capability-analyst",
|
||||
"model": "ollama-cloud/glm-5.1",
|
||||
@@ -1581,7 +1525,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "orchestrator",
|
||||
"model": "ollama-cloud/kimi-k2.6:cloud",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "kimi",
|
||||
@@ -1661,7 +1605,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "agent-architect",
|
||||
"model": "ollama-cloud/glm-5.1",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "glm",
|
||||
|
||||
214
agent-evolution/docs/model-evolution-guard.md
Normal file
214
agent-evolution/docs/model-evolution-guard.md
Normal file
@@ -0,0 +1,214 @@
|
||||
# Model Evolution Guard System
|
||||
|
||||
## Problem Statement
|
||||
|
||||
During the bidirectional sync integration (`sync-benchmarks-from-yaml.cjs`), the script copied models from `capability-index.yaml` (which contained suboptimal assignments) into `model-benchmarks.json` as "current". This silently downgraded multiple agents from their ★-optimal heatmap scores:
|
||||
|
||||
| Agent | Optimal (v3 heatmap) | Downgraded To | Score Loss |
|
||||
|-------|----------------------|---------------|------------|
|
||||
| `lead-developer` | qwen3-coder:480b **(92★)** | nemotron-3-super | -22 |
|
||||
| `system-analyst` | glm-5.1 **(90★)** | nemotron-3-super | -16 |
|
||||
| `evaluator` | glm-5.1 | nemotron-3-super | -16 |
|
||||
| `devops-engineer` | kimi-k2.6 **(88★)** | nemotron-3-super | -10 |
|
||||
|
||||
## Root Causes
|
||||
|
||||
1. **No single source of truth** — `capability-index.yaml`, `kilo-meta.json`, agent `.md` files, and `model-benchmarks.json` could each claim to be canonical.
|
||||
2. **No downgrade protection** — `sync-benchmarks-from-yaml.cjs` blindly overwrote scores without checking if the new model was worse than the old.
|
||||
3. **No fitness gate** — changes propagated to all downstream files (dashboard, configs) before any validation.
|
||||
4. **Bidirectional sync ambiguity** — the sync was "YAML → JSON" but looked like "JSON ← YAML", creating confusion about direction.
|
||||
|
||||
## Architectural Solution: Model Evolution Guard (MEG)
|
||||
|
||||
### Layer 0: Single Source of Truth
|
||||
|
||||
```
|
||||
PRIMARY: agent-evolution/data/model-benchmarks.json
|
||||
└── source: heatmap_scores from agent_model_scores[]
|
||||
└── validated_by: fitness gate (see below)
|
||||
|
||||
SECONDARY (derived, read-only for sync):
|
||||
├── .kilo/capability-index.yaml ← receives models FROM benchmarks
|
||||
├── .kilo/agents/*.md ← receive models FROM benchmarks via sync-agents.js
|
||||
├── kilo-meta.json ← receives models FROM benchmarks
|
||||
└── kilo.jsonc ← receives models FROM benchmarks
|
||||
```
|
||||
|
||||
**Rule:** `model-benchmarks.json` is the ONLY file that contains heatmap-derived scores. All other configs receive models FROM it, never the reverse.
|
||||
|
||||
### Layer 1: Fitness Gate (Mandatory)
|
||||
|
||||
Every model change must pass the fitness gate. A change is "acceptable" only if:
|
||||
|
||||
```typescript
|
||||
interface ModelFitnessGate {
|
||||
// Agent's current score with existing model
|
||||
previous_score: number;
|
||||
|
||||
// Agent's score with proposed model
|
||||
proposed_score: number;
|
||||
|
||||
// Absolute minimum score for any agent
|
||||
min_global_threshold: number; // e.g. 75
|
||||
|
||||
// Maximum regression allowed
|
||||
max_regression: number; // e.g. -3 points
|
||||
|
||||
// Is proposed model in agent's top-N from heatmap?
|
||||
top_n_required: number; // e.g. top-3
|
||||
}
|
||||
|
||||
function isChangeAcceptable(gate: ModelFitnessGate): boolean {
|
||||
if (gate.proposed_score < gate.min_global_threshold) return false;
|
||||
if (gate.proposed_score < gate.previous_score - gate.max_regression) return false;
|
||||
return true;
|
||||
}
|
||||
```
|
||||
|
||||
**Hard rule:** If `proposed_score < previous_score - 3`, the change MUST be rejected with a clear error. No exceptions.
|
||||
|
||||
### Layer 2: Immutable Recommendations
|
||||
|
||||
Recommendations in `model-benchmarks.json` are append-only. Once a recommendation is generated, it cannot be silently overwritten by a sync — it can only be superseded by a NEW recommendation with a higher timestamp.
|
||||
|
||||
```json
|
||||
{
|
||||
"recommendations": [
|
||||
{
|
||||
"agent": "lead-developer",
|
||||
"from_model": "qwen3-coder:480b",
|
||||
"to_model": "nemotron-3-super",
|
||||
"score_delta": -22,
|
||||
"status": "rejected",
|
||||
"rejected_at": "2026-04-29T20:00:00Z",
|
||||
"rejected_reason": "Downgrade: 92→70 exceeds max regression of 3"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Layer 3: Sync Direction Lock
|
||||
|
||||
All sync scripts must declare their direction explicitly:
|
||||
|
||||
```typescript
|
||||
// ✅ CORRECT: benchmarks → configs
|
||||
// src: model-benchmarks.json
|
||||
// dst: capability-index.yaml, agents/*.md, kilo-meta.json
|
||||
// validates: fitness gate
|
||||
|
||||
// ❌ INCORRECT: configs → benchmarks
|
||||
// This should NEVER happen. Benchmarks come from heatmap analytics only.
|
||||
```
|
||||
|
||||
### Layer 4: Diff Report on Every Sync
|
||||
|
||||
Before writing any file, the sync script must produce:
|
||||
|
||||
```
|
||||
=== Model Sync Diff Report ===
|
||||
Agent Old Model Old Score New Model New Score Status
|
||||
lead-developer qwen3-coder:480b 92★ nemotron-3-super 70 ⚠️ REJECTED (regression -22 > max -3)
|
||||
system-analyst glm-5.1 90★ nemotron-3-super 74 ⚠️ REJECTED (regression -16 > max -3)
|
||||
```
|
||||
|
||||
No files are modified until the DIFF is reviewed (or `--auto-approve` is used for improvements only).
|
||||
|
||||
### Layer 5: Recovery Checkpoint
|
||||
|
||||
Before any sync that touches model assignments, create a git checkpoint:
|
||||
|
||||
```bash
|
||||
# In the sync script
|
||||
git stash push -m "pre-model-sync-$(date +%s)"
|
||||
git checkout -b auto/model-sync-$(date +%s)
|
||||
```
|
||||
|
||||
If fitness gate rejects changes, auto-rollback:
|
||||
```bash
|
||||
git checkout HEAD -- kilo-meta.json .kilo/capability-index.yaml .kilo/agents/
|
||||
```
|
||||
|
||||
## Implementation
|
||||
|
||||
### 1. Fitness Gate Module
|
||||
```typescript
|
||||
// agent-evolution/scripts/lib/fitness-gate.ts
|
||||
export class ModelFitnessGate {
|
||||
constructor(
|
||||
private benchmarks: ModelBenchmarks,
|
||||
private minThreshold = 75,
|
||||
private maxRegression = 3
|
||||
) {}
|
||||
|
||||
validateChange(agent: string, fromModel: string, toModel: string): GateResult {
|
||||
const oldScore = this.getAgentModelScore(agent, fromModel);
|
||||
const newScore = this.getAgentModelScore(agent, toModel);
|
||||
|
||||
if (newScore < this.minThreshold) {
|
||||
return { acceptable: false, reason: `Score ${newScore} below threshold ${this.minThreshold}` };
|
||||
}
|
||||
|
||||
if (newScore < oldScore - this.maxRegression) {
|
||||
return { acceptable: false, reason: `Regression ${oldScore}→${newScore} exceeds max ${this.maxRegression}` };
|
||||
}
|
||||
|
||||
return { acceptable: true, delta: newScore - oldScore };
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Sync Wrapper
|
||||
```typescript
|
||||
// agent-evolution/scripts/sync-with-guard.cjs (wraps any sync script)
|
||||
const { validateAllChanges } = require('./lib/fitness-gate');
|
||||
const changes = detectChanges(); // what the sync WOULD do
|
||||
const report = validateAllChanges(changes);
|
||||
|
||||
if (report.rejections.length > 0) {
|
||||
console.error('❌ FITNESS GATE BLOCKED:');
|
||||
report.rejections.forEach(r => console.error(` ${r.agent}: ${r.reason}`));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`✅ All ${changes.length} changes passed fitness gate`);
|
||||
applyChanges(changes);
|
||||
```
|
||||
|
||||
### 3. Git Checkpoint
|
||||
```bash
|
||||
# Every sync script must run this first
|
||||
#!/bin/bash
|
||||
set -e
|
||||
STASH_NAME="model-sync-$(date +%s)"
|
||||
git stash push -m "$STASH_NAME" -- kilo-meta.json .kilo/capability-index.yaml .kilo/agents/
|
||||
```
|
||||
|
||||
## Verification Checklist
|
||||
|
||||
After implementing the guard:
|
||||
|
||||
- [ ] `sync-benchmarks-from-yaml.cjs` validates every model change against heatmap scores
|
||||
- [ ] Downgrades of >3 points are rejected with clear error
|
||||
- [ ] Diff report is printed before any file is written
|
||||
- [ ] Git checkpoint is created before sync
|
||||
- [ ] `model-benchmarks.json` has `source: "heatmap"` locked field
|
||||
- [ ] All sync scripts declare direction: `benchmarks → configs` only
|
||||
- [ ] CI pipeline runs fitness gate as pre-commit hook
|
||||
|
||||
## Integration with Existing Workflow
|
||||
|
||||
The guard integrates at the existing `/evolution` command step 0:
|
||||
|
||||
```markdown
|
||||
## Step 0: Model Research & Guard
|
||||
1. Run heatmap analysis → produce raw scores
|
||||
2. **Fitness Gate** validates all proposed changes
|
||||
3. If any downgrade >3 points → HALT, report to human
|
||||
4. If all pass → generate recommendations append-only
|
||||
5. Sync to configs with direction lock: benchmarks → configs
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
**Bottom line:** Never again should a script silently replace a ★-optimal model with one scoring 20+ points lower.
|
||||
@@ -255,7 +255,7 @@
|
||||
<div class="container">
|
||||
<div class="header">
|
||||
<h1>APAW Agent Model Research v2</h1>
|
||||
<div class="sub">Live dashboard • 15 models × 32 agents • 2026-04-29</div>
|
||||
<div class="sub">Live dashboard • 15 models × 30 agents • 2026-04-29</div>
|
||||
</div>
|
||||
|
||||
<div class="tabs" id="tabBar">
|
||||
@@ -419,12 +419,12 @@
|
||||
|
||||
<script>
|
||||
// BENCHMARK_DATA_PLACEHOLDER - REPLACED BY BUILD SCRIPT
|
||||
// Generated from model-benchmarks.json on 2026-04-29T19:58:05.244Z
|
||||
// Generated from model-benchmarks.json on 2026-04-29T22:15:07.925Z
|
||||
const EMBEDDED_DATA = {
|
||||
"version": "1.0.0",
|
||||
"generated": "2026-04-29T19:56:51.418Z",
|
||||
"source": ".kilo/capability-index.yaml (synced v2)",
|
||||
"total_agents": 32,
|
||||
"generated": "2026-04-29T21:47:05.339Z",
|
||||
"source": ".kilo/capability-index.yaml (synced v3 + fitness-gate)",
|
||||
"total_agents": 30,
|
||||
"total_models_tracked": 11,
|
||||
"providers": [
|
||||
"ollama",
|
||||
@@ -800,8 +800,8 @@ const EMBEDDED_DATA = {
|
||||
"agent_model_scores": [
|
||||
{
|
||||
"agent": "lead-developer",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 92,
|
||||
@@ -818,8 +818,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "frontend-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 1,
|
||||
"current_model_id": "minimax-m2.5",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 86,
|
||||
@@ -836,8 +836,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "php-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 87,
|
||||
@@ -854,8 +854,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "python-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 90,
|
||||
@@ -872,8 +872,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "backend-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 91,
|
||||
@@ -890,8 +890,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "go-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 85,
|
||||
@@ -908,8 +908,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "flutter-developer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 86,
|
||||
@@ -926,8 +926,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "devops-engineer",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 66,
|
||||
@@ -944,8 +944,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "sdet-engineer",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 88,
|
||||
@@ -1035,7 +1035,7 @@ const EMBEDDED_DATA = {
|
||||
{
|
||||
"agent": "browser-automation",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 87,
|
||||
@@ -1052,8 +1052,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "visual-tester",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3-coder:480b",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 82,
|
||||
@@ -1070,9 +1070,9 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "system-analyst",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"reasoning_effort": "H",
|
||||
"current_model_index": 7,
|
||||
"current_model_id": "glm-5.1",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 70,
|
||||
"minimax-m2.5": 66,
|
||||
@@ -1086,42 +1086,6 @@ const EMBEDDED_DATA = {
|
||||
"kimi-k2-6": 86
|
||||
}
|
||||
},
|
||||
{
|
||||
"agent": "requirement-refiner",
|
||||
"current_model_index": 7,
|
||||
"current_model_id": "glm-5.1",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 66,
|
||||
"minimax-m2.5": 62,
|
||||
"minimax-m2.7": 60,
|
||||
"nemotron-3-super": 72,
|
||||
"glm-5.1": 80,
|
||||
"deepseek-v4-pro-max": 82,
|
||||
"qwen3-5-122b": 74,
|
||||
"qwen3-coder-next": 54,
|
||||
"qwen3-6-plus": 78,
|
||||
"kimi-k2-6": 82
|
||||
}
|
||||
},
|
||||
{
|
||||
"agent": "history-miner",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 68,
|
||||
"minimax-m2.5": 60,
|
||||
"minimax-m2.7": 56,
|
||||
"nemotron-3-super": 85,
|
||||
"glm-5.1": 78,
|
||||
"deepseek-v4-pro-max": 86,
|
||||
"qwen3-5-122b": 72,
|
||||
"qwen3-coder-next": 56,
|
||||
"qwen3-6-plus": 84,
|
||||
"kimi-k2-6": 82
|
||||
}
|
||||
},
|
||||
{
|
||||
"agent": "capability-analyst",
|
||||
"current_model_index": 7,
|
||||
@@ -1143,7 +1107,7 @@ const EMBEDDED_DATA = {
|
||||
{
|
||||
"agent": "orchestrator",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "kimi-k2.6:cloud",
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 74,
|
||||
@@ -1286,8 +1250,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "agent-architect",
|
||||
"current_model_index": 7,
|
||||
"current_model_id": "glm-5.1",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "H",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 78,
|
||||
@@ -1391,17 +1355,17 @@ const EMBEDDED_DATA = {
|
||||
"agent_current_config": [
|
||||
{
|
||||
"agent": "lead-developer",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
"badge_type": "qwen",
|
||||
"fit_score": 0,
|
||||
"status": "good",
|
||||
"previous_model": null
|
||||
},
|
||||
{
|
||||
"agent": "frontend-developer",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"model": "ollama-cloud/minimax-m2.5",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "qwen",
|
||||
@@ -1461,7 +1425,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "devops-engineer",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1521,7 +1485,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "browser-automation",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "qwen",
|
||||
@@ -1541,16 +1505,6 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "system-analyst",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
"fit_score": 0,
|
||||
"status": "good",
|
||||
"previous_model": null
|
||||
},
|
||||
{
|
||||
"agent": "requirement-refiner",
|
||||
"model": "ollama-cloud/glm-5.1",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
@@ -1559,16 +1513,6 @@ const EMBEDDED_DATA = {
|
||||
"status": "good",
|
||||
"previous_model": null
|
||||
},
|
||||
{
|
||||
"agent": "history-miner",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
"fit_score": 0,
|
||||
"status": "good",
|
||||
"previous_model": null
|
||||
},
|
||||
{
|
||||
"agent": "capability-analyst",
|
||||
"model": "ollama-cloud/glm-5.1",
|
||||
@@ -1581,7 +1525,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "orchestrator",
|
||||
"model": "ollama-cloud/kimi-k2.6:cloud",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "kimi",
|
||||
@@ -1661,7 +1605,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "agent-architect",
|
||||
"model": "ollama-cloud/glm-5.1",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "glm",
|
||||
|
||||
171
agent-evolution/scripts/lib/fitness-gate.cjs
Normal file
171
agent-evolution/scripts/lib/fitness-gate.cjs
Normal file
@@ -0,0 +1,171 @@
|
||||
/**
|
||||
* Model Evolution Fitness Gate
|
||||
*
|
||||
* Validates any model assignment change against heatmap-derived scores.
|
||||
* Rejects changes that would downgrade agents beyond the regression threshold.
|
||||
*
|
||||
* Usage:
|
||||
* const { FitnessGate, runGate } = require('./fitness-gate');
|
||||
* runGate(require('../../data/model-benchmarks.json'));
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const BENCHMARKS_PATH = path.join(__dirname, '../../data/model-benchmarks.json');
|
||||
const DEFAULT_MIN_SCORE = 75;
|
||||
const DEFAULT_MAX_REGRESSION = 3;
|
||||
|
||||
class FitnessGate {
|
||||
constructor(benchmarks, options = {}) {
|
||||
this.benchmarks = benchmarks;
|
||||
this.agents = this._buildAgentIndex(benchmarks);
|
||||
this.models = this._buildModelIndex(benchmarks);
|
||||
this.minScore = options.minScore ?? DEFAULT_MIN_SCORE;
|
||||
this.maxRegression = options.maxRegression ?? DEFAULT_MAX_REGRESSION;
|
||||
}
|
||||
|
||||
_buildAgentIndex(data) {
|
||||
const map = {};
|
||||
(data.agent_model_scores || []).forEach(a => {
|
||||
map[a.agent] = a;
|
||||
});
|
||||
return map;
|
||||
}
|
||||
|
||||
_buildModelIndex(data) {
|
||||
const map = {};
|
||||
(data.models || []).forEach((m, i) => {
|
||||
map[m.id] = { ...m, idx: i };
|
||||
});
|
||||
return map;
|
||||
}
|
||||
|
||||
getScore(agentName, modelId) {
|
||||
const agent = this.agents[agentName];
|
||||
if (!agent) return null;
|
||||
// Normalize model IDs (v3.html uses "", JSON may use "kimi-k2.6" instead of "kimi-k2.6:cloud")
|
||||
const normalizedId = modelId.replace(/:/g, '-').replace(/--cloud$/, '-2.6');
|
||||
const tryKeys = [normalizedId, modelId, modelId + '-cloud'];
|
||||
for (const key of tryKeys) {
|
||||
if (agent.scores?.[key] !== undefined) return agent.scores[key];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
validateChange(agentName, fromModel, toModel) {
|
||||
const agent = this.agents[agentName];
|
||||
if (!agent) return { acceptable: false, reason: `Agent "${agentName}" not found in benchmarks` };
|
||||
|
||||
const oldScore = this.getScore(agentName, fromModel);
|
||||
const newScore = this.getScore(agentName, toModel);
|
||||
|
||||
if (oldScore === null) {
|
||||
return { acceptable: false, reason: `No score for "${fromModel}" on agent "${agentName}"` };
|
||||
}
|
||||
if (newScore === null) {
|
||||
return { acceptable: false, reason: `No score for "${toModel}" on agent "${agentName}"` };
|
||||
}
|
||||
|
||||
if (newScore < this.minScore) {
|
||||
return {
|
||||
acceptable: false,
|
||||
reason: `Score ${newScore} below global minimum ${this.minScore}`,
|
||||
oldScore, newScore, delta: newScore - oldScore
|
||||
};
|
||||
}
|
||||
|
||||
if (newScore < oldScore - this.maxRegression) {
|
||||
return {
|
||||
acceptable: false,
|
||||
reason: `Regression ${oldScore} -> ${newScore} (delta ${newScore - oldScore}) exceeds max allowed regression of ${this.maxRegression}`,
|
||||
oldScore, newScore, delta: newScore - oldScore
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
acceptable: true,
|
||||
oldScore, newScore, delta: newScore - oldScore,
|
||||
status: newScore > oldScore ? 'upgrade' : newScore === oldScore ? 'same' : 'minor_regression'
|
||||
};
|
||||
}
|
||||
|
||||
validateAllChanges(changes) {
|
||||
const results = [];
|
||||
const rejections = [];
|
||||
|
||||
for (const change of changes) {
|
||||
const result = this.validateChange(change.agent, change.from, change.to);
|
||||
results.push({ ...change, ...result });
|
||||
if (!result.acceptable) rejections.push(result);
|
||||
}
|
||||
|
||||
return { results, rejections, passed: rejections.length === 0 };
|
||||
}
|
||||
|
||||
printDiff(report) {
|
||||
console.log('\n=== Model Change Diff Report ===');
|
||||
console.log(
|
||||
'Agent'.padEnd(25),
|
||||
'Old Model'.padEnd(25),
|
||||
'Old Score'.padEnd(10),
|
||||
'New Model'.padEnd(25),
|
||||
'New Score'.padEnd(10),
|
||||
'Status'
|
||||
);
|
||||
console.log('-'.repeat(115));
|
||||
|
||||
for (const r of report.results) {
|
||||
const status = r.acceptable
|
||||
? r.delta > 0 ? '✅ UPGRADE'
|
||||
: r.delta === 0 ? '➖ SAME'
|
||||
: `⚠️ MINOR (${r.delta})`
|
||||
: `⛔ REJECTED: ${r.reason}`;
|
||||
|
||||
console.log(
|
||||
r.agent.padEnd(25),
|
||||
(r.from || '-').padEnd(25),
|
||||
(r.oldScore ?? '-').toString().padEnd(10),
|
||||
(r.to || '-').padEnd(25),
|
||||
(r.newScore ?? '-').toString().padEnd(10),
|
||||
status
|
||||
);
|
||||
}
|
||||
|
||||
console.log('-'.repeat(115));
|
||||
const upgrades = report.results.filter(r => r.delta > 0).length;
|
||||
const downgrades = report.results.filter(r => r.delta < 0 && r.acceptable).length;
|
||||
const same = report.results.filter(r => r.delta === 0).length;
|
||||
const rejected = report.rejections.length;
|
||||
|
||||
console.log(`Upgrades: ${upgrades} | Minor regressions: ${downgrades} | Same: ${same} | Rejected: ${rejected}`);
|
||||
|
||||
if (rejected > 0) {
|
||||
console.log('\n⛔ REJECTIONS (sync blocked):');
|
||||
for (const r of report.rejections) {
|
||||
console.log(` - ${r.agent}: ${r.reason}`);
|
||||
}
|
||||
console.log('\nNo files were modified. Fix the source data or adjust thresholds (not recommended).');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience: load benchmarks from default path and create gate
|
||||
*/
|
||||
function loadGate(options = {}) {
|
||||
const data = JSON.parse(fs.readFileSync(BENCHMARKS_PATH, 'utf8'));
|
||||
return new FitnessGate(data, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience: validate + print diff in one call
|
||||
*/
|
||||
function runGate(changes, options = {}) {
|
||||
const gate = loadGate(options);
|
||||
const report = gate.validateAllChanges(changes);
|
||||
gate.printDiff(report);
|
||||
return report;
|
||||
}
|
||||
|
||||
module.exports = { FitnessGate, loadGate, runGate };
|
||||
@@ -1,4 +1,5 @@
|
||||
const fs = require('fs');
|
||||
const { runGate } = require('./lib/fitness-gate.cjs');
|
||||
|
||||
// Parse simple YAML structure with 2-space indentation
|
||||
function parseCapabilityIndex(text) {
|
||||
@@ -6,21 +7,19 @@ function parseCapabilityIndex(text) {
|
||||
const agents = {};
|
||||
let currentAgent = '';
|
||||
let currentList = '';
|
||||
|
||||
|
||||
for (const line of lines) {
|
||||
const indent = line.length - line.trimStart().length;
|
||||
const trimmed = line.trim();
|
||||
|
||||
|
||||
if (indent === 2 && trimmed.endsWith(':') && !trimmed.startsWith('-')) {
|
||||
// Agent name
|
||||
currentAgent = trimmed.slice(0, -1);
|
||||
agents[currentAgent] = {};
|
||||
currentList = '';
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (indent === 4 && trimmed.endsWith(':') && !trimmed.startsWith('-')) {
|
||||
// Scalar property or list start
|
||||
const key = trimmed.slice(0, -1);
|
||||
currentList = key;
|
||||
if (!Array.isArray(agents[currentAgent][key])) {
|
||||
@@ -28,18 +27,16 @@ function parseCapabilityIndex(text) {
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (indent === 4 && trimmed.includes(':') && !trimmed.startsWith('-')) {
|
||||
// key: value
|
||||
const [key, ...rest] = trimmed.split(':');
|
||||
const value = rest.join(':').trim();
|
||||
agents[currentAgent][key.trim()] = value;
|
||||
currentList = '';
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (indent >= 6 && trimmed.startsWith('- ')) {
|
||||
// List item
|
||||
const value = trimmed.slice(2).trim();
|
||||
if (currentList) {
|
||||
if (!agents[currentAgent][currentList]) agents[currentAgent][currentList] = [];
|
||||
@@ -47,21 +44,19 @@ function parseCapabilityIndex(text) {
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Reset list context on unknown indentation
|
||||
|
||||
if (indent < 4) {
|
||||
currentList = '';
|
||||
}
|
||||
}
|
||||
|
||||
// Filter out non-agent entries (flat sections like capability_routing, etc.)
|
||||
|
||||
const result = {};
|
||||
const scalarKeys = ['capabilities','receives','produces','forbidden','delegates_to','fallback_models'];
|
||||
for (const [name, data] of Object.entries(agents)) {
|
||||
const hasAgentProps = scalarKeys.some(k => k in data) || 'model' in data;
|
||||
if (hasAgentProps) result[name] = data;
|
||||
}
|
||||
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -72,6 +67,38 @@ console.log('Parsed agents:', Object.keys(parsed).length);
|
||||
// Read existing benchmarks
|
||||
const bench = JSON.parse(fs.readFileSync('agent-evolution/data/model-benchmarks.json', 'utf8'));
|
||||
|
||||
// === FITNESS GATE: validate model changes ===
|
||||
const oldConfig = {};
|
||||
(bench.agent_current_config || []).forEach(c => {
|
||||
oldConfig[c.agent] = c.model;
|
||||
});
|
||||
|
||||
const changes = [];
|
||||
for (const [agent, data] of Object.entries(parsed)) {
|
||||
const newModel = data.model || '';
|
||||
const oldModel = oldConfig[agent];
|
||||
if (oldModel && oldModel !== newModel) {
|
||||
changes.push({
|
||||
agent,
|
||||
from: oldModel.replace('ollama-cloud/', ''),
|
||||
to: newModel.replace('ollama-cloud/', '')
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (changes.length > 0) {
|
||||
console.log('\nDetected model changes:', changes.length);
|
||||
const report = runGate(changes);
|
||||
|
||||
if (!report.passed) {
|
||||
console.error('\n⛔ FITNESS GATE REJECTED the sync. No files modified.');
|
||||
console.error('If you intend to downgrade, update the source scores in model-benchmarks.json first.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('\n✅ All model changes passed fitness gate. Proceeding...');
|
||||
}
|
||||
|
||||
// Update agent_current_config
|
||||
bench.agent_current_config = Object.entries(parsed).map(([agent, data]) => {
|
||||
const rawModel = data.model || '';
|
||||
@@ -104,7 +131,6 @@ bench.agent_model_scores = Object.entries(parsed).map(([agent, data]) => {
|
||||
const rawModel = data.model || '';
|
||||
const modelId = rawModel.replace('ollama-cloud/', '');
|
||||
const currentIndex = bench.models.findIndex(m => m.id === modelId);
|
||||
// Preserve existing scores or empty
|
||||
const scores = existingScores[agent] || {};
|
||||
return {
|
||||
agent,
|
||||
@@ -117,11 +143,11 @@ bench.agent_model_scores = Object.entries(parsed).map(([agent, data]) => {
|
||||
|
||||
// Update metadata
|
||||
bench.generated = new Date().toISOString();
|
||||
bench.source = '.kilo/capability-index.yaml (synced v2)';
|
||||
bench.source = '.kilo/capability-index.yaml (synced v3 + fitness-gate)';
|
||||
bench.total_agents = bench.agent_current_config.length;
|
||||
|
||||
fs.writeFileSync('agent-evolution/data/model-benchmarks.json', JSON.stringify(bench, null, 2));
|
||||
console.log('Synced', bench.agent_current_config.length, 'agents');
|
||||
console.log('\nSynced', bench.agent_current_config.length, 'agents');
|
||||
console.log('Generated:', bench.generated);
|
||||
|
||||
// Verify
|
||||
@@ -134,3 +160,4 @@ bench.agent_current_config.forEach(c => {
|
||||
}
|
||||
});
|
||||
console.log('Mismatches:', mismatches);
|
||||
console.log('\n💡 Tip: If fitness gate rejected changes, verify that model-benchmarks.json has correct heatmap scores before syncing from YAML.');
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"$schema": "https://app.kilo.ai/config.json",
|
||||
"metaVersion": "1.0.0",
|
||||
"lastSync": "2026-04-27T20:28:58.841Z",
|
||||
"lastSync": "2026-04-27T11:07:02.592Z",
|
||||
"agents": {
|
||||
"requirement-refiner": {
|
||||
"file": ".kilo/agents/requirement-refiner.md",
|
||||
@@ -21,7 +21,7 @@
|
||||
"system-analyst": {
|
||||
"file": ".kilo/agents/system-analyst.md",
|
||||
"description": "Designs technical specifications, data schemas, and API contracts before implementation",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/glm-5.1",
|
||||
"mode": "subagent",
|
||||
"category": "core"
|
||||
},
|
||||
@@ -36,7 +36,7 @@
|
||||
"lead-developer": {
|
||||
"file": ".kilo/agents/lead-developer.md",
|
||||
"description": "Primary code writer for backend and core logic. Writes implementation to pass tests",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"mode": "subagent",
|
||||
"color": "#DC2626",
|
||||
"category": "core"
|
||||
|
||||
@@ -45,7 +45,7 @@
|
||||
"system-analyst": {
|
||||
"description": "Designs technical specifications, data schemas, and API contracts before implementation",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/nemotron-3-super"
|
||||
"model": "qwen/qwen3.6-plus:free"
|
||||
},
|
||||
"sdet-engineer": {
|
||||
"description": "Writes tests following TDD methodology. Tests MUST fail initially (Red phase)",
|
||||
@@ -68,7 +68,7 @@
|
||||
"lead-developer": {
|
||||
"description": "Primary code writer for backend and core logic. Writes implementation to pass tests",
|
||||
"mode": "subagent",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"color": "#DC2626",
|
||||
"permission": {
|
||||
"read": "allow",
|
||||
|
||||
Reference in New Issue
Block a user