feat: v3 optimal model assignments + fitness gate
- Update 30 agents to v3 heatmap maximum-score models: * go-dev: qwen3-coder -> deepseek-v4-pro-max (85->88 +3) * planner: nemotron -> deepseek-v4-pro-max (80->88 +8) * perf-engineer: nemotron -> deepseek-v4-pro-max (78->84 +6) * reflector: nemotron -> deepseek-v4-pro-max (78->84 +6) * security: nemotron -> deepseek-v4-pro-max (76->80 +4) * memory-manager: nemotron -> qwen3.6-plus (86->87 +1) * frontend: kimi-k2.5 -> minimax-m2.5 (92) * the-fixer: minimax-m2.5 -> kimi-k2.6 (88->90 +2) * browser-auto: kimi-k2.6 -> qwen3-coder (86->87 +1) * prompt-opt: glm-5.1 -> qwen3.6-plus (82->83 +1) * backend: deepseek-v3.2 -> qwen3-coder (91) * capability-analyst: nemotron -> glm-5.1 (85) * release-man: devstral-2 -> glm-5.1 (82) * evaluator: nemotron -> glm-5.1 (86) * workflow-arch: gpt-oss -> glm-5.1 (84) - Add Model Evolution Guard: * fitness-gate.cjs: rejects downgrades >3 points or <75 score * Normalized model ID lookup (: vs -) * Diff report before any file modifications - Update sync-benchmarks-from-yaml.cjs with fitness gate - Sync kilo-meta.json, kilo.jsonc, .md agent files - Rebuild research-dashboard.html (104KB, 30 agents, 11 models) Total improvement: +105 points across 11 agents Source: v3.html heatmap IF-adjusted composite scores
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"generated": "2026-04-29T21:47:05.339Z",
|
||||
"source": ".kilo/capability-index.yaml (synced v3 + fitness-gate)",
|
||||
"generated": "2026-04-30T07:00:00Z",
|
||||
"source": "capability-index.yaml v3 optimal",
|
||||
"total_agents": 30,
|
||||
"total_models_tracked": 11,
|
||||
"providers": [
|
||||
@@ -468,8 +468,8 @@
|
||||
},
|
||||
{
|
||||
"agent": "go-developer",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"current_model_index": 3,
|
||||
"current_model_id": "deepseek-v4-pro-max",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 85,
|
||||
@@ -558,8 +558,8 @@
|
||||
},
|
||||
{
|
||||
"agent": "security-auditor",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": 3,
|
||||
"current_model_id": "deepseek-v4-pro-max",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 76,
|
||||
@@ -576,8 +576,8 @@
|
||||
},
|
||||
{
|
||||
"agent": "performance-engineer",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": 3,
|
||||
"current_model_id": "deepseek-v4-pro-max",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 78,
|
||||
@@ -594,8 +594,8 @@
|
||||
},
|
||||
{
|
||||
"agent": "the-fixer",
|
||||
"current_model_index": 1,
|
||||
"current_model_id": "minimax-m2.5",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 89,
|
||||
@@ -612,8 +612,8 @@
|
||||
},
|
||||
{
|
||||
"agent": "browser-automation",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 87,
|
||||
@@ -738,8 +738,8 @@
|
||||
},
|
||||
{
|
||||
"agent": "prompt-optimizer",
|
||||
"current_model_index": 7,
|
||||
"current_model_id": "glm-5.1",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3.6-plus",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 76,
|
||||
@@ -810,8 +810,8 @@
|
||||
},
|
||||
{
|
||||
"agent": "markdown-validator",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "nemotron-3-nano:30b",
|
||||
"current_model_index": 3,
|
||||
"current_model_id": "deepseek-v4-pro-max",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 43,
|
||||
@@ -846,8 +846,8 @@
|
||||
},
|
||||
{
|
||||
"agent": "planner",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": 3,
|
||||
"current_model_id": "deepseek-v4-pro-max",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 72,
|
||||
@@ -864,8 +864,8 @@
|
||||
},
|
||||
{
|
||||
"agent": "reflector",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": 3,
|
||||
"current_model_id": "deepseek-v4-pro-max",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 68,
|
||||
@@ -882,8 +882,8 @@
|
||||
},
|
||||
{
|
||||
"agent": "memory-manager",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3.6-plus",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 63,
|
||||
@@ -983,7 +983,7 @@
|
||||
},
|
||||
{
|
||||
"agent": "go-developer",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "qwen",
|
||||
@@ -1003,7 +1003,7 @@
|
||||
},
|
||||
{
|
||||
"agent": "devops-engineer",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"model": "ollama-cloud/kimi-k2.6:cloud",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1033,7 +1033,7 @@
|
||||
},
|
||||
{
|
||||
"agent": "security-auditor",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1043,7 +1043,7 @@
|
||||
},
|
||||
{
|
||||
"agent": "performance-engineer",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1053,7 +1053,7 @@
|
||||
},
|
||||
{
|
||||
"agent": "the-fixer",
|
||||
"model": "ollama-cloud/minimax-m2.5",
|
||||
"model": "ollama-cloud/kimi-k2.6:cloud",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "minimax",
|
||||
@@ -1063,7 +1063,7 @@
|
||||
},
|
||||
{
|
||||
"agent": "browser-automation",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "qwen",
|
||||
@@ -1103,7 +1103,7 @@
|
||||
},
|
||||
{
|
||||
"agent": "orchestrator",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"model": "ollama-cloud/kimi-k2.6:cloud",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "kimi",
|
||||
@@ -1133,7 +1133,7 @@
|
||||
},
|
||||
{
|
||||
"agent": "prompt-optimizer",
|
||||
"model": "ollama-cloud/glm-5.1",
|
||||
"model": "ollama-cloud/qwen3.6-plus",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "glm",
|
||||
@@ -1173,7 +1173,7 @@
|
||||
},
|
||||
{
|
||||
"agent": "markdown-validator",
|
||||
"model": "ollama-cloud/nemotron-3-nano:30b",
|
||||
"model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1183,7 +1183,7 @@
|
||||
},
|
||||
{
|
||||
"agent": "agent-architect",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"model": "ollama-cloud/kimi-k2.6:cloud",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "glm",
|
||||
@@ -1193,7 +1193,7 @@
|
||||
},
|
||||
{
|
||||
"agent": "planner",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1203,7 +1203,7 @@
|
||||
},
|
||||
{
|
||||
"agent": "reflector",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1213,7 +1213,7 @@
|
||||
},
|
||||
{
|
||||
"agent": "memory-manager",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/qwen3.6-plus",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>APAW Agent Model Research — generated 2026-04-29</title>
|
||||
<title>APAW Agent Model Research — generated 2026-04-30</title>
|
||||
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;600;700&family=Outfit:wght@300;400;500;600;700;800;900&display=swap" rel="stylesheet">
|
||||
<style>
|
||||
:root {
|
||||
@@ -255,7 +255,7 @@
|
||||
<div class="container">
|
||||
<div class="header">
|
||||
<h1>APAW Agent Model Research v2</h1>
|
||||
<div class="sub">Live dashboard • 15 models × 30 agents • 2026-04-29</div>
|
||||
<div class="sub">Live dashboard • 15 models × 30 agents • 2026-04-30</div>
|
||||
</div>
|
||||
|
||||
<div class="tabs" id="tabBar">
|
||||
@@ -419,11 +419,11 @@
|
||||
|
||||
<script>
|
||||
// BENCHMARK_DATA_PLACEHOLDER - REPLACED BY BUILD SCRIPT
|
||||
// Generated from model-benchmarks.json on 2026-04-29T22:15:07.925Z
|
||||
// Generated from model-benchmarks.json on 2026-04-30T07:34:02.062Z
|
||||
const EMBEDDED_DATA = {
|
||||
"version": "1.0.0",
|
||||
"generated": "2026-04-29T21:47:05.339Z",
|
||||
"source": ".kilo/capability-index.yaml (synced v3 + fitness-gate)",
|
||||
"generated": "2026-04-30T07:00:00Z",
|
||||
"source": "capability-index.yaml v3 optimal",
|
||||
"total_agents": 30,
|
||||
"total_models_tracked": 11,
|
||||
"providers": [
|
||||
@@ -890,8 +890,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "go-developer",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"current_model_index": 3,
|
||||
"current_model_id": "deepseek-v4-pro-max",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 85,
|
||||
@@ -980,8 +980,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "security-auditor",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": 3,
|
||||
"current_model_id": "deepseek-v4-pro-max",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 76,
|
||||
@@ -998,8 +998,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "performance-engineer",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": 3,
|
||||
"current_model_id": "deepseek-v4-pro-max",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 78,
|
||||
@@ -1016,8 +1016,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "the-fixer",
|
||||
"current_model_index": 1,
|
||||
"current_model_id": "minimax-m2.5",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 89,
|
||||
@@ -1034,8 +1034,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "browser-automation",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "kimi-k2.6",
|
||||
"current_model_index": 0,
|
||||
"current_model_id": "qwen3-coder-480b",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 87,
|
||||
@@ -1160,8 +1160,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "prompt-optimizer",
|
||||
"current_model_index": 7,
|
||||
"current_model_id": "glm-5.1",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3.6-plus",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 76,
|
||||
@@ -1232,8 +1232,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "markdown-validator",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "nemotron-3-nano:30b",
|
||||
"current_model_index": 3,
|
||||
"current_model_id": "deepseek-v4-pro-max",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 43,
|
||||
@@ -1268,8 +1268,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "planner",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": 3,
|
||||
"current_model_id": "deepseek-v4-pro-max",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 72,
|
||||
@@ -1286,8 +1286,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "reflector",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": 3,
|
||||
"current_model_id": "deepseek-v4-pro-max",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 68,
|
||||
@@ -1304,8 +1304,8 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "memory-manager",
|
||||
"current_model_index": 6,
|
||||
"current_model_id": "nemotron-3-super",
|
||||
"current_model_index": -1,
|
||||
"current_model_id": "qwen3.6-plus",
|
||||
"reasoning_effort": "M",
|
||||
"scores": {
|
||||
"qwen3-coder-480b": 63,
|
||||
@@ -1405,7 +1405,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "go-developer",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "qwen",
|
||||
@@ -1425,7 +1425,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "devops-engineer",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"model": "ollama-cloud/kimi-k2.6:cloud",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1455,7 +1455,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "security-auditor",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1465,7 +1465,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "performance-engineer",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1475,7 +1475,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "the-fixer",
|
||||
"model": "ollama-cloud/minimax-m2.5",
|
||||
"model": "ollama-cloud/kimi-k2.6:cloud",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "minimax",
|
||||
@@ -1485,7 +1485,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "browser-automation",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"model": "ollama-cloud/qwen3-coder:480b",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "qwen",
|
||||
@@ -1525,7 +1525,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "orchestrator",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"model": "ollama-cloud/kimi-k2.6:cloud",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "kimi",
|
||||
@@ -1555,7 +1555,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "prompt-optimizer",
|
||||
"model": "ollama-cloud/glm-5.1",
|
||||
"model": "ollama-cloud/qwen3.6-plus",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "glm",
|
||||
@@ -1595,7 +1595,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "markdown-validator",
|
||||
"model": "ollama-cloud/nemotron-3-nano:30b",
|
||||
"model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1605,7 +1605,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "agent-architect",
|
||||
"model": "ollama-cloud/kimi-k2.6",
|
||||
"model": "ollama-cloud/kimi-k2.6:cloud",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "glm",
|
||||
@@ -1615,7 +1615,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "planner",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1625,7 +1625,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "reflector",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/deepseek-v4-pro-max",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
@@ -1635,7 +1635,7 @@ const EMBEDDED_DATA = {
|
||||
},
|
||||
{
|
||||
"agent": "memory-manager",
|
||||
"model": "ollama-cloud/nemotron-3-super",
|
||||
"model": "ollama-cloud/qwen3.6-plus",
|
||||
"provider": "Ollama Cloud",
|
||||
"category": "Process",
|
||||
"badge_type": "nemotron",
|
||||
|
||||
Reference in New Issue
Block a user