Files
APAW/agent-evolution/data/model-benchmarks.json
¨NW¨ 3badb259cc feat: bidirectional research dashboard + agent config fixes
- Integrate apaw_agent_model_research_v3.html as standalone dashboard
- Add model-benchmarks.json with 32 agents, 11 scored models, 11 recommendations
- Add build-research-dashboard.ts: inject live data into template → standalone HTML
- Add rebuild-template.cjs: regenerate template from v3.html source
- Add sync-benchmarks-from-yaml.cjs: sync YAML → JSON round-trip
- Add sync-model-research.ts: apply recommendation matrix to config files
- Add model-benchmarks.schema.json and model-research.schema.json for validation
- Add bidirectional-data-flow.md architecture documentation
- Add log-execution.cjs pipeline hook
- Update capability-index.yaml: add fallback_models, failover_strategy
- Update kilo-meta.json, kilo.jsonc, KILO_SPEC.md with synced models
- Update evolution.md / research.md / self-evolution.md / evolutionary-sync.md docs
- Fix security-auditor.md: quote YAML color (#DC2626)
- Fix orchestrator.md: remove duplicate devops-engineer key
- Build research-dashboard.html (106KB standalone) + dated archive
2026-04-29 21:04:22 +01:00

1774 lines
45 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"version": "1.0.0",
"generated": "2026-04-29T19:56:51.418Z",
"source": ".kilo/capability-index.yaml (synced v2)",
"total_agents": 32,
"total_models_tracked": 11,
"providers": [
"ollama",
"ollama-cloud",
"openrouter",
"groq"
],
"models": [
{
"id": "qwen3-coder-480b",
"name": "Qwen3-Coder 480B",
"organization": "Qwen",
"parameters": "480B/35B active",
"context_window": "256K→1M",
"swe_bench": 66.5,
"if_score": 88,
"categories": [
"coding",
"agent"
],
"description": "SOTA open-source кодинг. Сравним с Claude Sonnet 4.",
"tags": [
"coding",
"agent",
"tools"
],
"openrouter": false,
"provider": "ollama"
},
{
"id": "minimax-m2.5",
"name": "MiniMax M2.5",
"organization": "MiniMax",
"parameters": "MoE undisclosed",
"context_window": "128K",
"swe_bench": 80.2,
"if_score": 82,
"categories": [
"coding",
"agent"
],
"description": "Лидер SWE-bench 80.2%. Полный lifecycle разработки.",
"tags": [
"coding",
"agent"
],
"openrouter": false,
"provider": "ollama"
},
{
"id": "minimax-m2.7",
"name": "MiniMax M2.7",
"organization": "MiniMax",
"parameters": "~10B active",
"context_window": "128K",
"swe_bench": 78,
"if_score": 80,
"categories": [
"coding",
"agent",
"efficient"
],
"description": "Самообучаемая. 56.2% SWE-Pro. 100 TPS. $0.30/M.",
"tags": [
"coding",
"agent",
"self-evolving"
],
"openrouter": false,
"provider": "ollama"
},
{
"id": "deepseek-v4-pro-max",
"name": "DeepSeek V4-Pro",
"organization": "DeepSeek",
"parameters": "1.6T/49B active MoE",
"context_window": "1M",
"swe_bench": 80.6,
"if_score": 89,
"categories": [
"coding",
"agent",
"reasoning"
],
"description": "SWE-V 80.6, LiveCodeBench 93.5(#1!), Terminal-Bench 67.9, Codeforces 3206, 1M ctx, 27% FLOPs vs V3.2. MIT.",
"tags": [
"coding",
"agent",
"thinking",
"tools"
],
"openrouter": false,
"provider": "ollama-cloud"
},
{
"id": "deepseek-v4-flash",
"name": "DeepSeek V4-Pro",
"organization": "DeepSeek",
"parameters": "284B/13B active MoE",
"context_window": "1M",
"swe_bench": 79,
"if_score": 86,
"categories": [
"coding",
"efficient",
"agent"
],
"description": "SWE-V ~79%, Flash Max = Pro уровень reasoning. 13B active = ультрабыстрый. 1M ctx. FP4+FP8. MIT.",
"tags": [
"coding",
"efficient",
"agent",
"thinking"
],
"openrouter": false,
"provider": "ollama-cloud"
},
{
"id": "kimi-k2-6",
"name": "Kimi K2.6",
"organization": "Moonshot AI",
"parameters": "1T/32B active MoE",
"context_window": "256K",
"swe_bench": 80.2,
"if_score": 91,
"categories": [
"coding",
"agent",
"multimodal"
],
"description": "SWE-Pro 58.6(#1!), SWE-V 80.2, Terminal-Bench 66.7, HLE 54.0(#1!), BrowseComp 83.2. 13h autonomous. 300 sub-agent swarm. Modified MIT.",
"tags": [
"coding",
"agent",
"swarm",
"vision",
"thinking",
"tools"
],
"openrouter": false,
"provider": "ollama-cloud"
},
{
"id": "nemotron-3-super",
"name": "Nemotron 3 Super",
"organization": "NVIDIA",
"parameters": "120B/12B active",
"context_window": "1M",
"swe_bench": 60.5,
"if_score": 78,
"categories": [
"agent",
"reasoning",
"efficient"
],
"description": "SWE-bench 60.5%. RULER@1M 91.75%! Но IF ниже — Mamba-layers иногда «теряют» инструкции в длинных промптах.",
"tags": [
"agent",
"1M-ctx",
"thinking"
],
"openrouter": false,
"provider": "ollama"
},
{
"id": "glm-5.1",
"name": "GLM-5",
"organization": "Z.ai",
"parameters": "744B/40B active",
"context_window": "128K",
"swe_bench": null,
"if_score": 90,
"categories": [
"reasoning",
"agent"
],
"description": "Мощный reasoning. Arena ELO 1451. Отличный instruction following (IFEval ~90+).",
"tags": [
"reasoning",
"agent"
],
"openrouter": false,
"provider": "ollama"
},
{
"id": "deepseek-v4",
"name": "DeepSeek V4-Pro",
"organization": "DeepSeek",
"parameters": "Large MoE",
"context_window": "128K",
"swe_bench": null,
"if_score": 75,
"categories": [
"reasoning"
],
"description": "Хороший reasoning, но IF нестабилен — иногда игнорирует формат вывода.",
"tags": [
"reasoning"
],
"openrouter": false,
"provider": "ollama"
},
{
"id": "qwen3-5-122b",
"name": "Qwen 3.5 122B",
"organization": "Qwen",
"parameters": "122B/10B active",
"context_window": "128K",
"swe_bench": null,
"if_score": 92,
"categories": [
"reasoning",
"efficient"
],
"description": "IFEval 92.6%! Лучший IF среди open-source. Multimodal. Thinking.",
"tags": [
"vision",
"thinking",
"tools"
],
"openrouter": false,
"provider": "ollama"
},
{
"id": "qwen3-coder-next",
"name": "Qwen3-Coder-Next",
"organization": "Qwen",
"parameters": "80B/3B active",
"context_window": "128K",
"swe_bench": 70,
"if_score": 84,
"categories": [
"coding",
"efficient"
],
"description": "70% SWE-bench с 3B active! Хороший IF для кодинга.",
"tags": [
"coding",
"efficient",
"tools"
],
"openrouter": false,
"provider": "ollama"
},
{
"id": "cogito-2-1-671b",
"name": "Cogito 2.1 671B",
"organization": "Cognitive",
"parameters": "671B MoE",
"context_window": "128K",
"swe_bench": null,
"if_score": 76,
"categories": [
"reasoning"
],
"description": "MIT лицензия. 671B total. IF неплохой, но уступает GLM/Qwen.",
"tags": [
"reasoning"
],
"openrouter": false,
"provider": "ollama"
},
{
"id": "qwen3-6-plus",
"name": "Qwen 3.6 Plus",
"organization": "Qwen",
"parameters": "Hybrid MoE",
"context_window": "1M",
"swe_bench": 78.8,
"if_score": 91,
"categories": [
"coding",
"agent",
"reasoning"
],
"description": "FREE на OpenRouter! 1M контекст. Always-on CoT. Превосходный IF — наследник Qwen 3.5 (92.6%).",
"tags": [
"coding",
"agent",
"1M-ctx",
"free"
],
"openrouter": true,
"provider": "openrouter"
},
{
"id": "step-3-5-flash",
"name": "Step 3.5 Flash",
"organization": "StepFun",
"parameters": "MoE",
"context_window": "128K",
"swe_bench": null,
"if_score": 79,
"categories": [
"efficient"
],
"description": "Бесплатна на OpenRouter. IF средний.",
"tags": [
"efficient",
"free"
],
"openrouter": true,
"provider": "openrouter"
},
{
"id": "deepseek-r1",
"name": "DeepSeek R1",
"organization": "DeepSeek",
"parameters": "671B MoE",
"context_window": "128K",
"swe_bench": null,
"if_score": 73,
"categories": [
"reasoning"
],
"description": "Мощные reasoning-цепочки. Но IF слабый — часто генерирует лишний reasoning вместо ответа.",
"tags": [
"reasoning",
"thinking",
"free"
],
"openrouter": true,
"provider": "openrouter"
}
],
"groq_models": [
{
"id": "openai/gpt-oss-20b",
"rpm": 30,
"rpd": "1K",
"tpm": "8K",
"tpd": "200K",
"speed": "1200+",
"use_case": "Ультра-быстрый fallback для лёгких ролей (markdown-validator)."
},
{
"id": "llama-3.1-8b-instant",
"rpm": 30,
"rpd": "14.4K",
"tpm": "6K",
"tpd": "500K",
"speed": "~800",
"use_case": "14.4K RPD! Самый высокий лимит. Для health-check / ping ролей."
},
{
"id": "groq/compound",
"rpm": 30,
"rpd": "250",
"tpm": "70K",
"tpd": "—",
"speed": "varies",
"use_case": "Мультимодельная агрегация. Для research-задач."
},
{
"id": "groq/compound-mini",
"rpm": 30,
"rpd": "250",
"tpm": "70K",
"tpd": "—",
"speed": "varies",
"use_case": "Лёгкая версия compound."
},
{
"id": "llama-prompt-guard-2",
"rpm": 30,
"rpd": "14.4K",
"tpm": "15K",
"tpd": "500K",
"speed": "~1K",
"use_case": "Security: входной фильтр для security-auditor (14.4K RPD!)."
}
],
"agent_model_scores": [
{
"agent": "lead-developer",
"current_model_index": 6,
"current_model_id": "nemotron-3-super",
"reasoning_effort": "H",
"scores": {
"qwen3-coder-480b": 92,
"minimax-m2.5": 86,
"minimax-m2.7": 82,
"nemotron-3-super": 70,
"glm-5.1": 68,
"deepseek-v4-pro-max": 88,
"qwen3-5-122b": 66,
"qwen3-coder-next": 80,
"qwen3-6-plus": 88,
"kimi-k2-6": 90
}
},
{
"agent": "frontend-developer",
"current_model_index": -1,
"current_model_id": "qwen3-coder:480b",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 86,
"minimax-m2.5": 92,
"minimax-m2.7": 88,
"nemotron-3-super": 62,
"glm-5.1": 56,
"deepseek-v4-pro-max": 82,
"qwen3-5-122b": 60,
"qwen3-coder-next": 76,
"qwen3-6-plus": 88,
"kimi-k2-6": 86
}
},
{
"agent": "php-developer",
"current_model_index": -1,
"current_model_id": "qwen3-coder:480b",
"reasoning_effort": "H",
"scores": {
"qwen3-coder-480b": 87,
"minimax-m2.5": 76,
"minimax-m2.7": 72,
"nemotron-3-super": 64,
"glm-5.1": 56,
"deepseek-v4-pro-max": 74,
"qwen3-5-122b": 60,
"qwen3-coder-next": 76,
"qwen3-6-plus": 84,
"kimi-k2-6": 86
}
},
{
"agent": "python-developer",
"current_model_index": -1,
"current_model_id": "qwen3-coder:480b",
"reasoning_effort": "H",
"scores": {
"qwen3-coder-480b": 90,
"minimax-m2.5": 82,
"minimax-m2.7": 78,
"nemotron-3-super": 66,
"glm-5.1": 60,
"deepseek-v4-pro-max": 78,
"qwen3-5-122b": 64,
"qwen3-coder-next": 78,
"qwen3-6-plus": 88,
"kimi-k2-6": 88
}
},
{
"agent": "backend-developer",
"current_model_index": -1,
"current_model_id": "qwen3-coder:480b",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 91,
"minimax-m2.5": 84,
"minimax-m2.7": 80,
"nemotron-3-super": 68,
"glm-5.1": 63,
"deepseek-v4-pro-max": 86,
"qwen3-5-122b": 62,
"qwen3-coder-next": 78,
"qwen3-6-plus": 87,
"kimi-k2-6": 90
}
},
{
"agent": "go-developer",
"current_model_index": -1,
"current_model_id": "qwen3-coder:480b",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 85,
"minimax-m2.5": 78,
"minimax-m2.7": 74,
"nemotron-3-super": 66,
"glm-5.1": 58,
"deepseek-v4-pro-max": 88,
"qwen3-5-122b": 58,
"qwen3-coder-next": 74,
"qwen3-6-plus": 82,
"kimi-k2-6": 86
}
},
{
"agent": "flutter-developer",
"current_model_index": -1,
"current_model_id": "qwen3-coder:480b",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 86,
"minimax-m2.5": 70,
"minimax-m2.7": 66,
"nemotron-3-super": 60,
"glm-5.1": 53,
"deepseek-v4-pro-max": 78,
"qwen3-5-122b": 58,
"qwen3-coder-next": 74,
"qwen3-6-plus": 82,
"kimi-k2-6": 84
}
},
{
"agent": "devops-engineer",
"current_model_index": 6,
"current_model_id": "nemotron-3-super",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 66,
"minimax-m2.5": 53,
"minimax-m2.7": 48,
"nemotron-3-super": 78,
"glm-5.1": 75,
"deepseek-v4-pro-max": 86,
"qwen3-5-122b": 70,
"qwen3-coder-next": 54,
"qwen3-6-plus": 76,
"kimi-k2-6": 88
}
},
{
"agent": "sdet-engineer",
"current_model_index": -1,
"current_model_id": "qwen3-coder:480b",
"reasoning_effort": "H",
"scores": {
"qwen3-coder-480b": 88,
"minimax-m2.5": 84,
"minimax-m2.7": 80,
"nemotron-3-super": 70,
"glm-5.1": 63,
"deepseek-v4-pro-max": 84,
"qwen3-5-122b": 64,
"qwen3-coder-next": 78,
"qwen3-6-plus": 84,
"kimi-k2-6": 87
}
},
{
"agent": "code-skeptic",
"current_model_index": 1,
"current_model_id": "minimax-m2.5",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 82,
"minimax-m2.5": 85,
"minimax-m2.7": 80,
"nemotron-3-super": 73,
"glm-5.1": 72,
"deepseek-v4-pro-max": 82,
"qwen3-5-122b": 70,
"qwen3-coder-next": 72,
"qwen3-6-plus": 80,
"kimi-k2-6": 82
}
},
{
"agent": "security-auditor",
"current_model_index": 6,
"current_model_id": "nemotron-3-super",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 76,
"minimax-m2.5": 74,
"minimax-m2.7": 68,
"nemotron-3-super": 76,
"glm-5.1": 68,
"deepseek-v4-pro-max": 80,
"qwen3-5-122b": 72,
"qwen3-coder-next": 64,
"qwen3-6-plus": 75,
"kimi-k2-6": 80
}
},
{
"agent": "performance-engineer",
"current_model_index": 6,
"current_model_id": "nemotron-3-super",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 78,
"minimax-m2.5": 75,
"minimax-m2.7": 70,
"nemotron-3-super": 78,
"glm-5.1": 74,
"deepseek-v4-pro-max": 84,
"qwen3-5-122b": 70,
"qwen3-coder-next": 67,
"qwen3-6-plus": 76,
"kimi-k2-6": 82
}
},
{
"agent": "the-fixer",
"current_model_index": 1,
"current_model_id": "minimax-m2.5",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 89,
"minimax-m2.5": 88,
"minimax-m2.7": 84,
"nemotron-3-super": 71,
"glm-5.1": 64,
"deepseek-v4-pro-max": 88,
"qwen3-5-122b": 64,
"qwen3-coder-next": 82,
"qwen3-6-plus": 86,
"kimi-k2-6": 90
}
},
{
"agent": "browser-automation",
"current_model_index": -1,
"current_model_id": "qwen3-coder:480b",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 87,
"minimax-m2.5": 72,
"minimax-m2.7": 68,
"nemotron-3-super": 61,
"glm-5.1": 53,
"deepseek-v4-pro-max": 82,
"qwen3-5-122b": 56,
"qwen3-coder-next": 72,
"qwen3-6-plus": 82,
"kimi-k2-6": 86
}
},
{
"agent": "visual-tester",
"current_model_index": -1,
"current_model_id": "qwen3-coder:480b",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 82,
"minimax-m2.5": 68,
"minimax-m2.7": 64,
"nemotron-3-super": 55,
"glm-5.1": 48,
"deepseek-v4-pro-max": 76,
"qwen3-5-122b": 54,
"qwen3-coder-next": 66,
"qwen3-6-plus": 76,
"kimi-k2-6": 78
}
},
{
"agent": "system-analyst",
"current_model_index": 6,
"current_model_id": "nemotron-3-super",
"reasoning_effort": "H",
"scores": {
"qwen3-coder-480b": 70,
"minimax-m2.5": 66,
"minimax-m2.7": 63,
"nemotron-3-super": 74,
"glm-5.1": 82,
"deepseek-v4-pro-max": 88,
"qwen3-5-122b": 76,
"qwen3-coder-next": 58,
"qwen3-6-plus": 80,
"kimi-k2-6": 86
}
},
{
"agent": "requirement-refiner",
"current_model_index": 7,
"current_model_id": "glm-5.1",
"reasoning_effort": "H",
"scores": {
"qwen3-coder-480b": 66,
"minimax-m2.5": 62,
"minimax-m2.7": 60,
"nemotron-3-super": 72,
"glm-5.1": 80,
"deepseek-v4-pro-max": 82,
"qwen3-5-122b": 74,
"qwen3-coder-next": 54,
"qwen3-6-plus": 78,
"kimi-k2-6": 82
}
},
{
"agent": "history-miner",
"current_model_index": 6,
"current_model_id": "nemotron-3-super",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 68,
"minimax-m2.5": 60,
"minimax-m2.7": 56,
"nemotron-3-super": 85,
"glm-5.1": 78,
"deepseek-v4-pro-max": 86,
"qwen3-5-122b": 72,
"qwen3-coder-next": 56,
"qwen3-6-plus": 84,
"kimi-k2-6": 82
}
},
{
"agent": "capability-analyst",
"current_model_index": 7,
"current_model_id": "glm-5.1",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 72,
"minimax-m2.5": 68,
"minimax-m2.7": 66,
"nemotron-3-super": 76,
"glm-5.1": 78,
"deepseek-v4-pro-max": 82,
"qwen3-5-122b": 75,
"qwen3-coder-next": 60,
"qwen3-6-plus": 79,
"kimi-k2-6": 82
}
},
{
"agent": "orchestrator",
"current_model_index": -1,
"current_model_id": "kimi-k2.6:cloud",
"reasoning_effort": "H",
"scores": {
"qwen3-coder-480b": 74,
"minimax-m2.5": 70,
"minimax-m2.7": 68,
"nemotron-3-super": 80,
"glm-5.1": 82,
"deepseek-v4-pro-max": 86,
"qwen3-5-122b": 78,
"qwen3-coder-next": 62,
"qwen3-6-plus": 84,
"kimi-k2-6": 92
}
},
{
"agent": "release-manager",
"current_model_index": 7,
"current_model_id": "glm-5.1",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 72,
"minimax-m2.5": 66,
"minimax-m2.7": 64,
"nemotron-3-super": 74,
"glm-5.1": 76,
"deepseek-v4-pro-max": 78,
"qwen3-5-122b": 72,
"qwen3-coder-next": 60,
"qwen3-6-plus": 76,
"kimi-k2-6": 78
}
},
{
"agent": "evaluator",
"current_model_index": 7,
"current_model_id": "glm-5.1",
"reasoning_effort": "H",
"scores": {
"qwen3-coder-480b": 70,
"minimax-m2.5": 73,
"minimax-m2.7": 70,
"nemotron-3-super": 78,
"glm-5.1": 78,
"deepseek-v4-pro-max": 84,
"qwen3-5-122b": 76,
"qwen3-coder-next": 58,
"qwen3-6-plus": 81,
"kimi-k2-6": 84
}
},
{
"agent": "prompt-optimizer",
"current_model_index": 7,
"current_model_id": "glm-5.1",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 76,
"minimax-m2.5": 74,
"minimax-m2.7": 72,
"nemotron-3-super": 76,
"glm-5.1": 75,
"deepseek-v4-pro-max": 80,
"qwen3-5-122b": 74,
"qwen3-coder-next": 64,
"qwen3-6-plus": 83,
"kimi-k2-6": 82
}
},
{
"agent": "product-owner",
"current_model_index": 7,
"current_model_id": "glm-5.1",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 60,
"minimax-m2.5": 56,
"minimax-m2.7": 54,
"nemotron-3-super": 74,
"glm-5.1": 78,
"deepseek-v4-pro-max": 76,
"qwen3-5-122b": 74,
"qwen3-coder-next": 48,
"qwen3-6-plus": 78,
"kimi-k2-6": 76
}
},
{
"agent": "pipeline-judge",
"current_model_index": 7,
"current_model_id": "glm-5.1",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 64,
"minimax-m2.5": 68,
"minimax-m2.7": 65,
"nemotron-3-super": 78,
"glm-5.1": 76,
"deepseek-v4-pro-max": 82,
"qwen3-5-122b": 74,
"qwen3-coder-next": 56,
"qwen3-6-plus": 80,
"kimi-k2-6": 84
}
},
{
"agent": "workflow-architect",
"current_model_index": 7,
"current_model_id": "glm-5.1",
"reasoning_effort": "H",
"scores": {
"qwen3-coder-480b": 68,
"minimax-m2.5": 62,
"minimax-m2.7": 60,
"nemotron-3-super": 76,
"glm-5.1": 76,
"deepseek-v4-pro-max": 80,
"qwen3-5-122b": 72,
"qwen3-coder-next": 56,
"qwen3-6-plus": 80,
"kimi-k2-6": 82
}
},
{
"agent": "markdown-validator",
"current_model_index": -1,
"current_model_id": "nemotron-3-nano:30b",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 43,
"minimax-m2.5": 38,
"minimax-m2.7": 36,
"nemotron-3-super": 52,
"glm-5.1": 55,
"deepseek-v4-pro-max": 68,
"qwen3-5-122b": 56,
"qwen3-coder-next": 40,
"qwen3-6-plus": 50,
"kimi-k2-6": 56
}
},
{
"agent": "agent-architect",
"current_model_index": 7,
"current_model_id": "glm-5.1",
"reasoning_effort": "H",
"scores": {
"qwen3-coder-480b": 78,
"minimax-m2.5": 72,
"minimax-m2.7": 70,
"nemotron-3-super": 78,
"glm-5.1": 76,
"deepseek-v4-pro-max": 82,
"qwen3-5-122b": 76,
"qwen3-coder-next": 66,
"qwen3-6-plus": 82,
"kimi-k2-6": 86
}
},
{
"agent": "planner",
"current_model_index": 6,
"current_model_id": "nemotron-3-super",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 72,
"minimax-m2.5": 68,
"minimax-m2.7": 66,
"nemotron-3-super": 80,
"glm-5.1": 78,
"deepseek-v4-pro-max": 88,
"qwen3-5-122b": 78,
"qwen3-coder-next": 60,
"qwen3-6-plus": 85,
"kimi-k2-6": 86
}
},
{
"agent": "reflector",
"current_model_index": 6,
"current_model_id": "nemotron-3-super",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 68,
"minimax-m2.5": 66,
"minimax-m2.7": 64,
"nemotron-3-super": 78,
"glm-5.1": 76,
"deepseek-v4-pro-max": 84,
"qwen3-5-122b": 76,
"qwen3-coder-next": 56,
"qwen3-6-plus": 82,
"kimi-k2-6": 80
}
},
{
"agent": "memory-manager",
"current_model_index": 6,
"current_model_id": "nemotron-3-super",
"reasoning_effort": "M",
"scores": {
"qwen3-coder-480b": 63,
"minimax-m2.5": 58,
"minimax-m2.7": 56,
"nemotron-3-super": 86,
"glm-5.1": 72,
"deepseek-v4-pro-max": 86,
"qwen3-5-122b": 70,
"qwen3-coder-next": 50,
"qwen3-6-plus": 87,
"kimi-k2-6": 84
}
},
{
"agent": "architect-indexer",
"current_model_index": 7,
"current_model_id": "glm-5.1",
"reasoning_effort": "H",
"scores": {
"qwen3-coder-480b": 70,
"minimax-m2.5": 64,
"minimax-m2.7": 62,
"nemotron-3-super": 74,
"glm-5.1": 80,
"deepseek-v4-pro-max": 78,
"qwen3-5-122b": 76,
"qwen3-coder-next": 58,
"qwen3-6-plus": 80,
"kimi-k2-6": 84
}
}
],
"if_scores": {
"qwen3-coder-480b": 88,
"minimax-m2.5": 82,
"minimax-m2.7": 78,
"nemotron-3-super": 85,
"glm-5.1": 80,
"deepseek-v4-pro-max": 88,
"qwen3-5-122b": 86,
"qwen3-coder-next": 84,
"qwen3-6-plus": 90,
"kimi-k2-6": 91,
"deepseek-v4-flash": 86
},
"agent_current_config": [
{
"agent": "lead-developer",
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "nemotron",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "frontend-developer",
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "qwen",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "php-developer",
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "qwen",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "python-developer",
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "qwen",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "backend-developer",
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "qwen",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "go-developer",
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "qwen",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "flutter-developer",
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "qwen",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "devops-engineer",
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "nemotron",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "sdet-engineer",
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "qwen",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "code-skeptic",
"model": "ollama-cloud/minimax-m2.5",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "minimax",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "security-auditor",
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "nemotron",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "performance-engineer",
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "nemotron",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "the-fixer",
"model": "ollama-cloud/minimax-m2.5",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "minimax",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "browser-automation",
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "qwen",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "visual-tester",
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "qwen",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "system-analyst",
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "nemotron",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "requirement-refiner",
"model": "ollama-cloud/glm-5.1",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "glm",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "history-miner",
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "nemotron",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "capability-analyst",
"model": "ollama-cloud/glm-5.1",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "glm",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "orchestrator",
"model": "ollama-cloud/kimi-k2.6:cloud",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "kimi",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "release-manager",
"model": "ollama-cloud/glm-5.1",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "glm",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "evaluator",
"model": "ollama-cloud/glm-5.1",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "glm",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "prompt-optimizer",
"model": "ollama-cloud/glm-5.1",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "glm",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "product-owner",
"model": "ollama-cloud/glm-5.1",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "glm",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "pipeline-judge",
"model": "ollama-cloud/glm-5.1",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "glm",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "workflow-architect",
"model": "ollama-cloud/glm-5.1",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "glm",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "markdown-validator",
"model": "ollama-cloud/nemotron-3-nano:30b",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "nemotron",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "agent-architect",
"model": "ollama-cloud/glm-5.1",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "glm",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "planner",
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "nemotron",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "reflector",
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "nemotron",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "memory-manager",
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "nemotron",
"fit_score": 0,
"status": "good",
"previous_model": null
},
{
"agent": "architect-indexer",
"model": "ollama-cloud/glm-5.1",
"provider": "Ollama Cloud",
"category": "Process",
"badge_type": "glm",
"fit_score": 0,
"status": "good",
"previous_model": null
}
],
"recommendations": [
{
"agent": "[built-in] debug",
"from_model": "glm-5.1.1 (88)",
"from_provider": "Ollama",
"to_model": "V4-Pro Max (★90) / K2.6 (★90) RE:High",
"to_provider": "Ollama Cloud",
"impact": "high",
"quality_change": "+2%",
"speed_change": "~1x",
"context_change": "200K→1M",
"provider_change": "Ollama Cloud",
"rationale": "★ матрицы: V4-Pro=90 и K2.6=90 (TIE!), GLM-5.1=88. V4-Pro: LiveCodeBench 93.5(#1!), Terminal 67.9, 1M ctx для полного проекта. K2.6: 13h auto sessions. Оба лучше GLM-5.1. RE:High для debug."
},
{
"agent": "planner",
"from_model": "nemotron-3-super (80)",
"from_provider": "Ollama",
"to_model": "V4-Pro Max (★88) RE:High",
"to_provider": "Ollama Cloud",
"impact": "high",
"quality_change": "+10%",
"speed_change": "~1x",
"context_change": "1M",
"provider_change": "Ollama Cloud",
"rationale": "★ матрицы: V4-Pro=88(лучший!), K2.6=86, GLM-5.1=85, Nem=80. V4-Pro: GPQA 90.1 (reasoning), 1M ctx сохраняется (vs потеря при K2.6). RE:High для chain-of-thought planning."
},
{
"agent": "go-developer",
"from_model": "qwen3-coder:480b (85)",
"from_provider": "Ollama",
"to_model": "V4-Pro Max (★88) RE:Medium",
"to_provider": "Ollama Cloud",
"impact": "medium",
"quality_change": "+4%",
"speed_change": "~1x",
"context_change": "256K→1M",
"provider_change": "Ollama Cloud",
"rationale": "★ матрицы: V4-Pro=88(лучший для Go!), K2.6=86, Qwen3Coder=85. DeepSeek модели традиционно сильны в Go/Rust. 1M ctx для крупных Go-проектов."
},
{
"agent": "history-miner",
"from_model": "nemotron-3-super (★85)",
"from_provider": "Ollama",
"to_model": "V4-Pro Max (86) + Nem fallback",
"to_provider": "Hybrid",
"impact": "medium",
"quality_change": "+1%",
"speed_change": "~1x",
"context_change": "1M",
"provider_change": "Ollama Cloud + Ollama",
"rationale": "V4-Pro=86 чуть лучше Nemotron=85. 1M ctx у обоих. MRCR 83.5 у V4-Pro — лучшее long-context retrieval. Nemotron как fallback (RULER 91.75%)."
},
{
"agent": "frontend-dev → M2.5",
"from_model": "qwen3-coder (90)",
"from_provider": "Ollama",
"to_model": "MiniMax M2.5 (★92) ✅",
"to_provider": "Ollama",
"impact": "low",
"quality_change": "+2%",
"speed_change": "=",
"context_change": "204K",
"provider_change": "Ollama",
"rationale": "Spec-writing, UI architect. APPLIED."
},
{
"agent": "devops → K2.6",
"from_model": "deepseek-v3.2",
"from_provider": "",
"to_model": "kimi-k2.6:cloud ✅",
"to_provider": "Ollama Cloud",
"impact": "low",
"quality_change": "+35%",
"speed_change": "=",
"context_change": "256K",
"provider_change": "",
"rationale": "APPLIED."
},
{
"agent": "orchestrator",
"from_model": "glm-5.1.1 (★90)",
"from_provider": "Ollama",
"to_model": "K2.6 (★92) RE:Medium",
"to_provider": "Ollama Cloud",
"impact": "medium",
"quality_change": "+2%",
"speed_change": "~1x",
"context_change": "200K→256K",
"provider_change": "Ollama Cloud",
"rationale": "K2.6=92★ всё ещё лучший для orchestration. V4-Pro=86 слабее. 300 sub-agent swarm."
},
{
"agent": "the-fixer",
"from_model": "minimax-m2.5 (★88)",
"from_provider": "Ollama",
"to_model": "V4-Pro (★88) / K2.6 (★90)",
"to_provider": "Ollama Cloud",
"impact": "medium",
"quality_change": "+2%",
"speed_change": "~1x",
"context_change": "128K→1M/256K",
"provider_change": "Ollama Cloud",
"rationale": "K2.6=90(лучший), V4-Pro=88=M2.5. M2.5 SWE-bench 80.2% стабильнее. Не срочно."
},
{
"agent": "Qwen3-Coder (7 coding)",
"from_model": "qwen3-coder",
"from_provider": "Ollama",
"to_model": "✅",
"to_provider": "",
"impact": "low",
"quality_change": "=0%",
"speed_change": "=",
"context_change": "256K",
"provider_change": "Ollama",
"rationale": "lead=92★, backend=91★, python=90★."
},
{
"agent": "GLM-5.1 (12 agents)",
"from_model": "glm-5.1.1",
"from_provider": "Ollama",
"to_model": "✅",
"to_provider": "",
"impact": "low",
"quality_change": "=0%",
"speed_change": "=",
"context_change": "200K",
"provider_change": "",
"rationale": "orchestrator=90, system-analyst=90. SWE-Pro #1."
},
{
"agent": "Kimi K2.6 (3 agents)",
"from_model": "kimi-k2.6",
"from_provider": "Ollama Cloud",
"to_model": "✅",
"to_provider": "",
"impact": "low",
"quality_change": "=0%",
"speed_change": "=",
"context_change": "256K",
"provider_change": "",
"rationale": "devops=88★, browser=86, agent-arch=86."
}
],
"impact_data": [
{
"category": "debug GLM5.1→V4-Pro/K2.6",
"before": 88,
"after": 90,
"delta": 2,
"notes": "LiveCodeBench 93.5, Terminal 67.9"
},
{
"category": "planner Nem→V4-Pro Max",
"before": 80,
"after": 88,
"delta": 8,
"notes": "★88! GPQA 90.1, 1M ctx"
},
{
"category": "go-dev Coder→V4-Pro Max",
"before": 85,
"after": 88,
"delta": 3,
"notes": "★88! Go/Rust specialist, 1M ctx"
},
{
"category": "history-miner →V4-Pro",
"before": 85,
"after": 86,
"delta": 1,
"notes": "MRCR 83.5, long-context"
},
{
"category": "orchestrator →K2.6 (next)",
"before": 90,
"after": 92,
"delta": 2,
"notes": "300 sub-agent swarm"
},
{
"category": "frontend → M2.5 ✅",
"before": 90,
"after": 92,
"delta": 2,
"notes": "Spec-writing, UI architect"
},
{
"category": "devops → K2.6 ✅",
"before": 65,
"after": 88,
"delta": 23,
"notes": "IF:65→91! Terminal 66.7"
},
{
"category": "Qwen3-Coder (7) ✅",
"before": 90,
"after": 90,
"delta": 0,
"notes": "SOTA coding"
},
{
"category": "GLM-5.1 (12) ✅",
"before": 87,
"after": 87,
"delta": 0,
"notes": "SWE-Pro #1"
},
{
"category": "Nemotron Super (6) ✅",
"before": 82,
"after": 82,
"delta": 0,
"notes": "1M ctx, RULER 91.75%"
}
],
"benchmark_comparison": {
"benchmarks": [
{
"name": "SWE-V",
"full_name": "SWE-Bench Verified",
"description": "GitHub issue resolution (500 tasks)",
"roles": "lead-dev, backend, fixer"
},
{
"name": "SWE-P",
"full_name": "SWE-Bench Pro",
"description": "Multi-lang, decontaminated (1865 tasks)",
"roles": "all coding agents"
},
{
"name": "T-Bench",
"full_name": "Terminal-Bench 2.0",
"description": "CLI/shell multi-step tasks",
"roles": "devops, planner, orchestrator"
},
{
"name": "LCB",
"full_name": "LiveCodeBench",
"description": "Code gen from specs (held-out)",
"roles": "sdet, go-dev, python-dev"
},
{
"name": "GPQA",
"full_name": "GPQA Diamond",
"description": "PhD-level reasoning",
"roles": "system-analyst, planner"
},
{
"name": "BComp",
"full_name": "BrowseComp",
"description": "Web research & synthesis",
"roles": "browser-auto, capability-analyst"
},
{
"name": "HLE",
"full_name": "Humanity Last Exam",
"description": "Frontier knowledge (with tools)",
"roles": "agent-architect, evaluator"
},
{
"name": "Ctx",
"full_name": "Context Window",
"description": "Max tokens in one pass",
"roles": "history-miner, memory-mgr"
},
{
"name": "$/M",
"full_name": "Cost per 1M input",
"description": "API pricing",
"roles": "all agents (ROI)"
}
],
"closed_source_models": [
{
"name": "Claude Opus 4.7",
"organization": "Anthropic",
"scores": [
87.6,
64.3,
69.4,
null,
94.2,
79.3,
53,
"1M",
"$5"
],
"color": "#c084fc",
"note": "#1 апрель 2026"
},
{
"name": "GPT-5.5",
"organization": "OpenAI",
"scores": [
null,
58.6,
82.7,
null,
null,
83.4,
57.2,
"1M",
"$5"
],
"color": "#ff6b81",
"note": "Новейший, Terminal #1"
},
{
"name": "GPT-5.4",
"organization": "OpenAI",
"scores": [
78.2,
59.1,
75.1,
null,
94.4,
82.7,
58.7,
"200K",
"$2.50"
],
"color": "#ff6b81",
"note": "Reasoning, math"
},
{
"name": "Gemini 3.1 Pro",
"organization": "Google",
"scores": [
80.6,
46.1,
68.5,
null,
94.3,
85.9,
51.4,
"2M",
"$2"
],
"color": "#facc15",
"note": "ARC-AGI 77.1%, дешёвый"
},
{
"name": "Claude Sonnet 4.6",
"organization": "Anthropic",
"scores": [
79.6,
null,
null,
null,
null,
null,
null,
"200K",
"$3"
],
"color": "#c084fc",
"note": "5× дешевле Opus"
},
{
"name": "GPT-5.3-Codex",
"organization": "OpenAI",
"scores": [
85,
57,
77.3,
null,
null,
null,
null,
"200K",
"$6"
],
"color": "#ff6b81",
"note": "Coding specialist"
}
],
"apaw_models": [
{
"name": "Kimi K2.6",
"organization": "APAW",
"scores": [
80.2,
58.6,
66.7,
87.2,
null,
83.2,
54,
"256K",
"$0.95"
],
"color": "#00ff94",
"note": "devops, browser, architect (3)"
},
{
"name": "GLM-5.1",
"organization": "APAW",
"scores": [
null,
58.4,
63.5,
null,
86.2,
68.7,
null,
"200K",
"~$0.50"
],
"color": "#00ff94",
"note": "12 agents! orchestrator, eval..."
},
{
"name": "V4-Pro Max",
"organization": "APAW",
"scores": [
80.6,
55.4,
67.9,
93.5,
90.1,
83.4,
48.2,
"1M",
"$0.42"
],
"color": "#00d4ff",
"note": "planner, go-dev (рек.)"
},
{
"name": "Qwen3-Coder 480B",
"organization": "APAW",
"scores": [
66.5,
null,
null,
null,
null,
null,
null,
"256K",
"~$0.50"
],
"color": "#00ff94",
"note": "7 coding agents"
},
{
"name": "MiniMax M2.5",
"organization": "APAW",
"scores": [
80.2,
51.3,
null,
null,
null,
76.3,
null,
"204K",
"$0.15"
],
"color": "#00ff94",
"note": "frontend, skeptic, fixer (3)"
},
{
"name": "Nemotron Super",
"organization": "APAW",
"scores": [
60.5,
null,
null,
null,
null,
null,
null,
"1M",
"~$0.40"
],
"color": "#00ff94",
"note": "6 agents (memory, history)"
}
]
}
}