- Integrate apaw_agent_model_research_v3.html as standalone dashboard - Add model-benchmarks.json with 32 agents, 11 scored models, 11 recommendations - Add build-research-dashboard.ts: inject live data into template → standalone HTML - Add rebuild-template.cjs: regenerate template from v3.html source - Add sync-benchmarks-from-yaml.cjs: sync YAML → JSON round-trip - Add sync-model-research.ts: apply recommendation matrix to config files - Add model-benchmarks.schema.json and model-research.schema.json for validation - Add bidirectional-data-flow.md architecture documentation - Add log-execution.cjs pipeline hook - Update capability-index.yaml: add fallback_models, failover_strategy - Update kilo-meta.json, kilo.jsonc, KILO_SPEC.md with synced models - Update evolution.md / research.md / self-evolution.md / evolutionary-sync.md docs - Fix security-auditor.md: quote YAML color (#DC2626) - Fix orchestrator.md: remove duplicate devops-engineer key - Build research-dashboard.html (106KB standalone) + dated archive
1774 lines
45 KiB
JSON
1774 lines
45 KiB
JSON
{
|
||
"version": "1.0.0",
|
||
"generated": "2026-04-29T19:56:51.418Z",
|
||
"source": ".kilo/capability-index.yaml (synced v2)",
|
||
"total_agents": 32,
|
||
"total_models_tracked": 11,
|
||
"providers": [
|
||
"ollama",
|
||
"ollama-cloud",
|
||
"openrouter",
|
||
"groq"
|
||
],
|
||
"models": [
|
||
{
|
||
"id": "qwen3-coder-480b",
|
||
"name": "Qwen3-Coder 480B",
|
||
"organization": "Qwen",
|
||
"parameters": "480B/35B active",
|
||
"context_window": "256K→1M",
|
||
"swe_bench": 66.5,
|
||
"if_score": 88,
|
||
"categories": [
|
||
"coding",
|
||
"agent"
|
||
],
|
||
"description": "SOTA open-source кодинг. Сравним с Claude Sonnet 4.",
|
||
"tags": [
|
||
"coding",
|
||
"agent",
|
||
"tools"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "minimax-m2.5",
|
||
"name": "MiniMax M2.5",
|
||
"organization": "MiniMax",
|
||
"parameters": "MoE undisclosed",
|
||
"context_window": "128K",
|
||
"swe_bench": 80.2,
|
||
"if_score": 82,
|
||
"categories": [
|
||
"coding",
|
||
"agent"
|
||
],
|
||
"description": "Лидер SWE-bench 80.2%. Полный lifecycle разработки.",
|
||
"tags": [
|
||
"coding",
|
||
"agent"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "minimax-m2.7",
|
||
"name": "MiniMax M2.7",
|
||
"organization": "MiniMax",
|
||
"parameters": "~10B active",
|
||
"context_window": "128K",
|
||
"swe_bench": 78,
|
||
"if_score": 80,
|
||
"categories": [
|
||
"coding",
|
||
"agent",
|
||
"efficient"
|
||
],
|
||
"description": "Самообучаемая. 56.2% SWE-Pro. 100 TPS. $0.30/M.",
|
||
"tags": [
|
||
"coding",
|
||
"agent",
|
||
"self-evolving"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "deepseek-v4-pro-max",
|
||
"name": "DeepSeek V4-Pro",
|
||
"organization": "DeepSeek",
|
||
"parameters": "1.6T/49B active MoE",
|
||
"context_window": "1M",
|
||
"swe_bench": 80.6,
|
||
"if_score": 89,
|
||
"categories": [
|
||
"coding",
|
||
"agent",
|
||
"reasoning"
|
||
],
|
||
"description": "SWE-V 80.6, LiveCodeBench 93.5(#1!), Terminal-Bench 67.9, Codeforces 3206, 1M ctx, 27% FLOPs vs V3.2. MIT.",
|
||
"tags": [
|
||
"coding",
|
||
"agent",
|
||
"thinking",
|
||
"tools"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama-cloud"
|
||
},
|
||
{
|
||
"id": "deepseek-v4-flash",
|
||
"name": "DeepSeek V4-Pro",
|
||
"organization": "DeepSeek",
|
||
"parameters": "284B/13B active MoE",
|
||
"context_window": "1M",
|
||
"swe_bench": 79,
|
||
"if_score": 86,
|
||
"categories": [
|
||
"coding",
|
||
"efficient",
|
||
"agent"
|
||
],
|
||
"description": "SWE-V ~79%, Flash Max = Pro уровень reasoning. 13B active = ультрабыстрый. 1M ctx. FP4+FP8. MIT.",
|
||
"tags": [
|
||
"coding",
|
||
"efficient",
|
||
"agent",
|
||
"thinking"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama-cloud"
|
||
},
|
||
{
|
||
"id": "kimi-k2-6",
|
||
"name": "Kimi K2.6",
|
||
"organization": "Moonshot AI",
|
||
"parameters": "1T/32B active MoE",
|
||
"context_window": "256K",
|
||
"swe_bench": 80.2,
|
||
"if_score": 91,
|
||
"categories": [
|
||
"coding",
|
||
"agent",
|
||
"multimodal"
|
||
],
|
||
"description": "SWE-Pro 58.6(#1!), SWE-V 80.2, Terminal-Bench 66.7, HLE 54.0(#1!), BrowseComp 83.2. 13h autonomous. 300 sub-agent swarm. Modified MIT.",
|
||
"tags": [
|
||
"coding",
|
||
"agent",
|
||
"swarm",
|
||
"vision",
|
||
"thinking",
|
||
"tools"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama-cloud"
|
||
},
|
||
{
|
||
"id": "nemotron-3-super",
|
||
"name": "Nemotron 3 Super",
|
||
"organization": "NVIDIA",
|
||
"parameters": "120B/12B active",
|
||
"context_window": "1M",
|
||
"swe_bench": 60.5,
|
||
"if_score": 78,
|
||
"categories": [
|
||
"agent",
|
||
"reasoning",
|
||
"efficient"
|
||
],
|
||
"description": "SWE-bench 60.5%. RULER@1M 91.75%! Но IF ниже — Mamba-layers иногда «теряют» инструкции в длинных промптах.",
|
||
"tags": [
|
||
"agent",
|
||
"1M-ctx",
|
||
"thinking"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "glm-5.1",
|
||
"name": "GLM-5",
|
||
"organization": "Z.ai",
|
||
"parameters": "744B/40B active",
|
||
"context_window": "128K",
|
||
"swe_bench": null,
|
||
"if_score": 90,
|
||
"categories": [
|
||
"reasoning",
|
||
"agent"
|
||
],
|
||
"description": "Мощный reasoning. Arena ELO 1451. Отличный instruction following (IFEval ~90+).",
|
||
"tags": [
|
||
"reasoning",
|
||
"agent"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "deepseek-v4",
|
||
"name": "DeepSeek V4-Pro",
|
||
"organization": "DeepSeek",
|
||
"parameters": "Large MoE",
|
||
"context_window": "128K",
|
||
"swe_bench": null,
|
||
"if_score": 75,
|
||
"categories": [
|
||
"reasoning"
|
||
],
|
||
"description": "Хороший reasoning, но IF нестабилен — иногда игнорирует формат вывода.",
|
||
"tags": [
|
||
"reasoning"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "qwen3-5-122b",
|
||
"name": "Qwen 3.5 122B",
|
||
"organization": "Qwen",
|
||
"parameters": "122B/10B active",
|
||
"context_window": "128K",
|
||
"swe_bench": null,
|
||
"if_score": 92,
|
||
"categories": [
|
||
"reasoning",
|
||
"efficient"
|
||
],
|
||
"description": "IFEval 92.6%! Лучший IF среди open-source. Multimodal. Thinking.",
|
||
"tags": [
|
||
"vision",
|
||
"thinking",
|
||
"tools"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "qwen3-coder-next",
|
||
"name": "Qwen3-Coder-Next",
|
||
"organization": "Qwen",
|
||
"parameters": "80B/3B active",
|
||
"context_window": "128K",
|
||
"swe_bench": 70,
|
||
"if_score": 84,
|
||
"categories": [
|
||
"coding",
|
||
"efficient"
|
||
],
|
||
"description": "70% SWE-bench с 3B active! Хороший IF для кодинга.",
|
||
"tags": [
|
||
"coding",
|
||
"efficient",
|
||
"tools"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "cogito-2-1-671b",
|
||
"name": "Cogito 2.1 671B",
|
||
"organization": "Cognitive",
|
||
"parameters": "671B MoE",
|
||
"context_window": "128K",
|
||
"swe_bench": null,
|
||
"if_score": 76,
|
||
"categories": [
|
||
"reasoning"
|
||
],
|
||
"description": "MIT лицензия. 671B total. IF неплохой, но уступает GLM/Qwen.",
|
||
"tags": [
|
||
"reasoning"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "qwen3-6-plus",
|
||
"name": "Qwen 3.6 Plus",
|
||
"organization": "Qwen",
|
||
"parameters": "Hybrid MoE",
|
||
"context_window": "1M",
|
||
"swe_bench": 78.8,
|
||
"if_score": 91,
|
||
"categories": [
|
||
"coding",
|
||
"agent",
|
||
"reasoning"
|
||
],
|
||
"description": "FREE на OpenRouter! 1M контекст. Always-on CoT. Превосходный IF — наследник Qwen 3.5 (92.6%).",
|
||
"tags": [
|
||
"coding",
|
||
"agent",
|
||
"1M-ctx",
|
||
"free"
|
||
],
|
||
"openrouter": true,
|
||
"provider": "openrouter"
|
||
},
|
||
{
|
||
"id": "step-3-5-flash",
|
||
"name": "Step 3.5 Flash",
|
||
"organization": "StepFun",
|
||
"parameters": "MoE",
|
||
"context_window": "128K",
|
||
"swe_bench": null,
|
||
"if_score": 79,
|
||
"categories": [
|
||
"efficient"
|
||
],
|
||
"description": "Бесплатна на OpenRouter. IF средний.",
|
||
"tags": [
|
||
"efficient",
|
||
"free"
|
||
],
|
||
"openrouter": true,
|
||
"provider": "openrouter"
|
||
},
|
||
{
|
||
"id": "deepseek-r1",
|
||
"name": "DeepSeek R1",
|
||
"organization": "DeepSeek",
|
||
"parameters": "671B MoE",
|
||
"context_window": "128K",
|
||
"swe_bench": null,
|
||
"if_score": 73,
|
||
"categories": [
|
||
"reasoning"
|
||
],
|
||
"description": "Мощные reasoning-цепочки. Но IF слабый — часто генерирует лишний reasoning вместо ответа.",
|
||
"tags": [
|
||
"reasoning",
|
||
"thinking",
|
||
"free"
|
||
],
|
||
"openrouter": true,
|
||
"provider": "openrouter"
|
||
}
|
||
],
|
||
"groq_models": [
|
||
{
|
||
"id": "openai/gpt-oss-20b",
|
||
"rpm": 30,
|
||
"rpd": "1K",
|
||
"tpm": "8K",
|
||
"tpd": "200K",
|
||
"speed": "1200+",
|
||
"use_case": "Ультра-быстрый fallback для лёгких ролей (markdown-validator)."
|
||
},
|
||
{
|
||
"id": "llama-3.1-8b-instant",
|
||
"rpm": 30,
|
||
"rpd": "14.4K",
|
||
"tpm": "6K",
|
||
"tpd": "500K",
|
||
"speed": "~800",
|
||
"use_case": "14.4K RPD! Самый высокий лимит. Для health-check / ping ролей."
|
||
},
|
||
{
|
||
"id": "groq/compound",
|
||
"rpm": 30,
|
||
"rpd": "250",
|
||
"tpm": "70K",
|
||
"tpd": "—",
|
||
"speed": "varies",
|
||
"use_case": "Мультимодельная агрегация. Для research-задач."
|
||
},
|
||
{
|
||
"id": "groq/compound-mini",
|
||
"rpm": 30,
|
||
"rpd": "250",
|
||
"tpm": "70K",
|
||
"tpd": "—",
|
||
"speed": "varies",
|
||
"use_case": "Лёгкая версия compound."
|
||
},
|
||
{
|
||
"id": "llama-prompt-guard-2",
|
||
"rpm": 30,
|
||
"rpd": "14.4K",
|
||
"tpm": "15K",
|
||
"tpd": "500K",
|
||
"speed": "~1K",
|
||
"use_case": "Security: входной фильтр для security-auditor (14.4K RPD!)."
|
||
}
|
||
],
|
||
"agent_model_scores": [
|
||
{
|
||
"agent": "lead-developer",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 92,
|
||
"minimax-m2.5": 86,
|
||
"minimax-m2.7": 82,
|
||
"nemotron-3-super": 70,
|
||
"glm-5.1": 68,
|
||
"deepseek-v4-pro-max": 88,
|
||
"qwen3-5-122b": 66,
|
||
"qwen3-coder-next": 80,
|
||
"qwen3-6-plus": 88,
|
||
"kimi-k2-6": 90
|
||
}
|
||
},
|
||
{
|
||
"agent": "frontend-developer",
|
||
"current_model_index": -1,
|
||
"current_model_id": "qwen3-coder:480b",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 86,
|
||
"minimax-m2.5": 92,
|
||
"minimax-m2.7": 88,
|
||
"nemotron-3-super": 62,
|
||
"glm-5.1": 56,
|
||
"deepseek-v4-pro-max": 82,
|
||
"qwen3-5-122b": 60,
|
||
"qwen3-coder-next": 76,
|
||
"qwen3-6-plus": 88,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "php-developer",
|
||
"current_model_index": -1,
|
||
"current_model_id": "qwen3-coder:480b",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 87,
|
||
"minimax-m2.5": 76,
|
||
"minimax-m2.7": 72,
|
||
"nemotron-3-super": 64,
|
||
"glm-5.1": 56,
|
||
"deepseek-v4-pro-max": 74,
|
||
"qwen3-5-122b": 60,
|
||
"qwen3-coder-next": 76,
|
||
"qwen3-6-plus": 84,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "python-developer",
|
||
"current_model_index": -1,
|
||
"current_model_id": "qwen3-coder:480b",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 90,
|
||
"minimax-m2.5": 82,
|
||
"minimax-m2.7": 78,
|
||
"nemotron-3-super": 66,
|
||
"glm-5.1": 60,
|
||
"deepseek-v4-pro-max": 78,
|
||
"qwen3-5-122b": 64,
|
||
"qwen3-coder-next": 78,
|
||
"qwen3-6-plus": 88,
|
||
"kimi-k2-6": 88
|
||
}
|
||
},
|
||
{
|
||
"agent": "backend-developer",
|
||
"current_model_index": -1,
|
||
"current_model_id": "qwen3-coder:480b",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 91,
|
||
"minimax-m2.5": 84,
|
||
"minimax-m2.7": 80,
|
||
"nemotron-3-super": 68,
|
||
"glm-5.1": 63,
|
||
"deepseek-v4-pro-max": 86,
|
||
"qwen3-5-122b": 62,
|
||
"qwen3-coder-next": 78,
|
||
"qwen3-6-plus": 87,
|
||
"kimi-k2-6": 90
|
||
}
|
||
},
|
||
{
|
||
"agent": "go-developer",
|
||
"current_model_index": -1,
|
||
"current_model_id": "qwen3-coder:480b",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 85,
|
||
"minimax-m2.5": 78,
|
||
"minimax-m2.7": 74,
|
||
"nemotron-3-super": 66,
|
||
"glm-5.1": 58,
|
||
"deepseek-v4-pro-max": 88,
|
||
"qwen3-5-122b": 58,
|
||
"qwen3-coder-next": 74,
|
||
"qwen3-6-plus": 82,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "flutter-developer",
|
||
"current_model_index": -1,
|
||
"current_model_id": "qwen3-coder:480b",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 86,
|
||
"minimax-m2.5": 70,
|
||
"minimax-m2.7": 66,
|
||
"nemotron-3-super": 60,
|
||
"glm-5.1": 53,
|
||
"deepseek-v4-pro-max": 78,
|
||
"qwen3-5-122b": 58,
|
||
"qwen3-coder-next": 74,
|
||
"qwen3-6-plus": 82,
|
||
"kimi-k2-6": 84
|
||
}
|
||
},
|
||
{
|
||
"agent": "devops-engineer",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 66,
|
||
"minimax-m2.5": 53,
|
||
"minimax-m2.7": 48,
|
||
"nemotron-3-super": 78,
|
||
"glm-5.1": 75,
|
||
"deepseek-v4-pro-max": 86,
|
||
"qwen3-5-122b": 70,
|
||
"qwen3-coder-next": 54,
|
||
"qwen3-6-plus": 76,
|
||
"kimi-k2-6": 88
|
||
}
|
||
},
|
||
{
|
||
"agent": "sdet-engineer",
|
||
"current_model_index": -1,
|
||
"current_model_id": "qwen3-coder:480b",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 88,
|
||
"minimax-m2.5": 84,
|
||
"minimax-m2.7": 80,
|
||
"nemotron-3-super": 70,
|
||
"glm-5.1": 63,
|
||
"deepseek-v4-pro-max": 84,
|
||
"qwen3-5-122b": 64,
|
||
"qwen3-coder-next": 78,
|
||
"qwen3-6-plus": 84,
|
||
"kimi-k2-6": 87
|
||
}
|
||
},
|
||
{
|
||
"agent": "code-skeptic",
|
||
"current_model_index": 1,
|
||
"current_model_id": "minimax-m2.5",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 82,
|
||
"minimax-m2.5": 85,
|
||
"minimax-m2.7": 80,
|
||
"nemotron-3-super": 73,
|
||
"glm-5.1": 72,
|
||
"deepseek-v4-pro-max": 82,
|
||
"qwen3-5-122b": 70,
|
||
"qwen3-coder-next": 72,
|
||
"qwen3-6-plus": 80,
|
||
"kimi-k2-6": 82
|
||
}
|
||
},
|
||
{
|
||
"agent": "security-auditor",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 76,
|
||
"minimax-m2.5": 74,
|
||
"minimax-m2.7": 68,
|
||
"nemotron-3-super": 76,
|
||
"glm-5.1": 68,
|
||
"deepseek-v4-pro-max": 80,
|
||
"qwen3-5-122b": 72,
|
||
"qwen3-coder-next": 64,
|
||
"qwen3-6-plus": 75,
|
||
"kimi-k2-6": 80
|
||
}
|
||
},
|
||
{
|
||
"agent": "performance-engineer",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 78,
|
||
"minimax-m2.5": 75,
|
||
"minimax-m2.7": 70,
|
||
"nemotron-3-super": 78,
|
||
"glm-5.1": 74,
|
||
"deepseek-v4-pro-max": 84,
|
||
"qwen3-5-122b": 70,
|
||
"qwen3-coder-next": 67,
|
||
"qwen3-6-plus": 76,
|
||
"kimi-k2-6": 82
|
||
}
|
||
},
|
||
{
|
||
"agent": "the-fixer",
|
||
"current_model_index": 1,
|
||
"current_model_id": "minimax-m2.5",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 89,
|
||
"minimax-m2.5": 88,
|
||
"minimax-m2.7": 84,
|
||
"nemotron-3-super": 71,
|
||
"glm-5.1": 64,
|
||
"deepseek-v4-pro-max": 88,
|
||
"qwen3-5-122b": 64,
|
||
"qwen3-coder-next": 82,
|
||
"qwen3-6-plus": 86,
|
||
"kimi-k2-6": 90
|
||
}
|
||
},
|
||
{
|
||
"agent": "browser-automation",
|
||
"current_model_index": -1,
|
||
"current_model_id": "qwen3-coder:480b",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 87,
|
||
"minimax-m2.5": 72,
|
||
"minimax-m2.7": 68,
|
||
"nemotron-3-super": 61,
|
||
"glm-5.1": 53,
|
||
"deepseek-v4-pro-max": 82,
|
||
"qwen3-5-122b": 56,
|
||
"qwen3-coder-next": 72,
|
||
"qwen3-6-plus": 82,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "visual-tester",
|
||
"current_model_index": -1,
|
||
"current_model_id": "qwen3-coder:480b",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 82,
|
||
"minimax-m2.5": 68,
|
||
"minimax-m2.7": 64,
|
||
"nemotron-3-super": 55,
|
||
"glm-5.1": 48,
|
||
"deepseek-v4-pro-max": 76,
|
||
"qwen3-5-122b": 54,
|
||
"qwen3-coder-next": 66,
|
||
"qwen3-6-plus": 76,
|
||
"kimi-k2-6": 78
|
||
}
|
||
},
|
||
{
|
||
"agent": "system-analyst",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 70,
|
||
"minimax-m2.5": 66,
|
||
"minimax-m2.7": 63,
|
||
"nemotron-3-super": 74,
|
||
"glm-5.1": 82,
|
||
"deepseek-v4-pro-max": 88,
|
||
"qwen3-5-122b": 76,
|
||
"qwen3-coder-next": 58,
|
||
"qwen3-6-plus": 80,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "requirement-refiner",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 66,
|
||
"minimax-m2.5": 62,
|
||
"minimax-m2.7": 60,
|
||
"nemotron-3-super": 72,
|
||
"glm-5.1": 80,
|
||
"deepseek-v4-pro-max": 82,
|
||
"qwen3-5-122b": 74,
|
||
"qwen3-coder-next": 54,
|
||
"qwen3-6-plus": 78,
|
||
"kimi-k2-6": 82
|
||
}
|
||
},
|
||
{
|
||
"agent": "history-miner",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 68,
|
||
"minimax-m2.5": 60,
|
||
"minimax-m2.7": 56,
|
||
"nemotron-3-super": 85,
|
||
"glm-5.1": 78,
|
||
"deepseek-v4-pro-max": 86,
|
||
"qwen3-5-122b": 72,
|
||
"qwen3-coder-next": 56,
|
||
"qwen3-6-plus": 84,
|
||
"kimi-k2-6": 82
|
||
}
|
||
},
|
||
{
|
||
"agent": "capability-analyst",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 72,
|
||
"minimax-m2.5": 68,
|
||
"minimax-m2.7": 66,
|
||
"nemotron-3-super": 76,
|
||
"glm-5.1": 78,
|
||
"deepseek-v4-pro-max": 82,
|
||
"qwen3-5-122b": 75,
|
||
"qwen3-coder-next": 60,
|
||
"qwen3-6-plus": 79,
|
||
"kimi-k2-6": 82
|
||
}
|
||
},
|
||
{
|
||
"agent": "orchestrator",
|
||
"current_model_index": -1,
|
||
"current_model_id": "kimi-k2.6:cloud",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 74,
|
||
"minimax-m2.5": 70,
|
||
"minimax-m2.7": 68,
|
||
"nemotron-3-super": 80,
|
||
"glm-5.1": 82,
|
||
"deepseek-v4-pro-max": 86,
|
||
"qwen3-5-122b": 78,
|
||
"qwen3-coder-next": 62,
|
||
"qwen3-6-plus": 84,
|
||
"kimi-k2-6": 92
|
||
}
|
||
},
|
||
{
|
||
"agent": "release-manager",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 72,
|
||
"minimax-m2.5": 66,
|
||
"minimax-m2.7": 64,
|
||
"nemotron-3-super": 74,
|
||
"glm-5.1": 76,
|
||
"deepseek-v4-pro-max": 78,
|
||
"qwen3-5-122b": 72,
|
||
"qwen3-coder-next": 60,
|
||
"qwen3-6-plus": 76,
|
||
"kimi-k2-6": 78
|
||
}
|
||
},
|
||
{
|
||
"agent": "evaluator",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 70,
|
||
"minimax-m2.5": 73,
|
||
"minimax-m2.7": 70,
|
||
"nemotron-3-super": 78,
|
||
"glm-5.1": 78,
|
||
"deepseek-v4-pro-max": 84,
|
||
"qwen3-5-122b": 76,
|
||
"qwen3-coder-next": 58,
|
||
"qwen3-6-plus": 81,
|
||
"kimi-k2-6": 84
|
||
}
|
||
},
|
||
{
|
||
"agent": "prompt-optimizer",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 76,
|
||
"minimax-m2.5": 74,
|
||
"minimax-m2.7": 72,
|
||
"nemotron-3-super": 76,
|
||
"glm-5.1": 75,
|
||
"deepseek-v4-pro-max": 80,
|
||
"qwen3-5-122b": 74,
|
||
"qwen3-coder-next": 64,
|
||
"qwen3-6-plus": 83,
|
||
"kimi-k2-6": 82
|
||
}
|
||
},
|
||
{
|
||
"agent": "product-owner",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 60,
|
||
"minimax-m2.5": 56,
|
||
"minimax-m2.7": 54,
|
||
"nemotron-3-super": 74,
|
||
"glm-5.1": 78,
|
||
"deepseek-v4-pro-max": 76,
|
||
"qwen3-5-122b": 74,
|
||
"qwen3-coder-next": 48,
|
||
"qwen3-6-plus": 78,
|
||
"kimi-k2-6": 76
|
||
}
|
||
},
|
||
{
|
||
"agent": "pipeline-judge",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 64,
|
||
"minimax-m2.5": 68,
|
||
"minimax-m2.7": 65,
|
||
"nemotron-3-super": 78,
|
||
"glm-5.1": 76,
|
||
"deepseek-v4-pro-max": 82,
|
||
"qwen3-5-122b": 74,
|
||
"qwen3-coder-next": 56,
|
||
"qwen3-6-plus": 80,
|
||
"kimi-k2-6": 84
|
||
}
|
||
},
|
||
{
|
||
"agent": "workflow-architect",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 68,
|
||
"minimax-m2.5": 62,
|
||
"minimax-m2.7": 60,
|
||
"nemotron-3-super": 76,
|
||
"glm-5.1": 76,
|
||
"deepseek-v4-pro-max": 80,
|
||
"qwen3-5-122b": 72,
|
||
"qwen3-coder-next": 56,
|
||
"qwen3-6-plus": 80,
|
||
"kimi-k2-6": 82
|
||
}
|
||
},
|
||
{
|
||
"agent": "markdown-validator",
|
||
"current_model_index": -1,
|
||
"current_model_id": "nemotron-3-nano:30b",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 43,
|
||
"minimax-m2.5": 38,
|
||
"minimax-m2.7": 36,
|
||
"nemotron-3-super": 52,
|
||
"glm-5.1": 55,
|
||
"deepseek-v4-pro-max": 68,
|
||
"qwen3-5-122b": 56,
|
||
"qwen3-coder-next": 40,
|
||
"qwen3-6-plus": 50,
|
||
"kimi-k2-6": 56
|
||
}
|
||
},
|
||
{
|
||
"agent": "agent-architect",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 78,
|
||
"minimax-m2.5": 72,
|
||
"minimax-m2.7": 70,
|
||
"nemotron-3-super": 78,
|
||
"glm-5.1": 76,
|
||
"deepseek-v4-pro-max": 82,
|
||
"qwen3-5-122b": 76,
|
||
"qwen3-coder-next": 66,
|
||
"qwen3-6-plus": 82,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "planner",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 72,
|
||
"minimax-m2.5": 68,
|
||
"minimax-m2.7": 66,
|
||
"nemotron-3-super": 80,
|
||
"glm-5.1": 78,
|
||
"deepseek-v4-pro-max": 88,
|
||
"qwen3-5-122b": 78,
|
||
"qwen3-coder-next": 60,
|
||
"qwen3-6-plus": 85,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "reflector",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 68,
|
||
"minimax-m2.5": 66,
|
||
"minimax-m2.7": 64,
|
||
"nemotron-3-super": 78,
|
||
"glm-5.1": 76,
|
||
"deepseek-v4-pro-max": 84,
|
||
"qwen3-5-122b": 76,
|
||
"qwen3-coder-next": 56,
|
||
"qwen3-6-plus": 82,
|
||
"kimi-k2-6": 80
|
||
}
|
||
},
|
||
{
|
||
"agent": "memory-manager",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 63,
|
||
"minimax-m2.5": 58,
|
||
"minimax-m2.7": 56,
|
||
"nemotron-3-super": 86,
|
||
"glm-5.1": 72,
|
||
"deepseek-v4-pro-max": 86,
|
||
"qwen3-5-122b": 70,
|
||
"qwen3-coder-next": 50,
|
||
"qwen3-6-plus": 87,
|
||
"kimi-k2-6": 84
|
||
}
|
||
},
|
||
{
|
||
"agent": "architect-indexer",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 70,
|
||
"minimax-m2.5": 64,
|
||
"minimax-m2.7": 62,
|
||
"nemotron-3-super": 74,
|
||
"glm-5.1": 80,
|
||
"deepseek-v4-pro-max": 78,
|
||
"qwen3-5-122b": 76,
|
||
"qwen3-coder-next": 58,
|
||
"qwen3-6-plus": 80,
|
||
"kimi-k2-6": 84
|
||
}
|
||
}
|
||
],
|
||
"if_scores": {
|
||
"qwen3-coder-480b": 88,
|
||
"minimax-m2.5": 82,
|
||
"minimax-m2.7": 78,
|
||
"nemotron-3-super": 85,
|
||
"glm-5.1": 80,
|
||
"deepseek-v4-pro-max": 88,
|
||
"qwen3-5-122b": 86,
|
||
"qwen3-coder-next": 84,
|
||
"qwen3-6-plus": 90,
|
||
"kimi-k2-6": 91,
|
||
"deepseek-v4-flash": 86
|
||
},
|
||
"agent_current_config": [
|
||
{
|
||
"agent": "lead-developer",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "frontend-developer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "php-developer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "python-developer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "backend-developer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "go-developer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "flutter-developer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "devops-engineer",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "sdet-engineer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "code-skeptic",
|
||
"model": "ollama-cloud/minimax-m2.5",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "minimax",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "security-auditor",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "performance-engineer",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "the-fixer",
|
||
"model": "ollama-cloud/minimax-m2.5",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "minimax",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "browser-automation",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "visual-tester",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "system-analyst",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "requirement-refiner",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "history-miner",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "capability-analyst",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "orchestrator",
|
||
"model": "ollama-cloud/kimi-k2.6:cloud",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "kimi",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "release-manager",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "evaluator",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "prompt-optimizer",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "product-owner",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "pipeline-judge",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "workflow-architect",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "markdown-validator",
|
||
"model": "ollama-cloud/nemotron-3-nano:30b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "agent-architect",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "planner",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "reflector",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "memory-manager",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "architect-indexer",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
}
|
||
],
|
||
"recommendations": [
|
||
{
|
||
"agent": "[built-in] debug",
|
||
"from_model": "glm-5.1.1 (88)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "V4-Pro Max (★90) / K2.6 (★90) RE:High",
|
||
"to_provider": "Ollama Cloud",
|
||
"impact": "high",
|
||
"quality_change": "+2%",
|
||
"speed_change": "~1x",
|
||
"context_change": "200K→1M",
|
||
"provider_change": "Ollama Cloud",
|
||
"rationale": "★ матрицы: V4-Pro=90 и K2.6=90 (TIE!), GLM-5.1=88. V4-Pro: LiveCodeBench 93.5(#1!), Terminal 67.9, 1M ctx для полного проекта. K2.6: 13h auto sessions. Оба лучше GLM-5.1. RE:High для debug."
|
||
},
|
||
{
|
||
"agent": "planner",
|
||
"from_model": "nemotron-3-super (80)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "V4-Pro Max (★88) RE:High",
|
||
"to_provider": "Ollama Cloud",
|
||
"impact": "high",
|
||
"quality_change": "+10%",
|
||
"speed_change": "~1x",
|
||
"context_change": "1M",
|
||
"provider_change": "Ollama Cloud",
|
||
"rationale": "★ матрицы: V4-Pro=88(лучший!), K2.6=86, GLM-5.1=85, Nem=80. V4-Pro: GPQA 90.1 (reasoning), 1M ctx сохраняется (vs потеря при K2.6). RE:High для chain-of-thought planning."
|
||
},
|
||
{
|
||
"agent": "go-developer",
|
||
"from_model": "qwen3-coder:480b (85)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "V4-Pro Max (★88) RE:Medium",
|
||
"to_provider": "Ollama Cloud",
|
||
"impact": "medium",
|
||
"quality_change": "+4%",
|
||
"speed_change": "~1x",
|
||
"context_change": "256K→1M",
|
||
"provider_change": "Ollama Cloud",
|
||
"rationale": "★ матрицы: V4-Pro=88(лучший для Go!), K2.6=86, Qwen3Coder=85. DeepSeek модели традиционно сильны в Go/Rust. 1M ctx для крупных Go-проектов."
|
||
},
|
||
{
|
||
"agent": "history-miner",
|
||
"from_model": "nemotron-3-super (★85)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "V4-Pro Max (86) + Nem fallback",
|
||
"to_provider": "Hybrid",
|
||
"impact": "medium",
|
||
"quality_change": "+1%",
|
||
"speed_change": "~1x",
|
||
"context_change": "1M",
|
||
"provider_change": "Ollama Cloud + Ollama",
|
||
"rationale": "V4-Pro=86 чуть лучше Nemotron=85. 1M ctx у обоих. MRCR 83.5 у V4-Pro — лучшее long-context retrieval. Nemotron как fallback (RULER 91.75%)."
|
||
},
|
||
{
|
||
"agent": "frontend-dev → M2.5",
|
||
"from_model": "qwen3-coder (90)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "MiniMax M2.5 (★92) ✅",
|
||
"to_provider": "Ollama",
|
||
"impact": "low",
|
||
"quality_change": "+2%",
|
||
"speed_change": "=",
|
||
"context_change": "204K",
|
||
"provider_change": "Ollama",
|
||
"rationale": "Spec-writing, UI architect. APPLIED."
|
||
},
|
||
{
|
||
"agent": "devops → K2.6",
|
||
"from_model": "deepseek-v3.2",
|
||
"from_provider": "",
|
||
"to_model": "kimi-k2.6:cloud ✅",
|
||
"to_provider": "Ollama Cloud",
|
||
"impact": "low",
|
||
"quality_change": "+35%",
|
||
"speed_change": "=",
|
||
"context_change": "256K",
|
||
"provider_change": "",
|
||
"rationale": "APPLIED."
|
||
},
|
||
{
|
||
"agent": "orchestrator",
|
||
"from_model": "glm-5.1.1 (★90)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "K2.6 (★92) RE:Medium",
|
||
"to_provider": "Ollama Cloud",
|
||
"impact": "medium",
|
||
"quality_change": "+2%",
|
||
"speed_change": "~1x",
|
||
"context_change": "200K→256K",
|
||
"provider_change": "Ollama Cloud",
|
||
"rationale": "K2.6=92★ всё ещё лучший для orchestration. V4-Pro=86 слабее. 300 sub-agent swarm."
|
||
},
|
||
{
|
||
"agent": "the-fixer",
|
||
"from_model": "minimax-m2.5 (★88)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "V4-Pro (★88) / K2.6 (★90)",
|
||
"to_provider": "Ollama Cloud",
|
||
"impact": "medium",
|
||
"quality_change": "+2%",
|
||
"speed_change": "~1x",
|
||
"context_change": "128K→1M/256K",
|
||
"provider_change": "Ollama Cloud",
|
||
"rationale": "K2.6=90(лучший), V4-Pro=88=M2.5. M2.5 SWE-bench 80.2% стабильнее. Не срочно."
|
||
},
|
||
{
|
||
"agent": "Qwen3-Coder (7 coding)",
|
||
"from_model": "qwen3-coder",
|
||
"from_provider": "Ollama",
|
||
"to_model": "✅",
|
||
"to_provider": "",
|
||
"impact": "low",
|
||
"quality_change": "=0%",
|
||
"speed_change": "=",
|
||
"context_change": "256K",
|
||
"provider_change": "Ollama",
|
||
"rationale": "lead=92★, backend=91★, python=90★."
|
||
},
|
||
{
|
||
"agent": "GLM-5.1 (12 agents)",
|
||
"from_model": "glm-5.1.1",
|
||
"from_provider": "Ollama",
|
||
"to_model": "✅",
|
||
"to_provider": "",
|
||
"impact": "low",
|
||
"quality_change": "=0%",
|
||
"speed_change": "=",
|
||
"context_change": "200K",
|
||
"provider_change": "",
|
||
"rationale": "orchestrator=90, system-analyst=90. SWE-Pro #1."
|
||
},
|
||
{
|
||
"agent": "Kimi K2.6 (3 agents)",
|
||
"from_model": "kimi-k2.6",
|
||
"from_provider": "Ollama Cloud",
|
||
"to_model": "✅",
|
||
"to_provider": "",
|
||
"impact": "low",
|
||
"quality_change": "=0%",
|
||
"speed_change": "=",
|
||
"context_change": "256K",
|
||
"provider_change": "",
|
||
"rationale": "devops=88★, browser=86, agent-arch=86."
|
||
}
|
||
],
|
||
"impact_data": [
|
||
{
|
||
"category": "debug GLM5.1→V4-Pro/K2.6",
|
||
"before": 88,
|
||
"after": 90,
|
||
"delta": 2,
|
||
"notes": "LiveCodeBench 93.5, Terminal 67.9"
|
||
},
|
||
{
|
||
"category": "planner Nem→V4-Pro Max",
|
||
"before": 80,
|
||
"after": 88,
|
||
"delta": 8,
|
||
"notes": "★88! GPQA 90.1, 1M ctx"
|
||
},
|
||
{
|
||
"category": "go-dev Coder→V4-Pro Max",
|
||
"before": 85,
|
||
"after": 88,
|
||
"delta": 3,
|
||
"notes": "★88! Go/Rust specialist, 1M ctx"
|
||
},
|
||
{
|
||
"category": "history-miner →V4-Pro",
|
||
"before": 85,
|
||
"after": 86,
|
||
"delta": 1,
|
||
"notes": "MRCR 83.5, long-context"
|
||
},
|
||
{
|
||
"category": "orchestrator →K2.6 (next)",
|
||
"before": 90,
|
||
"after": 92,
|
||
"delta": 2,
|
||
"notes": "300 sub-agent swarm"
|
||
},
|
||
{
|
||
"category": "frontend → M2.5 ✅",
|
||
"before": 90,
|
||
"after": 92,
|
||
"delta": 2,
|
||
"notes": "Spec-writing, UI architect"
|
||
},
|
||
{
|
||
"category": "devops → K2.6 ✅",
|
||
"before": 65,
|
||
"after": 88,
|
||
"delta": 23,
|
||
"notes": "IF:65→91! Terminal 66.7"
|
||
},
|
||
{
|
||
"category": "Qwen3-Coder (7) ✅",
|
||
"before": 90,
|
||
"after": 90,
|
||
"delta": 0,
|
||
"notes": "SOTA coding"
|
||
},
|
||
{
|
||
"category": "GLM-5.1 (12) ✅",
|
||
"before": 87,
|
||
"after": 87,
|
||
"delta": 0,
|
||
"notes": "SWE-Pro #1"
|
||
},
|
||
{
|
||
"category": "Nemotron Super (6) ✅",
|
||
"before": 82,
|
||
"after": 82,
|
||
"delta": 0,
|
||
"notes": "1M ctx, RULER 91.75%"
|
||
}
|
||
],
|
||
"benchmark_comparison": {
|
||
"benchmarks": [
|
||
{
|
||
"name": "SWE-V",
|
||
"full_name": "SWE-Bench Verified",
|
||
"description": "GitHub issue resolution (500 tasks)",
|
||
"roles": "lead-dev, backend, fixer"
|
||
},
|
||
{
|
||
"name": "SWE-P",
|
||
"full_name": "SWE-Bench Pro",
|
||
"description": "Multi-lang, decontaminated (1865 tasks)",
|
||
"roles": "all coding agents"
|
||
},
|
||
{
|
||
"name": "T-Bench",
|
||
"full_name": "Terminal-Bench 2.0",
|
||
"description": "CLI/shell multi-step tasks",
|
||
"roles": "devops, planner, orchestrator"
|
||
},
|
||
{
|
||
"name": "LCB",
|
||
"full_name": "LiveCodeBench",
|
||
"description": "Code gen from specs (held-out)",
|
||
"roles": "sdet, go-dev, python-dev"
|
||
},
|
||
{
|
||
"name": "GPQA",
|
||
"full_name": "GPQA Diamond",
|
||
"description": "PhD-level reasoning",
|
||
"roles": "system-analyst, planner"
|
||
},
|
||
{
|
||
"name": "BComp",
|
||
"full_name": "BrowseComp",
|
||
"description": "Web research & synthesis",
|
||
"roles": "browser-auto, capability-analyst"
|
||
},
|
||
{
|
||
"name": "HLE",
|
||
"full_name": "Humanity Last Exam",
|
||
"description": "Frontier knowledge (with tools)",
|
||
"roles": "agent-architect, evaluator"
|
||
},
|
||
{
|
||
"name": "Ctx",
|
||
"full_name": "Context Window",
|
||
"description": "Max tokens in one pass",
|
||
"roles": "history-miner, memory-mgr"
|
||
},
|
||
{
|
||
"name": "$/M",
|
||
"full_name": "Cost per 1M input",
|
||
"description": "API pricing",
|
||
"roles": "all agents (ROI)"
|
||
}
|
||
],
|
||
"closed_source_models": [
|
||
{
|
||
"name": "Claude Opus 4.7",
|
||
"organization": "Anthropic",
|
||
"scores": [
|
||
87.6,
|
||
64.3,
|
||
69.4,
|
||
null,
|
||
94.2,
|
||
79.3,
|
||
53,
|
||
"1M",
|
||
"$5"
|
||
],
|
||
"color": "#c084fc",
|
||
"note": "#1 апрель 2026"
|
||
},
|
||
{
|
||
"name": "GPT-5.5",
|
||
"organization": "OpenAI",
|
||
"scores": [
|
||
null,
|
||
58.6,
|
||
82.7,
|
||
null,
|
||
null,
|
||
83.4,
|
||
57.2,
|
||
"1M",
|
||
"$5"
|
||
],
|
||
"color": "#ff6b81",
|
||
"note": "Новейший, Terminal #1"
|
||
},
|
||
{
|
||
"name": "GPT-5.4",
|
||
"organization": "OpenAI",
|
||
"scores": [
|
||
78.2,
|
||
59.1,
|
||
75.1,
|
||
null,
|
||
94.4,
|
||
82.7,
|
||
58.7,
|
||
"200K",
|
||
"$2.50"
|
||
],
|
||
"color": "#ff6b81",
|
||
"note": "Reasoning, math"
|
||
},
|
||
{
|
||
"name": "Gemini 3.1 Pro",
|
||
"organization": "Google",
|
||
"scores": [
|
||
80.6,
|
||
46.1,
|
||
68.5,
|
||
null,
|
||
94.3,
|
||
85.9,
|
||
51.4,
|
||
"2M",
|
||
"$2"
|
||
],
|
||
"color": "#facc15",
|
||
"note": "ARC-AGI 77.1%, дешёвый"
|
||
},
|
||
{
|
||
"name": "Claude Sonnet 4.6",
|
||
"organization": "Anthropic",
|
||
"scores": [
|
||
79.6,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
"200K",
|
||
"$3"
|
||
],
|
||
"color": "#c084fc",
|
||
"note": "5× дешевле Opus"
|
||
},
|
||
{
|
||
"name": "GPT-5.3-Codex",
|
||
"organization": "OpenAI",
|
||
"scores": [
|
||
85,
|
||
57,
|
||
77.3,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
"200K",
|
||
"$6"
|
||
],
|
||
"color": "#ff6b81",
|
||
"note": "Coding specialist"
|
||
}
|
||
],
|
||
"apaw_models": [
|
||
{
|
||
"name": "Kimi K2.6",
|
||
"organization": "APAW",
|
||
"scores": [
|
||
80.2,
|
||
58.6,
|
||
66.7,
|
||
87.2,
|
||
null,
|
||
83.2,
|
||
54,
|
||
"256K",
|
||
"$0.95"
|
||
],
|
||
"color": "#00ff94",
|
||
"note": "devops, browser, architect (3)"
|
||
},
|
||
{
|
||
"name": "GLM-5.1",
|
||
"organization": "APAW",
|
||
"scores": [
|
||
null,
|
||
58.4,
|
||
63.5,
|
||
null,
|
||
86.2,
|
||
68.7,
|
||
null,
|
||
"200K",
|
||
"~$0.50"
|
||
],
|
||
"color": "#00ff94",
|
||
"note": "12 agents! orchestrator, eval..."
|
||
},
|
||
{
|
||
"name": "V4-Pro Max",
|
||
"organization": "APAW",
|
||
"scores": [
|
||
80.6,
|
||
55.4,
|
||
67.9,
|
||
93.5,
|
||
90.1,
|
||
83.4,
|
||
48.2,
|
||
"1M",
|
||
"$0.42"
|
||
],
|
||
"color": "#00d4ff",
|
||
"note": "planner, go-dev (рек.)"
|
||
},
|
||
{
|
||
"name": "Qwen3-Coder 480B",
|
||
"organization": "APAW",
|
||
"scores": [
|
||
66.5,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
"256K",
|
||
"~$0.50"
|
||
],
|
||
"color": "#00ff94",
|
||
"note": "7 coding agents"
|
||
},
|
||
{
|
||
"name": "MiniMax M2.5",
|
||
"organization": "APAW",
|
||
"scores": [
|
||
80.2,
|
||
51.3,
|
||
null,
|
||
null,
|
||
null,
|
||
76.3,
|
||
null,
|
||
"204K",
|
||
"$0.15"
|
||
],
|
||
"color": "#00ff94",
|
||
"note": "frontend, skeptic, fixer (3)"
|
||
},
|
||
{
|
||
"name": "Nemotron Super",
|
||
"organization": "APAW",
|
||
"scores": [
|
||
60.5,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
"1M",
|
||
"~$0.40"
|
||
],
|
||
"color": "#00ff94",
|
||
"note": "6 agents (memory, history)"
|
||
}
|
||
]
|
||
}
|
||
} |