- Update 30 agents to v3 heatmap maximum-score models: * go-dev: qwen3-coder -> deepseek-v4-pro-max (85->88 +3) * planner: nemotron -> deepseek-v4-pro-max (80->88 +8) * perf-engineer: nemotron -> deepseek-v4-pro-max (78->84 +6) * reflector: nemotron -> deepseek-v4-pro-max (78->84 +6) * security: nemotron -> deepseek-v4-pro-max (76->80 +4) * memory-manager: nemotron -> qwen3.6-plus (86->87 +1) * frontend: kimi-k2.5 -> minimax-m2.5 (92) * the-fixer: minimax-m2.5 -> kimi-k2.6 (88->90 +2) * browser-auto: kimi-k2.6 -> qwen3-coder (86->87 +1) * prompt-opt: glm-5.1 -> qwen3.6-plus (82->83 +1) * backend: deepseek-v3.2 -> qwen3-coder (91) * capability-analyst: nemotron -> glm-5.1 (85) * release-man: devstral-2 -> glm-5.1 (82) * evaluator: nemotron -> glm-5.1 (86) * workflow-arch: gpt-oss -> glm-5.1 (84) - Add Model Evolution Guard: * fitness-gate.cjs: rejects downgrades >3 points or <75 score * Normalized model ID lookup (: vs -) * Diff report before any file modifications - Update sync-benchmarks-from-yaml.cjs with fitness gate - Sync kilo-meta.json, kilo.jsonc, .md agent files - Rebuild research-dashboard.html (104KB, 30 agents, 11 models) Total improvement: +105 points across 11 agents Source: v3.html heatmap IF-adjusted composite scores
1718 lines
48 KiB
JSON
1718 lines
48 KiB
JSON
{
|
|
"version": "1.0.0",
|
|
"generated": "2026-04-30T07:00:00Z",
|
|
"source": "capability-index.yaml v3 optimal",
|
|
"total_agents": 30,
|
|
"total_models_tracked": 11,
|
|
"providers": [
|
|
"ollama",
|
|
"ollama-cloud",
|
|
"openrouter",
|
|
"groq"
|
|
],
|
|
"models": [
|
|
{
|
|
"id": "qwen3-coder-480b",
|
|
"name": "Qwen3-Coder 480B",
|
|
"organization": "Qwen",
|
|
"parameters": "480B/35B active",
|
|
"context_window": "256K\u21921M",
|
|
"swe_bench": 66.5,
|
|
"if_score": 88,
|
|
"categories": [
|
|
"coding",
|
|
"agent"
|
|
],
|
|
"description": "SOTA open-source \u043a\u043e\u0434\u0438\u043d\u0433. \u0421\u0440\u0430\u0432\u043d\u0438\u043c \u0441 Claude Sonnet 4.",
|
|
"tags": [
|
|
"coding",
|
|
"agent",
|
|
"tools"
|
|
],
|
|
"openrouter": false,
|
|
"provider": "ollama"
|
|
},
|
|
{
|
|
"id": "minimax-m2.5",
|
|
"name": "MiniMax M2.5",
|
|
"organization": "MiniMax",
|
|
"parameters": "MoE undisclosed",
|
|
"context_window": "128K",
|
|
"swe_bench": 80.2,
|
|
"if_score": 82,
|
|
"categories": [
|
|
"coding",
|
|
"agent"
|
|
],
|
|
"description": "\u041b\u0438\u0434\u0435\u0440 SWE-bench 80.2%. \u041f\u043e\u043b\u043d\u044b\u0439 lifecycle \u0440\u0430\u0437\u0440\u0430\u0431\u043e\u0442\u043a\u0438.",
|
|
"tags": [
|
|
"coding",
|
|
"agent"
|
|
],
|
|
"openrouter": false,
|
|
"provider": "ollama"
|
|
},
|
|
{
|
|
"id": "minimax-m2.7",
|
|
"name": "MiniMax M2.7",
|
|
"organization": "MiniMax",
|
|
"parameters": "~10B active",
|
|
"context_window": "128K",
|
|
"swe_bench": 78,
|
|
"if_score": 80,
|
|
"categories": [
|
|
"coding",
|
|
"agent",
|
|
"efficient"
|
|
],
|
|
"description": "\u0421\u0430\u043c\u043e\u043e\u0431\u0443\u0447\u0430\u0435\u043c\u0430\u044f. 56.2% SWE-Pro. 100 TPS. $0.30/M.",
|
|
"tags": [
|
|
"coding",
|
|
"agent",
|
|
"self-evolving"
|
|
],
|
|
"openrouter": false,
|
|
"provider": "ollama"
|
|
},
|
|
{
|
|
"id": "deepseek-v4-pro-max",
|
|
"name": "DeepSeek V4-Pro",
|
|
"organization": "DeepSeek",
|
|
"parameters": "1.6T/49B active MoE",
|
|
"context_window": "1M",
|
|
"swe_bench": 80.6,
|
|
"if_score": 89,
|
|
"categories": [
|
|
"coding",
|
|
"agent",
|
|
"reasoning"
|
|
],
|
|
"description": "SWE-V 80.6, LiveCodeBench 93.5(#1!), Terminal-Bench 67.9, Codeforces 3206, 1M ctx, 27% FLOPs vs V3.2. MIT.",
|
|
"tags": [
|
|
"coding",
|
|
"agent",
|
|
"thinking",
|
|
"tools"
|
|
],
|
|
"openrouter": false,
|
|
"provider": "ollama-cloud"
|
|
},
|
|
{
|
|
"id": "deepseek-v4-flash",
|
|
"name": "DeepSeek V4-Pro",
|
|
"organization": "DeepSeek",
|
|
"parameters": "284B/13B active MoE",
|
|
"context_window": "1M",
|
|
"swe_bench": 79,
|
|
"if_score": 86,
|
|
"categories": [
|
|
"coding",
|
|
"efficient",
|
|
"agent"
|
|
],
|
|
"description": "SWE-V ~79%, Flash Max = Pro \u0443\u0440\u043e\u0432\u0435\u043d\u044c reasoning. 13B active = \u0443\u043b\u044c\u0442\u0440\u0430\u0431\u044b\u0441\u0442\u0440\u044b\u0439. 1M ctx. FP4+FP8. MIT.",
|
|
"tags": [
|
|
"coding",
|
|
"efficient",
|
|
"agent",
|
|
"thinking"
|
|
],
|
|
"openrouter": false,
|
|
"provider": "ollama-cloud"
|
|
},
|
|
{
|
|
"id": "kimi-k2-6",
|
|
"name": "Kimi K2.6",
|
|
"organization": "Moonshot AI",
|
|
"parameters": "1T/32B active MoE",
|
|
"context_window": "256K",
|
|
"swe_bench": 80.2,
|
|
"if_score": 91,
|
|
"categories": [
|
|
"coding",
|
|
"agent",
|
|
"multimodal"
|
|
],
|
|
"description": "SWE-Pro 58.6(#1!), SWE-V 80.2, Terminal-Bench 66.7, HLE 54.0(#1!), BrowseComp 83.2. 13h autonomous. 300 sub-agent swarm. Modified MIT.",
|
|
"tags": [
|
|
"coding",
|
|
"agent",
|
|
"swarm",
|
|
"vision",
|
|
"thinking",
|
|
"tools"
|
|
],
|
|
"openrouter": false,
|
|
"provider": "ollama-cloud"
|
|
},
|
|
{
|
|
"id": "nemotron-3-super",
|
|
"name": "Nemotron 3 Super",
|
|
"organization": "NVIDIA",
|
|
"parameters": "120B/12B active",
|
|
"context_window": "1M",
|
|
"swe_bench": 60.5,
|
|
"if_score": 78,
|
|
"categories": [
|
|
"agent",
|
|
"reasoning",
|
|
"efficient"
|
|
],
|
|
"description": "SWE-bench 60.5%. RULER@1M 91.75%! \u041d\u043e IF \u043d\u0438\u0436\u0435 \u2014 Mamba-layers \u0438\u043d\u043e\u0433\u0434\u0430 \u00ab\u0442\u0435\u0440\u044f\u044e\u0442\u00bb \u0438\u043d\u0441\u0442\u0440\u0443\u043a\u0446\u0438\u0438 \u0432 \u0434\u043b\u0438\u043d\u043d\u044b\u0445 \u043f\u0440\u043e\u043c\u043f\u0442\u0430\u0445.",
|
|
"tags": [
|
|
"agent",
|
|
"1M-ctx",
|
|
"thinking"
|
|
],
|
|
"openrouter": false,
|
|
"provider": "ollama"
|
|
},
|
|
{
|
|
"id": "glm-5.1",
|
|
"name": "GLM-5",
|
|
"organization": "Z.ai",
|
|
"parameters": "744B/40B active",
|
|
"context_window": "128K",
|
|
"swe_bench": null,
|
|
"if_score": 90,
|
|
"categories": [
|
|
"reasoning",
|
|
"agent"
|
|
],
|
|
"description": "\u041c\u043e\u0449\u043d\u044b\u0439 reasoning. Arena ELO 1451. \u041e\u0442\u043b\u0438\u0447\u043d\u044b\u0439 instruction following (IFEval ~90+).",
|
|
"tags": [
|
|
"reasoning",
|
|
"agent"
|
|
],
|
|
"openrouter": false,
|
|
"provider": "ollama"
|
|
},
|
|
{
|
|
"id": "deepseek-v4",
|
|
"name": "DeepSeek V4-Pro",
|
|
"organization": "DeepSeek",
|
|
"parameters": "Large MoE",
|
|
"context_window": "128K",
|
|
"swe_bench": null,
|
|
"if_score": 75,
|
|
"categories": [
|
|
"reasoning"
|
|
],
|
|
"description": "\u0425\u043e\u0440\u043e\u0448\u0438\u0439 reasoning, \u043d\u043e IF \u043d\u0435\u0441\u0442\u0430\u0431\u0438\u043b\u0435\u043d \u2014 \u0438\u043d\u043e\u0433\u0434\u0430 \u0438\u0433\u043d\u043e\u0440\u0438\u0440\u0443\u0435\u0442 \u0444\u043e\u0440\u043c\u0430\u0442 \u0432\u044b\u0432\u043e\u0434\u0430.",
|
|
"tags": [
|
|
"reasoning"
|
|
],
|
|
"openrouter": false,
|
|
"provider": "ollama"
|
|
},
|
|
{
|
|
"id": "qwen3-5-122b",
|
|
"name": "Qwen 3.5 122B",
|
|
"organization": "Qwen",
|
|
"parameters": "122B/10B active",
|
|
"context_window": "128K",
|
|
"swe_bench": null,
|
|
"if_score": 92,
|
|
"categories": [
|
|
"reasoning",
|
|
"efficient"
|
|
],
|
|
"description": "IFEval 92.6%! \u041b\u0443\u0447\u0448\u0438\u0439 IF \u0441\u0440\u0435\u0434\u0438 open-source. Multimodal. Thinking.",
|
|
"tags": [
|
|
"vision",
|
|
"thinking",
|
|
"tools"
|
|
],
|
|
"openrouter": false,
|
|
"provider": "ollama"
|
|
},
|
|
{
|
|
"id": "qwen3-coder-next",
|
|
"name": "Qwen3-Coder-Next",
|
|
"organization": "Qwen",
|
|
"parameters": "80B/3B active",
|
|
"context_window": "128K",
|
|
"swe_bench": 70,
|
|
"if_score": 84,
|
|
"categories": [
|
|
"coding",
|
|
"efficient"
|
|
],
|
|
"description": "70% SWE-bench \u0441 3B active! \u0425\u043e\u0440\u043e\u0448\u0438\u0439 IF \u0434\u043b\u044f \u043a\u043e\u0434\u0438\u043d\u0433\u0430.",
|
|
"tags": [
|
|
"coding",
|
|
"efficient",
|
|
"tools"
|
|
],
|
|
"openrouter": false,
|
|
"provider": "ollama"
|
|
},
|
|
{
|
|
"id": "cogito-2-1-671b",
|
|
"name": "Cogito 2.1 671B",
|
|
"organization": "Cognitive",
|
|
"parameters": "671B MoE",
|
|
"context_window": "128K",
|
|
"swe_bench": null,
|
|
"if_score": 76,
|
|
"categories": [
|
|
"reasoning"
|
|
],
|
|
"description": "MIT \u043b\u0438\u0446\u0435\u043d\u0437\u0438\u044f. 671B total. IF \u043d\u0435\u043f\u043b\u043e\u0445\u043e\u0439, \u043d\u043e \u0443\u0441\u0442\u0443\u043f\u0430\u0435\u0442 GLM/Qwen.",
|
|
"tags": [
|
|
"reasoning"
|
|
],
|
|
"openrouter": false,
|
|
"provider": "ollama"
|
|
},
|
|
{
|
|
"id": "qwen3-6-plus",
|
|
"name": "Qwen 3.6 Plus",
|
|
"organization": "Qwen",
|
|
"parameters": "Hybrid MoE",
|
|
"context_window": "1M",
|
|
"swe_bench": 78.8,
|
|
"if_score": 91,
|
|
"categories": [
|
|
"coding",
|
|
"agent",
|
|
"reasoning"
|
|
],
|
|
"description": "FREE \u043d\u0430 OpenRouter! 1M \u043a\u043e\u043d\u0442\u0435\u043a\u0441\u0442. Always-on CoT. \u041f\u0440\u0435\u0432\u043e\u0441\u0445\u043e\u0434\u043d\u044b\u0439 IF \u2014 \u043d\u0430\u0441\u043b\u0435\u0434\u043d\u0438\u043a Qwen 3.5 (92.6%).",
|
|
"tags": [
|
|
"coding",
|
|
"agent",
|
|
"1M-ctx",
|
|
"free"
|
|
],
|
|
"openrouter": true,
|
|
"provider": "openrouter"
|
|
},
|
|
{
|
|
"id": "step-3-5-flash",
|
|
"name": "Step 3.5 Flash",
|
|
"organization": "StepFun",
|
|
"parameters": "MoE",
|
|
"context_window": "128K",
|
|
"swe_bench": null,
|
|
"if_score": 79,
|
|
"categories": [
|
|
"efficient"
|
|
],
|
|
"description": "\u0411\u0435\u0441\u043f\u043b\u0430\u0442\u043d\u0430 \u043d\u0430 OpenRouter. IF \u0441\u0440\u0435\u0434\u043d\u0438\u0439.",
|
|
"tags": [
|
|
"efficient",
|
|
"free"
|
|
],
|
|
"openrouter": true,
|
|
"provider": "openrouter"
|
|
},
|
|
{
|
|
"id": "deepseek-r1",
|
|
"name": "DeepSeek R1",
|
|
"organization": "DeepSeek",
|
|
"parameters": "671B MoE",
|
|
"context_window": "128K",
|
|
"swe_bench": null,
|
|
"if_score": 73,
|
|
"categories": [
|
|
"reasoning"
|
|
],
|
|
"description": "\u041c\u043e\u0449\u043d\u044b\u0435 reasoning-\u0446\u0435\u043f\u043e\u0447\u043a\u0438. \u041d\u043e IF \u0441\u043b\u0430\u0431\u044b\u0439 \u2014 \u0447\u0430\u0441\u0442\u043e \u0433\u0435\u043d\u0435\u0440\u0438\u0440\u0443\u0435\u0442 \u043b\u0438\u0448\u043d\u0438\u0439 reasoning \u0432\u043c\u0435\u0441\u0442\u043e \u043e\u0442\u0432\u0435\u0442\u0430.",
|
|
"tags": [
|
|
"reasoning",
|
|
"thinking",
|
|
"free"
|
|
],
|
|
"openrouter": true,
|
|
"provider": "openrouter"
|
|
}
|
|
],
|
|
"groq_models": [
|
|
{
|
|
"id": "openai/gpt-oss-20b",
|
|
"rpm": 30,
|
|
"rpd": "1K",
|
|
"tpm": "8K",
|
|
"tpd": "200K",
|
|
"speed": "1200+",
|
|
"use_case": "\u0423\u043b\u044c\u0442\u0440\u0430-\u0431\u044b\u0441\u0442\u0440\u044b\u0439 fallback \u0434\u043b\u044f \u043b\u0451\u0433\u043a\u0438\u0445 \u0440\u043e\u043b\u0435\u0439 (markdown-validator)."
|
|
},
|
|
{
|
|
"id": "llama-3.1-8b-instant",
|
|
"rpm": 30,
|
|
"rpd": "14.4K",
|
|
"tpm": "6K",
|
|
"tpd": "500K",
|
|
"speed": "~800",
|
|
"use_case": "14.4K RPD! \u0421\u0430\u043c\u044b\u0439 \u0432\u044b\u0441\u043e\u043a\u0438\u0439 \u043b\u0438\u043c\u0438\u0442. \u0414\u043b\u044f health-check / ping \u0440\u043e\u043b\u0435\u0439."
|
|
},
|
|
{
|
|
"id": "groq/compound",
|
|
"rpm": 30,
|
|
"rpd": "250",
|
|
"tpm": "70K",
|
|
"tpd": "\u2014",
|
|
"speed": "varies",
|
|
"use_case": "\u041c\u0443\u043b\u044c\u0442\u0438\u043c\u043e\u0434\u0435\u043b\u044c\u043d\u0430\u044f \u0430\u0433\u0440\u0435\u0433\u0430\u0446\u0438\u044f. \u0414\u043b\u044f research-\u0437\u0430\u0434\u0430\u0447."
|
|
},
|
|
{
|
|
"id": "groq/compound-mini",
|
|
"rpm": 30,
|
|
"rpd": "250",
|
|
"tpm": "70K",
|
|
"tpd": "\u2014",
|
|
"speed": "varies",
|
|
"use_case": "\u041b\u0451\u0433\u043a\u0430\u044f \u0432\u0435\u0440\u0441\u0438\u044f compound."
|
|
},
|
|
{
|
|
"id": "llama-prompt-guard-2",
|
|
"rpm": 30,
|
|
"rpd": "14.4K",
|
|
"tpm": "15K",
|
|
"tpd": "500K",
|
|
"speed": "~1K",
|
|
"use_case": "Security: \u0432\u0445\u043e\u0434\u043d\u043e\u0439 \u0444\u0438\u043b\u044c\u0442\u0440 \u0434\u043b\u044f security-auditor (14.4K RPD!)."
|
|
}
|
|
],
|
|
"agent_model_scores": [
|
|
{
|
|
"agent": "lead-developer",
|
|
"current_model_index": 0,
|
|
"current_model_id": "qwen3-coder-480b",
|
|
"reasoning_effort": "H",
|
|
"scores": {
|
|
"qwen3-coder-480b": 92,
|
|
"minimax-m2.5": 86,
|
|
"minimax-m2.7": 82,
|
|
"nemotron-3-super": 70,
|
|
"glm-5.1": 68,
|
|
"deepseek-v4-pro-max": 88,
|
|
"qwen3-5-122b": 66,
|
|
"qwen3-coder-next": 80,
|
|
"qwen3-6-plus": 88,
|
|
"kimi-k2-6": 90
|
|
}
|
|
},
|
|
{
|
|
"agent": "frontend-developer",
|
|
"current_model_index": 1,
|
|
"current_model_id": "minimax-m2.5",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 86,
|
|
"minimax-m2.5": 92,
|
|
"minimax-m2.7": 88,
|
|
"nemotron-3-super": 62,
|
|
"glm-5.1": 56,
|
|
"deepseek-v4-pro-max": 82,
|
|
"qwen3-5-122b": 60,
|
|
"qwen3-coder-next": 76,
|
|
"qwen3-6-plus": 88,
|
|
"kimi-k2-6": 86
|
|
}
|
|
},
|
|
{
|
|
"agent": "php-developer",
|
|
"current_model_index": 0,
|
|
"current_model_id": "qwen3-coder-480b",
|
|
"reasoning_effort": "H",
|
|
"scores": {
|
|
"qwen3-coder-480b": 87,
|
|
"minimax-m2.5": 76,
|
|
"minimax-m2.7": 72,
|
|
"nemotron-3-super": 64,
|
|
"glm-5.1": 56,
|
|
"deepseek-v4-pro-max": 74,
|
|
"qwen3-5-122b": 60,
|
|
"qwen3-coder-next": 76,
|
|
"qwen3-6-plus": 84,
|
|
"kimi-k2-6": 86
|
|
}
|
|
},
|
|
{
|
|
"agent": "python-developer",
|
|
"current_model_index": 0,
|
|
"current_model_id": "qwen3-coder-480b",
|
|
"reasoning_effort": "H",
|
|
"scores": {
|
|
"qwen3-coder-480b": 90,
|
|
"minimax-m2.5": 82,
|
|
"minimax-m2.7": 78,
|
|
"nemotron-3-super": 66,
|
|
"glm-5.1": 60,
|
|
"deepseek-v4-pro-max": 78,
|
|
"qwen3-5-122b": 64,
|
|
"qwen3-coder-next": 78,
|
|
"qwen3-6-plus": 88,
|
|
"kimi-k2-6": 88
|
|
}
|
|
},
|
|
{
|
|
"agent": "backend-developer",
|
|
"current_model_index": 0,
|
|
"current_model_id": "qwen3-coder-480b",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 91,
|
|
"minimax-m2.5": 84,
|
|
"minimax-m2.7": 80,
|
|
"nemotron-3-super": 68,
|
|
"glm-5.1": 63,
|
|
"deepseek-v4-pro-max": 86,
|
|
"qwen3-5-122b": 62,
|
|
"qwen3-coder-next": 78,
|
|
"qwen3-6-plus": 87,
|
|
"kimi-k2-6": 90
|
|
}
|
|
},
|
|
{
|
|
"agent": "go-developer",
|
|
"current_model_index": 3,
|
|
"current_model_id": "deepseek-v4-pro-max",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 85,
|
|
"minimax-m2.5": 78,
|
|
"minimax-m2.7": 74,
|
|
"nemotron-3-super": 66,
|
|
"glm-5.1": 58,
|
|
"deepseek-v4-pro-max": 88,
|
|
"qwen3-5-122b": 58,
|
|
"qwen3-coder-next": 74,
|
|
"qwen3-6-plus": 82,
|
|
"kimi-k2-6": 86
|
|
}
|
|
},
|
|
{
|
|
"agent": "flutter-developer",
|
|
"current_model_index": 0,
|
|
"current_model_id": "qwen3-coder-480b",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 86,
|
|
"minimax-m2.5": 70,
|
|
"minimax-m2.7": 66,
|
|
"nemotron-3-super": 60,
|
|
"glm-5.1": 53,
|
|
"deepseek-v4-pro-max": 78,
|
|
"qwen3-5-122b": 58,
|
|
"qwen3-coder-next": 74,
|
|
"qwen3-6-plus": 82,
|
|
"kimi-k2-6": 84
|
|
}
|
|
},
|
|
{
|
|
"agent": "devops-engineer",
|
|
"current_model_index": -1,
|
|
"current_model_id": "kimi-k2.6",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 66,
|
|
"minimax-m2.5": 53,
|
|
"minimax-m2.7": 48,
|
|
"nemotron-3-super": 78,
|
|
"glm-5.1": 75,
|
|
"deepseek-v4-pro-max": 86,
|
|
"qwen3-5-122b": 70,
|
|
"qwen3-coder-next": 54,
|
|
"qwen3-6-plus": 76,
|
|
"kimi-k2-6": 88
|
|
}
|
|
},
|
|
{
|
|
"agent": "sdet-engineer",
|
|
"current_model_index": 0,
|
|
"current_model_id": "qwen3-coder-480b",
|
|
"reasoning_effort": "H",
|
|
"scores": {
|
|
"qwen3-coder-480b": 88,
|
|
"minimax-m2.5": 84,
|
|
"minimax-m2.7": 80,
|
|
"nemotron-3-super": 70,
|
|
"glm-5.1": 63,
|
|
"deepseek-v4-pro-max": 84,
|
|
"qwen3-5-122b": 64,
|
|
"qwen3-coder-next": 78,
|
|
"qwen3-6-plus": 84,
|
|
"kimi-k2-6": 87
|
|
}
|
|
},
|
|
{
|
|
"agent": "code-skeptic",
|
|
"current_model_index": 1,
|
|
"current_model_id": "minimax-m2.5",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 82,
|
|
"minimax-m2.5": 85,
|
|
"minimax-m2.7": 80,
|
|
"nemotron-3-super": 73,
|
|
"glm-5.1": 72,
|
|
"deepseek-v4-pro-max": 82,
|
|
"qwen3-5-122b": 70,
|
|
"qwen3-coder-next": 72,
|
|
"qwen3-6-plus": 80,
|
|
"kimi-k2-6": 82
|
|
}
|
|
},
|
|
{
|
|
"agent": "security-auditor",
|
|
"current_model_index": 3,
|
|
"current_model_id": "deepseek-v4-pro-max",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 76,
|
|
"minimax-m2.5": 74,
|
|
"minimax-m2.7": 68,
|
|
"nemotron-3-super": 76,
|
|
"glm-5.1": 68,
|
|
"deepseek-v4-pro-max": 80,
|
|
"qwen3-5-122b": 72,
|
|
"qwen3-coder-next": 64,
|
|
"qwen3-6-plus": 75,
|
|
"kimi-k2-6": 80
|
|
}
|
|
},
|
|
{
|
|
"agent": "performance-engineer",
|
|
"current_model_index": 3,
|
|
"current_model_id": "deepseek-v4-pro-max",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 78,
|
|
"minimax-m2.5": 75,
|
|
"minimax-m2.7": 70,
|
|
"nemotron-3-super": 78,
|
|
"glm-5.1": 74,
|
|
"deepseek-v4-pro-max": 84,
|
|
"qwen3-5-122b": 70,
|
|
"qwen3-coder-next": 67,
|
|
"qwen3-6-plus": 76,
|
|
"kimi-k2-6": 82
|
|
}
|
|
},
|
|
{
|
|
"agent": "the-fixer",
|
|
"current_model_index": -1,
|
|
"current_model_id": "kimi-k2.6",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 89,
|
|
"minimax-m2.5": 88,
|
|
"minimax-m2.7": 84,
|
|
"nemotron-3-super": 71,
|
|
"glm-5.1": 64,
|
|
"deepseek-v4-pro-max": 88,
|
|
"qwen3-5-122b": 64,
|
|
"qwen3-coder-next": 82,
|
|
"qwen3-6-plus": 86,
|
|
"kimi-k2-6": 90
|
|
}
|
|
},
|
|
{
|
|
"agent": "browser-automation",
|
|
"current_model_index": 0,
|
|
"current_model_id": "qwen3-coder-480b",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 87,
|
|
"minimax-m2.5": 72,
|
|
"minimax-m2.7": 68,
|
|
"nemotron-3-super": 61,
|
|
"glm-5.1": 53,
|
|
"deepseek-v4-pro-max": 82,
|
|
"qwen3-5-122b": 56,
|
|
"qwen3-coder-next": 72,
|
|
"qwen3-6-plus": 82,
|
|
"kimi-k2-6": 86
|
|
}
|
|
},
|
|
{
|
|
"agent": "visual-tester",
|
|
"current_model_index": 0,
|
|
"current_model_id": "qwen3-coder-480b",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 82,
|
|
"minimax-m2.5": 68,
|
|
"minimax-m2.7": 64,
|
|
"nemotron-3-super": 55,
|
|
"glm-5.1": 48,
|
|
"deepseek-v4-pro-max": 76,
|
|
"qwen3-5-122b": 54,
|
|
"qwen3-coder-next": 66,
|
|
"qwen3-6-plus": 76,
|
|
"kimi-k2-6": 78
|
|
}
|
|
},
|
|
{
|
|
"agent": "system-analyst",
|
|
"current_model_index": 7,
|
|
"current_model_id": "glm-5.1",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 70,
|
|
"minimax-m2.5": 66,
|
|
"minimax-m2.7": 63,
|
|
"nemotron-3-super": 74,
|
|
"glm-5.1": 82,
|
|
"deepseek-v4-pro-max": 88,
|
|
"qwen3-5-122b": 76,
|
|
"qwen3-coder-next": 58,
|
|
"qwen3-6-plus": 80,
|
|
"kimi-k2-6": 86
|
|
}
|
|
},
|
|
{
|
|
"agent": "capability-analyst",
|
|
"current_model_index": 7,
|
|
"current_model_id": "glm-5.1",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 72,
|
|
"minimax-m2.5": 68,
|
|
"minimax-m2.7": 66,
|
|
"nemotron-3-super": 76,
|
|
"glm-5.1": 78,
|
|
"deepseek-v4-pro-max": 82,
|
|
"qwen3-5-122b": 75,
|
|
"qwen3-coder-next": 60,
|
|
"qwen3-6-plus": 79,
|
|
"kimi-k2-6": 82
|
|
}
|
|
},
|
|
{
|
|
"agent": "orchestrator",
|
|
"current_model_index": -1,
|
|
"current_model_id": "kimi-k2.6",
|
|
"reasoning_effort": "H",
|
|
"scores": {
|
|
"qwen3-coder-480b": 74,
|
|
"minimax-m2.5": 70,
|
|
"minimax-m2.7": 68,
|
|
"nemotron-3-super": 80,
|
|
"glm-5.1": 82,
|
|
"deepseek-v4-pro-max": 86,
|
|
"qwen3-5-122b": 78,
|
|
"qwen3-coder-next": 62,
|
|
"qwen3-6-plus": 84,
|
|
"kimi-k2-6": 92
|
|
}
|
|
},
|
|
{
|
|
"agent": "release-manager",
|
|
"current_model_index": 7,
|
|
"current_model_id": "glm-5.1",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 72,
|
|
"minimax-m2.5": 66,
|
|
"minimax-m2.7": 64,
|
|
"nemotron-3-super": 74,
|
|
"glm-5.1": 76,
|
|
"deepseek-v4-pro-max": 78,
|
|
"qwen3-5-122b": 72,
|
|
"qwen3-coder-next": 60,
|
|
"qwen3-6-plus": 76,
|
|
"kimi-k2-6": 78
|
|
}
|
|
},
|
|
{
|
|
"agent": "evaluator",
|
|
"current_model_index": 7,
|
|
"current_model_id": "glm-5.1",
|
|
"reasoning_effort": "H",
|
|
"scores": {
|
|
"qwen3-coder-480b": 70,
|
|
"minimax-m2.5": 73,
|
|
"minimax-m2.7": 70,
|
|
"nemotron-3-super": 78,
|
|
"glm-5.1": 78,
|
|
"deepseek-v4-pro-max": 84,
|
|
"qwen3-5-122b": 76,
|
|
"qwen3-coder-next": 58,
|
|
"qwen3-6-plus": 81,
|
|
"kimi-k2-6": 84
|
|
}
|
|
},
|
|
{
|
|
"agent": "prompt-optimizer",
|
|
"current_model_index": -1,
|
|
"current_model_id": "qwen3.6-plus",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 76,
|
|
"minimax-m2.5": 74,
|
|
"minimax-m2.7": 72,
|
|
"nemotron-3-super": 76,
|
|
"glm-5.1": 75,
|
|
"deepseek-v4-pro-max": 80,
|
|
"qwen3-5-122b": 74,
|
|
"qwen3-coder-next": 64,
|
|
"qwen3-6-plus": 83,
|
|
"kimi-k2-6": 82
|
|
}
|
|
},
|
|
{
|
|
"agent": "product-owner",
|
|
"current_model_index": 7,
|
|
"current_model_id": "glm-5.1",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 60,
|
|
"minimax-m2.5": 56,
|
|
"minimax-m2.7": 54,
|
|
"nemotron-3-super": 74,
|
|
"glm-5.1": 78,
|
|
"deepseek-v4-pro-max": 76,
|
|
"qwen3-5-122b": 74,
|
|
"qwen3-coder-next": 48,
|
|
"qwen3-6-plus": 78,
|
|
"kimi-k2-6": 76
|
|
}
|
|
},
|
|
{
|
|
"agent": "pipeline-judge",
|
|
"current_model_index": 7,
|
|
"current_model_id": "glm-5.1",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 64,
|
|
"minimax-m2.5": 68,
|
|
"minimax-m2.7": 65,
|
|
"nemotron-3-super": 78,
|
|
"glm-5.1": 76,
|
|
"deepseek-v4-pro-max": 82,
|
|
"qwen3-5-122b": 74,
|
|
"qwen3-coder-next": 56,
|
|
"qwen3-6-plus": 80,
|
|
"kimi-k2-6": 84
|
|
}
|
|
},
|
|
{
|
|
"agent": "workflow-architect",
|
|
"current_model_index": 7,
|
|
"current_model_id": "glm-5.1",
|
|
"reasoning_effort": "H",
|
|
"scores": {
|
|
"qwen3-coder-480b": 68,
|
|
"minimax-m2.5": 62,
|
|
"minimax-m2.7": 60,
|
|
"nemotron-3-super": 76,
|
|
"glm-5.1": 76,
|
|
"deepseek-v4-pro-max": 80,
|
|
"qwen3-5-122b": 72,
|
|
"qwen3-coder-next": 56,
|
|
"qwen3-6-plus": 80,
|
|
"kimi-k2-6": 82
|
|
}
|
|
},
|
|
{
|
|
"agent": "markdown-validator",
|
|
"current_model_index": 3,
|
|
"current_model_id": "deepseek-v4-pro-max",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 43,
|
|
"minimax-m2.5": 38,
|
|
"minimax-m2.7": 36,
|
|
"nemotron-3-super": 52,
|
|
"glm-5.1": 55,
|
|
"deepseek-v4-pro-max": 68,
|
|
"qwen3-5-122b": 56,
|
|
"qwen3-coder-next": 40,
|
|
"qwen3-6-plus": 50,
|
|
"kimi-k2-6": 56
|
|
}
|
|
},
|
|
{
|
|
"agent": "agent-architect",
|
|
"current_model_index": -1,
|
|
"current_model_id": "kimi-k2.6",
|
|
"reasoning_effort": "H",
|
|
"scores": {
|
|
"qwen3-coder-480b": 78,
|
|
"minimax-m2.5": 72,
|
|
"minimax-m2.7": 70,
|
|
"nemotron-3-super": 78,
|
|
"glm-5.1": 76,
|
|
"deepseek-v4-pro-max": 82,
|
|
"qwen3-5-122b": 76,
|
|
"qwen3-coder-next": 66,
|
|
"qwen3-6-plus": 82,
|
|
"kimi-k2-6": 86
|
|
}
|
|
},
|
|
{
|
|
"agent": "planner",
|
|
"current_model_index": 3,
|
|
"current_model_id": "deepseek-v4-pro-max",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 72,
|
|
"minimax-m2.5": 68,
|
|
"minimax-m2.7": 66,
|
|
"nemotron-3-super": 80,
|
|
"glm-5.1": 78,
|
|
"deepseek-v4-pro-max": 88,
|
|
"qwen3-5-122b": 78,
|
|
"qwen3-coder-next": 60,
|
|
"qwen3-6-plus": 85,
|
|
"kimi-k2-6": 86
|
|
}
|
|
},
|
|
{
|
|
"agent": "reflector",
|
|
"current_model_index": 3,
|
|
"current_model_id": "deepseek-v4-pro-max",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 68,
|
|
"minimax-m2.5": 66,
|
|
"minimax-m2.7": 64,
|
|
"nemotron-3-super": 78,
|
|
"glm-5.1": 76,
|
|
"deepseek-v4-pro-max": 84,
|
|
"qwen3-5-122b": 76,
|
|
"qwen3-coder-next": 56,
|
|
"qwen3-6-plus": 82,
|
|
"kimi-k2-6": 80
|
|
}
|
|
},
|
|
{
|
|
"agent": "memory-manager",
|
|
"current_model_index": -1,
|
|
"current_model_id": "qwen3.6-plus",
|
|
"reasoning_effort": "M",
|
|
"scores": {
|
|
"qwen3-coder-480b": 63,
|
|
"minimax-m2.5": 58,
|
|
"minimax-m2.7": 56,
|
|
"nemotron-3-super": 86,
|
|
"glm-5.1": 72,
|
|
"deepseek-v4-pro-max": 86,
|
|
"qwen3-5-122b": 70,
|
|
"qwen3-coder-next": 50,
|
|
"qwen3-6-plus": 87,
|
|
"kimi-k2-6": 84
|
|
}
|
|
},
|
|
{
|
|
"agent": "architect-indexer",
|
|
"current_model_index": 7,
|
|
"current_model_id": "glm-5.1",
|
|
"reasoning_effort": "H",
|
|
"scores": {
|
|
"qwen3-coder-480b": 70,
|
|
"minimax-m2.5": 64,
|
|
"minimax-m2.7": 62,
|
|
"nemotron-3-super": 74,
|
|
"glm-5.1": 80,
|
|
"deepseek-v4-pro-max": 78,
|
|
"qwen3-5-122b": 76,
|
|
"qwen3-coder-next": 58,
|
|
"qwen3-6-plus": 80,
|
|
"kimi-k2-6": 84
|
|
}
|
|
}
|
|
],
|
|
"if_scores": {
|
|
"qwen3-coder-480b": 88,
|
|
"minimax-m2.5": 82,
|
|
"minimax-m2.7": 78,
|
|
"nemotron-3-super": 85,
|
|
"glm-5.1": 80,
|
|
"deepseek-v4-pro-max": 88,
|
|
"qwen3-5-122b": 86,
|
|
"qwen3-coder-next": 84,
|
|
"qwen3-6-plus": 90,
|
|
"kimi-k2-6": 91,
|
|
"deepseek-v4-flash": 86
|
|
},
|
|
"agent_current_config": [
|
|
{
|
|
"agent": "lead-developer",
|
|
"model": "ollama-cloud/qwen3-coder:480b",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "qwen",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "frontend-developer",
|
|
"model": "ollama-cloud/minimax-m2.5",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "qwen",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "php-developer",
|
|
"model": "ollama-cloud/qwen3-coder:480b",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "qwen",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "python-developer",
|
|
"model": "ollama-cloud/qwen3-coder:480b",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "qwen",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "backend-developer",
|
|
"model": "ollama-cloud/qwen3-coder:480b",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "qwen",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "go-developer",
|
|
"model": "ollama-cloud/deepseek-v4-pro-max",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "qwen",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "flutter-developer",
|
|
"model": "ollama-cloud/qwen3-coder:480b",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "qwen",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "devops-engineer",
|
|
"model": "ollama-cloud/kimi-k2.6:cloud",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "nemotron",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "sdet-engineer",
|
|
"model": "ollama-cloud/qwen3-coder:480b",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "qwen",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "code-skeptic",
|
|
"model": "ollama-cloud/minimax-m2.5",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "minimax",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "security-auditor",
|
|
"model": "ollama-cloud/deepseek-v4-pro-max",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "nemotron",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "performance-engineer",
|
|
"model": "ollama-cloud/deepseek-v4-pro-max",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "nemotron",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "the-fixer",
|
|
"model": "ollama-cloud/kimi-k2.6:cloud",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "minimax",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "browser-automation",
|
|
"model": "ollama-cloud/qwen3-coder:480b",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "qwen",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "visual-tester",
|
|
"model": "ollama-cloud/qwen3-coder:480b",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "qwen",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "system-analyst",
|
|
"model": "ollama-cloud/glm-5.1",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "glm",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "capability-analyst",
|
|
"model": "ollama-cloud/glm-5.1",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "glm",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "orchestrator",
|
|
"model": "ollama-cloud/kimi-k2.6:cloud",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "kimi",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "release-manager",
|
|
"model": "ollama-cloud/glm-5.1",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "glm",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "evaluator",
|
|
"model": "ollama-cloud/glm-5.1",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "glm",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "prompt-optimizer",
|
|
"model": "ollama-cloud/qwen3.6-plus",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "glm",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "product-owner",
|
|
"model": "ollama-cloud/glm-5.1",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "glm",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "pipeline-judge",
|
|
"model": "ollama-cloud/glm-5.1",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "glm",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "workflow-architect",
|
|
"model": "ollama-cloud/glm-5.1",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "glm",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "markdown-validator",
|
|
"model": "ollama-cloud/deepseek-v4-pro-max",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "nemotron",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "agent-architect",
|
|
"model": "ollama-cloud/kimi-k2.6:cloud",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "glm",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "planner",
|
|
"model": "ollama-cloud/deepseek-v4-pro-max",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "nemotron",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "reflector",
|
|
"model": "ollama-cloud/deepseek-v4-pro-max",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "nemotron",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "memory-manager",
|
|
"model": "ollama-cloud/qwen3.6-plus",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "nemotron",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
},
|
|
{
|
|
"agent": "architect-indexer",
|
|
"model": "ollama-cloud/glm-5.1",
|
|
"provider": "Ollama Cloud",
|
|
"category": "Process",
|
|
"badge_type": "glm",
|
|
"fit_score": 0,
|
|
"status": "good",
|
|
"previous_model": null
|
|
}
|
|
],
|
|
"recommendations": [
|
|
{
|
|
"agent": "[built-in] debug",
|
|
"from_model": "glm-5.1.1 (88)",
|
|
"from_provider": "Ollama",
|
|
"to_model": "V4-Pro Max (\u260590) / K2.6 (\u260590) RE:High",
|
|
"to_provider": "Ollama Cloud",
|
|
"impact": "high",
|
|
"quality_change": "+2%",
|
|
"speed_change": "~1x",
|
|
"context_change": "200K\u21921M",
|
|
"provider_change": "Ollama Cloud",
|
|
"rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=90 \u0438 K2.6=90 (TIE!), GLM-5.1=88. V4-Pro: LiveCodeBench 93.5(#1!), Terminal 67.9, 1M ctx \u0434\u043b\u044f \u043f\u043e\u043b\u043d\u043e\u0433\u043e \u043f\u0440\u043e\u0435\u043a\u0442\u0430. K2.6: 13h auto sessions. \u041e\u0431\u0430 \u043b\u0443\u0447\u0448\u0435 GLM-5.1. RE:High \u0434\u043b\u044f debug."
|
|
},
|
|
{
|
|
"agent": "planner",
|
|
"from_model": "nemotron-3-super (80)",
|
|
"from_provider": "Ollama",
|
|
"to_model": "V4-Pro Max (\u260588) RE:High",
|
|
"to_provider": "Ollama Cloud",
|
|
"impact": "high",
|
|
"quality_change": "+10%",
|
|
"speed_change": "~1x",
|
|
"context_change": "1M",
|
|
"provider_change": "Ollama Cloud",
|
|
"rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=88(\u043b\u0443\u0447\u0448\u0438\u0439!), K2.6=86, GLM-5.1=85, Nem=80. V4-Pro: GPQA 90.1 (reasoning), 1M ctx \u0441\u043e\u0445\u0440\u0430\u043d\u044f\u0435\u0442\u0441\u044f (vs \u043f\u043e\u0442\u0435\u0440\u044f \u043f\u0440\u0438 K2.6). RE:High \u0434\u043b\u044f chain-of-thought planning."
|
|
},
|
|
{
|
|
"agent": "go-developer",
|
|
"from_model": "qwen3-coder:480b (85)",
|
|
"from_provider": "Ollama",
|
|
"to_model": "V4-Pro Max (\u260588) RE:Medium",
|
|
"to_provider": "Ollama Cloud",
|
|
"impact": "medium",
|
|
"quality_change": "+4%",
|
|
"speed_change": "~1x",
|
|
"context_change": "256K\u21921M",
|
|
"provider_change": "Ollama Cloud",
|
|
"rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=88(\u043b\u0443\u0447\u0448\u0438\u0439 \u0434\u043b\u044f Go!), K2.6=86, Qwen3Coder=85. DeepSeek \u043c\u043e\u0434\u0435\u043b\u0438 \u0442\u0440\u0430\u0434\u0438\u0446\u0438\u043e\u043d\u043d\u043e \u0441\u0438\u043b\u044c\u043d\u044b \u0432 Go/Rust. 1M ctx \u0434\u043b\u044f \u043a\u0440\u0443\u043f\u043d\u044b\u0445 Go-\u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0432."
|
|
},
|
|
{
|
|
"agent": "history-miner",
|
|
"from_model": "nemotron-3-super (\u260585)",
|
|
"from_provider": "Ollama",
|
|
"to_model": "V4-Pro Max (86) + Nem fallback",
|
|
"to_provider": "Hybrid",
|
|
"impact": "medium",
|
|
"quality_change": "+1%",
|
|
"speed_change": "~1x",
|
|
"context_change": "1M",
|
|
"provider_change": "Ollama Cloud + Ollama",
|
|
"rationale": "V4-Pro=86 \u0447\u0443\u0442\u044c \u043b\u0443\u0447\u0448\u0435 Nemotron=85. 1M ctx \u0443 \u043e\u0431\u043e\u0438\u0445. MRCR 83.5 \u0443 V4-Pro \u2014 \u043b\u0443\u0447\u0448\u0435\u0435 long-context retrieval. Nemotron \u043a\u0430\u043a fallback (RULER 91.75%)."
|
|
},
|
|
{
|
|
"agent": "frontend-dev \u2192 M2.5",
|
|
"from_model": "qwen3-coder (90)",
|
|
"from_provider": "Ollama",
|
|
"to_model": "MiniMax M2.5 (\u260592) \u2705",
|
|
"to_provider": "Ollama",
|
|
"impact": "low",
|
|
"quality_change": "+2%",
|
|
"speed_change": "=",
|
|
"context_change": "204K",
|
|
"provider_change": "Ollama",
|
|
"rationale": "Spec-writing, UI architect. APPLIED."
|
|
},
|
|
{
|
|
"agent": "devops \u2192 K2.6",
|
|
"from_model": "deepseek-v3.2",
|
|
"from_provider": "",
|
|
"to_model": "kimi-k2.6:cloud \u2705",
|
|
"to_provider": "Ollama Cloud",
|
|
"impact": "low",
|
|
"quality_change": "+35%",
|
|
"speed_change": "=",
|
|
"context_change": "256K",
|
|
"provider_change": "",
|
|
"rationale": "APPLIED."
|
|
},
|
|
{
|
|
"agent": "orchestrator",
|
|
"from_model": "glm-5.1.1 (\u260590)",
|
|
"from_provider": "Ollama",
|
|
"to_model": "K2.6 (\u260592) RE:Medium",
|
|
"to_provider": "Ollama Cloud",
|
|
"impact": "medium",
|
|
"quality_change": "+2%",
|
|
"speed_change": "~1x",
|
|
"context_change": "200K\u2192256K",
|
|
"provider_change": "Ollama Cloud",
|
|
"rationale": "K2.6=92\u2605 \u0432\u0441\u0451 \u0435\u0449\u0451 \u043b\u0443\u0447\u0448\u0438\u0439 \u0434\u043b\u044f orchestration. V4-Pro=86 \u0441\u043b\u0430\u0431\u0435\u0435. 300 sub-agent swarm."
|
|
},
|
|
{
|
|
"agent": "the-fixer",
|
|
"from_model": "minimax-m2.5 (\u260588)",
|
|
"from_provider": "Ollama",
|
|
"to_model": "V4-Pro (\u260588) / K2.6 (\u260590)",
|
|
"to_provider": "Ollama Cloud",
|
|
"impact": "medium",
|
|
"quality_change": "+2%",
|
|
"speed_change": "~1x",
|
|
"context_change": "128K\u21921M/256K",
|
|
"provider_change": "Ollama Cloud",
|
|
"rationale": "K2.6=90(\u043b\u0443\u0447\u0448\u0438\u0439), V4-Pro=88=M2.5. M2.5 SWE-bench 80.2% \u0441\u0442\u0430\u0431\u0438\u043b\u044c\u043d\u0435\u0435. \u041d\u0435 \u0441\u0440\u043e\u0447\u043d\u043e."
|
|
},
|
|
{
|
|
"agent": "Qwen3-Coder (7 coding)",
|
|
"from_model": "qwen3-coder",
|
|
"from_provider": "Ollama",
|
|
"to_model": "\u2705",
|
|
"to_provider": "",
|
|
"impact": "low",
|
|
"quality_change": "=0%",
|
|
"speed_change": "=",
|
|
"context_change": "256K",
|
|
"provider_change": "Ollama",
|
|
"rationale": "lead=92\u2605, backend=91\u2605, python=90\u2605."
|
|
},
|
|
{
|
|
"agent": "GLM-5.1 (12 agents)",
|
|
"from_model": "glm-5.1.1",
|
|
"from_provider": "Ollama",
|
|
"to_model": "\u2705",
|
|
"to_provider": "",
|
|
"impact": "low",
|
|
"quality_change": "=0%",
|
|
"speed_change": "=",
|
|
"context_change": "200K",
|
|
"provider_change": "",
|
|
"rationale": "orchestrator=90, system-analyst=90. SWE-Pro #1."
|
|
},
|
|
{
|
|
"agent": "Kimi K2.6 (3 agents)",
|
|
"from_model": "kimi-k2.6",
|
|
"from_provider": "Ollama Cloud",
|
|
"to_model": "\u2705",
|
|
"to_provider": "",
|
|
"impact": "low",
|
|
"quality_change": "=0%",
|
|
"speed_change": "=",
|
|
"context_change": "256K",
|
|
"provider_change": "",
|
|
"rationale": "devops=88\u2605, browser=86, agent-arch=86."
|
|
}
|
|
],
|
|
"impact_data": [
|
|
{
|
|
"category": "debug GLM5.1\u2192V4-Pro/K2.6",
|
|
"before": 88,
|
|
"after": 90,
|
|
"delta": 2,
|
|
"notes": "LiveCodeBench 93.5, Terminal 67.9"
|
|
},
|
|
{
|
|
"category": "planner Nem\u2192V4-Pro Max",
|
|
"before": 80,
|
|
"after": 88,
|
|
"delta": 8,
|
|
"notes": "\u260588! GPQA 90.1, 1M ctx"
|
|
},
|
|
{
|
|
"category": "go-dev Coder\u2192V4-Pro Max",
|
|
"before": 85,
|
|
"after": 88,
|
|
"delta": 3,
|
|
"notes": "\u260588! Go/Rust specialist, 1M ctx"
|
|
},
|
|
{
|
|
"category": "history-miner \u2192V4-Pro",
|
|
"before": 85,
|
|
"after": 86,
|
|
"delta": 1,
|
|
"notes": "MRCR 83.5, long-context"
|
|
},
|
|
{
|
|
"category": "orchestrator \u2192K2.6 (next)",
|
|
"before": 90,
|
|
"after": 92,
|
|
"delta": 2,
|
|
"notes": "300 sub-agent swarm"
|
|
},
|
|
{
|
|
"category": "frontend \u2192 M2.5 \u2705",
|
|
"before": 90,
|
|
"after": 92,
|
|
"delta": 2,
|
|
"notes": "Spec-writing, UI architect"
|
|
},
|
|
{
|
|
"category": "devops \u2192 K2.6 \u2705",
|
|
"before": 65,
|
|
"after": 88,
|
|
"delta": 23,
|
|
"notes": "IF:65\u219291! Terminal 66.7"
|
|
},
|
|
{
|
|
"category": "Qwen3-Coder (7) \u2705",
|
|
"before": 90,
|
|
"after": 90,
|
|
"delta": 0,
|
|
"notes": "SOTA coding"
|
|
},
|
|
{
|
|
"category": "GLM-5.1 (12) \u2705",
|
|
"before": 87,
|
|
"after": 87,
|
|
"delta": 0,
|
|
"notes": "SWE-Pro #1"
|
|
},
|
|
{
|
|
"category": "Nemotron Super (6) \u2705",
|
|
"before": 82,
|
|
"after": 82,
|
|
"delta": 0,
|
|
"notes": "1M ctx, RULER 91.75%"
|
|
}
|
|
],
|
|
"benchmark_comparison": {
|
|
"benchmarks": [
|
|
{
|
|
"name": "SWE-V",
|
|
"full_name": "SWE-Bench Verified",
|
|
"description": "GitHub issue resolution (500 tasks)",
|
|
"roles": "lead-dev, backend, fixer"
|
|
},
|
|
{
|
|
"name": "SWE-P",
|
|
"full_name": "SWE-Bench Pro",
|
|
"description": "Multi-lang, decontaminated (1865 tasks)",
|
|
"roles": "all coding agents"
|
|
},
|
|
{
|
|
"name": "T-Bench",
|
|
"full_name": "Terminal-Bench 2.0",
|
|
"description": "CLI/shell multi-step tasks",
|
|
"roles": "devops, planner, orchestrator"
|
|
},
|
|
{
|
|
"name": "LCB",
|
|
"full_name": "LiveCodeBench",
|
|
"description": "Code gen from specs (held-out)",
|
|
"roles": "sdet, go-dev, python-dev"
|
|
},
|
|
{
|
|
"name": "GPQA",
|
|
"full_name": "GPQA Diamond",
|
|
"description": "PhD-level reasoning",
|
|
"roles": "system-analyst, planner"
|
|
},
|
|
{
|
|
"name": "BComp",
|
|
"full_name": "BrowseComp",
|
|
"description": "Web research & synthesis",
|
|
"roles": "browser-auto, capability-analyst"
|
|
},
|
|
{
|
|
"name": "HLE",
|
|
"full_name": "Humanity Last Exam",
|
|
"description": "Frontier knowledge (with tools)",
|
|
"roles": "agent-architect, evaluator"
|
|
},
|
|
{
|
|
"name": "Ctx",
|
|
"full_name": "Context Window",
|
|
"description": "Max tokens in one pass",
|
|
"roles": "history-miner, memory-mgr"
|
|
},
|
|
{
|
|
"name": "$/M",
|
|
"full_name": "Cost per 1M input",
|
|
"description": "API pricing",
|
|
"roles": "all agents (ROI)"
|
|
}
|
|
],
|
|
"closed_source_models": [
|
|
{
|
|
"name": "Claude Opus 4.7",
|
|
"organization": "Anthropic",
|
|
"scores": [
|
|
87.6,
|
|
64.3,
|
|
69.4,
|
|
null,
|
|
94.2,
|
|
79.3,
|
|
53,
|
|
"1M",
|
|
"$5"
|
|
],
|
|
"color": "#c084fc",
|
|
"note": "#1 \u0430\u043f\u0440\u0435\u043b\u044c 2026"
|
|
},
|
|
{
|
|
"name": "GPT-5.5",
|
|
"organization": "OpenAI",
|
|
"scores": [
|
|
null,
|
|
58.6,
|
|
82.7,
|
|
null,
|
|
null,
|
|
83.4,
|
|
57.2,
|
|
"1M",
|
|
"$5"
|
|
],
|
|
"color": "#ff6b81",
|
|
"note": "\u041d\u043e\u0432\u0435\u0439\u0448\u0438\u0439, Terminal #1"
|
|
},
|
|
{
|
|
"name": "GPT-5.4",
|
|
"organization": "OpenAI",
|
|
"scores": [
|
|
78.2,
|
|
59.1,
|
|
75.1,
|
|
null,
|
|
94.4,
|
|
82.7,
|
|
58.7,
|
|
"200K",
|
|
"$2.50"
|
|
],
|
|
"color": "#ff6b81",
|
|
"note": "Reasoning, math"
|
|
},
|
|
{
|
|
"name": "Gemini 3.1 Pro",
|
|
"organization": "Google",
|
|
"scores": [
|
|
80.6,
|
|
46.1,
|
|
68.5,
|
|
null,
|
|
94.3,
|
|
85.9,
|
|
51.4,
|
|
"2M",
|
|
"$2"
|
|
],
|
|
"color": "#facc15",
|
|
"note": "ARC-AGI 77.1%, \u0434\u0435\u0448\u0451\u0432\u044b\u0439"
|
|
},
|
|
{
|
|
"name": "Claude Sonnet 4.6",
|
|
"organization": "Anthropic",
|
|
"scores": [
|
|
79.6,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
"200K",
|
|
"$3"
|
|
],
|
|
"color": "#c084fc",
|
|
"note": "5\u00d7 \u0434\u0435\u0448\u0435\u0432\u043b\u0435 Opus"
|
|
},
|
|
{
|
|
"name": "GPT-5.3-Codex",
|
|
"organization": "OpenAI",
|
|
"scores": [
|
|
85,
|
|
57,
|
|
77.3,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
"200K",
|
|
"$6"
|
|
],
|
|
"color": "#ff6b81",
|
|
"note": "Coding specialist"
|
|
}
|
|
],
|
|
"apaw_models": [
|
|
{
|
|
"name": "Kimi K2.6",
|
|
"organization": "APAW",
|
|
"scores": [
|
|
80.2,
|
|
58.6,
|
|
66.7,
|
|
87.2,
|
|
null,
|
|
83.2,
|
|
54,
|
|
"256K",
|
|
"$0.95"
|
|
],
|
|
"color": "#00ff94",
|
|
"note": "devops, browser, architect (3)"
|
|
},
|
|
{
|
|
"name": "GLM-5.1",
|
|
"organization": "APAW",
|
|
"scores": [
|
|
null,
|
|
58.4,
|
|
63.5,
|
|
null,
|
|
86.2,
|
|
68.7,
|
|
null,
|
|
"200K",
|
|
"~$0.50"
|
|
],
|
|
"color": "#00ff94",
|
|
"note": "12 agents! orchestrator, eval..."
|
|
},
|
|
{
|
|
"name": "V4-Pro Max",
|
|
"organization": "APAW",
|
|
"scores": [
|
|
80.6,
|
|
55.4,
|
|
67.9,
|
|
93.5,
|
|
90.1,
|
|
83.4,
|
|
48.2,
|
|
"1M",
|
|
"$0.42"
|
|
],
|
|
"color": "#00d4ff",
|
|
"note": "planner, go-dev (\u0440\u0435\u043a.)"
|
|
},
|
|
{
|
|
"name": "Qwen3-Coder 480B",
|
|
"organization": "APAW",
|
|
"scores": [
|
|
66.5,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
"256K",
|
|
"~$0.50"
|
|
],
|
|
"color": "#00ff94",
|
|
"note": "7 coding agents"
|
|
},
|
|
{
|
|
"name": "MiniMax M2.5",
|
|
"organization": "APAW",
|
|
"scores": [
|
|
80.2,
|
|
51.3,
|
|
null,
|
|
null,
|
|
null,
|
|
76.3,
|
|
null,
|
|
"204K",
|
|
"$0.15"
|
|
],
|
|
"color": "#00ff94",
|
|
"note": "frontend, skeptic, fixer (3)"
|
|
},
|
|
{
|
|
"name": "Nemotron Super",
|
|
"organization": "APAW",
|
|
"scores": [
|
|
60.5,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
"1M",
|
|
"~$0.40"
|
|
],
|
|
"color": "#00ff94",
|
|
"note": "6 agents (memory, history)"
|
|
}
|
|
]
|
|
}
|
|
} |