{ "version": "1.0.0", "generated": "2026-04-30T07:00:00Z", "source": "capability-index.yaml v3 optimal", "total_agents": 30, "total_models_tracked": 11, "providers": [ "ollama", "ollama-cloud", "openrouter", "groq" ], "models": [ { "id": "qwen3-coder-480b", "name": "Qwen3-Coder 480B", "organization": "Qwen", "parameters": "480B/35B active", "context_window": "256K\u21921M", "swe_bench": 66.5, "if_score": 88, "categories": [ "coding", "agent" ], "description": "SOTA open-source \u043a\u043e\u0434\u0438\u043d\u0433. \u0421\u0440\u0430\u0432\u043d\u0438\u043c \u0441 Claude Sonnet 4.", "tags": [ "coding", "agent", "tools" ], "openrouter": false, "provider": "ollama" }, { "id": "minimax-m2.5", "name": "MiniMax M2.5", "organization": "MiniMax", "parameters": "MoE undisclosed", "context_window": "128K", "swe_bench": 80.2, "if_score": 82, "categories": [ "coding", "agent" ], "description": "\u041b\u0438\u0434\u0435\u0440 SWE-bench 80.2%. \u041f\u043e\u043b\u043d\u044b\u0439 lifecycle \u0440\u0430\u0437\u0440\u0430\u0431\u043e\u0442\u043a\u0438.", "tags": [ "coding", "agent" ], "openrouter": false, "provider": "ollama" }, { "id": "minimax-m2.7", "name": "MiniMax M2.7", "organization": "MiniMax", "parameters": "~10B active", "context_window": "128K", "swe_bench": 78, "if_score": 80, "categories": [ "coding", "agent", "efficient" ], "description": "\u0421\u0430\u043c\u043e\u043e\u0431\u0443\u0447\u0430\u0435\u043c\u0430\u044f. 56.2% SWE-Pro. 100 TPS. $0.30/M.", "tags": [ "coding", "agent", "self-evolving" ], "openrouter": false, "provider": "ollama" }, { "id": "deepseek-v4-pro-max", "name": "DeepSeek V4-Pro", "organization": "DeepSeek", "parameters": "1.6T/49B active MoE", "context_window": "1M", "swe_bench": 80.6, "if_score": 89, "categories": [ "coding", "agent", "reasoning" ], "description": "SWE-V 80.6, LiveCodeBench 93.5(#1!), Terminal-Bench 67.9, Codeforces 3206, 1M ctx, 27% FLOPs vs V3.2. MIT.", "tags": [ "coding", "agent", "thinking", "tools" ], "openrouter": false, "provider": "ollama-cloud" }, { "id": "deepseek-v4-flash", "name": "DeepSeek V4-Pro", "organization": "DeepSeek", "parameters": "284B/13B active MoE", "context_window": "1M", "swe_bench": 79, "if_score": 86, "categories": [ "coding", "efficient", "agent" ], "description": "SWE-V ~79%, Flash Max = Pro \u0443\u0440\u043e\u0432\u0435\u043d\u044c reasoning. 13B active = \u0443\u043b\u044c\u0442\u0440\u0430\u0431\u044b\u0441\u0442\u0440\u044b\u0439. 1M ctx. FP4+FP8. MIT.", "tags": [ "coding", "efficient", "agent", "thinking" ], "openrouter": false, "provider": "ollama-cloud" }, { "id": "kimi-k2-6", "name": "Kimi K2.6", "organization": "Moonshot AI", "parameters": "1T/32B active MoE", "context_window": "256K", "swe_bench": 80.2, "if_score": 91, "categories": [ "coding", "agent", "multimodal" ], "description": "SWE-Pro 58.6(#1!), SWE-V 80.2, Terminal-Bench 66.7, HLE 54.0(#1!), BrowseComp 83.2. 13h autonomous. 300 sub-agent swarm. Modified MIT.", "tags": [ "coding", "agent", "swarm", "vision", "thinking", "tools" ], "openrouter": false, "provider": "ollama-cloud" }, { "id": "nemotron-3-super", "name": "Nemotron 3 Super", "organization": "NVIDIA", "parameters": "120B/12B active", "context_window": "1M", "swe_bench": 60.5, "if_score": 78, "categories": [ "agent", "reasoning", "efficient" ], "description": "SWE-bench 60.5%. RULER@1M 91.75%! \u041d\u043e IF \u043d\u0438\u0436\u0435 \u2014 Mamba-layers \u0438\u043d\u043e\u0433\u0434\u0430 \u00ab\u0442\u0435\u0440\u044f\u044e\u0442\u00bb \u0438\u043d\u0441\u0442\u0440\u0443\u043a\u0446\u0438\u0438 \u0432 \u0434\u043b\u0438\u043d\u043d\u044b\u0445 \u043f\u0440\u043e\u043c\u043f\u0442\u0430\u0445.", "tags": [ "agent", "1M-ctx", "thinking" ], "openrouter": false, "provider": "ollama" }, { "id": "glm-5.1", "name": "GLM-5", "organization": "Z.ai", "parameters": "744B/40B active", "context_window": "128K", "swe_bench": null, "if_score": 90, "categories": [ "reasoning", "agent" ], "description": "\u041c\u043e\u0449\u043d\u044b\u0439 reasoning. Arena ELO 1451. \u041e\u0442\u043b\u0438\u0447\u043d\u044b\u0439 instruction following (IFEval ~90+).", "tags": [ "reasoning", "agent" ], "openrouter": false, "provider": "ollama" }, { "id": "deepseek-v4", "name": "DeepSeek V4-Pro", "organization": "DeepSeek", "parameters": "Large MoE", "context_window": "128K", "swe_bench": null, "if_score": 75, "categories": [ "reasoning" ], "description": "\u0425\u043e\u0440\u043e\u0448\u0438\u0439 reasoning, \u043d\u043e IF \u043d\u0435\u0441\u0442\u0430\u0431\u0438\u043b\u0435\u043d \u2014 \u0438\u043d\u043e\u0433\u0434\u0430 \u0438\u0433\u043d\u043e\u0440\u0438\u0440\u0443\u0435\u0442 \u0444\u043e\u0440\u043c\u0430\u0442 \u0432\u044b\u0432\u043e\u0434\u0430.", "tags": [ "reasoning" ], "openrouter": false, "provider": "ollama" }, { "id": "qwen3-5-122b", "name": "Qwen 3.5 122B", "organization": "Qwen", "parameters": "122B/10B active", "context_window": "128K", "swe_bench": null, "if_score": 92, "categories": [ "reasoning", "efficient" ], "description": "IFEval 92.6%! \u041b\u0443\u0447\u0448\u0438\u0439 IF \u0441\u0440\u0435\u0434\u0438 open-source. Multimodal. Thinking.", "tags": [ "vision", "thinking", "tools" ], "openrouter": false, "provider": "ollama" }, { "id": "qwen3-coder-next", "name": "Qwen3-Coder-Next", "organization": "Qwen", "parameters": "80B/3B active", "context_window": "128K", "swe_bench": 70, "if_score": 84, "categories": [ "coding", "efficient" ], "description": "70% SWE-bench \u0441 3B active! \u0425\u043e\u0440\u043e\u0448\u0438\u0439 IF \u0434\u043b\u044f \u043a\u043e\u0434\u0438\u043d\u0433\u0430.", "tags": [ "coding", "efficient", "tools" ], "openrouter": false, "provider": "ollama" }, { "id": "cogito-2-1-671b", "name": "Cogito 2.1 671B", "organization": "Cognitive", "parameters": "671B MoE", "context_window": "128K", "swe_bench": null, "if_score": 76, "categories": [ "reasoning" ], "description": "MIT \u043b\u0438\u0446\u0435\u043d\u0437\u0438\u044f. 671B total. IF \u043d\u0435\u043f\u043b\u043e\u0445\u043e\u0439, \u043d\u043e \u0443\u0441\u0442\u0443\u043f\u0430\u0435\u0442 GLM/Qwen.", "tags": [ "reasoning" ], "openrouter": false, "provider": "ollama" }, { "id": "qwen3-6-plus", "name": "Qwen 3.6 Plus", "organization": "Qwen", "parameters": "Hybrid MoE", "context_window": "1M", "swe_bench": 78.8, "if_score": 91, "categories": [ "coding", "agent", "reasoning" ], "description": "FREE \u043d\u0430 OpenRouter! 1M \u043a\u043e\u043d\u0442\u0435\u043a\u0441\u0442. Always-on CoT. \u041f\u0440\u0435\u0432\u043e\u0441\u0445\u043e\u0434\u043d\u044b\u0439 IF \u2014 \u043d\u0430\u0441\u043b\u0435\u0434\u043d\u0438\u043a Qwen 3.5 (92.6%).", "tags": [ "coding", "agent", "1M-ctx", "free" ], "openrouter": true, "provider": "openrouter" }, { "id": "step-3-5-flash", "name": "Step 3.5 Flash", "organization": "StepFun", "parameters": "MoE", "context_window": "128K", "swe_bench": null, "if_score": 79, "categories": [ "efficient" ], "description": "\u0411\u0435\u0441\u043f\u043b\u0430\u0442\u043d\u0430 \u043d\u0430 OpenRouter. IF \u0441\u0440\u0435\u0434\u043d\u0438\u0439.", "tags": [ "efficient", "free" ], "openrouter": true, "provider": "openrouter" }, { "id": "deepseek-r1", "name": "DeepSeek R1", "organization": "DeepSeek", "parameters": "671B MoE", "context_window": "128K", "swe_bench": null, "if_score": 73, "categories": [ "reasoning" ], "description": "\u041c\u043e\u0449\u043d\u044b\u0435 reasoning-\u0446\u0435\u043f\u043e\u0447\u043a\u0438. \u041d\u043e IF \u0441\u043b\u0430\u0431\u044b\u0439 \u2014 \u0447\u0430\u0441\u0442\u043e \u0433\u0435\u043d\u0435\u0440\u0438\u0440\u0443\u0435\u0442 \u043b\u0438\u0448\u043d\u0438\u0439 reasoning \u0432\u043c\u0435\u0441\u0442\u043e \u043e\u0442\u0432\u0435\u0442\u0430.", "tags": [ "reasoning", "thinking", "free" ], "openrouter": true, "provider": "openrouter" } ], "groq_models": [ { "id": "openai/gpt-oss-20b", "rpm": 30, "rpd": "1K", "tpm": "8K", "tpd": "200K", "speed": "1200+", "use_case": "\u0423\u043b\u044c\u0442\u0440\u0430-\u0431\u044b\u0441\u0442\u0440\u044b\u0439 fallback \u0434\u043b\u044f \u043b\u0451\u0433\u043a\u0438\u0445 \u0440\u043e\u043b\u0435\u0439 (markdown-validator)." }, { "id": "llama-3.1-8b-instant", "rpm": 30, "rpd": "14.4K", "tpm": "6K", "tpd": "500K", "speed": "~800", "use_case": "14.4K RPD! \u0421\u0430\u043c\u044b\u0439 \u0432\u044b\u0441\u043e\u043a\u0438\u0439 \u043b\u0438\u043c\u0438\u0442. \u0414\u043b\u044f health-check / ping \u0440\u043e\u043b\u0435\u0439." }, { "id": "groq/compound", "rpm": 30, "rpd": "250", "tpm": "70K", "tpd": "\u2014", "speed": "varies", "use_case": "\u041c\u0443\u043b\u044c\u0442\u0438\u043c\u043e\u0434\u0435\u043b\u044c\u043d\u0430\u044f \u0430\u0433\u0440\u0435\u0433\u0430\u0446\u0438\u044f. \u0414\u043b\u044f research-\u0437\u0430\u0434\u0430\u0447." }, { "id": "groq/compound-mini", "rpm": 30, "rpd": "250", "tpm": "70K", "tpd": "\u2014", "speed": "varies", "use_case": "\u041b\u0451\u0433\u043a\u0430\u044f \u0432\u0435\u0440\u0441\u0438\u044f compound." }, { "id": "llama-prompt-guard-2", "rpm": 30, "rpd": "14.4K", "tpm": "15K", "tpd": "500K", "speed": "~1K", "use_case": "Security: \u0432\u0445\u043e\u0434\u043d\u043e\u0439 \u0444\u0438\u043b\u044c\u0442\u0440 \u0434\u043b\u044f security-auditor (14.4K RPD!)." } ], "agent_model_scores": [ { "agent": "lead-developer", "current_model_index": 0, "current_model_id": "qwen3-coder-480b", "reasoning_effort": "H", "scores": { "qwen3-coder-480b": 92, "minimax-m2.5": 86, "minimax-m2.7": 82, "nemotron-3-super": 70, "glm-5.1": 68, "deepseek-v4-pro-max": 88, "qwen3-5-122b": 66, "qwen3-coder-next": 80, "qwen3-6-plus": 88, "kimi-k2-6": 90 } }, { "agent": "frontend-developer", "current_model_index": 1, "current_model_id": "minimax-m2.5", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 86, "minimax-m2.5": 92, "minimax-m2.7": 88, "nemotron-3-super": 62, "glm-5.1": 56, "deepseek-v4-pro-max": 82, "qwen3-5-122b": 60, "qwen3-coder-next": 76, "qwen3-6-plus": 88, "kimi-k2-6": 86 } }, { "agent": "php-developer", "current_model_index": 0, "current_model_id": "qwen3-coder-480b", "reasoning_effort": "H", "scores": { "qwen3-coder-480b": 87, "minimax-m2.5": 76, "minimax-m2.7": 72, "nemotron-3-super": 64, "glm-5.1": 56, "deepseek-v4-pro-max": 74, "qwen3-5-122b": 60, "qwen3-coder-next": 76, "qwen3-6-plus": 84, "kimi-k2-6": 86 } }, { "agent": "python-developer", "current_model_index": 0, "current_model_id": "qwen3-coder-480b", "reasoning_effort": "H", "scores": { "qwen3-coder-480b": 90, "minimax-m2.5": 82, "minimax-m2.7": 78, "nemotron-3-super": 66, "glm-5.1": 60, "deepseek-v4-pro-max": 78, "qwen3-5-122b": 64, "qwen3-coder-next": 78, "qwen3-6-plus": 88, "kimi-k2-6": 88 } }, { "agent": "backend-developer", "current_model_index": 0, "current_model_id": "qwen3-coder-480b", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 91, "minimax-m2.5": 84, "minimax-m2.7": 80, "nemotron-3-super": 68, "glm-5.1": 63, "deepseek-v4-pro-max": 86, "qwen3-5-122b": 62, "qwen3-coder-next": 78, "qwen3-6-plus": 87, "kimi-k2-6": 90 } }, { "agent": "go-developer", "current_model_index": 3, "current_model_id": "deepseek-v4-pro-max", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 85, "minimax-m2.5": 78, "minimax-m2.7": 74, "nemotron-3-super": 66, "glm-5.1": 58, "deepseek-v4-pro-max": 88, "qwen3-5-122b": 58, "qwen3-coder-next": 74, "qwen3-6-plus": 82, "kimi-k2-6": 86 } }, { "agent": "flutter-developer", "current_model_index": 0, "current_model_id": "qwen3-coder-480b", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 86, "minimax-m2.5": 70, "minimax-m2.7": 66, "nemotron-3-super": 60, "glm-5.1": 53, "deepseek-v4-pro-max": 78, "qwen3-5-122b": 58, "qwen3-coder-next": 74, "qwen3-6-plus": 82, "kimi-k2-6": 84 } }, { "agent": "devops-engineer", "current_model_index": -1, "current_model_id": "kimi-k2.6", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 66, "minimax-m2.5": 53, "minimax-m2.7": 48, "nemotron-3-super": 78, "glm-5.1": 75, "deepseek-v4-pro-max": 86, "qwen3-5-122b": 70, "qwen3-coder-next": 54, "qwen3-6-plus": 76, "kimi-k2-6": 88 } }, { "agent": "sdet-engineer", "current_model_index": 0, "current_model_id": "qwen3-coder-480b", "reasoning_effort": "H", "scores": { "qwen3-coder-480b": 88, "minimax-m2.5": 84, "minimax-m2.7": 80, "nemotron-3-super": 70, "glm-5.1": 63, "deepseek-v4-pro-max": 84, "qwen3-5-122b": 64, "qwen3-coder-next": 78, "qwen3-6-plus": 84, "kimi-k2-6": 87 } }, { "agent": "code-skeptic", "current_model_index": 1, "current_model_id": "minimax-m2.5", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 82, "minimax-m2.5": 85, "minimax-m2.7": 80, "nemotron-3-super": 73, "glm-5.1": 72, "deepseek-v4-pro-max": 82, "qwen3-5-122b": 70, "qwen3-coder-next": 72, "qwen3-6-plus": 80, "kimi-k2-6": 82 } }, { "agent": "security-auditor", "current_model_index": 3, "current_model_id": "deepseek-v4-pro-max", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 76, "minimax-m2.5": 74, "minimax-m2.7": 68, "nemotron-3-super": 76, "glm-5.1": 68, "deepseek-v4-pro-max": 80, "qwen3-5-122b": 72, "qwen3-coder-next": 64, "qwen3-6-plus": 75, "kimi-k2-6": 80 } }, { "agent": "performance-engineer", "current_model_index": 3, "current_model_id": "deepseek-v4-pro-max", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 78, "minimax-m2.5": 75, "minimax-m2.7": 70, "nemotron-3-super": 78, "glm-5.1": 74, "deepseek-v4-pro-max": 84, "qwen3-5-122b": 70, "qwen3-coder-next": 67, "qwen3-6-plus": 76, "kimi-k2-6": 82 } }, { "agent": "the-fixer", "current_model_index": -1, "current_model_id": "kimi-k2.6", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 89, "minimax-m2.5": 88, "minimax-m2.7": 84, "nemotron-3-super": 71, "glm-5.1": 64, "deepseek-v4-pro-max": 88, "qwen3-5-122b": 64, "qwen3-coder-next": 82, "qwen3-6-plus": 86, "kimi-k2-6": 90 } }, { "agent": "browser-automation", "current_model_index": 0, "current_model_id": "qwen3-coder-480b", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 87, "minimax-m2.5": 72, "minimax-m2.7": 68, "nemotron-3-super": 61, "glm-5.1": 53, "deepseek-v4-pro-max": 82, "qwen3-5-122b": 56, "qwen3-coder-next": 72, "qwen3-6-plus": 82, "kimi-k2-6": 86 } }, { "agent": "visual-tester", "current_model_index": 0, "current_model_id": "qwen3-coder-480b", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 82, "minimax-m2.5": 68, "minimax-m2.7": 64, "nemotron-3-super": 55, "glm-5.1": 48, "deepseek-v4-pro-max": 76, "qwen3-5-122b": 54, "qwen3-coder-next": 66, "qwen3-6-plus": 76, "kimi-k2-6": 78 } }, { "agent": "system-analyst", "current_model_index": 7, "current_model_id": "glm-5.1", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 70, "minimax-m2.5": 66, "minimax-m2.7": 63, "nemotron-3-super": 74, "glm-5.1": 82, "deepseek-v4-pro-max": 88, "qwen3-5-122b": 76, "qwen3-coder-next": 58, "qwen3-6-plus": 80, "kimi-k2-6": 86 } }, { "agent": "capability-analyst", "current_model_index": 7, "current_model_id": "glm-5.1", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 72, "minimax-m2.5": 68, "minimax-m2.7": 66, "nemotron-3-super": 76, "glm-5.1": 78, "deepseek-v4-pro-max": 82, "qwen3-5-122b": 75, "qwen3-coder-next": 60, "qwen3-6-plus": 79, "kimi-k2-6": 82 } }, { "agent": "orchestrator", "current_model_index": -1, "current_model_id": "kimi-k2.6", "reasoning_effort": "H", "scores": { "qwen3-coder-480b": 74, "minimax-m2.5": 70, "minimax-m2.7": 68, "nemotron-3-super": 80, "glm-5.1": 82, "deepseek-v4-pro-max": 86, "qwen3-5-122b": 78, "qwen3-coder-next": 62, "qwen3-6-plus": 84, "kimi-k2-6": 92 } }, { "agent": "release-manager", "current_model_index": 7, "current_model_id": "glm-5.1", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 72, "minimax-m2.5": 66, "minimax-m2.7": 64, "nemotron-3-super": 74, "glm-5.1": 76, "deepseek-v4-pro-max": 78, "qwen3-5-122b": 72, "qwen3-coder-next": 60, "qwen3-6-plus": 76, "kimi-k2-6": 78 } }, { "agent": "evaluator", "current_model_index": 7, "current_model_id": "glm-5.1", "reasoning_effort": "H", "scores": { "qwen3-coder-480b": 70, "minimax-m2.5": 73, "minimax-m2.7": 70, "nemotron-3-super": 78, "glm-5.1": 78, "deepseek-v4-pro-max": 84, "qwen3-5-122b": 76, "qwen3-coder-next": 58, "qwen3-6-plus": 81, "kimi-k2-6": 84 } }, { "agent": "prompt-optimizer", "current_model_index": -1, "current_model_id": "qwen3.6-plus", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 76, "minimax-m2.5": 74, "minimax-m2.7": 72, "nemotron-3-super": 76, "glm-5.1": 75, "deepseek-v4-pro-max": 80, "qwen3-5-122b": 74, "qwen3-coder-next": 64, "qwen3-6-plus": 83, "kimi-k2-6": 82 } }, { "agent": "product-owner", "current_model_index": 7, "current_model_id": "glm-5.1", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 60, "minimax-m2.5": 56, "minimax-m2.7": 54, "nemotron-3-super": 74, "glm-5.1": 78, "deepseek-v4-pro-max": 76, "qwen3-5-122b": 74, "qwen3-coder-next": 48, "qwen3-6-plus": 78, "kimi-k2-6": 76 } }, { "agent": "pipeline-judge", "current_model_index": 7, "current_model_id": "glm-5.1", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 64, "minimax-m2.5": 68, "minimax-m2.7": 65, "nemotron-3-super": 78, "glm-5.1": 76, "deepseek-v4-pro-max": 82, "qwen3-5-122b": 74, "qwen3-coder-next": 56, "qwen3-6-plus": 80, "kimi-k2-6": 84 } }, { "agent": "workflow-architect", "current_model_index": 7, "current_model_id": "glm-5.1", "reasoning_effort": "H", "scores": { "qwen3-coder-480b": 68, "minimax-m2.5": 62, "minimax-m2.7": 60, "nemotron-3-super": 76, "glm-5.1": 76, "deepseek-v4-pro-max": 80, "qwen3-5-122b": 72, "qwen3-coder-next": 56, "qwen3-6-plus": 80, "kimi-k2-6": 82 } }, { "agent": "markdown-validator", "current_model_index": 3, "current_model_id": "deepseek-v4-pro-max", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 43, "minimax-m2.5": 38, "minimax-m2.7": 36, "nemotron-3-super": 52, "glm-5.1": 55, "deepseek-v4-pro-max": 68, "qwen3-5-122b": 56, "qwen3-coder-next": 40, "qwen3-6-plus": 50, "kimi-k2-6": 56 } }, { "agent": "agent-architect", "current_model_index": -1, "current_model_id": "kimi-k2.6", "reasoning_effort": "H", "scores": { "qwen3-coder-480b": 78, "minimax-m2.5": 72, "minimax-m2.7": 70, "nemotron-3-super": 78, "glm-5.1": 76, "deepseek-v4-pro-max": 82, "qwen3-5-122b": 76, "qwen3-coder-next": 66, "qwen3-6-plus": 82, "kimi-k2-6": 86 } }, { "agent": "planner", "current_model_index": 3, "current_model_id": "deepseek-v4-pro-max", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 72, "minimax-m2.5": 68, "minimax-m2.7": 66, "nemotron-3-super": 80, "glm-5.1": 78, "deepseek-v4-pro-max": 88, "qwen3-5-122b": 78, "qwen3-coder-next": 60, "qwen3-6-plus": 85, "kimi-k2-6": 86 } }, { "agent": "reflector", "current_model_index": 3, "current_model_id": "deepseek-v4-pro-max", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 68, "minimax-m2.5": 66, "minimax-m2.7": 64, "nemotron-3-super": 78, "glm-5.1": 76, "deepseek-v4-pro-max": 84, "qwen3-5-122b": 76, "qwen3-coder-next": 56, "qwen3-6-plus": 82, "kimi-k2-6": 80 } }, { "agent": "memory-manager", "current_model_index": -1, "current_model_id": "qwen3.6-plus", "reasoning_effort": "M", "scores": { "qwen3-coder-480b": 63, "minimax-m2.5": 58, "minimax-m2.7": 56, "nemotron-3-super": 86, "glm-5.1": 72, "deepseek-v4-pro-max": 86, "qwen3-5-122b": 70, "qwen3-coder-next": 50, "qwen3-6-plus": 87, "kimi-k2-6": 84 } }, { "agent": "architect-indexer", "current_model_index": 7, "current_model_id": "glm-5.1", "reasoning_effort": "H", "scores": { "qwen3-coder-480b": 70, "minimax-m2.5": 64, "minimax-m2.7": 62, "nemotron-3-super": 74, "glm-5.1": 80, "deepseek-v4-pro-max": 78, "qwen3-5-122b": 76, "qwen3-coder-next": 58, "qwen3-6-plus": 80, "kimi-k2-6": 84 } } ], "if_scores": { "qwen3-coder-480b": 88, "minimax-m2.5": 82, "minimax-m2.7": 78, "nemotron-3-super": 85, "glm-5.1": 80, "deepseek-v4-pro-max": 88, "qwen3-5-122b": 86, "qwen3-coder-next": 84, "qwen3-6-plus": 90, "kimi-k2-6": 91, "deepseek-v4-flash": 86 }, "agent_current_config": [ { "agent": "lead-developer", "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama Cloud", "category": "Process", "badge_type": "qwen", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "frontend-developer", "model": "ollama-cloud/minimax-m2.5", "provider": "Ollama Cloud", "category": "Process", "badge_type": "qwen", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "php-developer", "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama Cloud", "category": "Process", "badge_type": "qwen", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "python-developer", "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama Cloud", "category": "Process", "badge_type": "qwen", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "backend-developer", "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama Cloud", "category": "Process", "badge_type": "qwen", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "go-developer", "model": "ollama-cloud/deepseek-v4-pro-max", "provider": "Ollama Cloud", "category": "Process", "badge_type": "qwen", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "flutter-developer", "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama Cloud", "category": "Process", "badge_type": "qwen", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "devops-engineer", "model": "ollama-cloud/kimi-k2.6:cloud", "provider": "Ollama Cloud", "category": "Process", "badge_type": "nemotron", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "sdet-engineer", "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama Cloud", "category": "Process", "badge_type": "qwen", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "code-skeptic", "model": "ollama-cloud/minimax-m2.5", "provider": "Ollama Cloud", "category": "Process", "badge_type": "minimax", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "security-auditor", "model": "ollama-cloud/deepseek-v4-pro-max", "provider": "Ollama Cloud", "category": "Process", "badge_type": "nemotron", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "performance-engineer", "model": "ollama-cloud/deepseek-v4-pro-max", "provider": "Ollama Cloud", "category": "Process", "badge_type": "nemotron", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "the-fixer", "model": "ollama-cloud/kimi-k2.6:cloud", "provider": "Ollama Cloud", "category": "Process", "badge_type": "minimax", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "browser-automation", "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama Cloud", "category": "Process", "badge_type": "qwen", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "visual-tester", "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama Cloud", "category": "Process", "badge_type": "qwen", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "system-analyst", "model": "ollama-cloud/glm-5.1", "provider": "Ollama Cloud", "category": "Process", "badge_type": "glm", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "capability-analyst", "model": "ollama-cloud/glm-5.1", "provider": "Ollama Cloud", "category": "Process", "badge_type": "glm", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "orchestrator", "model": "ollama-cloud/kimi-k2.6:cloud", "provider": "Ollama Cloud", "category": "Process", "badge_type": "kimi", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "release-manager", "model": "ollama-cloud/glm-5.1", "provider": "Ollama Cloud", "category": "Process", "badge_type": "glm", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "evaluator", "model": "ollama-cloud/glm-5.1", "provider": "Ollama Cloud", "category": "Process", "badge_type": "glm", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "prompt-optimizer", "model": "ollama-cloud/qwen3.6-plus", "provider": "Ollama Cloud", "category": "Process", "badge_type": "glm", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "product-owner", "model": "ollama-cloud/glm-5.1", "provider": "Ollama Cloud", "category": "Process", "badge_type": "glm", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "pipeline-judge", "model": "ollama-cloud/glm-5.1", "provider": "Ollama Cloud", "category": "Process", "badge_type": "glm", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "workflow-architect", "model": "ollama-cloud/glm-5.1", "provider": "Ollama Cloud", "category": "Process", "badge_type": "glm", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "markdown-validator", "model": "ollama-cloud/deepseek-v4-pro-max", "provider": "Ollama Cloud", "category": "Process", "badge_type": "nemotron", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "agent-architect", "model": "ollama-cloud/kimi-k2.6:cloud", "provider": "Ollama Cloud", "category": "Process", "badge_type": "glm", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "planner", "model": "ollama-cloud/deepseek-v4-pro-max", "provider": "Ollama Cloud", "category": "Process", "badge_type": "nemotron", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "reflector", "model": "ollama-cloud/deepseek-v4-pro-max", "provider": "Ollama Cloud", "category": "Process", "badge_type": "nemotron", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "memory-manager", "model": "ollama-cloud/qwen3.6-plus", "provider": "Ollama Cloud", "category": "Process", "badge_type": "nemotron", "fit_score": 0, "status": "good", "previous_model": null }, { "agent": "architect-indexer", "model": "ollama-cloud/glm-5.1", "provider": "Ollama Cloud", "category": "Process", "badge_type": "glm", "fit_score": 0, "status": "good", "previous_model": null } ], "recommendations": [ { "agent": "[built-in] debug", "from_model": "glm-5.1.1 (88)", "from_provider": "Ollama", "to_model": "V4-Pro Max (\u260590) / K2.6 (\u260590) RE:High", "to_provider": "Ollama Cloud", "impact": "high", "quality_change": "+2%", "speed_change": "~1x", "context_change": "200K\u21921M", "provider_change": "Ollama Cloud", "rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=90 \u0438 K2.6=90 (TIE!), GLM-5.1=88. V4-Pro: LiveCodeBench 93.5(#1!), Terminal 67.9, 1M ctx \u0434\u043b\u044f \u043f\u043e\u043b\u043d\u043e\u0433\u043e \u043f\u0440\u043e\u0435\u043a\u0442\u0430. K2.6: 13h auto sessions. \u041e\u0431\u0430 \u043b\u0443\u0447\u0448\u0435 GLM-5.1. RE:High \u0434\u043b\u044f debug." }, { "agent": "planner", "from_model": "nemotron-3-super (80)", "from_provider": "Ollama", "to_model": "V4-Pro Max (\u260588) RE:High", "to_provider": "Ollama Cloud", "impact": "high", "quality_change": "+10%", "speed_change": "~1x", "context_change": "1M", "provider_change": "Ollama Cloud", "rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=88(\u043b\u0443\u0447\u0448\u0438\u0439!), K2.6=86, GLM-5.1=85, Nem=80. V4-Pro: GPQA 90.1 (reasoning), 1M ctx \u0441\u043e\u0445\u0440\u0430\u043d\u044f\u0435\u0442\u0441\u044f (vs \u043f\u043e\u0442\u0435\u0440\u044f \u043f\u0440\u0438 K2.6). RE:High \u0434\u043b\u044f chain-of-thought planning." }, { "agent": "go-developer", "from_model": "qwen3-coder:480b (85)", "from_provider": "Ollama", "to_model": "V4-Pro Max (\u260588) RE:Medium", "to_provider": "Ollama Cloud", "impact": "medium", "quality_change": "+4%", "speed_change": "~1x", "context_change": "256K\u21921M", "provider_change": "Ollama Cloud", "rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=88(\u043b\u0443\u0447\u0448\u0438\u0439 \u0434\u043b\u044f Go!), K2.6=86, Qwen3Coder=85. DeepSeek \u043c\u043e\u0434\u0435\u043b\u0438 \u0442\u0440\u0430\u0434\u0438\u0446\u0438\u043e\u043d\u043d\u043e \u0441\u0438\u043b\u044c\u043d\u044b \u0432 Go/Rust. 1M ctx \u0434\u043b\u044f \u043a\u0440\u0443\u043f\u043d\u044b\u0445 Go-\u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0432." }, { "agent": "history-miner", "from_model": "nemotron-3-super (\u260585)", "from_provider": "Ollama", "to_model": "V4-Pro Max (86) + Nem fallback", "to_provider": "Hybrid", "impact": "medium", "quality_change": "+1%", "speed_change": "~1x", "context_change": "1M", "provider_change": "Ollama Cloud + Ollama", "rationale": "V4-Pro=86 \u0447\u0443\u0442\u044c \u043b\u0443\u0447\u0448\u0435 Nemotron=85. 1M ctx \u0443 \u043e\u0431\u043e\u0438\u0445. MRCR 83.5 \u0443 V4-Pro \u2014 \u043b\u0443\u0447\u0448\u0435\u0435 long-context retrieval. Nemotron \u043a\u0430\u043a fallback (RULER 91.75%)." }, { "agent": "frontend-dev \u2192 M2.5", "from_model": "qwen3-coder (90)", "from_provider": "Ollama", "to_model": "MiniMax M2.5 (\u260592) \u2705", "to_provider": "Ollama", "impact": "low", "quality_change": "+2%", "speed_change": "=", "context_change": "204K", "provider_change": "Ollama", "rationale": "Spec-writing, UI architect. APPLIED." }, { "agent": "devops \u2192 K2.6", "from_model": "deepseek-v3.2", "from_provider": "", "to_model": "kimi-k2.6:cloud \u2705", "to_provider": "Ollama Cloud", "impact": "low", "quality_change": "+35%", "speed_change": "=", "context_change": "256K", "provider_change": "", "rationale": "APPLIED." }, { "agent": "orchestrator", "from_model": "glm-5.1.1 (\u260590)", "from_provider": "Ollama", "to_model": "K2.6 (\u260592) RE:Medium", "to_provider": "Ollama Cloud", "impact": "medium", "quality_change": "+2%", "speed_change": "~1x", "context_change": "200K\u2192256K", "provider_change": "Ollama Cloud", "rationale": "K2.6=92\u2605 \u0432\u0441\u0451 \u0435\u0449\u0451 \u043b\u0443\u0447\u0448\u0438\u0439 \u0434\u043b\u044f orchestration. V4-Pro=86 \u0441\u043b\u0430\u0431\u0435\u0435. 300 sub-agent swarm." }, { "agent": "the-fixer", "from_model": "minimax-m2.5 (\u260588)", "from_provider": "Ollama", "to_model": "V4-Pro (\u260588) / K2.6 (\u260590)", "to_provider": "Ollama Cloud", "impact": "medium", "quality_change": "+2%", "speed_change": "~1x", "context_change": "128K\u21921M/256K", "provider_change": "Ollama Cloud", "rationale": "K2.6=90(\u043b\u0443\u0447\u0448\u0438\u0439), V4-Pro=88=M2.5. M2.5 SWE-bench 80.2% \u0441\u0442\u0430\u0431\u0438\u043b\u044c\u043d\u0435\u0435. \u041d\u0435 \u0441\u0440\u043e\u0447\u043d\u043e." }, { "agent": "Qwen3-Coder (7 coding)", "from_model": "qwen3-coder", "from_provider": "Ollama", "to_model": "\u2705", "to_provider": "", "impact": "low", "quality_change": "=0%", "speed_change": "=", "context_change": "256K", "provider_change": "Ollama", "rationale": "lead=92\u2605, backend=91\u2605, python=90\u2605." }, { "agent": "GLM-5.1 (12 agents)", "from_model": "glm-5.1.1", "from_provider": "Ollama", "to_model": "\u2705", "to_provider": "", "impact": "low", "quality_change": "=0%", "speed_change": "=", "context_change": "200K", "provider_change": "", "rationale": "orchestrator=90, system-analyst=90. SWE-Pro #1." }, { "agent": "Kimi K2.6 (3 agents)", "from_model": "kimi-k2.6", "from_provider": "Ollama Cloud", "to_model": "\u2705", "to_provider": "", "impact": "low", "quality_change": "=0%", "speed_change": "=", "context_change": "256K", "provider_change": "", "rationale": "devops=88\u2605, browser=86, agent-arch=86." } ], "impact_data": [ { "category": "debug GLM5.1\u2192V4-Pro/K2.6", "before": 88, "after": 90, "delta": 2, "notes": "LiveCodeBench 93.5, Terminal 67.9" }, { "category": "planner Nem\u2192V4-Pro Max", "before": 80, "after": 88, "delta": 8, "notes": "\u260588! GPQA 90.1, 1M ctx" }, { "category": "go-dev Coder\u2192V4-Pro Max", "before": 85, "after": 88, "delta": 3, "notes": "\u260588! Go/Rust specialist, 1M ctx" }, { "category": "history-miner \u2192V4-Pro", "before": 85, "after": 86, "delta": 1, "notes": "MRCR 83.5, long-context" }, { "category": "orchestrator \u2192K2.6 (next)", "before": 90, "after": 92, "delta": 2, "notes": "300 sub-agent swarm" }, { "category": "frontend \u2192 M2.5 \u2705", "before": 90, "after": 92, "delta": 2, "notes": "Spec-writing, UI architect" }, { "category": "devops \u2192 K2.6 \u2705", "before": 65, "after": 88, "delta": 23, "notes": "IF:65\u219291! Terminal 66.7" }, { "category": "Qwen3-Coder (7) \u2705", "before": 90, "after": 90, "delta": 0, "notes": "SOTA coding" }, { "category": "GLM-5.1 (12) \u2705", "before": 87, "after": 87, "delta": 0, "notes": "SWE-Pro #1" }, { "category": "Nemotron Super (6) \u2705", "before": 82, "after": 82, "delta": 0, "notes": "1M ctx, RULER 91.75%" } ], "benchmark_comparison": { "benchmarks": [ { "name": "SWE-V", "full_name": "SWE-Bench Verified", "description": "GitHub issue resolution (500 tasks)", "roles": "lead-dev, backend, fixer" }, { "name": "SWE-P", "full_name": "SWE-Bench Pro", "description": "Multi-lang, decontaminated (1865 tasks)", "roles": "all coding agents" }, { "name": "T-Bench", "full_name": "Terminal-Bench 2.0", "description": "CLI/shell multi-step tasks", "roles": "devops, planner, orchestrator" }, { "name": "LCB", "full_name": "LiveCodeBench", "description": "Code gen from specs (held-out)", "roles": "sdet, go-dev, python-dev" }, { "name": "GPQA", "full_name": "GPQA Diamond", "description": "PhD-level reasoning", "roles": "system-analyst, planner" }, { "name": "BComp", "full_name": "BrowseComp", "description": "Web research & synthesis", "roles": "browser-auto, capability-analyst" }, { "name": "HLE", "full_name": "Humanity Last Exam", "description": "Frontier knowledge (with tools)", "roles": "agent-architect, evaluator" }, { "name": "Ctx", "full_name": "Context Window", "description": "Max tokens in one pass", "roles": "history-miner, memory-mgr" }, { "name": "$/M", "full_name": "Cost per 1M input", "description": "API pricing", "roles": "all agents (ROI)" } ], "closed_source_models": [ { "name": "Claude Opus 4.7", "organization": "Anthropic", "scores": [ 87.6, 64.3, 69.4, null, 94.2, 79.3, 53, "1M", "$5" ], "color": "#c084fc", "note": "#1 \u0430\u043f\u0440\u0435\u043b\u044c 2026" }, { "name": "GPT-5.5", "organization": "OpenAI", "scores": [ null, 58.6, 82.7, null, null, 83.4, 57.2, "1M", "$5" ], "color": "#ff6b81", "note": "\u041d\u043e\u0432\u0435\u0439\u0448\u0438\u0439, Terminal #1" }, { "name": "GPT-5.4", "organization": "OpenAI", "scores": [ 78.2, 59.1, 75.1, null, 94.4, 82.7, 58.7, "200K", "$2.50" ], "color": "#ff6b81", "note": "Reasoning, math" }, { "name": "Gemini 3.1 Pro", "organization": "Google", "scores": [ 80.6, 46.1, 68.5, null, 94.3, 85.9, 51.4, "2M", "$2" ], "color": "#facc15", "note": "ARC-AGI 77.1%, \u0434\u0435\u0448\u0451\u0432\u044b\u0439" }, { "name": "Claude Sonnet 4.6", "organization": "Anthropic", "scores": [ 79.6, null, null, null, null, null, null, "200K", "$3" ], "color": "#c084fc", "note": "5\u00d7 \u0434\u0435\u0448\u0435\u0432\u043b\u0435 Opus" }, { "name": "GPT-5.3-Codex", "organization": "OpenAI", "scores": [ 85, 57, 77.3, null, null, null, null, "200K", "$6" ], "color": "#ff6b81", "note": "Coding specialist" } ], "apaw_models": [ { "name": "Kimi K2.6", "organization": "APAW", "scores": [ 80.2, 58.6, 66.7, 87.2, null, 83.2, 54, "256K", "$0.95" ], "color": "#00ff94", "note": "devops, browser, architect (3)" }, { "name": "GLM-5.1", "organization": "APAW", "scores": [ null, 58.4, 63.5, null, 86.2, 68.7, null, "200K", "~$0.50" ], "color": "#00ff94", "note": "12 agents! orchestrator, eval..." }, { "name": "V4-Pro Max", "organization": "APAW", "scores": [ 80.6, 55.4, 67.9, 93.5, 90.1, 83.4, 48.2, "1M", "$0.42" ], "color": "#00d4ff", "note": "planner, go-dev (\u0440\u0435\u043a.)" }, { "name": "Qwen3-Coder 480B", "organization": "APAW", "scores": [ 66.5, null, null, null, null, null, null, "256K", "~$0.50" ], "color": "#00ff94", "note": "7 coding agents" }, { "name": "MiniMax M2.5", "organization": "APAW", "scores": [ 80.2, 51.3, null, null, null, 76.3, null, "204K", "$0.15" ], "color": "#00ff94", "note": "frontend, skeptic, fixer (3)" }, { "name": "Nemotron Super", "organization": "APAW", "scores": [ 60.5, null, null, null, null, null, null, "1M", "~$0.40" ], "color": "#00ff94", "note": "6 agents (memory, history)" } ] } }