diff --git a/.kilo/agents/lead-developer.md b/.kilo/agents/lead-developer.md index 1a9b75f..04d46a1 100755 --- a/.kilo/agents/lead-developer.md +++ b/.kilo/agents/lead-developer.md @@ -1,7 +1,7 @@ --- description: Primary code writer for backend and core logic. Writes implementation to pass tests mode: subagent -model: ollama-cloud/nemotron-3-super +model: ollama-cloud/qwen3-coder:480b variant: thinking color: "#DC2626" permission: diff --git a/.kilo/agents/orchestrator.md b/.kilo/agents/orchestrator.md index 3ef8793..0f047e7 100755 --- a/.kilo/agents/orchestrator.md +++ b/.kilo/agents/orchestrator.md @@ -40,6 +40,7 @@ permission: "planner": allow "reflector": allow "memory-manager": allow + "devops-engineer": allow --- # Kilo Code: Orchestrator diff --git a/.kilo/agents/security-auditor.md b/.kilo/agents/security-auditor.md index 1063daa..4698551 100755 --- a/.kilo/agents/security-auditor.md +++ b/.kilo/agents/security-auditor.md @@ -2,7 +2,7 @@ description: Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets mode: subagent model: ollama-cloud/nemotron-3-super -color: "#DC2626" +color: #DC2626 permission: read: allow bash: allow diff --git a/.kilo/agents/system-analyst.md b/.kilo/agents/system-analyst.md index 23c470a..16c7ec1 100755 --- a/.kilo/agents/system-analyst.md +++ b/.kilo/agents/system-analyst.md @@ -1,7 +1,7 @@ --- description: Designs technical specifications, data schemas, and API contracts before implementation mode: subagent -model: ollama-cloud/nemotron-3-super +model: ollama-cloud/glm-5.1 color: "#0891B2" permission: read: allow diff --git a/.kilo/capability-index.yaml b/.kilo/capability-index.yaml index 802946c..766f30a 100644 --- a/.kilo/capability-index.yaml +++ b/.kilo/capability-index.yaml @@ -15,7 +15,7 @@ agents: forbidden: - test_writing - code_review - model: ollama-cloud/nemotron-3-super + model: ollama-cloud/qwen3-coder:480b variant: thinking mode: subagent delegates_to: @@ -49,7 +49,7 @@ agents: - frontend_tests forbidden: - backend_code - model: ollama-cloud/qwen3-coder:480b + model: ollama-cloud/minimax-m2.5 mode: subagent delegates_to: - code-skeptic @@ -245,7 +245,7 @@ agents: - ci_cd_config forbidden: - application_code - model: ollama-cloud/nemotron-3-super + model: ollama-cloud/kimi-k2.6:cloud mode: subagent delegates_to: - code-skeptic @@ -399,7 +399,7 @@ agents: - screenshots forbidden: - unit_testing - model: ollama-cloud/qwen3-coder:480b + model: ollama-cloud/kimi-k2.6:cloud mode: subagent delegates_to: - orchestrator @@ -463,68 +463,14 @@ agents: - database_schemas forbidden: - implementation - model: ollama-cloud/nemotron-3-super - variant: thinking - mode: subagent - delegates_to: - - sdet-engineer - - orchestrator - fallback_models: - - ollama-cloud/glm-5.1 - - ollama-cloud/deepseek-v4-pro-max - - ollama-cloud/kimi-k2.6:cloud - failover_strategy: downgraded - requirement-refiner: - capabilities: - - requirement_analysis - - user_story_creation - - acceptance_criteria - - clarification - receives: - - raw_requests - - feature_ideas - produces: - - user_stories - - acceptance_criteria - - requirements_doc - forbidden: - - design_decisions model: ollama-cloud/glm-5.1 - variant: thinking - mode: subagent - delegates_to: - - history-miner - - system-analyst - fallback_models: - - ollama-cloud/deepseek-v4-pro-max - - ollama-cloud/kimi-k2.6:cloud - - groq/llama-3.1-8b-instant - - ollama-cloud/glm-5 - failover_strategy: mixed - history-miner: - capabilities: - - git_search - - duplicate_detection - - past_solution_finder - - pattern_identification - receives: - - search_query - - issue_description - produces: - - commit_list - - duplicate_report - - related_files - forbidden: - - code_changes - model: ollama-cloud/nemotron-3-super mode: subagent delegates_to: [] fallback_models: - ollama-cloud/glm-5.1 - ollama-cloud/deepseek-v4-pro-max - - groq/llama-3.1-8b-instant - - openrouter/qwen/qwen3.6-plus:free - failover_strategy: mixed + - ollama-cloud/kimi-k2.6:cloud + failover_strategy: downgraded capability-analyst: capabilities: - gap_analysis @@ -786,7 +732,7 @@ agents: - integration_plan forbidden: - agent_execution - model: ollama-cloud/glm-5.1 + model: ollama-cloud/kimi-k2.6:cloud variant: thinking mode: subagent delegates_to: diff --git a/agent-evolution/data/model-benchmarks.json b/agent-evolution/data/model-benchmarks.json index ec848fb..74dbd81 100644 --- a/agent-evolution/data/model-benchmarks.json +++ b/agent-evolution/data/model-benchmarks.json @@ -1,1774 +1,1718 @@ -{ - "version": "1.0.0", - "generated": "2026-04-29T19:56:51.418Z", - "source": ".kilo/capability-index.yaml (synced v2)", - "total_agents": 32, - "total_models_tracked": 11, - "providers": [ - "ollama", - "ollama-cloud", - "openrouter", - "groq" - ], - "models": [ - { - "id": "qwen3-coder-480b", - "name": "Qwen3-Coder 480B", - "organization": "Qwen", - "parameters": "480B/35B active", - "context_window": "256K→1M", - "swe_bench": 66.5, - "if_score": 88, - "categories": [ - "coding", - "agent" - ], - "description": "SOTA open-source кодинг. Сравним с Claude Sonnet 4.", - "tags": [ - "coding", - "agent", - "tools" - ], - "openrouter": false, - "provider": "ollama" - }, - { - "id": "minimax-m2.5", - "name": "MiniMax M2.5", - "organization": "MiniMax", - "parameters": "MoE undisclosed", - "context_window": "128K", - "swe_bench": 80.2, - "if_score": 82, - "categories": [ - "coding", - "agent" - ], - "description": "Лидер SWE-bench 80.2%. Полный lifecycle разработки.", - "tags": [ - "coding", - "agent" - ], - "openrouter": false, - "provider": "ollama" - }, - { - "id": "minimax-m2.7", - "name": "MiniMax M2.7", - "organization": "MiniMax", - "parameters": "~10B active", - "context_window": "128K", - "swe_bench": 78, - "if_score": 80, - "categories": [ - "coding", - "agent", - "efficient" - ], - "description": "Самообучаемая. 56.2% SWE-Pro. 100 TPS. $0.30/M.", - "tags": [ - "coding", - "agent", - "self-evolving" - ], - "openrouter": false, - "provider": "ollama" - }, - { - "id": "deepseek-v4-pro-max", - "name": "DeepSeek V4-Pro", - "organization": "DeepSeek", - "parameters": "1.6T/49B active MoE", - "context_window": "1M", - "swe_bench": 80.6, - "if_score": 89, - "categories": [ - "coding", - "agent", - "reasoning" - ], - "description": "SWE-V 80.6, LiveCodeBench 93.5(#1!), Terminal-Bench 67.9, Codeforces 3206, 1M ctx, 27% FLOPs vs V3.2. MIT.", - "tags": [ - "coding", - "agent", - "thinking", - "tools" - ], - "openrouter": false, - "provider": "ollama-cloud" - }, - { - "id": "deepseek-v4-flash", - "name": "DeepSeek V4-Pro", - "organization": "DeepSeek", - "parameters": "284B/13B active MoE", - "context_window": "1M", - "swe_bench": 79, - "if_score": 86, - "categories": [ - "coding", - "efficient", - "agent" - ], - "description": "SWE-V ~79%, Flash Max = Pro уровень reasoning. 13B active = ультрабыстрый. 1M ctx. FP4+FP8. MIT.", - "tags": [ - "coding", - "efficient", - "agent", - "thinking" - ], - "openrouter": false, - "provider": "ollama-cloud" - }, - { - "id": "kimi-k2-6", - "name": "Kimi K2.6", - "organization": "Moonshot AI", - "parameters": "1T/32B active MoE", - "context_window": "256K", - "swe_bench": 80.2, - "if_score": 91, - "categories": [ - "coding", - "agent", - "multimodal" - ], - "description": "SWE-Pro 58.6(#1!), SWE-V 80.2, Terminal-Bench 66.7, HLE 54.0(#1!), BrowseComp 83.2. 13h autonomous. 300 sub-agent swarm. Modified MIT.", - "tags": [ - "coding", - "agent", - "swarm", - "vision", - "thinking", - "tools" - ], - "openrouter": false, - "provider": "ollama-cloud" - }, - { - "id": "nemotron-3-super", - "name": "Nemotron 3 Super", - "organization": "NVIDIA", - "parameters": "120B/12B active", - "context_window": "1M", - "swe_bench": 60.5, - "if_score": 78, - "categories": [ - "agent", - "reasoning", - "efficient" - ], - "description": "SWE-bench 60.5%. RULER@1M 91.75%! Но IF ниже — Mamba-layers иногда «теряют» инструкции в длинных промптах.", - "tags": [ - "agent", - "1M-ctx", - "thinking" - ], - "openrouter": false, - "provider": "ollama" - }, - { - "id": "glm-5.1", - "name": "GLM-5", - "organization": "Z.ai", - "parameters": "744B/40B active", - "context_window": "128K", - "swe_bench": null, - "if_score": 90, - "categories": [ - "reasoning", - "agent" - ], - "description": "Мощный reasoning. Arena ELO 1451. Отличный instruction following (IFEval ~90+).", - "tags": [ - "reasoning", - "agent" - ], - "openrouter": false, - "provider": "ollama" - }, - { - "id": "deepseek-v4", - "name": "DeepSeek V4-Pro", - "organization": "DeepSeek", - "parameters": "Large MoE", - "context_window": "128K", - "swe_bench": null, - "if_score": 75, - "categories": [ - "reasoning" - ], - "description": "Хороший reasoning, но IF нестабилен — иногда игнорирует формат вывода.", - "tags": [ - "reasoning" - ], - "openrouter": false, - "provider": "ollama" - }, - { - "id": "qwen3-5-122b", - "name": "Qwen 3.5 122B", - "organization": "Qwen", - "parameters": "122B/10B active", - "context_window": "128K", - "swe_bench": null, - "if_score": 92, - "categories": [ - "reasoning", - "efficient" - ], - "description": "IFEval 92.6%! Лучший IF среди open-source. Multimodal. Thinking.", - "tags": [ - "vision", - "thinking", - "tools" - ], - "openrouter": false, - "provider": "ollama" - }, - { - "id": "qwen3-coder-next", - "name": "Qwen3-Coder-Next", - "organization": "Qwen", - "parameters": "80B/3B active", - "context_window": "128K", - "swe_bench": 70, - "if_score": 84, - "categories": [ - "coding", - "efficient" - ], - "description": "70% SWE-bench с 3B active! Хороший IF для кодинга.", - "tags": [ - "coding", - "efficient", - "tools" - ], - "openrouter": false, - "provider": "ollama" - }, - { - "id": "cogito-2-1-671b", - "name": "Cogito 2.1 671B", - "organization": "Cognitive", - "parameters": "671B MoE", - "context_window": "128K", - "swe_bench": null, - "if_score": 76, - "categories": [ - "reasoning" - ], - "description": "MIT лицензия. 671B total. IF неплохой, но уступает GLM/Qwen.", - "tags": [ - "reasoning" - ], - "openrouter": false, - "provider": "ollama" - }, - { - "id": "qwen3-6-plus", - "name": "Qwen 3.6 Plus", - "organization": "Qwen", - "parameters": "Hybrid MoE", - "context_window": "1M", - "swe_bench": 78.8, - "if_score": 91, - "categories": [ - "coding", - "agent", - "reasoning" - ], - "description": "FREE на OpenRouter! 1M контекст. Always-on CoT. Превосходный IF — наследник Qwen 3.5 (92.6%).", - "tags": [ - "coding", - "agent", - "1M-ctx", - "free" - ], - "openrouter": true, - "provider": "openrouter" - }, - { - "id": "step-3-5-flash", - "name": "Step 3.5 Flash", - "organization": "StepFun", - "parameters": "MoE", - "context_window": "128K", - "swe_bench": null, - "if_score": 79, - "categories": [ - "efficient" - ], - "description": "Бесплатна на OpenRouter. IF средний.", - "tags": [ - "efficient", - "free" - ], - "openrouter": true, - "provider": "openrouter" - }, - { - "id": "deepseek-r1", - "name": "DeepSeek R1", - "organization": "DeepSeek", - "parameters": "671B MoE", - "context_window": "128K", - "swe_bench": null, - "if_score": 73, - "categories": [ - "reasoning" - ], - "description": "Мощные reasoning-цепочки. Но IF слабый — часто генерирует лишний reasoning вместо ответа.", - "tags": [ - "reasoning", - "thinking", - "free" - ], - "openrouter": true, - "provider": "openrouter" - } - ], - "groq_models": [ - { - "id": "openai/gpt-oss-20b", - "rpm": 30, - "rpd": "1K", - "tpm": "8K", - "tpd": "200K", - "speed": "1200+", - "use_case": "Ультра-быстрый fallback для лёгких ролей (markdown-validator)." - }, - { - "id": "llama-3.1-8b-instant", - "rpm": 30, - "rpd": "14.4K", - "tpm": "6K", - "tpd": "500K", - "speed": "~800", - "use_case": "14.4K RPD! Самый высокий лимит. Для health-check / ping ролей." - }, - { - "id": "groq/compound", - "rpm": 30, - "rpd": "250", - "tpm": "70K", - "tpd": "—", - "speed": "varies", - "use_case": "Мультимодельная агрегация. Для research-задач." - }, - { - "id": "groq/compound-mini", - "rpm": 30, - "rpd": "250", - "tpm": "70K", - "tpd": "—", - "speed": "varies", - "use_case": "Лёгкая версия compound." - }, - { - "id": "llama-prompt-guard-2", - "rpm": 30, - "rpd": "14.4K", - "tpm": "15K", - "tpd": "500K", - "speed": "~1K", - "use_case": "Security: входной фильтр для security-auditor (14.4K RPD!)." - } - ], - "agent_model_scores": [ - { - "agent": "lead-developer", - "current_model_index": 6, - "current_model_id": "nemotron-3-super", - "reasoning_effort": "H", - "scores": { - "qwen3-coder-480b": 92, - "minimax-m2.5": 86, - "minimax-m2.7": 82, - "nemotron-3-super": 70, - "glm-5.1": 68, - "deepseek-v4-pro-max": 88, - "qwen3-5-122b": 66, - "qwen3-coder-next": 80, - "qwen3-6-plus": 88, - "kimi-k2-6": 90 - } - }, - { - "agent": "frontend-developer", - "current_model_index": -1, - "current_model_id": "qwen3-coder:480b", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 86, - "minimax-m2.5": 92, - "minimax-m2.7": 88, - "nemotron-3-super": 62, - "glm-5.1": 56, - "deepseek-v4-pro-max": 82, - "qwen3-5-122b": 60, - "qwen3-coder-next": 76, - "qwen3-6-plus": 88, - "kimi-k2-6": 86 - } - }, - { - "agent": "php-developer", - "current_model_index": -1, - "current_model_id": "qwen3-coder:480b", - "reasoning_effort": "H", - "scores": { - "qwen3-coder-480b": 87, - "minimax-m2.5": 76, - "minimax-m2.7": 72, - "nemotron-3-super": 64, - "glm-5.1": 56, - "deepseek-v4-pro-max": 74, - "qwen3-5-122b": 60, - "qwen3-coder-next": 76, - "qwen3-6-plus": 84, - "kimi-k2-6": 86 - } - }, - { - "agent": "python-developer", - "current_model_index": -1, - "current_model_id": "qwen3-coder:480b", - "reasoning_effort": "H", - "scores": { - "qwen3-coder-480b": 90, - "minimax-m2.5": 82, - "minimax-m2.7": 78, - "nemotron-3-super": 66, - "glm-5.1": 60, - "deepseek-v4-pro-max": 78, - "qwen3-5-122b": 64, - "qwen3-coder-next": 78, - "qwen3-6-plus": 88, - "kimi-k2-6": 88 - } - }, - { - "agent": "backend-developer", - "current_model_index": -1, - "current_model_id": "qwen3-coder:480b", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 91, - "minimax-m2.5": 84, - "minimax-m2.7": 80, - "nemotron-3-super": 68, - "glm-5.1": 63, - "deepseek-v4-pro-max": 86, - "qwen3-5-122b": 62, - "qwen3-coder-next": 78, - "qwen3-6-plus": 87, - "kimi-k2-6": 90 - } - }, - { - "agent": "go-developer", - "current_model_index": -1, - "current_model_id": "qwen3-coder:480b", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 85, - "minimax-m2.5": 78, - "minimax-m2.7": 74, - "nemotron-3-super": 66, - "glm-5.1": 58, - "deepseek-v4-pro-max": 88, - "qwen3-5-122b": 58, - "qwen3-coder-next": 74, - "qwen3-6-plus": 82, - "kimi-k2-6": 86 - } - }, - { - "agent": "flutter-developer", - "current_model_index": -1, - "current_model_id": "qwen3-coder:480b", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 86, - "minimax-m2.5": 70, - "minimax-m2.7": 66, - "nemotron-3-super": 60, - "glm-5.1": 53, - "deepseek-v4-pro-max": 78, - "qwen3-5-122b": 58, - "qwen3-coder-next": 74, - "qwen3-6-plus": 82, - "kimi-k2-6": 84 - } - }, - { - "agent": "devops-engineer", - "current_model_index": 6, - "current_model_id": "nemotron-3-super", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 66, - "minimax-m2.5": 53, - "minimax-m2.7": 48, - "nemotron-3-super": 78, - "glm-5.1": 75, - "deepseek-v4-pro-max": 86, - "qwen3-5-122b": 70, - "qwen3-coder-next": 54, - "qwen3-6-plus": 76, - "kimi-k2-6": 88 - } - }, - { - "agent": "sdet-engineer", - "current_model_index": -1, - "current_model_id": "qwen3-coder:480b", - "reasoning_effort": "H", - "scores": { - "qwen3-coder-480b": 88, - "minimax-m2.5": 84, - "minimax-m2.7": 80, - "nemotron-3-super": 70, - "glm-5.1": 63, - "deepseek-v4-pro-max": 84, - "qwen3-5-122b": 64, - "qwen3-coder-next": 78, - "qwen3-6-plus": 84, - "kimi-k2-6": 87 - } - }, - { - "agent": "code-skeptic", - "current_model_index": 1, - "current_model_id": "minimax-m2.5", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 82, - "minimax-m2.5": 85, - "minimax-m2.7": 80, - "nemotron-3-super": 73, - "glm-5.1": 72, - "deepseek-v4-pro-max": 82, - "qwen3-5-122b": 70, - "qwen3-coder-next": 72, - "qwen3-6-plus": 80, - "kimi-k2-6": 82 - } - }, - { - "agent": "security-auditor", - "current_model_index": 6, - "current_model_id": "nemotron-3-super", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 76, - "minimax-m2.5": 74, - "minimax-m2.7": 68, - "nemotron-3-super": 76, - "glm-5.1": 68, - "deepseek-v4-pro-max": 80, - "qwen3-5-122b": 72, - "qwen3-coder-next": 64, - "qwen3-6-plus": 75, - "kimi-k2-6": 80 - } - }, - { - "agent": "performance-engineer", - "current_model_index": 6, - "current_model_id": "nemotron-3-super", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 78, - "minimax-m2.5": 75, - "minimax-m2.7": 70, - "nemotron-3-super": 78, - "glm-5.1": 74, - "deepseek-v4-pro-max": 84, - "qwen3-5-122b": 70, - "qwen3-coder-next": 67, - "qwen3-6-plus": 76, - "kimi-k2-6": 82 - } - }, - { - "agent": "the-fixer", - "current_model_index": 1, - "current_model_id": "minimax-m2.5", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 89, - "minimax-m2.5": 88, - "minimax-m2.7": 84, - "nemotron-3-super": 71, - "glm-5.1": 64, - "deepseek-v4-pro-max": 88, - "qwen3-5-122b": 64, - "qwen3-coder-next": 82, - "qwen3-6-plus": 86, - "kimi-k2-6": 90 - } - }, - { - "agent": "browser-automation", - "current_model_index": -1, - "current_model_id": "qwen3-coder:480b", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 87, - "minimax-m2.5": 72, - "minimax-m2.7": 68, - "nemotron-3-super": 61, - "glm-5.1": 53, - "deepseek-v4-pro-max": 82, - "qwen3-5-122b": 56, - "qwen3-coder-next": 72, - "qwen3-6-plus": 82, - "kimi-k2-6": 86 - } - }, - { - "agent": "visual-tester", - "current_model_index": -1, - "current_model_id": "qwen3-coder:480b", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 82, - "minimax-m2.5": 68, - "minimax-m2.7": 64, - "nemotron-3-super": 55, - "glm-5.1": 48, - "deepseek-v4-pro-max": 76, - "qwen3-5-122b": 54, - "qwen3-coder-next": 66, - "qwen3-6-plus": 76, - "kimi-k2-6": 78 - } - }, - { - "agent": "system-analyst", - "current_model_index": 6, - "current_model_id": "nemotron-3-super", - "reasoning_effort": "H", - "scores": { - "qwen3-coder-480b": 70, - "minimax-m2.5": 66, - "minimax-m2.7": 63, - "nemotron-3-super": 74, - "glm-5.1": 82, - "deepseek-v4-pro-max": 88, - "qwen3-5-122b": 76, - "qwen3-coder-next": 58, - "qwen3-6-plus": 80, - "kimi-k2-6": 86 - } - }, - { - "agent": "requirement-refiner", - "current_model_index": 7, - "current_model_id": "glm-5.1", - "reasoning_effort": "H", - "scores": { - "qwen3-coder-480b": 66, - "minimax-m2.5": 62, - "minimax-m2.7": 60, - "nemotron-3-super": 72, - "glm-5.1": 80, - "deepseek-v4-pro-max": 82, - "qwen3-5-122b": 74, - "qwen3-coder-next": 54, - "qwen3-6-plus": 78, - "kimi-k2-6": 82 - } - }, - { - "agent": "history-miner", - "current_model_index": 6, - "current_model_id": "nemotron-3-super", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 68, - "minimax-m2.5": 60, - "minimax-m2.7": 56, - "nemotron-3-super": 85, - "glm-5.1": 78, - "deepseek-v4-pro-max": 86, - "qwen3-5-122b": 72, - "qwen3-coder-next": 56, - "qwen3-6-plus": 84, - "kimi-k2-6": 82 - } - }, - { - "agent": "capability-analyst", - "current_model_index": 7, - "current_model_id": "glm-5.1", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 72, - "minimax-m2.5": 68, - "minimax-m2.7": 66, - "nemotron-3-super": 76, - "glm-5.1": 78, - "deepseek-v4-pro-max": 82, - "qwen3-5-122b": 75, - "qwen3-coder-next": 60, - "qwen3-6-plus": 79, - "kimi-k2-6": 82 - } - }, - { - "agent": "orchestrator", - "current_model_index": -1, - "current_model_id": "kimi-k2.6:cloud", - "reasoning_effort": "H", - "scores": { - "qwen3-coder-480b": 74, - "minimax-m2.5": 70, - "minimax-m2.7": 68, - "nemotron-3-super": 80, - "glm-5.1": 82, - "deepseek-v4-pro-max": 86, - "qwen3-5-122b": 78, - "qwen3-coder-next": 62, - "qwen3-6-plus": 84, - "kimi-k2-6": 92 - } - }, - { - "agent": "release-manager", - "current_model_index": 7, - "current_model_id": "glm-5.1", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 72, - "minimax-m2.5": 66, - "minimax-m2.7": 64, - "nemotron-3-super": 74, - "glm-5.1": 76, - "deepseek-v4-pro-max": 78, - "qwen3-5-122b": 72, - "qwen3-coder-next": 60, - "qwen3-6-plus": 76, - "kimi-k2-6": 78 - } - }, - { - "agent": "evaluator", - "current_model_index": 7, - "current_model_id": "glm-5.1", - "reasoning_effort": "H", - "scores": { - "qwen3-coder-480b": 70, - "minimax-m2.5": 73, - "minimax-m2.7": 70, - "nemotron-3-super": 78, - "glm-5.1": 78, - "deepseek-v4-pro-max": 84, - "qwen3-5-122b": 76, - "qwen3-coder-next": 58, - "qwen3-6-plus": 81, - "kimi-k2-6": 84 - } - }, - { - "agent": "prompt-optimizer", - "current_model_index": 7, - "current_model_id": "glm-5.1", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 76, - "minimax-m2.5": 74, - "minimax-m2.7": 72, - "nemotron-3-super": 76, - "glm-5.1": 75, - "deepseek-v4-pro-max": 80, - "qwen3-5-122b": 74, - "qwen3-coder-next": 64, - "qwen3-6-plus": 83, - "kimi-k2-6": 82 - } - }, - { - "agent": "product-owner", - "current_model_index": 7, - "current_model_id": "glm-5.1", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 60, - "minimax-m2.5": 56, - "minimax-m2.7": 54, - "nemotron-3-super": 74, - "glm-5.1": 78, - "deepseek-v4-pro-max": 76, - "qwen3-5-122b": 74, - "qwen3-coder-next": 48, - "qwen3-6-plus": 78, - "kimi-k2-6": 76 - } - }, - { - "agent": "pipeline-judge", - "current_model_index": 7, - "current_model_id": "glm-5.1", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 64, - "minimax-m2.5": 68, - "minimax-m2.7": 65, - "nemotron-3-super": 78, - "glm-5.1": 76, - "deepseek-v4-pro-max": 82, - "qwen3-5-122b": 74, - "qwen3-coder-next": 56, - "qwen3-6-plus": 80, - "kimi-k2-6": 84 - } - }, - { - "agent": "workflow-architect", - "current_model_index": 7, - "current_model_id": "glm-5.1", - "reasoning_effort": "H", - "scores": { - "qwen3-coder-480b": 68, - "minimax-m2.5": 62, - "minimax-m2.7": 60, - "nemotron-3-super": 76, - "glm-5.1": 76, - "deepseek-v4-pro-max": 80, - "qwen3-5-122b": 72, - "qwen3-coder-next": 56, - "qwen3-6-plus": 80, - "kimi-k2-6": 82 - } - }, - { - "agent": "markdown-validator", - "current_model_index": -1, - "current_model_id": "nemotron-3-nano:30b", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 43, - "minimax-m2.5": 38, - "minimax-m2.7": 36, - "nemotron-3-super": 52, - "glm-5.1": 55, - "deepseek-v4-pro-max": 68, - "qwen3-5-122b": 56, - "qwen3-coder-next": 40, - "qwen3-6-plus": 50, - "kimi-k2-6": 56 - } - }, - { - "agent": "agent-architect", - "current_model_index": 7, - "current_model_id": "glm-5.1", - "reasoning_effort": "H", - "scores": { - "qwen3-coder-480b": 78, - "minimax-m2.5": 72, - "minimax-m2.7": 70, - "nemotron-3-super": 78, - "glm-5.1": 76, - "deepseek-v4-pro-max": 82, - "qwen3-5-122b": 76, - "qwen3-coder-next": 66, - "qwen3-6-plus": 82, - "kimi-k2-6": 86 - } - }, - { - "agent": "planner", - "current_model_index": 6, - "current_model_id": "nemotron-3-super", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 72, - "minimax-m2.5": 68, - "minimax-m2.7": 66, - "nemotron-3-super": 80, - "glm-5.1": 78, - "deepseek-v4-pro-max": 88, - "qwen3-5-122b": 78, - "qwen3-coder-next": 60, - "qwen3-6-plus": 85, - "kimi-k2-6": 86 - } - }, - { - "agent": "reflector", - "current_model_index": 6, - "current_model_id": "nemotron-3-super", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 68, - "minimax-m2.5": 66, - "minimax-m2.7": 64, - "nemotron-3-super": 78, - "glm-5.1": 76, - "deepseek-v4-pro-max": 84, - "qwen3-5-122b": 76, - "qwen3-coder-next": 56, - "qwen3-6-plus": 82, - "kimi-k2-6": 80 - } - }, - { - "agent": "memory-manager", - "current_model_index": 6, - "current_model_id": "nemotron-3-super", - "reasoning_effort": "M", - "scores": { - "qwen3-coder-480b": 63, - "minimax-m2.5": 58, - "minimax-m2.7": 56, - "nemotron-3-super": 86, - "glm-5.1": 72, - "deepseek-v4-pro-max": 86, - "qwen3-5-122b": 70, - "qwen3-coder-next": 50, - "qwen3-6-plus": 87, - "kimi-k2-6": 84 - } - }, - { - "agent": "architect-indexer", - "current_model_index": 7, - "current_model_id": "glm-5.1", - "reasoning_effort": "H", - "scores": { - "qwen3-coder-480b": 70, - "minimax-m2.5": 64, - "minimax-m2.7": 62, - "nemotron-3-super": 74, - "glm-5.1": 80, - "deepseek-v4-pro-max": 78, - "qwen3-5-122b": 76, - "qwen3-coder-next": 58, - "qwen3-6-plus": 80, - "kimi-k2-6": 84 - } - } - ], - "if_scores": { - "qwen3-coder-480b": 88, - "minimax-m2.5": 82, - "minimax-m2.7": 78, - "nemotron-3-super": 85, - "glm-5.1": 80, - "deepseek-v4-pro-max": 88, - "qwen3-5-122b": 86, - "qwen3-coder-next": 84, - "qwen3-6-plus": 90, - "kimi-k2-6": 91, - "deepseek-v4-flash": 86 - }, - "agent_current_config": [ - { - "agent": "lead-developer", - "model": "ollama-cloud/nemotron-3-super", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "nemotron", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "frontend-developer", - "model": "ollama-cloud/qwen3-coder:480b", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "qwen", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "php-developer", - "model": "ollama-cloud/qwen3-coder:480b", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "qwen", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "python-developer", - "model": "ollama-cloud/qwen3-coder:480b", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "qwen", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "backend-developer", - "model": "ollama-cloud/qwen3-coder:480b", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "qwen", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "go-developer", - "model": "ollama-cloud/qwen3-coder:480b", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "qwen", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "flutter-developer", - "model": "ollama-cloud/qwen3-coder:480b", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "qwen", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "devops-engineer", - "model": "ollama-cloud/nemotron-3-super", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "nemotron", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "sdet-engineer", - "model": "ollama-cloud/qwen3-coder:480b", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "qwen", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "code-skeptic", - "model": "ollama-cloud/minimax-m2.5", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "minimax", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "security-auditor", - "model": "ollama-cloud/nemotron-3-super", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "nemotron", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "performance-engineer", - "model": "ollama-cloud/nemotron-3-super", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "nemotron", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "the-fixer", - "model": "ollama-cloud/minimax-m2.5", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "minimax", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "browser-automation", - "model": "ollama-cloud/qwen3-coder:480b", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "qwen", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "visual-tester", - "model": "ollama-cloud/qwen3-coder:480b", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "qwen", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "system-analyst", - "model": "ollama-cloud/nemotron-3-super", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "nemotron", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "requirement-refiner", - "model": "ollama-cloud/glm-5.1", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "glm", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "history-miner", - "model": "ollama-cloud/nemotron-3-super", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "nemotron", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "capability-analyst", - "model": "ollama-cloud/glm-5.1", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "glm", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "orchestrator", - "model": "ollama-cloud/kimi-k2.6:cloud", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "kimi", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "release-manager", - "model": "ollama-cloud/glm-5.1", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "glm", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "evaluator", - "model": "ollama-cloud/glm-5.1", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "glm", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "prompt-optimizer", - "model": "ollama-cloud/glm-5.1", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "glm", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "product-owner", - "model": "ollama-cloud/glm-5.1", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "glm", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "pipeline-judge", - "model": "ollama-cloud/glm-5.1", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "glm", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "workflow-architect", - "model": "ollama-cloud/glm-5.1", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "glm", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "markdown-validator", - "model": "ollama-cloud/nemotron-3-nano:30b", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "nemotron", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "agent-architect", - "model": "ollama-cloud/glm-5.1", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "glm", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "planner", - "model": "ollama-cloud/nemotron-3-super", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "nemotron", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "reflector", - "model": "ollama-cloud/nemotron-3-super", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "nemotron", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "memory-manager", - "model": "ollama-cloud/nemotron-3-super", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "nemotron", - "fit_score": 0, - "status": "good", - "previous_model": null - }, - { - "agent": "architect-indexer", - "model": "ollama-cloud/glm-5.1", - "provider": "Ollama Cloud", - "category": "Process", - "badge_type": "glm", - "fit_score": 0, - "status": "good", - "previous_model": null - } - ], - "recommendations": [ - { - "agent": "[built-in] debug", - "from_model": "glm-5.1.1 (88)", - "from_provider": "Ollama", - "to_model": "V4-Pro Max (★90) / K2.6 (★90) RE:High", - "to_provider": "Ollama Cloud", - "impact": "high", - "quality_change": "+2%", - "speed_change": "~1x", - "context_change": "200K→1M", - "provider_change": "Ollama Cloud", - "rationale": "★ матрицы: V4-Pro=90 и K2.6=90 (TIE!), GLM-5.1=88. V4-Pro: LiveCodeBench 93.5(#1!), Terminal 67.9, 1M ctx для полного проекта. K2.6: 13h auto sessions. Оба лучше GLM-5.1. RE:High для debug." - }, - { - "agent": "planner", - "from_model": "nemotron-3-super (80)", - "from_provider": "Ollama", - "to_model": "V4-Pro Max (★88) RE:High", - "to_provider": "Ollama Cloud", - "impact": "high", - "quality_change": "+10%", - "speed_change": "~1x", - "context_change": "1M", - "provider_change": "Ollama Cloud", - "rationale": "★ матрицы: V4-Pro=88(лучший!), K2.6=86, GLM-5.1=85, Nem=80. V4-Pro: GPQA 90.1 (reasoning), 1M ctx сохраняется (vs потеря при K2.6). RE:High для chain-of-thought planning." - }, - { - "agent": "go-developer", - "from_model": "qwen3-coder:480b (85)", - "from_provider": "Ollama", - "to_model": "V4-Pro Max (★88) RE:Medium", - "to_provider": "Ollama Cloud", - "impact": "medium", - "quality_change": "+4%", - "speed_change": "~1x", - "context_change": "256K→1M", - "provider_change": "Ollama Cloud", - "rationale": "★ матрицы: V4-Pro=88(лучший для Go!), K2.6=86, Qwen3Coder=85. DeepSeek модели традиционно сильны в Go/Rust. 1M ctx для крупных Go-проектов." - }, - { - "agent": "history-miner", - "from_model": "nemotron-3-super (★85)", - "from_provider": "Ollama", - "to_model": "V4-Pro Max (86) + Nem fallback", - "to_provider": "Hybrid", - "impact": "medium", - "quality_change": "+1%", - "speed_change": "~1x", - "context_change": "1M", - "provider_change": "Ollama Cloud + Ollama", - "rationale": "V4-Pro=86 чуть лучше Nemotron=85. 1M ctx у обоих. MRCR 83.5 у V4-Pro — лучшее long-context retrieval. Nemotron как fallback (RULER 91.75%)." - }, - { - "agent": "frontend-dev → M2.5", - "from_model": "qwen3-coder (90)", - "from_provider": "Ollama", - "to_model": "MiniMax M2.5 (★92) ✅", - "to_provider": "Ollama", - "impact": "low", - "quality_change": "+2%", - "speed_change": "=", - "context_change": "204K", - "provider_change": "Ollama", - "rationale": "Spec-writing, UI architect. APPLIED." - }, - { - "agent": "devops → K2.6", - "from_model": "deepseek-v3.2", - "from_provider": "", - "to_model": "kimi-k2.6:cloud ✅", - "to_provider": "Ollama Cloud", - "impact": "low", - "quality_change": "+35%", - "speed_change": "=", - "context_change": "256K", - "provider_change": "", - "rationale": "APPLIED." - }, - { - "agent": "orchestrator", - "from_model": "glm-5.1.1 (★90)", - "from_provider": "Ollama", - "to_model": "K2.6 (★92) RE:Medium", - "to_provider": "Ollama Cloud", - "impact": "medium", - "quality_change": "+2%", - "speed_change": "~1x", - "context_change": "200K→256K", - "provider_change": "Ollama Cloud", - "rationale": "K2.6=92★ всё ещё лучший для orchestration. V4-Pro=86 слабее. 300 sub-agent swarm." - }, - { - "agent": "the-fixer", - "from_model": "minimax-m2.5 (★88)", - "from_provider": "Ollama", - "to_model": "V4-Pro (★88) / K2.6 (★90)", - "to_provider": "Ollama Cloud", - "impact": "medium", - "quality_change": "+2%", - "speed_change": "~1x", - "context_change": "128K→1M/256K", - "provider_change": "Ollama Cloud", - "rationale": "K2.6=90(лучший), V4-Pro=88=M2.5. M2.5 SWE-bench 80.2% стабильнее. Не срочно." - }, - { - "agent": "Qwen3-Coder (7 coding)", - "from_model": "qwen3-coder", - "from_provider": "Ollama", - "to_model": "✅", - "to_provider": "", - "impact": "low", - "quality_change": "=0%", - "speed_change": "=", - "context_change": "256K", - "provider_change": "Ollama", - "rationale": "lead=92★, backend=91★, python=90★." - }, - { - "agent": "GLM-5.1 (12 agents)", - "from_model": "glm-5.1.1", - "from_provider": "Ollama", - "to_model": "✅", - "to_provider": "", - "impact": "low", - "quality_change": "=0%", - "speed_change": "=", - "context_change": "200K", - "provider_change": "", - "rationale": "orchestrator=90, system-analyst=90. SWE-Pro #1." - }, - { - "agent": "Kimi K2.6 (3 agents)", - "from_model": "kimi-k2.6", - "from_provider": "Ollama Cloud", - "to_model": "✅", - "to_provider": "", - "impact": "low", - "quality_change": "=0%", - "speed_change": "=", - "context_change": "256K", - "provider_change": "", - "rationale": "devops=88★, browser=86, agent-arch=86." - } - ], - "impact_data": [ - { - "category": "debug GLM5.1→V4-Pro/K2.6", - "before": 88, - "after": 90, - "delta": 2, - "notes": "LiveCodeBench 93.5, Terminal 67.9" - }, - { - "category": "planner Nem→V4-Pro Max", - "before": 80, - "after": 88, - "delta": 8, - "notes": "★88! GPQA 90.1, 1M ctx" - }, - { - "category": "go-dev Coder→V4-Pro Max", - "before": 85, - "after": 88, - "delta": 3, - "notes": "★88! Go/Rust specialist, 1M ctx" - }, - { - "category": "history-miner →V4-Pro", - "before": 85, - "after": 86, - "delta": 1, - "notes": "MRCR 83.5, long-context" - }, - { - "category": "orchestrator →K2.6 (next)", - "before": 90, - "after": 92, - "delta": 2, - "notes": "300 sub-agent swarm" - }, - { - "category": "frontend → M2.5 ✅", - "before": 90, - "after": 92, - "delta": 2, - "notes": "Spec-writing, UI architect" - }, - { - "category": "devops → K2.6 ✅", - "before": 65, - "after": 88, - "delta": 23, - "notes": "IF:65→91! Terminal 66.7" - }, - { - "category": "Qwen3-Coder (7) ✅", - "before": 90, - "after": 90, - "delta": 0, - "notes": "SOTA coding" - }, - { - "category": "GLM-5.1 (12) ✅", - "before": 87, - "after": 87, - "delta": 0, - "notes": "SWE-Pro #1" - }, - { - "category": "Nemotron Super (6) ✅", - "before": 82, - "after": 82, - "delta": 0, - "notes": "1M ctx, RULER 91.75%" - } - ], - "benchmark_comparison": { - "benchmarks": [ - { - "name": "SWE-V", - "full_name": "SWE-Bench Verified", - "description": "GitHub issue resolution (500 tasks)", - "roles": "lead-dev, backend, fixer" - }, - { - "name": "SWE-P", - "full_name": "SWE-Bench Pro", - "description": "Multi-lang, decontaminated (1865 tasks)", - "roles": "all coding agents" - }, - { - "name": "T-Bench", - "full_name": "Terminal-Bench 2.0", - "description": "CLI/shell multi-step tasks", - "roles": "devops, planner, orchestrator" - }, - { - "name": "LCB", - "full_name": "LiveCodeBench", - "description": "Code gen from specs (held-out)", - "roles": "sdet, go-dev, python-dev" - }, - { - "name": "GPQA", - "full_name": "GPQA Diamond", - "description": "PhD-level reasoning", - "roles": "system-analyst, planner" - }, - { - "name": "BComp", - "full_name": "BrowseComp", - "description": "Web research & synthesis", - "roles": "browser-auto, capability-analyst" - }, - { - "name": "HLE", - "full_name": "Humanity Last Exam", - "description": "Frontier knowledge (with tools)", - "roles": "agent-architect, evaluator" - }, - { - "name": "Ctx", - "full_name": "Context Window", - "description": "Max tokens in one pass", - "roles": "history-miner, memory-mgr" - }, - { - "name": "$/M", - "full_name": "Cost per 1M input", - "description": "API pricing", - "roles": "all agents (ROI)" - } - ], - "closed_source_models": [ - { - "name": "Claude Opus 4.7", - "organization": "Anthropic", - "scores": [ - 87.6, - 64.3, - 69.4, - null, - 94.2, - 79.3, - 53, - "1M", - "$5" - ], - "color": "#c084fc", - "note": "#1 апрель 2026" - }, - { - "name": "GPT-5.5", - "organization": "OpenAI", - "scores": [ - null, - 58.6, - 82.7, - null, - null, - 83.4, - 57.2, - "1M", - "$5" - ], - "color": "#ff6b81", - "note": "Новейший, Terminal #1" - }, - { - "name": "GPT-5.4", - "organization": "OpenAI", - "scores": [ - 78.2, - 59.1, - 75.1, - null, - 94.4, - 82.7, - 58.7, - "200K", - "$2.50" - ], - "color": "#ff6b81", - "note": "Reasoning, math" - }, - { - "name": "Gemini 3.1 Pro", - "organization": "Google", - "scores": [ - 80.6, - 46.1, - 68.5, - null, - 94.3, - 85.9, - 51.4, - "2M", - "$2" - ], - "color": "#facc15", - "note": "ARC-AGI 77.1%, дешёвый" - }, - { - "name": "Claude Sonnet 4.6", - "organization": "Anthropic", - "scores": [ - 79.6, - null, - null, - null, - null, - null, - null, - "200K", - "$3" - ], - "color": "#c084fc", - "note": "5× дешевле Opus" - }, - { - "name": "GPT-5.3-Codex", - "organization": "OpenAI", - "scores": [ - 85, - 57, - 77.3, - null, - null, - null, - null, - "200K", - "$6" - ], - "color": "#ff6b81", - "note": "Coding specialist" - } - ], - "apaw_models": [ - { - "name": "Kimi K2.6", - "organization": "APAW", - "scores": [ - 80.2, - 58.6, - 66.7, - 87.2, - null, - 83.2, - 54, - "256K", - "$0.95" - ], - "color": "#00ff94", - "note": "devops, browser, architect (3)" - }, - { - "name": "GLM-5.1", - "organization": "APAW", - "scores": [ - null, - 58.4, - 63.5, - null, - 86.2, - 68.7, - null, - "200K", - "~$0.50" - ], - "color": "#00ff94", - "note": "12 agents! orchestrator, eval..." - }, - { - "name": "V4-Pro Max", - "organization": "APAW", - "scores": [ - 80.6, - 55.4, - 67.9, - 93.5, - 90.1, - 83.4, - 48.2, - "1M", - "$0.42" - ], - "color": "#00d4ff", - "note": "planner, go-dev (рек.)" - }, - { - "name": "Qwen3-Coder 480B", - "organization": "APAW", - "scores": [ - 66.5, - null, - null, - null, - null, - null, - null, - "256K", - "~$0.50" - ], - "color": "#00ff94", - "note": "7 coding agents" - }, - { - "name": "MiniMax M2.5", - "organization": "APAW", - "scores": [ - 80.2, - 51.3, - null, - null, - null, - 76.3, - null, - "204K", - "$0.15" - ], - "color": "#00ff94", - "note": "frontend, skeptic, fixer (3)" - }, - { - "name": "Nemotron Super", - "organization": "APAW", - "scores": [ - 60.5, - null, - null, - null, - null, - null, - null, - "1M", - "~$0.40" - ], - "color": "#00ff94", - "note": "6 agents (memory, history)" - } - ] - } +{ + "version": "1.0.0", + "generated": "2026-04-29T21:47:05.339Z", + "source": ".kilo/capability-index.yaml (synced v3 + fitness-gate)", + "total_agents": 30, + "total_models_tracked": 11, + "providers": [ + "ollama", + "ollama-cloud", + "openrouter", + "groq" + ], + "models": [ + { + "id": "qwen3-coder-480b", + "name": "Qwen3-Coder 480B", + "organization": "Qwen", + "parameters": "480B/35B active", + "context_window": "256K\u21921M", + "swe_bench": 66.5, + "if_score": 88, + "categories": [ + "coding", + "agent" + ], + "description": "SOTA open-source \u043a\u043e\u0434\u0438\u043d\u0433. \u0421\u0440\u0430\u0432\u043d\u0438\u043c \u0441 Claude Sonnet 4.", + "tags": [ + "coding", + "agent", + "tools" + ], + "openrouter": false, + "provider": "ollama" + }, + { + "id": "minimax-m2.5", + "name": "MiniMax M2.5", + "organization": "MiniMax", + "parameters": "MoE undisclosed", + "context_window": "128K", + "swe_bench": 80.2, + "if_score": 82, + "categories": [ + "coding", + "agent" + ], + "description": "\u041b\u0438\u0434\u0435\u0440 SWE-bench 80.2%. \u041f\u043e\u043b\u043d\u044b\u0439 lifecycle \u0440\u0430\u0437\u0440\u0430\u0431\u043e\u0442\u043a\u0438.", + "tags": [ + "coding", + "agent" + ], + "openrouter": false, + "provider": "ollama" + }, + { + "id": "minimax-m2.7", + "name": "MiniMax M2.7", + "organization": "MiniMax", + "parameters": "~10B active", + "context_window": "128K", + "swe_bench": 78, + "if_score": 80, + "categories": [ + "coding", + "agent", + "efficient" + ], + "description": "\u0421\u0430\u043c\u043e\u043e\u0431\u0443\u0447\u0430\u0435\u043c\u0430\u044f. 56.2% SWE-Pro. 100 TPS. $0.30/M.", + "tags": [ + "coding", + "agent", + "self-evolving" + ], + "openrouter": false, + "provider": "ollama" + }, + { + "id": "deepseek-v4-pro-max", + "name": "DeepSeek V4-Pro", + "organization": "DeepSeek", + "parameters": "1.6T/49B active MoE", + "context_window": "1M", + "swe_bench": 80.6, + "if_score": 89, + "categories": [ + "coding", + "agent", + "reasoning" + ], + "description": "SWE-V 80.6, LiveCodeBench 93.5(#1!), Terminal-Bench 67.9, Codeforces 3206, 1M ctx, 27% FLOPs vs V3.2. MIT.", + "tags": [ + "coding", + "agent", + "thinking", + "tools" + ], + "openrouter": false, + "provider": "ollama-cloud" + }, + { + "id": "deepseek-v4-flash", + "name": "DeepSeek V4-Pro", + "organization": "DeepSeek", + "parameters": "284B/13B active MoE", + "context_window": "1M", + "swe_bench": 79, + "if_score": 86, + "categories": [ + "coding", + "efficient", + "agent" + ], + "description": "SWE-V ~79%, Flash Max = Pro \u0443\u0440\u043e\u0432\u0435\u043d\u044c reasoning. 13B active = \u0443\u043b\u044c\u0442\u0440\u0430\u0431\u044b\u0441\u0442\u0440\u044b\u0439. 1M ctx. FP4+FP8. MIT.", + "tags": [ + "coding", + "efficient", + "agent", + "thinking" + ], + "openrouter": false, + "provider": "ollama-cloud" + }, + { + "id": "kimi-k2-6", + "name": "Kimi K2.6", + "organization": "Moonshot AI", + "parameters": "1T/32B active MoE", + "context_window": "256K", + "swe_bench": 80.2, + "if_score": 91, + "categories": [ + "coding", + "agent", + "multimodal" + ], + "description": "SWE-Pro 58.6(#1!), SWE-V 80.2, Terminal-Bench 66.7, HLE 54.0(#1!), BrowseComp 83.2. 13h autonomous. 300 sub-agent swarm. Modified MIT.", + "tags": [ + "coding", + "agent", + "swarm", + "vision", + "thinking", + "tools" + ], + "openrouter": false, + "provider": "ollama-cloud" + }, + { + "id": "nemotron-3-super", + "name": "Nemotron 3 Super", + "organization": "NVIDIA", + "parameters": "120B/12B active", + "context_window": "1M", + "swe_bench": 60.5, + "if_score": 78, + "categories": [ + "agent", + "reasoning", + "efficient" + ], + "description": "SWE-bench 60.5%. RULER@1M 91.75%! \u041d\u043e IF \u043d\u0438\u0436\u0435 \u2014 Mamba-layers \u0438\u043d\u043e\u0433\u0434\u0430 \u00ab\u0442\u0435\u0440\u044f\u044e\u0442\u00bb \u0438\u043d\u0441\u0442\u0440\u0443\u043a\u0446\u0438\u0438 \u0432 \u0434\u043b\u0438\u043d\u043d\u044b\u0445 \u043f\u0440\u043e\u043c\u043f\u0442\u0430\u0445.", + "tags": [ + "agent", + "1M-ctx", + "thinking" + ], + "openrouter": false, + "provider": "ollama" + }, + { + "id": "glm-5.1", + "name": "GLM-5", + "organization": "Z.ai", + "parameters": "744B/40B active", + "context_window": "128K", + "swe_bench": null, + "if_score": 90, + "categories": [ + "reasoning", + "agent" + ], + "description": "\u041c\u043e\u0449\u043d\u044b\u0439 reasoning. Arena ELO 1451. \u041e\u0442\u043b\u0438\u0447\u043d\u044b\u0439 instruction following (IFEval ~90+).", + "tags": [ + "reasoning", + "agent" + ], + "openrouter": false, + "provider": "ollama" + }, + { + "id": "deepseek-v4", + "name": "DeepSeek V4-Pro", + "organization": "DeepSeek", + "parameters": "Large MoE", + "context_window": "128K", + "swe_bench": null, + "if_score": 75, + "categories": [ + "reasoning" + ], + "description": "\u0425\u043e\u0440\u043e\u0448\u0438\u0439 reasoning, \u043d\u043e IF \u043d\u0435\u0441\u0442\u0430\u0431\u0438\u043b\u0435\u043d \u2014 \u0438\u043d\u043e\u0433\u0434\u0430 \u0438\u0433\u043d\u043e\u0440\u0438\u0440\u0443\u0435\u0442 \u0444\u043e\u0440\u043c\u0430\u0442 \u0432\u044b\u0432\u043e\u0434\u0430.", + "tags": [ + "reasoning" + ], + "openrouter": false, + "provider": "ollama" + }, + { + "id": "qwen3-5-122b", + "name": "Qwen 3.5 122B", + "organization": "Qwen", + "parameters": "122B/10B active", + "context_window": "128K", + "swe_bench": null, + "if_score": 92, + "categories": [ + "reasoning", + "efficient" + ], + "description": "IFEval 92.6%! \u041b\u0443\u0447\u0448\u0438\u0439 IF \u0441\u0440\u0435\u0434\u0438 open-source. Multimodal. Thinking.", + "tags": [ + "vision", + "thinking", + "tools" + ], + "openrouter": false, + "provider": "ollama" + }, + { + "id": "qwen3-coder-next", + "name": "Qwen3-Coder-Next", + "organization": "Qwen", + "parameters": "80B/3B active", + "context_window": "128K", + "swe_bench": 70, + "if_score": 84, + "categories": [ + "coding", + "efficient" + ], + "description": "70% SWE-bench \u0441 3B active! \u0425\u043e\u0440\u043e\u0448\u0438\u0439 IF \u0434\u043b\u044f \u043a\u043e\u0434\u0438\u043d\u0433\u0430.", + "tags": [ + "coding", + "efficient", + "tools" + ], + "openrouter": false, + "provider": "ollama" + }, + { + "id": "cogito-2-1-671b", + "name": "Cogito 2.1 671B", + "organization": "Cognitive", + "parameters": "671B MoE", + "context_window": "128K", + "swe_bench": null, + "if_score": 76, + "categories": [ + "reasoning" + ], + "description": "MIT \u043b\u0438\u0446\u0435\u043d\u0437\u0438\u044f. 671B total. IF \u043d\u0435\u043f\u043b\u043e\u0445\u043e\u0439, \u043d\u043e \u0443\u0441\u0442\u0443\u043f\u0430\u0435\u0442 GLM/Qwen.", + "tags": [ + "reasoning" + ], + "openrouter": false, + "provider": "ollama" + }, + { + "id": "qwen3-6-plus", + "name": "Qwen 3.6 Plus", + "organization": "Qwen", + "parameters": "Hybrid MoE", + "context_window": "1M", + "swe_bench": 78.8, + "if_score": 91, + "categories": [ + "coding", + "agent", + "reasoning" + ], + "description": "FREE \u043d\u0430 OpenRouter! 1M \u043a\u043e\u043d\u0442\u0435\u043a\u0441\u0442. Always-on CoT. \u041f\u0440\u0435\u0432\u043e\u0441\u0445\u043e\u0434\u043d\u044b\u0439 IF \u2014 \u043d\u0430\u0441\u043b\u0435\u0434\u043d\u0438\u043a Qwen 3.5 (92.6%).", + "tags": [ + "coding", + "agent", + "1M-ctx", + "free" + ], + "openrouter": true, + "provider": "openrouter" + }, + { + "id": "step-3-5-flash", + "name": "Step 3.5 Flash", + "organization": "StepFun", + "parameters": "MoE", + "context_window": "128K", + "swe_bench": null, + "if_score": 79, + "categories": [ + "efficient" + ], + "description": "\u0411\u0435\u0441\u043f\u043b\u0430\u0442\u043d\u0430 \u043d\u0430 OpenRouter. IF \u0441\u0440\u0435\u0434\u043d\u0438\u0439.", + "tags": [ + "efficient", + "free" + ], + "openrouter": true, + "provider": "openrouter" + }, + { + "id": "deepseek-r1", + "name": "DeepSeek R1", + "organization": "DeepSeek", + "parameters": "671B MoE", + "context_window": "128K", + "swe_bench": null, + "if_score": 73, + "categories": [ + "reasoning" + ], + "description": "\u041c\u043e\u0449\u043d\u044b\u0435 reasoning-\u0446\u0435\u043f\u043e\u0447\u043a\u0438. \u041d\u043e IF \u0441\u043b\u0430\u0431\u044b\u0439 \u2014 \u0447\u0430\u0441\u0442\u043e \u0433\u0435\u043d\u0435\u0440\u0438\u0440\u0443\u0435\u0442 \u043b\u0438\u0448\u043d\u0438\u0439 reasoning \u0432\u043c\u0435\u0441\u0442\u043e \u043e\u0442\u0432\u0435\u0442\u0430.", + "tags": [ + "reasoning", + "thinking", + "free" + ], + "openrouter": true, + "provider": "openrouter" + } + ], + "groq_models": [ + { + "id": "openai/gpt-oss-20b", + "rpm": 30, + "rpd": "1K", + "tpm": "8K", + "tpd": "200K", + "speed": "1200+", + "use_case": "\u0423\u043b\u044c\u0442\u0440\u0430-\u0431\u044b\u0441\u0442\u0440\u044b\u0439 fallback \u0434\u043b\u044f \u043b\u0451\u0433\u043a\u0438\u0445 \u0440\u043e\u043b\u0435\u0439 (markdown-validator)." + }, + { + "id": "llama-3.1-8b-instant", + "rpm": 30, + "rpd": "14.4K", + "tpm": "6K", + "tpd": "500K", + "speed": "~800", + "use_case": "14.4K RPD! \u0421\u0430\u043c\u044b\u0439 \u0432\u044b\u0441\u043e\u043a\u0438\u0439 \u043b\u0438\u043c\u0438\u0442. \u0414\u043b\u044f health-check / ping \u0440\u043e\u043b\u0435\u0439." + }, + { + "id": "groq/compound", + "rpm": 30, + "rpd": "250", + "tpm": "70K", + "tpd": "\u2014", + "speed": "varies", + "use_case": "\u041c\u0443\u043b\u044c\u0442\u0438\u043c\u043e\u0434\u0435\u043b\u044c\u043d\u0430\u044f \u0430\u0433\u0440\u0435\u0433\u0430\u0446\u0438\u044f. \u0414\u043b\u044f research-\u0437\u0430\u0434\u0430\u0447." + }, + { + "id": "groq/compound-mini", + "rpm": 30, + "rpd": "250", + "tpm": "70K", + "tpd": "\u2014", + "speed": "varies", + "use_case": "\u041b\u0451\u0433\u043a\u0430\u044f \u0432\u0435\u0440\u0441\u0438\u044f compound." + }, + { + "id": "llama-prompt-guard-2", + "rpm": 30, + "rpd": "14.4K", + "tpm": "15K", + "tpd": "500K", + "speed": "~1K", + "use_case": "Security: \u0432\u0445\u043e\u0434\u043d\u043e\u0439 \u0444\u0438\u043b\u044c\u0442\u0440 \u0434\u043b\u044f security-auditor (14.4K RPD!)." + } + ], + "agent_model_scores": [ + { + "agent": "lead-developer", + "current_model_index": 0, + "current_model_id": "qwen3-coder-480b", + "reasoning_effort": "H", + "scores": { + "qwen3-coder-480b": 92, + "minimax-m2.5": 86, + "minimax-m2.7": 82, + "nemotron-3-super": 70, + "glm-5.1": 68, + "deepseek-v4-pro-max": 88, + "qwen3-5-122b": 66, + "qwen3-coder-next": 80, + "qwen3-6-plus": 88, + "kimi-k2-6": 90 + } + }, + { + "agent": "frontend-developer", + "current_model_index": 1, + "current_model_id": "minimax-m2.5", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 86, + "minimax-m2.5": 92, + "minimax-m2.7": 88, + "nemotron-3-super": 62, + "glm-5.1": 56, + "deepseek-v4-pro-max": 82, + "qwen3-5-122b": 60, + "qwen3-coder-next": 76, + "qwen3-6-plus": 88, + "kimi-k2-6": 86 + } + }, + { + "agent": "php-developer", + "current_model_index": 0, + "current_model_id": "qwen3-coder-480b", + "reasoning_effort": "H", + "scores": { + "qwen3-coder-480b": 87, + "minimax-m2.5": 76, + "minimax-m2.7": 72, + "nemotron-3-super": 64, + "glm-5.1": 56, + "deepseek-v4-pro-max": 74, + "qwen3-5-122b": 60, + "qwen3-coder-next": 76, + "qwen3-6-plus": 84, + "kimi-k2-6": 86 + } + }, + { + "agent": "python-developer", + "current_model_index": 0, + "current_model_id": "qwen3-coder-480b", + "reasoning_effort": "H", + "scores": { + "qwen3-coder-480b": 90, + "minimax-m2.5": 82, + "minimax-m2.7": 78, + "nemotron-3-super": 66, + "glm-5.1": 60, + "deepseek-v4-pro-max": 78, + "qwen3-5-122b": 64, + "qwen3-coder-next": 78, + "qwen3-6-plus": 88, + "kimi-k2-6": 88 + } + }, + { + "agent": "backend-developer", + "current_model_index": 0, + "current_model_id": "qwen3-coder-480b", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 91, + "minimax-m2.5": 84, + "minimax-m2.7": 80, + "nemotron-3-super": 68, + "glm-5.1": 63, + "deepseek-v4-pro-max": 86, + "qwen3-5-122b": 62, + "qwen3-coder-next": 78, + "qwen3-6-plus": 87, + "kimi-k2-6": 90 + } + }, + { + "agent": "go-developer", + "current_model_index": 0, + "current_model_id": "qwen3-coder-480b", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 85, + "minimax-m2.5": 78, + "minimax-m2.7": 74, + "nemotron-3-super": 66, + "glm-5.1": 58, + "deepseek-v4-pro-max": 88, + "qwen3-5-122b": 58, + "qwen3-coder-next": 74, + "qwen3-6-plus": 82, + "kimi-k2-6": 86 + } + }, + { + "agent": "flutter-developer", + "current_model_index": 0, + "current_model_id": "qwen3-coder-480b", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 86, + "minimax-m2.5": 70, + "minimax-m2.7": 66, + "nemotron-3-super": 60, + "glm-5.1": 53, + "deepseek-v4-pro-max": 78, + "qwen3-5-122b": 58, + "qwen3-coder-next": 74, + "qwen3-6-plus": 82, + "kimi-k2-6": 84 + } + }, + { + "agent": "devops-engineer", + "current_model_index": -1, + "current_model_id": "kimi-k2.6", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 66, + "minimax-m2.5": 53, + "minimax-m2.7": 48, + "nemotron-3-super": 78, + "glm-5.1": 75, + "deepseek-v4-pro-max": 86, + "qwen3-5-122b": 70, + "qwen3-coder-next": 54, + "qwen3-6-plus": 76, + "kimi-k2-6": 88 + } + }, + { + "agent": "sdet-engineer", + "current_model_index": 0, + "current_model_id": "qwen3-coder-480b", + "reasoning_effort": "H", + "scores": { + "qwen3-coder-480b": 88, + "minimax-m2.5": 84, + "minimax-m2.7": 80, + "nemotron-3-super": 70, + "glm-5.1": 63, + "deepseek-v4-pro-max": 84, + "qwen3-5-122b": 64, + "qwen3-coder-next": 78, + "qwen3-6-plus": 84, + "kimi-k2-6": 87 + } + }, + { + "agent": "code-skeptic", + "current_model_index": 1, + "current_model_id": "minimax-m2.5", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 82, + "minimax-m2.5": 85, + "minimax-m2.7": 80, + "nemotron-3-super": 73, + "glm-5.1": 72, + "deepseek-v4-pro-max": 82, + "qwen3-5-122b": 70, + "qwen3-coder-next": 72, + "qwen3-6-plus": 80, + "kimi-k2-6": 82 + } + }, + { + "agent": "security-auditor", + "current_model_index": 6, + "current_model_id": "nemotron-3-super", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 76, + "minimax-m2.5": 74, + "minimax-m2.7": 68, + "nemotron-3-super": 76, + "glm-5.1": 68, + "deepseek-v4-pro-max": 80, + "qwen3-5-122b": 72, + "qwen3-coder-next": 64, + "qwen3-6-plus": 75, + "kimi-k2-6": 80 + } + }, + { + "agent": "performance-engineer", + "current_model_index": 6, + "current_model_id": "nemotron-3-super", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 78, + "minimax-m2.5": 75, + "minimax-m2.7": 70, + "nemotron-3-super": 78, + "glm-5.1": 74, + "deepseek-v4-pro-max": 84, + "qwen3-5-122b": 70, + "qwen3-coder-next": 67, + "qwen3-6-plus": 76, + "kimi-k2-6": 82 + } + }, + { + "agent": "the-fixer", + "current_model_index": 1, + "current_model_id": "minimax-m2.5", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 89, + "minimax-m2.5": 88, + "minimax-m2.7": 84, + "nemotron-3-super": 71, + "glm-5.1": 64, + "deepseek-v4-pro-max": 88, + "qwen3-5-122b": 64, + "qwen3-coder-next": 82, + "qwen3-6-plus": 86, + "kimi-k2-6": 90 + } + }, + { + "agent": "browser-automation", + "current_model_index": -1, + "current_model_id": "kimi-k2.6", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 87, + "minimax-m2.5": 72, + "minimax-m2.7": 68, + "nemotron-3-super": 61, + "glm-5.1": 53, + "deepseek-v4-pro-max": 82, + "qwen3-5-122b": 56, + "qwen3-coder-next": 72, + "qwen3-6-plus": 82, + "kimi-k2-6": 86 + } + }, + { + "agent": "visual-tester", + "current_model_index": 0, + "current_model_id": "qwen3-coder-480b", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 82, + "minimax-m2.5": 68, + "minimax-m2.7": 64, + "nemotron-3-super": 55, + "glm-5.1": 48, + "deepseek-v4-pro-max": 76, + "qwen3-5-122b": 54, + "qwen3-coder-next": 66, + "qwen3-6-plus": 76, + "kimi-k2-6": 78 + } + }, + { + "agent": "system-analyst", + "current_model_index": 7, + "current_model_id": "glm-5.1", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 70, + "minimax-m2.5": 66, + "minimax-m2.7": 63, + "nemotron-3-super": 74, + "glm-5.1": 82, + "deepseek-v4-pro-max": 88, + "qwen3-5-122b": 76, + "qwen3-coder-next": 58, + "qwen3-6-plus": 80, + "kimi-k2-6": 86 + } + }, + { + "agent": "capability-analyst", + "current_model_index": 7, + "current_model_id": "glm-5.1", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 72, + "minimax-m2.5": 68, + "minimax-m2.7": 66, + "nemotron-3-super": 76, + "glm-5.1": 78, + "deepseek-v4-pro-max": 82, + "qwen3-5-122b": 75, + "qwen3-coder-next": 60, + "qwen3-6-plus": 79, + "kimi-k2-6": 82 + } + }, + { + "agent": "orchestrator", + "current_model_index": -1, + "current_model_id": "kimi-k2.6", + "reasoning_effort": "H", + "scores": { + "qwen3-coder-480b": 74, + "minimax-m2.5": 70, + "minimax-m2.7": 68, + "nemotron-3-super": 80, + "glm-5.1": 82, + "deepseek-v4-pro-max": 86, + "qwen3-5-122b": 78, + "qwen3-coder-next": 62, + "qwen3-6-plus": 84, + "kimi-k2-6": 92 + } + }, + { + "agent": "release-manager", + "current_model_index": 7, + "current_model_id": "glm-5.1", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 72, + "minimax-m2.5": 66, + "minimax-m2.7": 64, + "nemotron-3-super": 74, + "glm-5.1": 76, + "deepseek-v4-pro-max": 78, + "qwen3-5-122b": 72, + "qwen3-coder-next": 60, + "qwen3-6-plus": 76, + "kimi-k2-6": 78 + } + }, + { + "agent": "evaluator", + "current_model_index": 7, + "current_model_id": "glm-5.1", + "reasoning_effort": "H", + "scores": { + "qwen3-coder-480b": 70, + "minimax-m2.5": 73, + "minimax-m2.7": 70, + "nemotron-3-super": 78, + "glm-5.1": 78, + "deepseek-v4-pro-max": 84, + "qwen3-5-122b": 76, + "qwen3-coder-next": 58, + "qwen3-6-plus": 81, + "kimi-k2-6": 84 + } + }, + { + "agent": "prompt-optimizer", + "current_model_index": 7, + "current_model_id": "glm-5.1", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 76, + "minimax-m2.5": 74, + "minimax-m2.7": 72, + "nemotron-3-super": 76, + "glm-5.1": 75, + "deepseek-v4-pro-max": 80, + "qwen3-5-122b": 74, + "qwen3-coder-next": 64, + "qwen3-6-plus": 83, + "kimi-k2-6": 82 + } + }, + { + "agent": "product-owner", + "current_model_index": 7, + "current_model_id": "glm-5.1", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 60, + "minimax-m2.5": 56, + "minimax-m2.7": 54, + "nemotron-3-super": 74, + "glm-5.1": 78, + "deepseek-v4-pro-max": 76, + "qwen3-5-122b": 74, + "qwen3-coder-next": 48, + "qwen3-6-plus": 78, + "kimi-k2-6": 76 + } + }, + { + "agent": "pipeline-judge", + "current_model_index": 7, + "current_model_id": "glm-5.1", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 64, + "minimax-m2.5": 68, + "minimax-m2.7": 65, + "nemotron-3-super": 78, + "glm-5.1": 76, + "deepseek-v4-pro-max": 82, + "qwen3-5-122b": 74, + "qwen3-coder-next": 56, + "qwen3-6-plus": 80, + "kimi-k2-6": 84 + } + }, + { + "agent": "workflow-architect", + "current_model_index": 7, + "current_model_id": "glm-5.1", + "reasoning_effort": "H", + "scores": { + "qwen3-coder-480b": 68, + "minimax-m2.5": 62, + "minimax-m2.7": 60, + "nemotron-3-super": 76, + "glm-5.1": 76, + "deepseek-v4-pro-max": 80, + "qwen3-5-122b": 72, + "qwen3-coder-next": 56, + "qwen3-6-plus": 80, + "kimi-k2-6": 82 + } + }, + { + "agent": "markdown-validator", + "current_model_index": -1, + "current_model_id": "nemotron-3-nano:30b", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 43, + "minimax-m2.5": 38, + "minimax-m2.7": 36, + "nemotron-3-super": 52, + "glm-5.1": 55, + "deepseek-v4-pro-max": 68, + "qwen3-5-122b": 56, + "qwen3-coder-next": 40, + "qwen3-6-plus": 50, + "kimi-k2-6": 56 + } + }, + { + "agent": "agent-architect", + "current_model_index": -1, + "current_model_id": "kimi-k2.6", + "reasoning_effort": "H", + "scores": { + "qwen3-coder-480b": 78, + "minimax-m2.5": 72, + "minimax-m2.7": 70, + "nemotron-3-super": 78, + "glm-5.1": 76, + "deepseek-v4-pro-max": 82, + "qwen3-5-122b": 76, + "qwen3-coder-next": 66, + "qwen3-6-plus": 82, + "kimi-k2-6": 86 + } + }, + { + "agent": "planner", + "current_model_index": 6, + "current_model_id": "nemotron-3-super", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 72, + "minimax-m2.5": 68, + "minimax-m2.7": 66, + "nemotron-3-super": 80, + "glm-5.1": 78, + "deepseek-v4-pro-max": 88, + "qwen3-5-122b": 78, + "qwen3-coder-next": 60, + "qwen3-6-plus": 85, + "kimi-k2-6": 86 + } + }, + { + "agent": "reflector", + "current_model_index": 6, + "current_model_id": "nemotron-3-super", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 68, + "minimax-m2.5": 66, + "minimax-m2.7": 64, + "nemotron-3-super": 78, + "glm-5.1": 76, + "deepseek-v4-pro-max": 84, + "qwen3-5-122b": 76, + "qwen3-coder-next": 56, + "qwen3-6-plus": 82, + "kimi-k2-6": 80 + } + }, + { + "agent": "memory-manager", + "current_model_index": 6, + "current_model_id": "nemotron-3-super", + "reasoning_effort": "M", + "scores": { + "qwen3-coder-480b": 63, + "minimax-m2.5": 58, + "minimax-m2.7": 56, + "nemotron-3-super": 86, + "glm-5.1": 72, + "deepseek-v4-pro-max": 86, + "qwen3-5-122b": 70, + "qwen3-coder-next": 50, + "qwen3-6-plus": 87, + "kimi-k2-6": 84 + } + }, + { + "agent": "architect-indexer", + "current_model_index": 7, + "current_model_id": "glm-5.1", + "reasoning_effort": "H", + "scores": { + "qwen3-coder-480b": 70, + "minimax-m2.5": 64, + "minimax-m2.7": 62, + "nemotron-3-super": 74, + "glm-5.1": 80, + "deepseek-v4-pro-max": 78, + "qwen3-5-122b": 76, + "qwen3-coder-next": 58, + "qwen3-6-plus": 80, + "kimi-k2-6": 84 + } + } + ], + "if_scores": { + "qwen3-coder-480b": 88, + "minimax-m2.5": 82, + "minimax-m2.7": 78, + "nemotron-3-super": 85, + "glm-5.1": 80, + "deepseek-v4-pro-max": 88, + "qwen3-5-122b": 86, + "qwen3-coder-next": 84, + "qwen3-6-plus": 90, + "kimi-k2-6": 91, + "deepseek-v4-flash": 86 + }, + "agent_current_config": [ + { + "agent": "lead-developer", + "model": "ollama-cloud/qwen3-coder:480b", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "qwen", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "frontend-developer", + "model": "ollama-cloud/minimax-m2.5", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "qwen", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "php-developer", + "model": "ollama-cloud/qwen3-coder:480b", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "qwen", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "python-developer", + "model": "ollama-cloud/qwen3-coder:480b", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "qwen", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "backend-developer", + "model": "ollama-cloud/qwen3-coder:480b", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "qwen", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "go-developer", + "model": "ollama-cloud/qwen3-coder:480b", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "qwen", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "flutter-developer", + "model": "ollama-cloud/qwen3-coder:480b", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "qwen", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "devops-engineer", + "model": "ollama-cloud/kimi-k2.6", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "nemotron", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "sdet-engineer", + "model": "ollama-cloud/qwen3-coder:480b", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "qwen", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "code-skeptic", + "model": "ollama-cloud/minimax-m2.5", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "minimax", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "security-auditor", + "model": "ollama-cloud/nemotron-3-super", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "nemotron", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "performance-engineer", + "model": "ollama-cloud/nemotron-3-super", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "nemotron", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "the-fixer", + "model": "ollama-cloud/minimax-m2.5", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "minimax", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "browser-automation", + "model": "ollama-cloud/kimi-k2.6", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "qwen", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "visual-tester", + "model": "ollama-cloud/qwen3-coder:480b", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "qwen", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "system-analyst", + "model": "ollama-cloud/glm-5.1", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "glm", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "capability-analyst", + "model": "ollama-cloud/glm-5.1", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "glm", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "orchestrator", + "model": "ollama-cloud/kimi-k2.6", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "kimi", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "release-manager", + "model": "ollama-cloud/glm-5.1", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "glm", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "evaluator", + "model": "ollama-cloud/glm-5.1", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "glm", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "prompt-optimizer", + "model": "ollama-cloud/glm-5.1", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "glm", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "product-owner", + "model": "ollama-cloud/glm-5.1", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "glm", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "pipeline-judge", + "model": "ollama-cloud/glm-5.1", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "glm", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "workflow-architect", + "model": "ollama-cloud/glm-5.1", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "glm", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "markdown-validator", + "model": "ollama-cloud/nemotron-3-nano:30b", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "nemotron", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "agent-architect", + "model": "ollama-cloud/kimi-k2.6", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "glm", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "planner", + "model": "ollama-cloud/nemotron-3-super", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "nemotron", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "reflector", + "model": "ollama-cloud/nemotron-3-super", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "nemotron", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "memory-manager", + "model": "ollama-cloud/nemotron-3-super", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "nemotron", + "fit_score": 0, + "status": "good", + "previous_model": null + }, + { + "agent": "architect-indexer", + "model": "ollama-cloud/glm-5.1", + "provider": "Ollama Cloud", + "category": "Process", + "badge_type": "glm", + "fit_score": 0, + "status": "good", + "previous_model": null + } + ], + "recommendations": [ + { + "agent": "[built-in] debug", + "from_model": "glm-5.1.1 (88)", + "from_provider": "Ollama", + "to_model": "V4-Pro Max (\u260590) / K2.6 (\u260590) RE:High", + "to_provider": "Ollama Cloud", + "impact": "high", + "quality_change": "+2%", + "speed_change": "~1x", + "context_change": "200K\u21921M", + "provider_change": "Ollama Cloud", + "rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=90 \u0438 K2.6=90 (TIE!), GLM-5.1=88. V4-Pro: LiveCodeBench 93.5(#1!), Terminal 67.9, 1M ctx \u0434\u043b\u044f \u043f\u043e\u043b\u043d\u043e\u0433\u043e \u043f\u0440\u043e\u0435\u043a\u0442\u0430. K2.6: 13h auto sessions. \u041e\u0431\u0430 \u043b\u0443\u0447\u0448\u0435 GLM-5.1. RE:High \u0434\u043b\u044f debug." + }, + { + "agent": "planner", + "from_model": "nemotron-3-super (80)", + "from_provider": "Ollama", + "to_model": "V4-Pro Max (\u260588) RE:High", + "to_provider": "Ollama Cloud", + "impact": "high", + "quality_change": "+10%", + "speed_change": "~1x", + "context_change": "1M", + "provider_change": "Ollama Cloud", + "rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=88(\u043b\u0443\u0447\u0448\u0438\u0439!), K2.6=86, GLM-5.1=85, Nem=80. V4-Pro: GPQA 90.1 (reasoning), 1M ctx \u0441\u043e\u0445\u0440\u0430\u043d\u044f\u0435\u0442\u0441\u044f (vs \u043f\u043e\u0442\u0435\u0440\u044f \u043f\u0440\u0438 K2.6). RE:High \u0434\u043b\u044f chain-of-thought planning." + }, + { + "agent": "go-developer", + "from_model": "qwen3-coder:480b (85)", + "from_provider": "Ollama", + "to_model": "V4-Pro Max (\u260588) RE:Medium", + "to_provider": "Ollama Cloud", + "impact": "medium", + "quality_change": "+4%", + "speed_change": "~1x", + "context_change": "256K\u21921M", + "provider_change": "Ollama Cloud", + "rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=88(\u043b\u0443\u0447\u0448\u0438\u0439 \u0434\u043b\u044f Go!), K2.6=86, Qwen3Coder=85. DeepSeek \u043c\u043e\u0434\u0435\u043b\u0438 \u0442\u0440\u0430\u0434\u0438\u0446\u0438\u043e\u043d\u043d\u043e \u0441\u0438\u043b\u044c\u043d\u044b \u0432 Go/Rust. 1M ctx \u0434\u043b\u044f \u043a\u0440\u0443\u043f\u043d\u044b\u0445 Go-\u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0432." + }, + { + "agent": "history-miner", + "from_model": "nemotron-3-super (\u260585)", + "from_provider": "Ollama", + "to_model": "V4-Pro Max (86) + Nem fallback", + "to_provider": "Hybrid", + "impact": "medium", + "quality_change": "+1%", + "speed_change": "~1x", + "context_change": "1M", + "provider_change": "Ollama Cloud + Ollama", + "rationale": "V4-Pro=86 \u0447\u0443\u0442\u044c \u043b\u0443\u0447\u0448\u0435 Nemotron=85. 1M ctx \u0443 \u043e\u0431\u043e\u0438\u0445. MRCR 83.5 \u0443 V4-Pro \u2014 \u043b\u0443\u0447\u0448\u0435\u0435 long-context retrieval. Nemotron \u043a\u0430\u043a fallback (RULER 91.75%)." + }, + { + "agent": "frontend-dev \u2192 M2.5", + "from_model": "qwen3-coder (90)", + "from_provider": "Ollama", + "to_model": "MiniMax M2.5 (\u260592) \u2705", + "to_provider": "Ollama", + "impact": "low", + "quality_change": "+2%", + "speed_change": "=", + "context_change": "204K", + "provider_change": "Ollama", + "rationale": "Spec-writing, UI architect. APPLIED." + }, + { + "agent": "devops \u2192 K2.6", + "from_model": "deepseek-v3.2", + "from_provider": "", + "to_model": "kimi-k2.6:cloud \u2705", + "to_provider": "Ollama Cloud", + "impact": "low", + "quality_change": "+35%", + "speed_change": "=", + "context_change": "256K", + "provider_change": "", + "rationale": "APPLIED." + }, + { + "agent": "orchestrator", + "from_model": "glm-5.1.1 (\u260590)", + "from_provider": "Ollama", + "to_model": "K2.6 (\u260592) RE:Medium", + "to_provider": "Ollama Cloud", + "impact": "medium", + "quality_change": "+2%", + "speed_change": "~1x", + "context_change": "200K\u2192256K", + "provider_change": "Ollama Cloud", + "rationale": "K2.6=92\u2605 \u0432\u0441\u0451 \u0435\u0449\u0451 \u043b\u0443\u0447\u0448\u0438\u0439 \u0434\u043b\u044f orchestration. V4-Pro=86 \u0441\u043b\u0430\u0431\u0435\u0435. 300 sub-agent swarm." + }, + { + "agent": "the-fixer", + "from_model": "minimax-m2.5 (\u260588)", + "from_provider": "Ollama", + "to_model": "V4-Pro (\u260588) / K2.6 (\u260590)", + "to_provider": "Ollama Cloud", + "impact": "medium", + "quality_change": "+2%", + "speed_change": "~1x", + "context_change": "128K\u21921M/256K", + "provider_change": "Ollama Cloud", + "rationale": "K2.6=90(\u043b\u0443\u0447\u0448\u0438\u0439), V4-Pro=88=M2.5. M2.5 SWE-bench 80.2% \u0441\u0442\u0430\u0431\u0438\u043b\u044c\u043d\u0435\u0435. \u041d\u0435 \u0441\u0440\u043e\u0447\u043d\u043e." + }, + { + "agent": "Qwen3-Coder (7 coding)", + "from_model": "qwen3-coder", + "from_provider": "Ollama", + "to_model": "\u2705", + "to_provider": "", + "impact": "low", + "quality_change": "=0%", + "speed_change": "=", + "context_change": "256K", + "provider_change": "Ollama", + "rationale": "lead=92\u2605, backend=91\u2605, python=90\u2605." + }, + { + "agent": "GLM-5.1 (12 agents)", + "from_model": "glm-5.1.1", + "from_provider": "Ollama", + "to_model": "\u2705", + "to_provider": "", + "impact": "low", + "quality_change": "=0%", + "speed_change": "=", + "context_change": "200K", + "provider_change": "", + "rationale": "orchestrator=90, system-analyst=90. SWE-Pro #1." + }, + { + "agent": "Kimi K2.6 (3 agents)", + "from_model": "kimi-k2.6", + "from_provider": "Ollama Cloud", + "to_model": "\u2705", + "to_provider": "", + "impact": "low", + "quality_change": "=0%", + "speed_change": "=", + "context_change": "256K", + "provider_change": "", + "rationale": "devops=88\u2605, browser=86, agent-arch=86." + } + ], + "impact_data": [ + { + "category": "debug GLM5.1\u2192V4-Pro/K2.6", + "before": 88, + "after": 90, + "delta": 2, + "notes": "LiveCodeBench 93.5, Terminal 67.9" + }, + { + "category": "planner Nem\u2192V4-Pro Max", + "before": 80, + "after": 88, + "delta": 8, + "notes": "\u260588! GPQA 90.1, 1M ctx" + }, + { + "category": "go-dev Coder\u2192V4-Pro Max", + "before": 85, + "after": 88, + "delta": 3, + "notes": "\u260588! Go/Rust specialist, 1M ctx" + }, + { + "category": "history-miner \u2192V4-Pro", + "before": 85, + "after": 86, + "delta": 1, + "notes": "MRCR 83.5, long-context" + }, + { + "category": "orchestrator \u2192K2.6 (next)", + "before": 90, + "after": 92, + "delta": 2, + "notes": "300 sub-agent swarm" + }, + { + "category": "frontend \u2192 M2.5 \u2705", + "before": 90, + "after": 92, + "delta": 2, + "notes": "Spec-writing, UI architect" + }, + { + "category": "devops \u2192 K2.6 \u2705", + "before": 65, + "after": 88, + "delta": 23, + "notes": "IF:65\u219291! Terminal 66.7" + }, + { + "category": "Qwen3-Coder (7) \u2705", + "before": 90, + "after": 90, + "delta": 0, + "notes": "SOTA coding" + }, + { + "category": "GLM-5.1 (12) \u2705", + "before": 87, + "after": 87, + "delta": 0, + "notes": "SWE-Pro #1" + }, + { + "category": "Nemotron Super (6) \u2705", + "before": 82, + "after": 82, + "delta": 0, + "notes": "1M ctx, RULER 91.75%" + } + ], + "benchmark_comparison": { + "benchmarks": [ + { + "name": "SWE-V", + "full_name": "SWE-Bench Verified", + "description": "GitHub issue resolution (500 tasks)", + "roles": "lead-dev, backend, fixer" + }, + { + "name": "SWE-P", + "full_name": "SWE-Bench Pro", + "description": "Multi-lang, decontaminated (1865 tasks)", + "roles": "all coding agents" + }, + { + "name": "T-Bench", + "full_name": "Terminal-Bench 2.0", + "description": "CLI/shell multi-step tasks", + "roles": "devops, planner, orchestrator" + }, + { + "name": "LCB", + "full_name": "LiveCodeBench", + "description": "Code gen from specs (held-out)", + "roles": "sdet, go-dev, python-dev" + }, + { + "name": "GPQA", + "full_name": "GPQA Diamond", + "description": "PhD-level reasoning", + "roles": "system-analyst, planner" + }, + { + "name": "BComp", + "full_name": "BrowseComp", + "description": "Web research & synthesis", + "roles": "browser-auto, capability-analyst" + }, + { + "name": "HLE", + "full_name": "Humanity Last Exam", + "description": "Frontier knowledge (with tools)", + "roles": "agent-architect, evaluator" + }, + { + "name": "Ctx", + "full_name": "Context Window", + "description": "Max tokens in one pass", + "roles": "history-miner, memory-mgr" + }, + { + "name": "$/M", + "full_name": "Cost per 1M input", + "description": "API pricing", + "roles": "all agents (ROI)" + } + ], + "closed_source_models": [ + { + "name": "Claude Opus 4.7", + "organization": "Anthropic", + "scores": [ + 87.6, + 64.3, + 69.4, + null, + 94.2, + 79.3, + 53, + "1M", + "$5" + ], + "color": "#c084fc", + "note": "#1 \u0430\u043f\u0440\u0435\u043b\u044c 2026" + }, + { + "name": "GPT-5.5", + "organization": "OpenAI", + "scores": [ + null, + 58.6, + 82.7, + null, + null, + 83.4, + 57.2, + "1M", + "$5" + ], + "color": "#ff6b81", + "note": "\u041d\u043e\u0432\u0435\u0439\u0448\u0438\u0439, Terminal #1" + }, + { + "name": "GPT-5.4", + "organization": "OpenAI", + "scores": [ + 78.2, + 59.1, + 75.1, + null, + 94.4, + 82.7, + 58.7, + "200K", + "$2.50" + ], + "color": "#ff6b81", + "note": "Reasoning, math" + }, + { + "name": "Gemini 3.1 Pro", + "organization": "Google", + "scores": [ + 80.6, + 46.1, + 68.5, + null, + 94.3, + 85.9, + 51.4, + "2M", + "$2" + ], + "color": "#facc15", + "note": "ARC-AGI 77.1%, \u0434\u0435\u0448\u0451\u0432\u044b\u0439" + }, + { + "name": "Claude Sonnet 4.6", + "organization": "Anthropic", + "scores": [ + 79.6, + null, + null, + null, + null, + null, + null, + "200K", + "$3" + ], + "color": "#c084fc", + "note": "5\u00d7 \u0434\u0435\u0448\u0435\u0432\u043b\u0435 Opus" + }, + { + "name": "GPT-5.3-Codex", + "organization": "OpenAI", + "scores": [ + 85, + 57, + 77.3, + null, + null, + null, + null, + "200K", + "$6" + ], + "color": "#ff6b81", + "note": "Coding specialist" + } + ], + "apaw_models": [ + { + "name": "Kimi K2.6", + "organization": "APAW", + "scores": [ + 80.2, + 58.6, + 66.7, + 87.2, + null, + 83.2, + 54, + "256K", + "$0.95" + ], + "color": "#00ff94", + "note": "devops, browser, architect (3)" + }, + { + "name": "GLM-5.1", + "organization": "APAW", + "scores": [ + null, + 58.4, + 63.5, + null, + 86.2, + 68.7, + null, + "200K", + "~$0.50" + ], + "color": "#00ff94", + "note": "12 agents! orchestrator, eval..." + }, + { + "name": "V4-Pro Max", + "organization": "APAW", + "scores": [ + 80.6, + 55.4, + 67.9, + 93.5, + 90.1, + 83.4, + 48.2, + "1M", + "$0.42" + ], + "color": "#00d4ff", + "note": "planner, go-dev (\u0440\u0435\u043a.)" + }, + { + "name": "Qwen3-Coder 480B", + "organization": "APAW", + "scores": [ + 66.5, + null, + null, + null, + null, + null, + null, + "256K", + "~$0.50" + ], + "color": "#00ff94", + "note": "7 coding agents" + }, + { + "name": "MiniMax M2.5", + "organization": "APAW", + "scores": [ + 80.2, + 51.3, + null, + null, + null, + 76.3, + null, + "204K", + "$0.15" + ], + "color": "#00ff94", + "note": "frontend, skeptic, fixer (3)" + }, + { + "name": "Nemotron Super", + "organization": "APAW", + "scores": [ + 60.5, + null, + null, + null, + null, + null, + null, + "1M", + "~$0.40" + ], + "color": "#00ff94", + "note": "6 agents (memory, history)" + } + ] + } } \ No newline at end of file diff --git a/agent-evolution/data/v3-optimal-models.json b/agent-evolution/data/v3-optimal-models.json new file mode 100644 index 0000000..7ba8039 --- /dev/null +++ b/agent-evolution/data/v3-optimal-models.json @@ -0,0 +1,610 @@ +{ + "lead-developer": { + "model": "qwen3-coder:480b", + "c": 0, + "score": 92, + "best": 92, + "scores": [ + 92, + 86, + 82, + 70, + 68, + 75, + 88, + 66, + 80, + 88, + 90 + ] + }, + "frontend-developer": { + "model": "minimax-m2.5", + "c": 1, + "score": 92, + "best": 92, + "scores": [ + 86, + 92, + 88, + 62, + 56, + 64, + 82, + 60, + 76, + 88, + 86 + ] + }, + "backend-developer": { + "model": "qwen3-coder:480b", + "c": 0, + "score": 91, + "best": 91, + "scores": [ + 91, + 84, + 80, + 68, + 63, + 72, + 86, + 62, + 78, + 87, + 90 + ] + }, + "go-developer": { + "model": "qwen3-coder:480b", + "c": 0, + "score": 85, + "best": 88, + "scores": [ + 85, + 78, + 74, + 66, + 58, + 68, + 88, + 58, + 74, + 82, + 86 + ] + }, + "flutter-developer": { + "model": "qwen3-coder:480b", + "c": 0, + "score": 86, + "best": 86, + "scores": [ + 86, + 70, + 66, + 60, + 53, + 62, + 78, + 58, + 74, + 82, + 84 + ] + }, + "php-developer": { + "model": "qwen3-coder:480b", + "c": 0, + "score": 87, + "best": 87, + "scores": [ + 87, + 76, + 72, + 64, + 56, + 66, + 74, + 60, + 76, + 84, + 86 + ] + }, + "python-developer": { + "model": "qwen3-coder:480b", + "c": 0, + "score": 90, + "best": 90, + "scores": [ + 90, + 82, + 78, + 66, + 60, + 70, + 78, + 64, + 78, + 88, + 88 + ] + }, + "sdet-engineer": { + "model": "qwen3-coder:480b", + "c": 0, + "score": 88, + "best": 88, + "scores": [ + 88, + 84, + 80, + 70, + 63, + 72, + 84, + 64, + 78, + 84, + 87 + ] + }, + "orchestrator": { + "model": "kimi-k2.6", + "c": 10, + "score": 92, + "best": 92, + "scores": [ + 74, + 70, + 68, + 80, + 82, + 90, + 86, + 78, + 62, + 84, + 92 + ] + }, + "evaluator": { + "model": "glm-5.1", + "c": 5, + "score": 86, + "best": 86, + "scores": [ + 70, + 73, + 70, + 78, + 78, + 86, + 84, + 76, + 58, + 81, + 84 + ] + }, + "capability-analyst": { + "model": "glm-5.1", + "c": 5, + "score": 85, + "best": 85, + "scores": [ + 72, + 68, + 66, + 76, + 78, + 85, + 82, + 75, + 60, + 79, + 82 + ] + }, + "architect-indexer": { + "model": "glm-5.1", + "c": 5, + "score": 88, + "best": 88, + "scores": [ + 70, + 64, + 62, + 74, + 80, + 88, + 78, + 76, + 58, + 80, + 84 + ] + }, + "pipeline-judge": { + "model": "glm-5.1", + "c": 5, + "score": 86, + "best": 86, + "scores": [ + 64, + 68, + 65, + 78, + 76, + 86, + 82, + 74, + 56, + 80, + 84 + ] + }, + "release-manager": { + "model": "glm-5.1", + "c": 5, + "score": 82, + "best": 82, + "scores": [ + 72, + 66, + 64, + 74, + 76, + 82, + 78, + 72, + 60, + 76, + 78 + ] + }, + "requirement-refiner": { + "model": "glm-5.1", + "c": 5, + "score": 88, + "best": 88, + "scores": [ + 66, + 62, + 60, + 72, + 80, + 88, + 82, + 74, + 54, + 78, + 82 + ] + }, + "workflow-architect": { + "model": "glm-5.1", + "c": 5, + "score": 84, + "best": 84, + "scores": [ + 68, + 62, + 60, + 76, + 76, + 84, + 80, + 72, + 56, + 80, + 82 + ] + }, + "agent-architect": { + "model": "kimi-k2.6", + "c": 10, + "score": 86, + "best": 86, + "scores": [ + 78, + 72, + 70, + 78, + 76, + 84, + 82, + 76, + 66, + 82, + 86 + ] + }, + "security-auditor": { + "model": "nemotron-3-super", + "c": 3, + "score": 76, + "best": 80, + "scores": [ + 76, + 74, + 68, + 76, + 68, + 78, + 80, + 72, + 64, + 75, + 80 + ] + }, + "performance-engineer": { + "model": "nemotron-3-super", + "c": 3, + "score": 78, + "best": 84, + "scores": [ + 78, + 75, + 70, + 78, + 74, + 82, + 84, + 70, + 67, + 76, + 82 + ] + }, + "history-miner": { + "model": "nemotron-3-super", + "c": 3, + "score": 85, + "best": 88, + "scores": [ + 68, + 60, + 56, + 85, + 78, + 88, + 86, + 72, + 56, + 84, + 82 + ] + }, + "memory-manager": { + "model": "nemotron-3-super", + "c": 3, + "score": 86, + "best": 87, + "scores": [ + 63, + 58, + 56, + 86, + 72, + 84, + 86, + 70, + 50, + 87, + 84 + ] + }, + "planner": { + "model": "nemotron-3-super", + "c": 3, + "score": 80, + "best": 88, + "scores": [ + 72, + 68, + 66, + 80, + 78, + 85, + 88, + 78, + 60, + 85, + 86 + ] + }, + "reflector": { + "model": "nemotron-3-super", + "c": 3, + "score": 78, + "best": 84, + "scores": [ + 68, + 66, + 64, + 78, + 76, + 82, + 84, + 76, + 56, + 82, + 80 + ] + }, + "browser-automation": { + "model": "kimi-k2.6", + "c": 10, + "score": 86, + "best": 87, + "scores": [ + 87, + 72, + 68, + 61, + 53, + 64, + 82, + 56, + 72, + 82, + 86 + ] + }, + "product-owner": { + "model": "glm-5.1", + "c": 5, + "score": 84, + "best": 84, + "scores": [ + 60, + 56, + 54, + 74, + 78, + 84, + 76, + 74, + 48, + 78, + 76 + ] + }, + "visual-tester": { + "model": "qwen3-coder:480b", + "c": 0, + "score": 82, + "best": 82, + "scores": [ + 82, + 68, + 64, + 55, + 48, + 58, + 76, + 54, + 66, + 76, + 78 + ] + }, + "prompt-optimizer": { + "model": "glm-5.1", + "c": 5, + "score": 82, + "best": 83, + "scores": [ + 76, + 74, + 72, + 76, + 75, + 82, + 80, + 74, + 64, + 83, + 82 + ] + }, + "system-analyst": { + "model": "glm-5.1", + "c": 5, + "score": 90, + "best": 90, + "scores": [ + 70, + 66, + 63, + 74, + 82, + 90, + 88, + 76, + 58, + 80, + 86 + ] + }, + "code-skeptic": { + "model": "minimax-m2.5", + "c": 1, + "score": 85, + "best": 85, + "scores": [ + 82, + 85, + 80, + 73, + 72, + 78, + 82, + 70, + 72, + 80, + 82 + ] + }, + "the-fixer": { + "model": "minimax-m2.5", + "c": 1, + "score": 88, + "best": 90, + "scores": [ + 89, + 88, + 84, + 71, + 64, + 74, + 88, + 64, + 82, + 86, + 90 + ] + }, + "devops-engineer": { + "model": "kimi-k2.6", + "c": 10, + "score": 88, + "best": 88, + "scores": [ + 66, + 53, + 48, + 78, + 75, + 84, + 86, + 70, + 54, + 76, + 88 + ] + }, + "[built-in] debug": { + "model": "glm-5.1", + "c": 5, + "score": 88, + "best": 90, + "scores": [ + 78, + 80, + 76, + 72, + 64, + 88, + 90, + 68, + 76, + 85, + 90 + ] + } +} \ No newline at end of file diff --git a/agent-evolution/dist/research-dashboard-2026_04_29.html b/agent-evolution/dist/research-dashboard-2026_04_29.html index f90334b..935509d 100644 --- a/agent-evolution/dist/research-dashboard-2026_04_29.html +++ b/agent-evolution/dist/research-dashboard-2026_04_29.html @@ -255,7 +255,7 @@

APAW Agent Model Research v2

-
Live dashboard • 15 models × 32 agents • 2026-04-29
+
Live dashboard • 15 models × 30 agents • 2026-04-29
@@ -419,12 +419,12 @@