APAW/agent-evolution/data/model-benchmarks.json

{
  "version": "1.0.0",
  "generated": "2026-04-30T07:00:00Z",
  "source": "capability-index.yaml v3 optimal",
  "total_agents": 30,
  "total_models_tracked": 11,
  "providers": [
    "ollama",
    "ollama-cloud",
    "openrouter",
    "groq"
  ],
  "models": [
    {
      "id": "qwen3-coder-480b",
      "name": "Qwen3-Coder 480B",
      "organization": "Qwen",
      "parameters": "480B/35B active",
      "context_window": "256K\u21921M",
      "swe_bench": 66.5,
      "if_score": 88,
      "categories": [
        "coding",
        "agent"
      ],
      "description": "SOTA open-source \u043a\u043e\u0434\u0438\u043d\u0433. \u0421\u0440\u0430\u0432\u043d\u0438\u043c \u0441 Claude Sonnet 4.",
      "tags": [
        "coding",
        "agent",
        "tools"
      ],
      "openrouter": false,
      "provider": "ollama"
    },
    {
      "id": "minimax-m2.5",
      "name": "MiniMax M2.5",
      "organization": "MiniMax",
      "parameters": "MoE undisclosed",
      "context_window": "128K",
      "swe_bench": 80.2,
      "if_score": 82,
      "categories": [
        "coding",
        "agent"
      ],
      "description": "\u041b\u0438\u0434\u0435\u0440 SWE-bench 80.2%. \u041f\u043e\u043b\u043d\u044b\u0439 lifecycle \u0440\u0430\u0437\u0440\u0430\u0431\u043e\u0442\u043a\u0438.",
      "tags": [
        "coding",
        "agent"
      ],
      "openrouter": false,
      "provider": "ollama"
    },
    {
      "id": "minimax-m2.7",
      "name": "MiniMax M2.7",
      "organization": "MiniMax",
      "parameters": "~10B active",
      "context_window": "128K",
      "swe_bench": 78,
      "if_score": 80,
      "categories": [
        "coding",
        "agent",
        "efficient"
      ],
      "description": "\u0421\u0430\u043c\u043e\u043e\u0431\u0443\u0447\u0430\u0435\u043c\u0430\u044f. 56.2% SWE-Pro. 100 TPS. $0.30/M.",
      "tags": [
        "coding",
        "agent",
        "self-evolving"
      ],
      "openrouter": false,
      "provider": "ollama"
    },
    {
      "id": "deepseek-v4-pro-max",
      "name": "DeepSeek V4-Pro",
      "organization": "DeepSeek",
      "parameters": "1.6T/49B active MoE",
      "context_window": "1M",
      "swe_bench": 80.6,
      "if_score": 89,
      "categories": [
        "coding",
        "agent",
        "reasoning"
      ],
      "description": "SWE-V 80.6, LiveCodeBench 93.5(#1!), Terminal-Bench 67.9, Codeforces 3206, 1M ctx, 27% FLOPs vs V3.2. MIT.",
      "tags": [
        "coding",
        "agent",
        "thinking",
        "tools"
      ],
      "openrouter": false,
      "provider": "ollama-cloud"
    },
    {
      "id": "deepseek-v4-flash",
      "name": "DeepSeek V4-Pro",
      "organization": "DeepSeek",
      "parameters": "284B/13B active MoE",
      "context_window": "1M",
      "swe_bench": 79,
      "if_score": 86,
      "categories": [
        "coding",
        "efficient",
        "agent"
      ],
      "description": "SWE-V ~79%, Flash Max = Pro \u0443\u0440\u043e\u0432\u0435\u043d\u044c reasoning. 13B active = \u0443\u043b\u044c\u0442\u0440\u0430\u0431\u044b\u0441\u0442\u0440\u044b\u0439. 1M ctx. FP4+FP8. MIT.",
      "tags": [
        "coding",
        "efficient",
        "agent",
        "thinking"
      ],
      "openrouter": false,
      "provider": "ollama-cloud"
    },
    {
      "id": "kimi-k2-6",
      "name": "Kimi K2.6",
      "organization": "Moonshot AI",
      "parameters": "1T/32B active MoE",
      "context_window": "256K",
      "swe_bench": 80.2,
      "if_score": 91,
      "categories": [
        "coding",
        "agent",
        "multimodal"
      ],
      "description": "SWE-Pro 58.6(#1!), SWE-V 80.2, Terminal-Bench 66.7, HLE 54.0(#1!), BrowseComp 83.2. 13h autonomous. 300 sub-agent swarm. Modified MIT.",
      "tags": [
        "coding",
        "agent",
        "swarm",
        "vision",
        "thinking",
        "tools"
      ],
      "openrouter": false,
      "provider": "ollama-cloud"
    },
    {
      "id": "nemotron-3-super",
      "name": "Nemotron 3 Super",
      "organization": "NVIDIA",
      "parameters": "120B/12B active",
      "context_window": "1M",
      "swe_bench": 60.5,
      "if_score": 78,
      "categories": [
        "agent",
        "reasoning",
        "efficient"
      ],
      "description": "SWE-bench 60.5%. RULER@1M 91.75%! \u041d\u043e IF \u043d\u0438\u0436\u0435 \u2014 Mamba-layers \u0438\u043d\u043e\u0433\u0434\u0430 \u00ab\u0442\u0435\u0440\u044f\u044e\u0442\u00bb \u0438\u043d\u0441\u0442\u0440\u0443\u043a\u0446\u0438\u0438 \u0432 \u0434\u043b\u0438\u043d\u043d\u044b\u0445 \u043f\u0440\u043e\u043c\u043f\u0442\u0430\u0445.",
      "tags": [
        "agent",
        "1M-ctx",
        "thinking"
      ],
      "openrouter": false,
      "provider": "ollama"
    },
    {
      "id": "glm-5.1",
      "name": "GLM-5",
      "organization": "Z.ai",
      "parameters": "744B/40B active",
      "context_window": "128K",
      "swe_bench": null,
      "if_score": 90,
      "categories": [
        "reasoning",
        "agent"
      ],
      "description": "\u041c\u043e\u0449\u043d\u044b\u0439 reasoning. Arena ELO 1451. \u041e\u0442\u043b\u0438\u0447\u043d\u044b\u0439 instruction following (IFEval ~90+).",
      "tags": [
        "reasoning",
        "agent"
      ],
      "openrouter": false,
      "provider": "ollama"
    },
    {
      "id": "deepseek-v4",
      "name": "DeepSeek V4-Pro",
      "organization": "DeepSeek",
      "parameters": "Large MoE",
      "context_window": "128K",
      "swe_bench": null,
      "if_score": 75,
      "categories": [
        "reasoning"
      ],
      "description": "\u0425\u043e\u0440\u043e\u0448\u0438\u0439 reasoning, \u043d\u043e IF \u043d\u0435\u0441\u0442\u0430\u0431\u0438\u043b\u0435\u043d \u2014 \u0438\u043d\u043e\u0433\u0434\u0430 \u0438\u0433\u043d\u043e\u0440\u0438\u0440\u0443\u0435\u0442 \u0444\u043e\u0440\u043c\u0430\u0442 \u0432\u044b\u0432\u043e\u0434\u0430.",
      "tags": [
        "reasoning"
      ],
      "openrouter": false,
      "provider": "ollama"
    },
    {
      "id": "qwen3-5-122b",
      "name": "Qwen 3.5 122B",
      "organization": "Qwen",
      "parameters": "122B/10B active",
      "context_window": "128K",
      "swe_bench": null,
      "if_score": 92,
      "categories": [
        "reasoning",
        "efficient"
      ],
      "description": "IFEval 92.6%! \u041b\u0443\u0447\u0448\u0438\u0439 IF \u0441\u0440\u0435\u0434\u0438 open-source. Multimodal. Thinking.",
      "tags": [
        "vision",
        "thinking",
        "tools"
      ],
      "openrouter": false,
      "provider": "ollama"
    },
    {
      "id": "qwen3-coder-next",
      "name": "Qwen3-Coder-Next",
      "organization": "Qwen",
      "parameters": "80B/3B active",
      "context_window": "128K",
      "swe_bench": 70,
      "if_score": 84,
      "categories": [
        "coding",
        "efficient"
      ],
      "description": "70% SWE-bench \u0441 3B active! \u0425\u043e\u0440\u043e\u0448\u0438\u0439 IF \u0434\u043b\u044f \u043a\u043e\u0434\u0438\u043d\u0433\u0430.",
      "tags": [
        "coding",
        "efficient",
        "tools"
      ],
      "openrouter": false,
      "provider": "ollama"
    },
    {
      "id": "cogito-2-1-671b",
      "name": "Cogito 2.1 671B",
      "organization": "Cognitive",
      "parameters": "671B MoE",
      "context_window": "128K",
      "swe_bench": null,
      "if_score": 76,
      "categories": [
        "reasoning"
      ],
      "description": "MIT \u043b\u0438\u0446\u0435\u043d\u0437\u0438\u044f. 671B total. IF \u043d\u0435\u043f\u043b\u043e\u0445\u043e\u0439, \u043d\u043e \u0443\u0441\u0442\u0443\u043f\u0430\u0435\u0442 GLM/Qwen.",
      "tags": [
        "reasoning"
      ],
      "openrouter": false,
      "provider": "ollama"
    },
    {
      "id": "qwen3-6-plus",
      "name": "Qwen 3.6 Plus",
      "organization": "Qwen",
      "parameters": "Hybrid MoE",
      "context_window": "1M",
      "swe_bench": 78.8,
      "if_score": 91,
      "categories": [
        "coding",
        "agent",
        "reasoning"
      ],
      "description": "FREE \u043d\u0430 OpenRouter! 1M \u043a\u043e\u043d\u0442\u0435\u043a\u0441\u0442. Always-on CoT. \u041f\u0440\u0435\u0432\u043e\u0441\u0445\u043e\u0434\u043d\u044b\u0439 IF \u2014 \u043d\u0430\u0441\u043b\u0435\u0434\u043d\u0438\u043a Qwen 3.5 (92.6%).",
      "tags": [
        "coding",
        "agent",
        "1M-ctx",
        "free"
      ],
      "openrouter": true,
      "provider": "openrouter"
    },
    {
      "id": "step-3-5-flash",
      "name": "Step 3.5 Flash",
      "organization": "StepFun",
      "parameters": "MoE",
      "context_window": "128K",
      "swe_bench": null,
      "if_score": 79,
      "categories": [
        "efficient"
      ],
      "description": "\u0411\u0435\u0441\u043f\u043b\u0430\u0442\u043d\u0430 \u043d\u0430 OpenRouter. IF \u0441\u0440\u0435\u0434\u043d\u0438\u0439.",
      "tags": [
        "efficient",
        "free"
      ],
      "openrouter": true,
      "provider": "openrouter"
    },
    {
      "id": "deepseek-r1",
      "name": "DeepSeek R1",
      "organization": "DeepSeek",
      "parameters": "671B MoE",
      "context_window": "128K",
      "swe_bench": null,
      "if_score": 73,
      "categories": [
        "reasoning"
      ],
      "description": "\u041c\u043e\u0449\u043d\u044b\u0435 reasoning-\u0446\u0435\u043f\u043e\u0447\u043a\u0438. \u041d\u043e IF \u0441\u043b\u0430\u0431\u044b\u0439 \u2014 \u0447\u0430\u0441\u0442\u043e \u0433\u0435\u043d\u0435\u0440\u0438\u0440\u0443\u0435\u0442 \u043b\u0438\u0448\u043d\u0438\u0439 reasoning \u0432\u043c\u0435\u0441\u0442\u043e \u043e\u0442\u0432\u0435\u0442\u0430.",
      "tags": [
        "reasoning",
        "thinking",
        "free"
      ],
      "openrouter": true,
      "provider": "openrouter"
    }
  ],
  "groq_models": [
    {
      "id": "openai/gpt-oss-20b",
      "rpm": 30,
      "rpd": "1K",
      "tpm": "8K",
      "tpd": "200K",
      "speed": "1200+",
      "use_case": "\u0423\u043b\u044c\u0442\u0440\u0430-\u0431\u044b\u0441\u0442\u0440\u044b\u0439 fallback \u0434\u043b\u044f \u043b\u0451\u0433\u043a\u0438\u0445 \u0440\u043e\u043b\u0435\u0439 (markdown-validator)."
    },
    {
      "id": "llama-3.1-8b-instant",
      "rpm": 30,
      "rpd": "14.4K",
      "tpm": "6K",
      "tpd": "500K",
      "speed": "~800",
      "use_case": "14.4K RPD! \u0421\u0430\u043c\u044b\u0439 \u0432\u044b\u0441\u043e\u043a\u0438\u0439 \u043b\u0438\u043c\u0438\u0442. \u0414\u043b\u044f health-check / ping \u0440\u043e\u043b\u0435\u0439."
    },
    {
      "id": "groq/compound",
      "rpm": 30,
      "rpd": "250",
      "tpm": "70K",
      "tpd": "\u2014",
      "speed": "varies",
      "use_case": "\u041c\u0443\u043b\u044c\u0442\u0438\u043c\u043e\u0434\u0435\u043b\u044c\u043d\u0430\u044f \u0430\u0433\u0440\u0435\u0433\u0430\u0446\u0438\u044f. \u0414\u043b\u044f research-\u0437\u0430\u0434\u0430\u0447."
    },
    {
      "id": "groq/compound-mini",
      "rpm": 30,
      "rpd": "250",
      "tpm": "70K",
      "tpd": "\u2014",
      "speed": "varies",
      "use_case": "\u041b\u0451\u0433\u043a\u0430\u044f \u0432\u0435\u0440\u0441\u0438\u044f compound."
    },
    {
      "id": "llama-prompt-guard-2",
      "rpm": 30,
      "rpd": "14.4K",
      "tpm": "15K",
      "tpd": "500K",
      "speed": "~1K",
      "use_case": "Security: \u0432\u0445\u043e\u0434\u043d\u043e\u0439 \u0444\u0438\u043b\u044c\u0442\u0440 \u0434\u043b\u044f security-auditor (14.4K RPD!)."
    }
  ],
  "agent_model_scores": [
    {
      "agent": "lead-developer",
      "current_model_index": 0,
      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 92,
        "minimax-m2.5": 86,
        "minimax-m2.7": 82,
        "nemotron-3-super": 70,
        "glm-5.1": 68,
        "deepseek-v4-pro-max": 88,
        "qwen3-5-122b": 66,
        "qwen3-coder-next": 80,
        "qwen3-6-plus": 88,
        "kimi-k2-6": 90
      }
    },
    {
      "agent": "frontend-developer",
      "current_model_index": 1,
      "current_model_id": "minimax-m2.5",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 86,
        "minimax-m2.5": 92,
        "minimax-m2.7": 88,
        "nemotron-3-super": 62,
        "glm-5.1": 56,
        "deepseek-v4-pro-max": 82,
        "qwen3-5-122b": 60,
        "qwen3-coder-next": 76,
        "qwen3-6-plus": 88,
        "kimi-k2-6": 86
      }
    },
    {
      "agent": "php-developer",
      "current_model_index": 0,
      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 87,
        "minimax-m2.5": 76,
        "minimax-m2.7": 72,
        "nemotron-3-super": 64,
        "glm-5.1": 56,
        "deepseek-v4-pro-max": 74,
        "qwen3-5-122b": 60,
        "qwen3-coder-next": 76,
        "qwen3-6-plus": 84,
        "kimi-k2-6": 86
      }
    },
    {
      "agent": "python-developer",
      "current_model_index": 0,
      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 90,
        "minimax-m2.5": 82,
        "minimax-m2.7": 78,
        "nemotron-3-super": 66,
        "glm-5.1": 60,
        "deepseek-v4-pro-max": 78,
        "qwen3-5-122b": 64,
        "qwen3-coder-next": 78,
        "qwen3-6-plus": 88,
        "kimi-k2-6": 88
      }
    },
    {
      "agent": "backend-developer",
      "current_model_index": 0,
      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 91,
        "minimax-m2.5": 84,
        "minimax-m2.7": 80,
        "nemotron-3-super": 68,
        "glm-5.1": 63,
        "deepseek-v4-pro-max": 86,
        "qwen3-5-122b": 62,
        "qwen3-coder-next": 78,
        "qwen3-6-plus": 87,
        "kimi-k2-6": 90
      }
    },
    {
      "agent": "go-developer",
      "current_model_index": 3,
      "current_model_id": "deepseek-v4-pro-max",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 85,
        "minimax-m2.5": 78,
        "minimax-m2.7": 74,
        "nemotron-3-super": 66,
        "glm-5.1": 58,
        "deepseek-v4-pro-max": 88,
        "qwen3-5-122b": 58,
        "qwen3-coder-next": 74,
        "qwen3-6-plus": 82,
        "kimi-k2-6": 86
      }
    },
    {
      "agent": "flutter-developer",
      "current_model_index": 0,
      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 86,
        "minimax-m2.5": 70,
        "minimax-m2.7": 66,
        "nemotron-3-super": 60,
        "glm-5.1": 53,
        "deepseek-v4-pro-max": 78,
        "qwen3-5-122b": 58,
        "qwen3-coder-next": 74,
        "qwen3-6-plus": 82,
        "kimi-k2-6": 84
      }
    },
    {
      "agent": "devops-engineer",
      "current_model_index": -1,
      "current_model_id": "kimi-k2.6",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 66,
        "minimax-m2.5": 53,
        "minimax-m2.7": 48,
        "nemotron-3-super": 78,
        "glm-5.1": 75,
        "deepseek-v4-pro-max": 86,
        "qwen3-5-122b": 70,
        "qwen3-coder-next": 54,
        "qwen3-6-plus": 76,
        "kimi-k2-6": 88
      }
    },
    {
      "agent": "sdet-engineer",
      "current_model_index": 0,
      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 88,
        "minimax-m2.5": 84,
        "minimax-m2.7": 80,
        "nemotron-3-super": 70,
        "glm-5.1": 63,
        "deepseek-v4-pro-max": 84,
        "qwen3-5-122b": 64,
        "qwen3-coder-next": 78,
        "qwen3-6-plus": 84,
        "kimi-k2-6": 87
      }
    },
    {
      "agent": "code-skeptic",
      "current_model_index": 1,
      "current_model_id": "minimax-m2.5",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 82,
        "minimax-m2.5": 85,
        "minimax-m2.7": 80,
        "nemotron-3-super": 73,
        "glm-5.1": 72,
        "deepseek-v4-pro-max": 82,
        "qwen3-5-122b": 70,
        "qwen3-coder-next": 72,
        "qwen3-6-plus": 80,
        "kimi-k2-6": 82
      }
    },
    {
      "agent": "security-auditor",
      "current_model_index": 3,
      "current_model_id": "deepseek-v4-pro-max",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 76,
        "minimax-m2.5": 74,
        "minimax-m2.7": 68,
        "nemotron-3-super": 76,
        "glm-5.1": 68,
        "deepseek-v4-pro-max": 80,
        "qwen3-5-122b": 72,
        "qwen3-coder-next": 64,
        "qwen3-6-plus": 75,
        "kimi-k2-6": 80
      }
    },
    {
      "agent": "performance-engineer",
      "current_model_index": 3,
      "current_model_id": "deepseek-v4-pro-max",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 78,
        "minimax-m2.5": 75,
        "minimax-m2.7": 70,
        "nemotron-3-super": 78,
        "glm-5.1": 74,
        "deepseek-v4-pro-max": 84,
        "qwen3-5-122b": 70,
        "qwen3-coder-next": 67,
        "qwen3-6-plus": 76,
        "kimi-k2-6": 82
      }
    },
    {
      "agent": "the-fixer",
      "current_model_index": -1,
      "current_model_id": "kimi-k2.6",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 89,
        "minimax-m2.5": 88,
        "minimax-m2.7": 84,
        "nemotron-3-super": 71,
        "glm-5.1": 64,
        "deepseek-v4-pro-max": 88,
        "qwen3-5-122b": 64,
        "qwen3-coder-next": 82,
        "qwen3-6-plus": 86,
        "kimi-k2-6": 90
      }
    },
    {
      "agent": "browser-automation",
      "current_model_index": 0,
      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 87,
        "minimax-m2.5": 72,
        "minimax-m2.7": 68,
        "nemotron-3-super": 61,
        "glm-5.1": 53,
        "deepseek-v4-pro-max": 82,
        "qwen3-5-122b": 56,
        "qwen3-coder-next": 72,
        "qwen3-6-plus": 82,
        "kimi-k2-6": 86
      }
    },
    {
      "agent": "visual-tester",
      "current_model_index": 0,
      "current_model_id": "qwen3-coder-480b",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 82,
        "minimax-m2.5": 68,
        "minimax-m2.7": 64,
        "nemotron-3-super": 55,
        "glm-5.1": 48,
        "deepseek-v4-pro-max": 76,
        "qwen3-5-122b": 54,
        "qwen3-coder-next": 66,
        "qwen3-6-plus": 76,
        "kimi-k2-6": 78
      }
    },
    {
      "agent": "system-analyst",
      "current_model_index": 7,
      "current_model_id": "glm-5.1",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 70,
        "minimax-m2.5": 66,
        "minimax-m2.7": 63,
        "nemotron-3-super": 74,
        "glm-5.1": 82,
        "deepseek-v4-pro-max": 88,
        "qwen3-5-122b": 76,
        "qwen3-coder-next": 58,
        "qwen3-6-plus": 80,
        "kimi-k2-6": 86
      }
    },
    {
      "agent": "capability-analyst",
      "current_model_index": 7,
      "current_model_id": "glm-5.1",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 72,
        "minimax-m2.5": 68,
        "minimax-m2.7": 66,
        "nemotron-3-super": 76,
        "glm-5.1": 78,
        "deepseek-v4-pro-max": 82,
        "qwen3-5-122b": 75,
        "qwen3-coder-next": 60,
        "qwen3-6-plus": 79,
        "kimi-k2-6": 82
      }
    },
    {
      "agent": "orchestrator",
      "current_model_index": -1,
      "current_model_id": "kimi-k2.6",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 74,
        "minimax-m2.5": 70,
        "minimax-m2.7": 68,
        "nemotron-3-super": 80,
        "glm-5.1": 82,
        "deepseek-v4-pro-max": 86,
        "qwen3-5-122b": 78,
        "qwen3-coder-next": 62,
        "qwen3-6-plus": 84,
        "kimi-k2-6": 92
      }
    },
    {
      "agent": "release-manager",
      "current_model_index": 7,
      "current_model_id": "glm-5.1",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 72,
        "minimax-m2.5": 66,
        "minimax-m2.7": 64,
        "nemotron-3-super": 74,
        "glm-5.1": 76,
        "deepseek-v4-pro-max": 78,
        "qwen3-5-122b": 72,
        "qwen3-coder-next": 60,
        "qwen3-6-plus": 76,
        "kimi-k2-6": 78
      }
    },
    {
      "agent": "evaluator",
      "current_model_index": 7,
      "current_model_id": "glm-5.1",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 70,
        "minimax-m2.5": 73,
        "minimax-m2.7": 70,
        "nemotron-3-super": 78,
        "glm-5.1": 78,
        "deepseek-v4-pro-max": 84,
        "qwen3-5-122b": 76,
        "qwen3-coder-next": 58,
        "qwen3-6-plus": 81,
        "kimi-k2-6": 84
      }
    },
    {
      "agent": "prompt-optimizer",
      "current_model_index": -1,
      "current_model_id": "qwen3.6-plus",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 76,
        "minimax-m2.5": 74,
        "minimax-m2.7": 72,
        "nemotron-3-super": 76,
        "glm-5.1": 75,
        "deepseek-v4-pro-max": 80,
        "qwen3-5-122b": 74,
        "qwen3-coder-next": 64,
        "qwen3-6-plus": 83,
        "kimi-k2-6": 82
      }
    },
    {
      "agent": "product-owner",
      "current_model_index": 7,
      "current_model_id": "glm-5.1",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 60,
        "minimax-m2.5": 56,
        "minimax-m2.7": 54,
        "nemotron-3-super": 74,
        "glm-5.1": 78,
        "deepseek-v4-pro-max": 76,
        "qwen3-5-122b": 74,
        "qwen3-coder-next": 48,
        "qwen3-6-plus": 78,
        "kimi-k2-6": 76
      }
    },
    {
      "agent": "pipeline-judge",
      "current_model_index": 7,
      "current_model_id": "glm-5.1",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 64,
        "minimax-m2.5": 68,
        "minimax-m2.7": 65,
        "nemotron-3-super": 78,
        "glm-5.1": 76,
        "deepseek-v4-pro-max": 82,
        "qwen3-5-122b": 74,
        "qwen3-coder-next": 56,
        "qwen3-6-plus": 80,
        "kimi-k2-6": 84
      }
    },
    {
      "agent": "workflow-architect",
      "current_model_index": 7,
      "current_model_id": "glm-5.1",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 68,
        "minimax-m2.5": 62,
        "minimax-m2.7": 60,
        "nemotron-3-super": 76,
        "glm-5.1": 76,
        "deepseek-v4-pro-max": 80,
        "qwen3-5-122b": 72,
        "qwen3-coder-next": 56,
        "qwen3-6-plus": 80,
        "kimi-k2-6": 82
      }
    },
    {
      "agent": "markdown-validator",
      "current_model_index": 3,
      "current_model_id": "deepseek-v4-pro-max",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 43,
        "minimax-m2.5": 38,
        "minimax-m2.7": 36,
        "nemotron-3-super": 52,
        "glm-5.1": 55,
        "deepseek-v4-pro-max": 68,
        "qwen3-5-122b": 56,
        "qwen3-coder-next": 40,
        "qwen3-6-plus": 50,
        "kimi-k2-6": 56
      }
    },
    {
      "agent": "agent-architect",
      "current_model_index": -1,
      "current_model_id": "kimi-k2.6",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 78,
        "minimax-m2.5": 72,
        "minimax-m2.7": 70,
        "nemotron-3-super": 78,
        "glm-5.1": 76,
        "deepseek-v4-pro-max": 82,
        "qwen3-5-122b": 76,
        "qwen3-coder-next": 66,
        "qwen3-6-plus": 82,
        "kimi-k2-6": 86
      }
    },
    {
      "agent": "planner",
      "current_model_index": 3,
      "current_model_id": "deepseek-v4-pro-max",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 72,
        "minimax-m2.5": 68,
        "minimax-m2.7": 66,
        "nemotron-3-super": 80,
        "glm-5.1": 78,
        "deepseek-v4-pro-max": 88,
        "qwen3-5-122b": 78,
        "qwen3-coder-next": 60,
        "qwen3-6-plus": 85,
        "kimi-k2-6": 86
      }
    },
    {
      "agent": "reflector",
      "current_model_index": 3,
      "current_model_id": "deepseek-v4-pro-max",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 68,
        "minimax-m2.5": 66,
        "minimax-m2.7": 64,
        "nemotron-3-super": 78,
        "glm-5.1": 76,
        "deepseek-v4-pro-max": 84,
        "qwen3-5-122b": 76,
        "qwen3-coder-next": 56,
        "qwen3-6-plus": 82,
        "kimi-k2-6": 80
      }
    },
    {
      "agent": "memory-manager",
      "current_model_index": -1,
      "current_model_id": "qwen3.6-plus",
      "reasoning_effort": "M",
      "scores": {
        "qwen3-coder-480b": 63,
        "minimax-m2.5": 58,
        "minimax-m2.7": 56,
        "nemotron-3-super": 86,
        "glm-5.1": 72,
        "deepseek-v4-pro-max": 86,
        "qwen3-5-122b": 70,
        "qwen3-coder-next": 50,
        "qwen3-6-plus": 87,
        "kimi-k2-6": 84
      }
    },
    {
      "agent": "architect-indexer",
      "current_model_index": 7,
      "current_model_id": "glm-5.1",
      "reasoning_effort": "H",
      "scores": {
        "qwen3-coder-480b": 70,
        "minimax-m2.5": 64,
        "minimax-m2.7": 62,
        "nemotron-3-super": 74,
        "glm-5.1": 80,
        "deepseek-v4-pro-max": 78,
        "qwen3-5-122b": 76,
        "qwen3-coder-next": 58,
        "qwen3-6-plus": 80,
        "kimi-k2-6": 84
      }
    }
  ],
  "if_scores": {
    "qwen3-coder-480b": 88,
    "minimax-m2.5": 82,
    "minimax-m2.7": 78,
    "nemotron-3-super": 85,
    "glm-5.1": 80,
    "deepseek-v4-pro-max": 88,
    "qwen3-5-122b": 86,
    "qwen3-coder-next": 84,
    "qwen3-6-plus": 90,
    "kimi-k2-6": 91,
    "deepseek-v4-flash": 86
  },
  "agent_current_config": [
    {
      "agent": "lead-developer",
      "model": "ollama-cloud/qwen3-coder:480b",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "qwen",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "frontend-developer",
      "model": "ollama-cloud/minimax-m2.5",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "qwen",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "php-developer",
      "model": "ollama-cloud/qwen3-coder:480b",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "qwen",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "python-developer",
      "model": "ollama-cloud/qwen3-coder:480b",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "qwen",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "backend-developer",
      "model": "ollama-cloud/qwen3-coder:480b",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "qwen",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "go-developer",
      "model": "ollama-cloud/deepseek-v4-pro-max",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "qwen",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "flutter-developer",
      "model": "ollama-cloud/qwen3-coder:480b",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "qwen",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "devops-engineer",
      "model": "ollama-cloud/kimi-k2.6:cloud",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "nemotron",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "sdet-engineer",
      "model": "ollama-cloud/qwen3-coder:480b",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "qwen",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "code-skeptic",
      "model": "ollama-cloud/minimax-m2.5",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "minimax",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "security-auditor",
      "model": "ollama-cloud/deepseek-v4-pro-max",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "nemotron",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "performance-engineer",
      "model": "ollama-cloud/deepseek-v4-pro-max",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "nemotron",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "the-fixer",
      "model": "ollama-cloud/kimi-k2.6:cloud",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "minimax",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "browser-automation",
      "model": "ollama-cloud/qwen3-coder:480b",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "qwen",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "visual-tester",
      "model": "ollama-cloud/qwen3-coder:480b",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "qwen",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "system-analyst",
      "model": "ollama-cloud/glm-5.1",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "glm",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "capability-analyst",
      "model": "ollama-cloud/glm-5.1",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "glm",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "orchestrator",
      "model": "ollama-cloud/kimi-k2.6:cloud",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "kimi",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "release-manager",
      "model": "ollama-cloud/glm-5.1",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "glm",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "evaluator",
      "model": "ollama-cloud/glm-5.1",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "glm",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "prompt-optimizer",
      "model": "ollama-cloud/qwen3.6-plus",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "glm",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "product-owner",
      "model": "ollama-cloud/glm-5.1",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "glm",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "pipeline-judge",
      "model": "ollama-cloud/glm-5.1",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "glm",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "workflow-architect",
      "model": "ollama-cloud/glm-5.1",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "glm",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "markdown-validator",
      "model": "ollama-cloud/deepseek-v4-pro-max",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "nemotron",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "agent-architect",
      "model": "ollama-cloud/kimi-k2.6:cloud",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "glm",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "planner",
      "model": "ollama-cloud/deepseek-v4-pro-max",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "nemotron",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "reflector",
      "model": "ollama-cloud/deepseek-v4-pro-max",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "nemotron",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "memory-manager",
      "model": "ollama-cloud/qwen3.6-plus",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "nemotron",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    },
    {
      "agent": "architect-indexer",
      "model": "ollama-cloud/glm-5.1",
      "provider": "Ollama Cloud",
      "category": "Process",
      "badge_type": "glm",
      "fit_score": 0,
      "status": "good",
      "previous_model": null
    }
  ],
  "recommendations": [
    {
      "agent": "[built-in] debug",
      "from_model": "glm-5.1.1 (88)",
      "from_provider": "Ollama",
      "to_model": "V4-Pro Max (\u260590) / K2.6 (\u260590) RE:High",
      "to_provider": "Ollama Cloud",
      "impact": "high",
      "quality_change": "+2%",
      "speed_change": "~1x",
      "context_change": "200K\u21921M",
      "provider_change": "Ollama Cloud",
      "rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=90 \u0438 K2.6=90 (TIE!), GLM-5.1=88. V4-Pro: LiveCodeBench 93.5(#1!), Terminal 67.9, 1M ctx \u0434\u043b\u044f \u043f\u043e\u043b\u043d\u043e\u0433\u043e \u043f\u0440\u043e\u0435\u043a\u0442\u0430. K2.6: 13h auto sessions. \u041e\u0431\u0430 \u043b\u0443\u0447\u0448\u0435 GLM-5.1. RE:High \u0434\u043b\u044f debug."
    },
    {
      "agent": "planner",
      "from_model": "nemotron-3-super (80)",
      "from_provider": "Ollama",
      "to_model": "V4-Pro Max (\u260588) RE:High",
      "to_provider": "Ollama Cloud",
      "impact": "high",
      "quality_change": "+10%",
      "speed_change": "~1x",
      "context_change": "1M",
      "provider_change": "Ollama Cloud",
      "rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=88(\u043b\u0443\u0447\u0448\u0438\u0439!), K2.6=86, GLM-5.1=85, Nem=80. V4-Pro: GPQA 90.1 (reasoning), 1M ctx \u0441\u043e\u0445\u0440\u0430\u043d\u044f\u0435\u0442\u0441\u044f (vs \u043f\u043e\u0442\u0435\u0440\u044f \u043f\u0440\u0438 K2.6). RE:High \u0434\u043b\u044f chain-of-thought planning."
    },
    {
      "agent": "go-developer",
      "from_model": "qwen3-coder:480b (85)",
      "from_provider": "Ollama",
      "to_model": "V4-Pro Max (\u260588) RE:Medium",
      "to_provider": "Ollama Cloud",
      "impact": "medium",
      "quality_change": "+4%",
      "speed_change": "~1x",
      "context_change": "256K\u21921M",
      "provider_change": "Ollama Cloud",
      "rationale": "\u2605 \u043c\u0430\u0442\u0440\u0438\u0446\u044b: V4-Pro=88(\u043b\u0443\u0447\u0448\u0438\u0439 \u0434\u043b\u044f Go!), K2.6=86, Qwen3Coder=85. DeepSeek \u043c\u043e\u0434\u0435\u043b\u0438 \u0442\u0440\u0430\u0434\u0438\u0446\u0438\u043e\u043d\u043d\u043e \u0441\u0438\u043b\u044c\u043d\u044b \u0432 Go/Rust. 1M ctx \u0434\u043b\u044f \u043a\u0440\u0443\u043f\u043d\u044b\u0445 Go-\u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0432."
    },
    {
      "agent": "history-miner",
      "from_model": "nemotron-3-super (\u260585)",
      "from_provider": "Ollama",
      "to_model": "V4-Pro Max (86) + Nem fallback",
      "to_provider": "Hybrid",
      "impact": "medium",
      "quality_change": "+1%",
      "speed_change": "~1x",
      "context_change": "1M",
      "provider_change": "Ollama Cloud + Ollama",
      "rationale": "V4-Pro=86 \u0447\u0443\u0442\u044c \u043b\u0443\u0447\u0448\u0435 Nemotron=85. 1M ctx \u0443 \u043e\u0431\u043e\u0438\u0445. MRCR 83.5 \u0443 V4-Pro \u2014 \u043b\u0443\u0447\u0448\u0435\u0435 long-context retrieval. Nemotron \u043a\u0430\u043a fallback (RULER 91.75%)."
    },
    {
      "agent": "frontend-dev \u2192 M2.5",
      "from_model": "qwen3-coder (90)",
      "from_provider": "Ollama",
      "to_model": "MiniMax M2.5 (\u260592) \u2705",
      "to_provider": "Ollama",
      "impact": "low",
      "quality_change": "+2%",
      "speed_change": "=",
      "context_change": "204K",
      "provider_change": "Ollama",
      "rationale": "Spec-writing, UI architect. APPLIED."
    },
    {
      "agent": "devops \u2192 K2.6",
      "from_model": "deepseek-v3.2",
      "from_provider": "",
      "to_model": "kimi-k2.6:cloud \u2705",
      "to_provider": "Ollama Cloud",
      "impact": "low",
      "quality_change": "+35%",
      "speed_change": "=",
      "context_change": "256K",
      "provider_change": "",
      "rationale": "APPLIED."
    },
    {
      "agent": "orchestrator",
      "from_model": "glm-5.1.1 (\u260590)",
      "from_provider": "Ollama",
      "to_model": "K2.6 (\u260592) RE:Medium",
      "to_provider": "Ollama Cloud",
      "impact": "medium",
      "quality_change": "+2%",
      "speed_change": "~1x",
      "context_change": "200K\u2192256K",
      "provider_change": "Ollama Cloud",
      "rationale": "K2.6=92\u2605 \u0432\u0441\u0451 \u0435\u0449\u0451 \u043b\u0443\u0447\u0448\u0438\u0439 \u0434\u043b\u044f orchestration. V4-Pro=86 \u0441\u043b\u0430\u0431\u0435\u0435. 300 sub-agent swarm."
    },
    {
      "agent": "the-fixer",
      "from_model": "minimax-m2.5 (\u260588)",
      "from_provider": "Ollama",
      "to_model": "V4-Pro (\u260588) / K2.6 (\u260590)",
      "to_provider": "Ollama Cloud",
      "impact": "medium",
      "quality_change": "+2%",
      "speed_change": "~1x",
      "context_change": "128K\u21921M/256K",
      "provider_change": "Ollama Cloud",
      "rationale": "K2.6=90(\u043b\u0443\u0447\u0448\u0438\u0439), V4-Pro=88=M2.5. M2.5 SWE-bench 80.2% \u0441\u0442\u0430\u0431\u0438\u043b\u044c\u043d\u0435\u0435. \u041d\u0435 \u0441\u0440\u043e\u0447\u043d\u043e."
    },
    {
      "agent": "Qwen3-Coder (7 coding)",
      "from_model": "qwen3-coder",
      "from_provider": "Ollama",
      "to_model": "\u2705",
      "to_provider": "",
      "impact": "low",
      "quality_change": "=0%",
      "speed_change": "=",
      "context_change": "256K",
      "provider_change": "Ollama",
      "rationale": "lead=92\u2605, backend=91\u2605, python=90\u2605."
    },
    {
      "agent": "GLM-5.1 (12 agents)",
      "from_model": "glm-5.1.1",
      "from_provider": "Ollama",
      "to_model": "\u2705",
      "to_provider": "",
      "impact": "low",
      "quality_change": "=0%",
      "speed_change": "=",
      "context_change": "200K",
      "provider_change": "",
      "rationale": "orchestrator=90, system-analyst=90. SWE-Pro #1."
    },
    {
      "agent": "Kimi K2.6 (3 agents)",
      "from_model": "kimi-k2.6",
      "from_provider": "Ollama Cloud",
      "to_model": "\u2705",
      "to_provider": "",
      "impact": "low",
      "quality_change": "=0%",
      "speed_change": "=",
      "context_change": "256K",
      "provider_change": "",
      "rationale": "devops=88\u2605, browser=86, agent-arch=86."
    }
  ],
  "impact_data": [
    {
      "category": "debug GLM5.1\u2192V4-Pro/K2.6",
      "before": 88,
      "after": 90,
      "delta": 2,
      "notes": "LiveCodeBench 93.5, Terminal 67.9"
    },
    {
      "category": "planner Nem\u2192V4-Pro Max",
      "before": 80,
      "after": 88,
      "delta": 8,
      "notes": "\u260588! GPQA 90.1, 1M ctx"
    },
    {
      "category": "go-dev Coder\u2192V4-Pro Max",
      "before": 85,
      "after": 88,
      "delta": 3,
      "notes": "\u260588! Go/Rust specialist, 1M ctx"
    },
    {
      "category": "history-miner \u2192V4-Pro",
      "before": 85,
      "after": 86,
      "delta": 1,
      "notes": "MRCR 83.5, long-context"
    },
    {
      "category": "orchestrator \u2192K2.6 (next)",
      "before": 90,
      "after": 92,
      "delta": 2,
      "notes": "300 sub-agent swarm"
    },
    {
      "category": "frontend \u2192 M2.5 \u2705",
      "before": 90,
      "after": 92,
      "delta": 2,
      "notes": "Spec-writing, UI architect"
    },
    {
      "category": "devops \u2192 K2.6 \u2705",
      "before": 65,
      "after": 88,
      "delta": 23,
      "notes": "IF:65\u219291! Terminal 66.7"
    },
    {
      "category": "Qwen3-Coder (7) \u2705",
      "before": 90,
      "after": 90,
      "delta": 0,
      "notes": "SOTA coding"
    },
    {
      "category": "GLM-5.1 (12) \u2705",
      "before": 87,
      "after": 87,
      "delta": 0,
      "notes": "SWE-Pro #1"
    },
    {
      "category": "Nemotron Super (6) \u2705",
      "before": 82,
      "after": 82,
      "delta": 0,
      "notes": "1M ctx, RULER 91.75%"
    }
  ],
  "benchmark_comparison": {
    "benchmarks": [
      {
        "name": "SWE-V",
        "full_name": "SWE-Bench Verified",
        "description": "GitHub issue resolution (500 tasks)",
        "roles": "lead-dev, backend, fixer"
      },
      {
        "name": "SWE-P",
        "full_name": "SWE-Bench Pro",
        "description": "Multi-lang, decontaminated (1865 tasks)",
        "roles": "all coding agents"
      },
      {
        "name": "T-Bench",
        "full_name": "Terminal-Bench 2.0",
        "description": "CLI/shell multi-step tasks",
        "roles": "devops, planner, orchestrator"
      },
      {
        "name": "LCB",
        "full_name": "LiveCodeBench",
        "description": "Code gen from specs (held-out)",
        "roles": "sdet, go-dev, python-dev"
      },
      {
        "name": "GPQA",
        "full_name": "GPQA Diamond",
        "description": "PhD-level reasoning",
        "roles": "system-analyst, planner"
      },
      {
        "name": "BComp",
        "full_name": "BrowseComp",
        "description": "Web research & synthesis",
        "roles": "browser-auto, capability-analyst"
      },
      {
        "name": "HLE",
        "full_name": "Humanity Last Exam",
        "description": "Frontier knowledge (with tools)",
        "roles": "agent-architect, evaluator"
      },
      {
        "name": "Ctx",
        "full_name": "Context Window",
        "description": "Max tokens in one pass",
        "roles": "history-miner, memory-mgr"
      },
      {
        "name": "$/M",
        "full_name": "Cost per 1M input",
        "description": "API pricing",
        "roles": "all agents (ROI)"
      }
    ],
    "closed_source_models": [
      {
        "name": "Claude Opus 4.7",
        "organization": "Anthropic",
        "scores": [
          87.6,
          64.3,
          69.4,
          null,
          94.2,
          79.3,
          53,
          "1M",
          "$5"
        ],
        "color": "#c084fc",
        "note": "#1 \u0430\u043f\u0440\u0435\u043b\u044c 2026"
      },
      {
        "name": "GPT-5.5",
        "organization": "OpenAI",
        "scores": [
          null,
          58.6,
          82.7,
          null,
          null,
          83.4,
          57.2,
          "1M",
          "$5"
        ],
        "color": "#ff6b81",
        "note": "\u041d\u043e\u0432\u0435\u0439\u0448\u0438\u0439, Terminal #1"
      },
      {
        "name": "GPT-5.4",
        "organization": "OpenAI",
        "scores": [
          78.2,
          59.1,
          75.1,
          null,
          94.4,
          82.7,
          58.7,
          "200K",
          "$2.50"
        ],
        "color": "#ff6b81",
        "note": "Reasoning, math"
      },
      {
        "name": "Gemini 3.1 Pro",
        "organization": "Google",
        "scores": [
          80.6,
          46.1,
          68.5,
          null,
          94.3,
          85.9,
          51.4,
          "2M",
          "$2"
        ],
        "color": "#facc15",
        "note": "ARC-AGI 77.1%, \u0434\u0435\u0448\u0451\u0432\u044b\u0439"
      },
      {
        "name": "Claude Sonnet 4.6",
        "organization": "Anthropic",
        "scores": [
          79.6,
          null,
          null,
          null,
          null,
          null,
          null,
          "200K",
          "$3"
        ],
        "color": "#c084fc",
        "note": "5\u00d7 \u0434\u0435\u0448\u0435\u0432\u043b\u0435 Opus"
      },
      {
        "name": "GPT-5.3-Codex",
        "organization": "OpenAI",
        "scores": [
          85,
          57,
          77.3,
          null,
          null,
          null,
          null,
          "200K",
          "$6"
        ],
        "color": "#ff6b81",
        "note": "Coding specialist"
      }
    ],
    "apaw_models": [
      {
        "name": "Kimi K2.6",
        "organization": "APAW",
        "scores": [
          80.2,
          58.6,
          66.7,
          87.2,
          null,
          83.2,
          54,
          "256K",
          "$0.95"
        ],
        "color": "#00ff94",
        "note": "devops, browser, architect (3)"
      },
      {
        "name": "GLM-5.1",
        "organization": "APAW",
        "scores": [
          null,
          58.4,
          63.5,
          null,
          86.2,
          68.7,
          null,
          "200K",
          "~$0.50"
        ],
        "color": "#00ff94",
        "note": "12 agents! orchestrator, eval..."
      },
      {
        "name": "V4-Pro Max",
        "organization": "APAW",
        "scores": [
          80.6,
          55.4,
          67.9,
          93.5,
          90.1,
          83.4,
          48.2,
          "1M",
          "$0.42"
        ],
        "color": "#00d4ff",
        "note": "planner, go-dev (\u0440\u0435\u043a.)"
      },
      {
        "name": "Qwen3-Coder 480B",
        "organization": "APAW",
        "scores": [
          66.5,
          null,
          null,
          null,
          null,
          null,
          null,
          "256K",
          "~$0.50"
        ],
        "color": "#00ff94",
        "note": "7 coding agents"
      },
      {
        "name": "MiniMax M2.5",
        "organization": "APAW",
        "scores": [
          80.2,
          51.3,
          null,
          null,
          null,
          76.3,
          null,
          "204K",
          "$0.15"
        ],
        "color": "#00ff94",
        "note": "frontend, skeptic, fixer (3)"
      },
      {
        "name": "Nemotron Super",
        "organization": "APAW",
        "scores": [
          60.5,
          null,
          null,
          null,
          null,
          null,
          null,
          "1M",
          "~$0.40"
        ],
        "color": "#00ff94",
        "note": "6 agents (memory, history)"
      }
    ]
  }
}