APAW/agent-evolution/data/model-benchmarks.json

{
  "version": "1.0.0",
  "generated": "2026-05-24T01:00:00Z",
  "source": "ollama-cloud-models-v2026-05-24",
  "total_agents": 34,
  "total_models_tracked": 13,
  "providers": ["ollama-cloud"],
  "models": [
    {
      "id": "deepseek-v4-pro-max",
      "name": "DeepSeek V4-Pro Max",
      "organization": "DeepSeek",
      "parameters": "1.6T/49B active MoE",
      "context_window": "1M",
      "swe_bench": 80.6,
      "if_score": 89,
      "categories": ["coding", "agent", "reasoning"],
      "provider": "ollama-cloud",
      "updated": "2026-05-03",
      "pulls": "71.6K"
    },
    {
      "id": "deepseek-v4-flash",
      "name": "DeepSeek V4-Flash",
      "organization": "DeepSeek",
      "parameters": "284B/13B active MoE",
      "context_window": "1M",
      "swe_bench": 79,
      "if_score": 86,
      "categories": ["coding", "efficient", "agent"],
      "provider": "ollama-cloud",
      "updated": "2026-05-03",
      "pulls": "84.4K"
    },
    {
      "id": "kimi-k2.6",
      "name": "Kimi K2.6",
      "organization": "Moonshot AI",
      "parameters": "1T/32B active MoE",
      "context_window": "256K→1M",
      "swe_bench": 80.2,
      "if_score": 91,
      "categories": ["coding", "agent", "multimodal", "vision"],
      "provider": "ollama-cloud",
      "updated": "2026-04-24",
      "pulls": "259.7K"
    },
    {
      "id": "kimi-k2.5",
      "name": "Kimi K2.5",
      "organization": "Moonshot AI",
      "parameters": "1T/32B active MoE",
      "context_window": "256K",
      "swe_bench": 78,
      "if_score": 90,
      "categories": ["coding", "agent", "multimodal", "vision"],
      "provider": "ollama-cloud",
      "updated": "2026-02-24",
      "pulls": "293.2K"
    },
    {
      "id": "qwen3-coder-480b",
      "name": "Qwen3-Coder 480B",
      "organization": "Qwen",
      "parameters": "480B/35B active",
      "context_window": "256K→1M",
      "swe_bench": 66.5,
      "if_score": 88,
      "categories": ["coding", "agent"],
      "provider": "ollama-cloud",
      "updated": "2026-02-24",
      "pulls": "N/A (legacy track)"
    },
    {
      "id": "qwen3.5-122b",
      "name": "Qwen 3.5 122B",
      "organization": "Qwen",
      "parameters": "122B/10B active",
      "context_window": "128K",
      "swe_bench": null,
      "if_score": 92,
      "categories": ["reasoning", "efficient", "vision", "tools"],
      "provider": "ollama-cloud",
      "updated": "2026-05-22",
      "pulls": "12.4M"
    },
    {
      "id": "gemma4-27b",
      "name": "Gemma 4 (27B)",
      "organization": "Google",
      "parameters": "27B",
      "context_window": "128K",
      "swe_bench": null,
      "if_score": 85,
      "categories": ["coding", "agent", "reasoning", "vision", "audio"],
      "provider": "ollama-cloud",
      "updated": "2026-05-22",
      "pulls": "10.1M",
      "note": "Updated 2 days ago. Frontier-level performance at each size."
    },
    {
      "id": "minimax-m2.5",
      "name": "MiniMax M2.5",
      "organization": "MiniMax",
      "parameters": "MoE undisclosed",
      "context_window": "128K",
      "swe_bench": 80.2,
      "if_score": 82,
      "categories": ["coding", "agent"],
      "provider": "ollama-cloud",
      "updated": "2026-02-24",
      "pulls": "2.2M"
    },
    {
      "id": "minimax-m2.7",
      "name": "MiniMax M2.7",
      "organization": "MiniMax",
      "parameters": "~10B active",
      "context_window": "128K",
      "swe_bench": 78,
      "if_score": 80,
      "categories": ["coding", "agent", "efficient"],
      "provider": "ollama-cloud",
      "updated": "2026-03-24",
      "pulls": "2.2M"
    },
    {
      "id": "glm-5.1",
      "name": "GLM-5.1",
      "organization": "Z.ai",
      "parameters": "744B/40B active",
      "context_window": "128K",
      "swe_bench": null,
      "if_score": 90,
      "categories": ["reasoning", "agent"],
      "provider": "ollama-cloud",
      "updated": "2026-04-24",
      "pulls": "2.2M",
      "note": "Next-gen flagship. SWE-Bench Pro SOTA."
    },
    {
      "id": "glm-5",
      "name": "GLM-5",
      "organization": "Z.ai",
      "parameters": "744B/40B active",
      "context_window": "128K",
      "swe_bench": null,
      "if_score": 90,
      "categories": ["reasoning", "agent"],
      "provider": "ollama-cloud",
      "updated": "2026-02-24",
      "pulls": "2.3M"
    },
    {
      "id": "nemotron-3-super",
      "name": "Nemotron 3 Super",
      "organization": "NVIDIA",
      "parameters": "120B/12B active",
      "context_window": "1M",
      "swe_bench": 60.5,
      "if_score": 78,
      "categories": ["agent", "reasoning", "efficient"],
      "provider": "ollama-cloud",
      "updated": "2026-03-24",
      "pulls": "2.4M"
    },
    {
      "id": "nemotron-3-nano",
      "name": "Nemotron 3 Nano",
      "organization": "NVIDIA",
      "parameters": "30B/4B",
      "context_window": "128K",
      "swe_bench": null,
      "if_score": 68,
      "categories": ["agent", "efficient"],
      "provider": "ollama-cloud",
      "updated": "2026-03-24",
      "pulls": "453K"
    },
    {
      "id": "devstral-2",
      "name": "Devstral 2",
      "organization": "Mistral / Devstral",
      "parameters": "123B",
      "context_window": "128K",
      "swe_bench": null,
      "if_score": 80,
      "categories": ["coding", "agent"],
      "provider": "ollama-cloud",
      "updated": "2026-02-24",
      "pulls": "223.2K"
    },
    {
      "id": "devstral-small-2",
      "name": "Devstral Small 2",
      "organization": "Mistral / Devstral",
      "parameters": "24B",
      "context_window": "128K",
      "swe_bench": null,
      "if_score": 75,
      "categories": ["coding", "agent"],
      "provider": "ollama-cloud",
      "updated": "2026-02-24",
      "pulls": "838.8K"
    }
  ],
  "if_scores": {
    "deepseek-v4-pro-max": 89,
    "deepseek-v4-flash": 86,
    "kimi-k2.6": 91,
    "kimi-k2.5": 90,
    "qwen3-coder-480b": 88,
    "qwen3.5-122b": 92,
    "gemma4-27b": 85,
    "minimax-m2.5": 82,
    "minimax-m2.7": 80,
    "glm-5.1": 90,
    "glm-5": 90,
    "nemotron-3-super": 78,
    "nemotron-3-nano": 68,
    "devstral-2": 80,
    "devstral-small-2": 75
  },
  "agent_model_scores": [
    {
      "agent": "lead-developer",
      "current_model_index": 0,
      "scores": {
        "qwen3-coder-480b": 92,
        "deepseek-v4-pro-max": 88,
        "deepseek-v4-flash": 85,
        "kimi-k2.6": 90,
        "kimi-k2.5": 88,
        "qwen3.5-122b": 86,
        "gemma4-27b": 83,
        "minimax-m2.5": 86,
        "minimax-m2.7": 82,
        "glm-5.1": 68,
        "nemotron-3-super": 70,
        "devstral-2": 84,
        "devstral-small-2": 78
      }
    },
    {
      "agent": "frontend-developer",
      "scores": {
        "qwen3-coder-480b": 86,
        "deepseek-v4-pro-max": 82,
        "deepseek-v4-flash": 80,
        "kimi-k2.6": 86,
        "kimi-k2.5": 84,
        "qwen3.5-122b": 84,
        "gemma4-27b": 85,
        "minimax-m2.5": 92,
        "minimax-m2.7": 88,
        "glm-5.1": 56,
        "nemotron-3-super": 62,
        "devstral-2": 80,
        "devstral-small-2": 74
      }
    },
    {
      "agent": "backend-developer",
      "scores": {
        "qwen3-coder-480b": 91,
        "deepseek-v4-pro-max": 86,
        "kimi-k2.6": 90,
        "qwen3.5-122b": 85,
        "gemma4-27b": 84,
        "minimax-m2.5": 84,
        "minimax-m2.7": 80,
        "glm-5.1": 63,
        "nemotron-3-super": 68,
        "devstral-2": 82,
        "devstral-small-2": 76
      }
    },
    {
      "agent": "go-developer",
      "scores": {
        "qwen3-coder-480b": 85,
        "deepseek-v4-pro-max": 88,
        "deepseek-v4-flash": 84,
        "kimi-k2.6": 86,
        "qwen3.5-122b": 80,
        "gemma4-27b": 80,
        "minimax-m2.5": 78,
        "minimax-m2.7": 74,
        "glm-5.1": 58,
        "nemotron-3-super": 66,
        "devstral-2": 82,
        "devstral-small-2": 74
      }
    },
    {
      "agent": "python-developer",
      "scores": {
        "qwen3-coder-480b": 90,
        "deepseek-v4-pro-max": 78,
        "kimi-k2.6": 88,
        "qwen3.5-122b": 86,
        "gemma4-27b": 82,
        "minimax-m2.5": 82,
        "minimax-m2.7": 78,
        "glm-5.1": 60,
        "nemotron-3-super": 66,
        "devstral-2": 86,
        "devstral-small-2": 80
      }
    },
    {
      "agent": "php-developer",
      "scores": {
        "qwen3-coder-480b": 87,
        "deepseek-v4-pro-max": 74,
        "kimi-k2.6": 86,
        "qwen3.5-122b": 84,
        "gemma4-27b": 82,
        "minimax-m2.5": 76,
        "minimax-m2.7": 72,
        "glm-5.1": 56,
        "nemotron-3-super": 64,
        "devstral-2": 80,
        "devstral-small-2": 74
      }
    },
    {
      "agent": "devops-engineer",
      "scores": {
        "qwen3-coder-480b": 66,
        "deepseek-v4-pro-max": 80,
        "kimi-k2.6": 88,
        "qwen3.5-122b": 75,
        "gemma4-27b": 78,
        "minimax-m2.5": 53,
        "minimax-m2.7": 48,
        "glm-5.1": 75,
        "nemotron-3-super": 78,
        "devstral-2": 72,
        "devstral-small-2": 68
      }
    },
    {
      "agent": "sdet-engineer",
      "scores": {
        "qwen3-coder-480b": 88,
        "deepseek-v4-pro-max": 84,
        "kimi-k2.6": 87,
        "qwen3.5-122b": 86,
        "gemma4-27b": 82,
        "minimax-m2.5": 84,
        "minimax-m2.7": 80,
        "glm-5.1": 63,
        "nemotron-3-super": 70,
        "devstral-2": 86,
        "devstral-small-2": 80
      }
    },
    {
      "agent": "code-skeptic",
      "scores": {
        "qwen3-coder-480b": 82,
        "deepseek-v4-pro-max": 82,
        "kimi-k2.6": 82,
        "qwen3.5-122b": 80,
        "gemma4-27b": 80,
        "minimax-m2.5": 85,
        "minimax-m2.7": 80,
        "glm-5.1": 72,
        "nemotron-3-super": 73,
        "devstral-2": 82,
        "devstral-small-2": 76
      }
    },
    {
      "agent": "security-auditor",
      "scores": {
        "qwen3-coder-480b": 76,
        "deepseek-v4-pro-max": 80,
        "kimi-k2.6": 80,
        "qwen3.5-122b": 78,
        "gemma4-27b": 78,
        "minimax-m2.5": 74,
        "minimax-m2.7": 68,
        "glm-5.1": 68,
        "nemotron-3-super": 76,
        "devstral-2": 78,
        "devstral-small-2": 72
      }
    },
    {
      "agent": "performance-engineer",
      "scores": {
        "qwen3-coder-480b": 78,
        "deepseek-v4-pro-max": 84,
        "kimi-k2.6": 82,
        "qwen3.5-122b": 76,
        "gemma4-27b": 76,
        "minimax-m2.5": 75,
        "minimax-m2.7": 70,
        "glm-5.1": 74,
        "nemotron-3-super": 78,
        "devstral-2": 80,
        "devstral-small-2": 74
      }
    },
    {
      "agent": "the-fixer",
      "scores": {
        "qwen3-coder-480b": 89,
        "deepseek-v4-pro-max": 88,
        "kimi-k2.6": 90,
        "qwen3.5-122b": 86,
        "gemma4-27b": 82,
        "minimax-m2.5": 88,
        "minimax-m2.7": 84,
        "glm-5.1": 64,
        "nemotron-3-super": 71,
        "devstral-2": 86,
        "devstral-small-2": 82
      }
    },
    {
      "agent": "browser-automation",
      "scores": {
        "qwen3-coder-480b": 87,
        "deepseek-v4-pro-max": 82,
        "kimi-k2.6": 86,
        "qwen3.5-122b": 82,
        "gemma4-27b": 84,
        "minimax-m2.5": 72,
        "minimax-m2.7": 68,
        "glm-5.1": 53,
        "nemotron-3-super": 61,
        "devstral-2": 80,
        "devstral-small-2": 74
      }
    },
    {
      "agent": "visual-tester",
      "scores": {
        "qwen3-coder-480b": 82,
        "deepseek-v4-pro-max": 76,
        "kimi-k2.6": 78,
        "qwen3.5-122b": 76,
        "gemma4-27b": 78,
        "minimax-m2.5": 68,
        "minimax-m2.7": 64,
        "glm-5.1": 48,
        "nemotron-3-super": 55,
        "devstral-2": 74,
        "devstral-small-2": 68
      }
    },
    {
      "agent": "system-analyst",
      "scores": {
        "qwen3-coder-480b": 70,
        "deepseek-v4-pro-max": 88,
        "kimi-k2.6": 86,
        "qwen3.5-122b": 82,
        "gemma4-27b": 82,
        "minimax-m2.5": 66,
        "minimax-m2.7": 63,
        "glm-5.1": 82,
        "nemotron-3-super": 74,
        "devstral-2": 80,
        "devstral-small-2": 74
      }
    },
    {
      "agent": "capability-analyst",
      "scores": {
        "qwen3-coder-480b": 72,
        "deepseek-v4-pro-max": 82,
        "kimi-k2.6": 82,
        "qwen3.5-122b": 80,
        "gemma4-27b": 80,
        "minimax-m2.5": 68,
        "minimax-m2.7": 66,
        "glm-5.1": 78,
        "nemotron-3-super": 76,
        "devstral-2": 78,
        "devstral-small-2": 72
      }
    },
    {
      "agent": "orchestrator",
      "scores": {
        "qwen3-coder-480b": 74,
        "deepseek-v4-pro-max": 86,
        "kimi-k2.6": 92,
        "qwen3.5-122b": 84,
        "gemma4-27b": 82,
        "minimax-m2.5": 70,
        "minimax-m2.7": 68,
        "glm-5.1": 82,
        "nemotron-3-super": 80,
        "devstral-2": 80,
        "devstral-small-2": 74
      }
    },
    {
      "agent": "release-manager",
      "scores": {
        "qwen3-coder-480b": 72,
        "deepseek-v4-pro-max": 78,
        "kimi-k2.6": 78,
        "qwen3.5-122b": 76,
        "gemma4-27b": 76,
        "minimax-m2.5": 66,
        "minimax-m2.7": 64,
        "glm-5.1": 76,
        "nemotron-3-super": 74,
        "devstral-2": 76,
        "devstral-small-2": 70
      }
    },
    {
      "agent": "evaluator",
      "scores": {
        "qwen3-coder-480b": 70,
        "deepseek-v4-pro-max": 84,
        "kimi-k2.6": 84,
        "qwen3.5-122b": 82,
        "gemma4-27b": 80,
        "minimax-m2.5": 73,
        "minimax-m2.7": 70,
        "glm-5.1": 78,
        "nemotron-3-super": 78,
        "devstral-2": 80,
        "devstral-small-2": 74
      }
    },
    {
      "agent": "prompt-optimizer",
      "scores": {
        "qwen3-coder-480b": 76,
        "deepseek-v4-pro-max": 80,
        "kimi-k2.6": 82,
        "qwen3.5-122b": 82,
        "gemma4-27b": 80,
        "minimax-m2.5": 74,
        "minimax-m2.7": 72,
        "glm-5.1": 75,
        "nemotron-3-super": 76,
        "devstral-2": 80,
        "devstral-small-2": 74
      }
    },
    {
      "agent": "product-owner",
      "scores": {
        "qwen3-coder-480b": 60,
        "deepseek-v4-pro-max": 76,
        "kimi-k2.6": 76,
        "qwen3.5-122b": 76,
        "gemma4-27b": 76,
        "minimax-m2.5": 56,
        "minimax-m2.7": 54,
        "glm-5.1": 78,
        "nemotron-3-super": 74,
        "devstral-2": 76,
        "devstral-small-2": 70
      }
    },
    {
      "agent": "pipeline-judge",
      "scores": {
        "qwen3-coder-480b": 64,
        "deepseek-v4-pro-max": 82,
        "kimi-k2.6": 84,
        "qwen3.5-122b": 82,
        "gemma4-27b": 80,
        "minimax-m2.5": 68,
        "minimax-m2.7": 65,
        "glm-5.1": 76,
        "nemotron-3-super": 78,
        "devstral-2": 78,
        "devstral-small-2": 72
      }
    },
    {
      "agent": "workflow-architect",
      "scores": {
        "qwen3-coder-480b": 68,
        "deepseek-v4-pro-max": 80,
        "kimi-k2.6": 82,
        "qwen3.5-122b": 80,
        "gemma4-27b": 80,
        "minimax-m2.5": 62,
        "minimax-m2.7": 60,
        "glm-5.1": 76,
        "nemotron-3-super": 76,
        "devstral-2": 78,
        "devstral-small-2": 72
      }
    },
    {
      "agent": "markdown-validator",
      "scores": {
        "qwen3-coder-480b": 43,
        "deepseek-v4-pro-max": 68,
        "kimi-k2.6": 56,
        "qwen3.5-122b": 56,
        "gemma4-27b": 60,
        "minimax-m2.5": 38,
        "minimax-m2.7": 36,
        "glm-5.1": 55,
        "nemotron-3-super": 52,
        "nemotron-3-nano": 70,
        "devstral-2": 65,
        "devstral-small-2": 62
      }
    },
    {
      "agent": "agent-architect",
      "scores": {
        "qwen3-coder-480b": 78,
        "deepseek-v4-pro-max": 82,
        "kimi-k2.6": 86,
        "qwen3.5-122b": 80,
        "gemma4-27b": 82,
        "minimax-m2.5": 72,
        "minimax-m2.7": 70,
        "glm-5.1": 76,
        "nemotron-3-super": 78,
        "devstral-2": 80,
        "devstral-small-2": 74
      }
    },
    {
      "agent": "planner",
      "scores": {
        "qwen3-coder-480b": 72,
        "deepseek-v4-pro-max": 88,
        "kimi-k2.6": 86,
        "qwen3.5-122b": 86,
        "gemma4-27b": 84,
        "minimax-m2.5": 68,
        "minimax-m2.7": 66,
        "glm-5.1": 78,
        "nemotron-3-super": 80,
        "devstral-2": 84,
        "devstral-small-2": 78
      }
    },
    {
      "agent": "reflector",
      "scores": {
        "qwen3-coder-480b": 68,
        "deepseek-v4-pro-max": 84,
        "kimi-k2.6": 80,
        "qwen3.5-122b": 80,
        "gemma4-27b": 80,
        "minimax-m2.5": 66,
        "minimax-m2.7": 64,
        "glm-5.1": 76,
        "nemotron-3-super": 78,
        "devstral-2": 82,
        "devstral-small-2": 76
      }
    },
    {
      "agent": "memory-manager",
      "scores": {
        "qwen3-coder-480b": 63,
        "deepseek-v4-pro-max": 86,
        "kimi-k2.6": 84,
        "qwen3.5-122b": 85,
        "gemma4-27b": 82,
        "minimax-m2.5": 58,
        "minimax-m2.7": 56,
        "glm-5.1": 72,
        "nemotron-3-super": 86,
        "devstral-2": 78,
        "devstral-small-2": 72
      }
    },
    {
      "agent": "architect-indexer",
      "scores": {
        "qwen3-coder-480b": 70,
        "deepseek-v4-pro-max": 78,
        "kimi-k2.6": 84,
        "qwen3.5-122b": 80,
        "gemma4-27b": 80,
        "minimax-m2.5": 64,
        "minimax-m2.7": 62,
        "glm-5.1": 80,
        "nemotron-3-super": 74,
        "devstral-2": 78,
        "devstral-small-2": 72
      }
    },
    {
      "agent": "flutter-developer",
      "scores": {
        "qwen3-coder-480b": 86,
        "deepseek-v4-pro-max": 78,
        "kimi-k2.6": 84,
        "qwen3.5-122b": 84,
        "gemma4-27b": 84,
        "minimax-m2.5": 70,
        "minimax-m2.7": 66,
        "glm-5.1": 53,
        "nemotron-3-super": 60,
        "devstral-2": 78,
        "devstral-small-2": 74
      }
    }
  ],
  "agent_current_config": [
    { "agent": "lead-developer",         "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 92, "status": "optimal" },
    { "agent": "frontend-developer",     "model": "ollama-cloud/minimax-m2.5",        "fit_score": 92, "status": "optimal" },
    { "agent": "backend-developer",      "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 91, "status": "optimal" },
    { "agent": "go-developer",           "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 88, "status": "optimal" },
    { "agent": "python-developer",       "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 90, "status": "optimal" },
    { "agent": "php-developer",          "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 87, "status": "optimal" },
    { "agent": "flutter-developer",      "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 86, "status": "optimal" },
    { "agent": "devops-engineer",        "model": "ollama-cloud/kimi-k2.6",            "fit_score": 88, "status": "optimal" },
    { "agent": "sdet-engineer",          "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 88, "status": "optimal" },
    { "agent": "code-skeptic",           "model": "ollama-cloud/minimax-m2.5",        "fit_score": 85, "status": "optimal" },
    { "agent": "security-auditor",       "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 80, "status": "good" },
    { "agent": "performance-engineer",   "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 84, "status": "optimal" },
    { "agent": "the-fixer",              "model": "ollama-cloud/kimi-k2.6",            "fit_score": 90, "status": "optimal" },
    { "agent": "browser-automation",     "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 87, "status": "optimal" },
    { "agent": "visual-tester",          "model": "ollama-cloud/qwen3-coder:480b",    "fit_score": 82, "status": "good" },
    { "agent": "system-analyst",         "model": "ollama-cloud/glm-5.1",              "fit_score": 82, "status": "good" },
    { "agent": "capability-analyst",     "model": "ollama-cloud/glm-5.1",              "fit_score": 78, "status": "good" },
    { "agent": "orchestrator",           "model": "ollama-cloud/kimi-k2.6",            "fit_score": 92, "status": "optimal" },
    { "agent": "release-manager",        "model": "ollama-cloud/glm-5.1",              "fit_score": 76, "status": "good" },
    { "agent": "evaluator",              "model": "ollama-cloud/glm-5.1",              "fit_score": 78, "status": "good" },
    { "agent": "prompt-optimizer",       "model": "ollama-cloud/qwen3.5",              "fit_score": 82, "status": "recommended" },
    { "agent": "product-owner",          "model": "ollama-cloud/glm-5.1",              "fit_score": 78, "status": "good" },
    { "agent": "pipeline-judge",         "model": "ollama-cloud/glm-5.1",              "fit_score": 76, "status": "good" },
    { "agent": "workflow-architect",     "model": "ollama-cloud/glm-5.1",              "fit_score": 76, "status": "good" },
    { "agent": "markdown-validator",     "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 68, "status": "poor" },
    { "agent": "agent-architect",        "model": "ollama-cloud/kimi-k2.6",            "fit_score": 86, "status": "optimal" },
    { "agent": "planner",              "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 88, "status": "optimal" },
    { "agent": "reflector",              "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 84, "status": "optimal" },
    { "agent": "memory-manager",         "model": "ollama-cloud/qwen3.5",              "fit_score": 85, "status": "recommended" },
    { "agent": "architect-indexer",      "model": "ollama-cloud/glm-5.1",              "fit_score": 80, "status": "good" }
  ],
  "recommendations": [
    {
      "agent": "prompt-optimizer",
      "from_model": "ollama-cloud/qwen3.6-plus (openrouter)",
      "to_model": "ollama-cloud/qwen3.5",
      "reason": "Migrated to Ollama Cloud. IF 92, vision+tools+thinking. Same quality, no rate limits.",
      "impact": "high",
      "applied": false
    },
    {
      "agent": "memory-manager",
      "from_model": "ollama-cloud/qwen3.6-plus (openrouter)",
      "to_model": "ollama-cloud/qwen3.5",
      "reason": "Migrated to Ollama Cloud. 1M context via qwen3.5? Actually qwen3.5 has 128K, not 1M. Alternative: kimi-k2.6 (256K) or deepseek-v4 (1M). But matrix shows qwen3.5=85 vs kimi-k2.6=84 vs deepseek=86.",
      "impact": "high",
      "applied": false
    },
    {
      "agent": "markdown-validator",
      "from_model": "ollama-cloud/deepseek-v4-pro-max",
      "to_model": "ollama-cloud/nemotron-3-nano",
      "reason": "Markdown validator scores are lowest (68 max). Nemotron-3-Nano IF=68 but is tiny (4B/30B), extremely cheap. For lightweight validation tasks, nano is sufficient.",
      "impact": "medium",
      "applied": false
    },
    {
      "agent": "markdown-validator",
      "from_model": "ollama-cloud/deepseek-v4-pro-max",
      "to_model": "ollama-cloud/gemma4-27b",
      "reason": "Gemma 4 is newest (2 days), frontier at each size. Scores 60 for validator — better than nano 70? Actually wait: gemma4=60, nano=70. Nano is better for this role. But gemma4 is newer and more general.",
      "impact": "low",
      "applied": false
    },
    {
      "agent": "system-analyst",
      "from_model": "ollama-cloud/glm-5.1",
      "to_model": "ollama-cloud/deepseek-v4-pro-max",
      "reason": "Matrix: deepseek-v4-pro-max=88 vs glm-5.1=82. +6% quality, 1M context for architecture docs. GLM-5.1 still strong for standardization.",
      "impact": "medium",
      "applied": false
    },
    {
      "agent": "evaluator",
      "from_model": "ollama-cloud/glm-5.1",
      "to_model": "ollama-cloud/kimi-k2.6",
      "reason": "Matrix: kimi-k2.6=84 vs glm-5.1=78. +6%. IF=91 for scoring accuracy. High reasoning needed.",
      "impact": "medium",
      "applied": false
    },
    {
      "agent": "evaluator",
      "from_model": "ollama-cloud/glm-5.1",
      "to_model": "ollama-cloud/deepseek-v4-pro-max",
      "reason": "Alternative to kimi-k2.6. deepseek-v4-pro-max=84 (same as kimi), but 1M context. Could be better for large evaluation tasks.",
      "impact": "medium",
      "applied": false
    },
    {
      "agent": "security-auditor",
      "from_model": "ollama-cloud/deepseek-v4-pro-max",
      "to_model": "ollama-cloud/kimi-k2.6",
      "reason": "Matrix: both 80. But kimi-k2.6 has multimodal (vision) which could help with screenshot-based security analysis. Tie.",
      "impact": "low",
      "applied": false
    },
    {
      "agent": "gemma4-trial",
      "from_model": "none",
      "to_model": "ollama-cloud/gemma4-27b",
      "reason": "Gemma 4 is brand new (2 days), 10.1M pulls, frontier at each size, vision+audio+thinking. Could be game-changer for frontend-dev, browser-automation, visual-tester.",
      "impact": "high",
      "applied": false,
      "note": "Requires A/B test on frontend task."
    },
    {
      "agent": "qwen3.5-trial",
      "from_model": "none",
      "to_model": "ollama-cloud/qwen3.5-122b",
      "reason": "Qwen 3.5 updated 2 days ago, 12.4M pulls, IF=92 (highest!), multimodal. Could replace GLM-5.1 for reasoning tasks and qwen3-coder for some coding tasks.",
      "impact": "high",
      "applied": false,
      "note": "Requires A/B test on planner/evaluator tasks."
    }
  ],
  "new_models_to_consider": [
    {
      "id": "gemma4-27b",
      "priority": "critical",
      "rationale": "Updated 2 days ago. 10.1M pulls. Frontier-level at each size. Vision + audio + thinking + tools + cloud. Potentially replaces qwen3-coder for some tasks."
    },
    {
      "id": "qwen3.5-122b",
      "priority": "critical",
      "rationale": "Updated 2 days ago. 12.4M pulls. IF=92 highest among tracked. Multimodal. Could replace glm-5.1 for reasoning and compete with qwen3-coder for coding."
    },
    {
      "id": "deepseek-v4-flash",
      "priority": "medium",
      "rationale": "Same family as pro-max but much faster (13B active vs 49B). Good for low-latency agents: code-skeptic, browser-automation."
    },
    {
      "id": "devstral-2",
      "priority": "medium",
      "rationale": "123B model for tool use and codebase exploration. Could be strong for lead-developer on large projects."
    }
  ]
}