- markdown-validator: deepseek-v4-pro-max → nemotron-3-nano (90% cost cut) - release-manager: glm-5.1 → kimi-k2.6 (+2 matrix, 1M context for diffs) - capability-analyst: glm-5.1 → deepseek-v4-pro-max (+4 matrix, 1M ctx) - browser-automation: qwen3-coder → deepseek-v4-flash (3× faster inference) - history-miner: nemotron-3-super → qwen3.5-122b (+14 IF, 12.4M pulls)
852 lines
26 KiB
JSON
852 lines
26 KiB
JSON
{
|
|
"version": "1.0.0",
|
|
"generated": "2026-05-24T01:00:00Z",
|
|
"source": "ollama-cloud-models-v2026-05-24",
|
|
"total_agents": 34,
|
|
"total_models_tracked": 13,
|
|
"providers": ["ollama-cloud"],
|
|
"models": [
|
|
{
|
|
"id": "deepseek-v4-pro-max",
|
|
"name": "DeepSeek V4-Pro Max",
|
|
"organization": "DeepSeek",
|
|
"parameters": "1.6T/49B active MoE",
|
|
"context_window": "1M",
|
|
"swe_bench": 80.6,
|
|
"if_score": 89,
|
|
"categories": ["coding", "agent", "reasoning"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-05-03",
|
|
"pulls": "71.6K"
|
|
},
|
|
{
|
|
"id": "deepseek-v4-flash",
|
|
"name": "DeepSeek V4-Flash",
|
|
"organization": "DeepSeek",
|
|
"parameters": "284B/13B active MoE",
|
|
"context_window": "1M",
|
|
"swe_bench": 79,
|
|
"if_score": 86,
|
|
"categories": ["coding", "efficient", "agent"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-05-03",
|
|
"pulls": "84.4K"
|
|
},
|
|
{
|
|
"id": "kimi-k2.6",
|
|
"name": "Kimi K2.6",
|
|
"organization": "Moonshot AI",
|
|
"parameters": "1T/32B active MoE",
|
|
"context_window": "256K→1M",
|
|
"swe_bench": 80.2,
|
|
"if_score": 91,
|
|
"categories": ["coding", "agent", "multimodal", "vision"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-04-24",
|
|
"pulls": "259.7K"
|
|
},
|
|
{
|
|
"id": "kimi-k2.5",
|
|
"name": "Kimi K2.5",
|
|
"organization": "Moonshot AI",
|
|
"parameters": "1T/32B active MoE",
|
|
"context_window": "256K",
|
|
"swe_bench": 78,
|
|
"if_score": 90,
|
|
"categories": ["coding", "agent", "multimodal", "vision"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-02-24",
|
|
"pulls": "293.2K"
|
|
},
|
|
{
|
|
"id": "qwen3-coder-480b",
|
|
"name": "Qwen3-Coder 480B",
|
|
"organization": "Qwen",
|
|
"parameters": "480B/35B active",
|
|
"context_window": "256K→1M",
|
|
"swe_bench": 66.5,
|
|
"if_score": 88,
|
|
"categories": ["coding", "agent"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-02-24",
|
|
"pulls": "N/A (legacy track)"
|
|
},
|
|
{
|
|
"id": "qwen3.5-122b",
|
|
"name": "Qwen 3.5 122B",
|
|
"organization": "Qwen",
|
|
"parameters": "122B/10B active",
|
|
"context_window": "128K",
|
|
"swe_bench": null,
|
|
"if_score": 92,
|
|
"categories": ["reasoning", "efficient", "vision", "tools"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-05-22",
|
|
"pulls": "12.4M"
|
|
},
|
|
{
|
|
"id": "gemma4-27b",
|
|
"name": "Gemma 4 (27B)",
|
|
"organization": "Google",
|
|
"parameters": "27B",
|
|
"context_window": "128K",
|
|
"swe_bench": null,
|
|
"if_score": 85,
|
|
"categories": ["coding", "agent", "reasoning", "vision", "audio"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-05-22",
|
|
"pulls": "10.1M",
|
|
"note": "Updated 2 days ago. Frontier-level performance at each size."
|
|
},
|
|
{
|
|
"id": "minimax-m2.5",
|
|
"name": "MiniMax M2.5",
|
|
"organization": "MiniMax",
|
|
"parameters": "MoE undisclosed",
|
|
"context_window": "128K",
|
|
"swe_bench": 80.2,
|
|
"if_score": 82,
|
|
"categories": ["coding", "agent"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-02-24",
|
|
"pulls": "2.2M"
|
|
},
|
|
{
|
|
"id": "minimax-m2.7",
|
|
"name": "MiniMax M2.7",
|
|
"organization": "MiniMax",
|
|
"parameters": "~10B active",
|
|
"context_window": "128K",
|
|
"swe_bench": 78,
|
|
"if_score": 80,
|
|
"categories": ["coding", "agent", "efficient"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-03-24",
|
|
"pulls": "2.2M"
|
|
},
|
|
{
|
|
"id": "glm-5.1",
|
|
"name": "GLM-5.1",
|
|
"organization": "Z.ai",
|
|
"parameters": "744B/40B active",
|
|
"context_window": "128K",
|
|
"swe_bench": null,
|
|
"if_score": 90,
|
|
"categories": ["reasoning", "agent"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-04-24",
|
|
"pulls": "2.2M",
|
|
"note": "Next-gen flagship. SWE-Bench Pro SOTA."
|
|
},
|
|
{
|
|
"id": "glm-5",
|
|
"name": "GLM-5",
|
|
"organization": "Z.ai",
|
|
"parameters": "744B/40B active",
|
|
"context_window": "128K",
|
|
"swe_bench": null,
|
|
"if_score": 90,
|
|
"categories": ["reasoning", "agent"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-02-24",
|
|
"pulls": "2.3M"
|
|
},
|
|
{
|
|
"id": "nemotron-3-super",
|
|
"name": "Nemotron 3 Super",
|
|
"organization": "NVIDIA",
|
|
"parameters": "120B/12B active",
|
|
"context_window": "1M",
|
|
"swe_bench": 60.5,
|
|
"if_score": 78,
|
|
"categories": ["agent", "reasoning", "efficient"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-03-24",
|
|
"pulls": "2.4M"
|
|
},
|
|
{
|
|
"id": "nemotron-3-nano",
|
|
"name": "Nemotron 3 Nano",
|
|
"organization": "NVIDIA",
|
|
"parameters": "30B/4B",
|
|
"context_window": "128K",
|
|
"swe_bench": null,
|
|
"if_score": 68,
|
|
"categories": ["agent", "efficient"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-03-24",
|
|
"pulls": "453K"
|
|
},
|
|
{
|
|
"id": "devstral-2",
|
|
"name": "Devstral 2",
|
|
"organization": "Mistral / Devstral",
|
|
"parameters": "123B",
|
|
"context_window": "128K",
|
|
"swe_bench": null,
|
|
"if_score": 80,
|
|
"categories": ["coding", "agent"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-02-24",
|
|
"pulls": "223.2K"
|
|
},
|
|
{
|
|
"id": "devstral-small-2",
|
|
"name": "Devstral Small 2",
|
|
"organization": "Mistral / Devstral",
|
|
"parameters": "24B",
|
|
"context_window": "128K",
|
|
"swe_bench": null,
|
|
"if_score": 75,
|
|
"categories": ["coding", "agent"],
|
|
"provider": "ollama-cloud",
|
|
"updated": "2026-02-24",
|
|
"pulls": "838.8K"
|
|
}
|
|
],
|
|
"if_scores": {
|
|
"deepseek-v4-pro-max": 89,
|
|
"deepseek-v4-flash": 86,
|
|
"kimi-k2.6": 91,
|
|
"kimi-k2.5": 90,
|
|
"qwen3-coder-480b": 88,
|
|
"qwen3.5-122b": 92,
|
|
"gemma4-27b": 85,
|
|
"minimax-m2.5": 82,
|
|
"minimax-m2.7": 80,
|
|
"glm-5.1": 90,
|
|
"glm-5": 90,
|
|
"nemotron-3-super": 78,
|
|
"nemotron-3-nano": 68,
|
|
"devstral-2": 80,
|
|
"devstral-small-2": 75
|
|
},
|
|
"agent_model_scores": [
|
|
{
|
|
"agent": "lead-developer",
|
|
"current_model_index": 0,
|
|
"scores": {
|
|
"qwen3-coder-480b": 92,
|
|
"deepseek-v4-pro-max": 88,
|
|
"deepseek-v4-flash": 85,
|
|
"kimi-k2.6": 90,
|
|
"kimi-k2.5": 88,
|
|
"qwen3.5-122b": 86,
|
|
"gemma4-27b": 83,
|
|
"minimax-m2.5": 86,
|
|
"minimax-m2.7": 82,
|
|
"glm-5.1": 68,
|
|
"nemotron-3-super": 70,
|
|
"devstral-2": 84,
|
|
"devstral-small-2": 78
|
|
}
|
|
},
|
|
{
|
|
"agent": "frontend-developer",
|
|
"scores": {
|
|
"qwen3-coder-480b": 86,
|
|
"deepseek-v4-pro-max": 82,
|
|
"deepseek-v4-flash": 80,
|
|
"kimi-k2.6": 86,
|
|
"kimi-k2.5": 84,
|
|
"qwen3.5-122b": 84,
|
|
"gemma4-27b": 85,
|
|
"minimax-m2.5": 92,
|
|
"minimax-m2.7": 88,
|
|
"glm-5.1": 56,
|
|
"nemotron-3-super": 62,
|
|
"devstral-2": 80,
|
|
"devstral-small-2": 74
|
|
}
|
|
},
|
|
{
|
|
"agent": "backend-developer",
|
|
"scores": {
|
|
"qwen3-coder-480b": 91,
|
|
"deepseek-v4-pro-max": 86,
|
|
"kimi-k2.6": 90,
|
|
"qwen3.5-122b": 85,
|
|
"gemma4-27b": 84,
|
|
"minimax-m2.5": 84,
|
|
"minimax-m2.7": 80,
|
|
"glm-5.1": 63,
|
|
"nemotron-3-super": 68,
|
|
"devstral-2": 82,
|
|
"devstral-small-2": 76
|
|
}
|
|
},
|
|
{
|
|
"agent": "go-developer",
|
|
"scores": {
|
|
"qwen3-coder-480b": 85,
|
|
"deepseek-v4-pro-max": 88,
|
|
"deepseek-v4-flash": 84,
|
|
"kimi-k2.6": 86,
|
|
"qwen3.5-122b": 80,
|
|
"gemma4-27b": 80,
|
|
"minimax-m2.5": 78,
|
|
"minimax-m2.7": 74,
|
|
"glm-5.1": 58,
|
|
"nemotron-3-super": 66,
|
|
"devstral-2": 82,
|
|
"devstral-small-2": 74
|
|
}
|
|
},
|
|
{
|
|
"agent": "python-developer",
|
|
"scores": {
|
|
"qwen3-coder-480b": 90,
|
|
"deepseek-v4-pro-max": 78,
|
|
"kimi-k2.6": 88,
|
|
"qwen3.5-122b": 86,
|
|
"gemma4-27b": 82,
|
|
"minimax-m2.5": 82,
|
|
"minimax-m2.7": 78,
|
|
"glm-5.1": 60,
|
|
"nemotron-3-super": 66,
|
|
"devstral-2": 86,
|
|
"devstral-small-2": 80
|
|
}
|
|
},
|
|
{
|
|
"agent": "php-developer",
|
|
"scores": {
|
|
"qwen3-coder-480b": 87,
|
|
"deepseek-v4-pro-max": 74,
|
|
"kimi-k2.6": 86,
|
|
"qwen3.5-122b": 84,
|
|
"gemma4-27b": 82,
|
|
"minimax-m2.5": 76,
|
|
"minimax-m2.7": 72,
|
|
"glm-5.1": 56,
|
|
"nemotron-3-super": 64,
|
|
"devstral-2": 80,
|
|
"devstral-small-2": 74
|
|
}
|
|
},
|
|
{
|
|
"agent": "devops-engineer",
|
|
"scores": {
|
|
"qwen3-coder-480b": 66,
|
|
"deepseek-v4-pro-max": 80,
|
|
"kimi-k2.6": 88,
|
|
"qwen3.5-122b": 75,
|
|
"gemma4-27b": 78,
|
|
"minimax-m2.5": 53,
|
|
"minimax-m2.7": 48,
|
|
"glm-5.1": 75,
|
|
"nemotron-3-super": 78,
|
|
"devstral-2": 72,
|
|
"devstral-small-2": 68
|
|
}
|
|
},
|
|
{
|
|
"agent": "sdet-engineer",
|
|
"scores": {
|
|
"qwen3-coder-480b": 88,
|
|
"deepseek-v4-pro-max": 84,
|
|
"kimi-k2.6": 87,
|
|
"qwen3.5-122b": 86,
|
|
"gemma4-27b": 82,
|
|
"minimax-m2.5": 84,
|
|
"minimax-m2.7": 80,
|
|
"glm-5.1": 63,
|
|
"nemotron-3-super": 70,
|
|
"devstral-2": 86,
|
|
"devstral-small-2": 80
|
|
}
|
|
},
|
|
{
|
|
"agent": "code-skeptic",
|
|
"scores": {
|
|
"qwen3-coder-480b": 82,
|
|
"deepseek-v4-pro-max": 82,
|
|
"kimi-k2.6": 82,
|
|
"qwen3.5-122b": 80,
|
|
"gemma4-27b": 80,
|
|
"minimax-m2.5": 85,
|
|
"minimax-m2.7": 80,
|
|
"glm-5.1": 72,
|
|
"nemotron-3-super": 73,
|
|
"devstral-2": 82,
|
|
"devstral-small-2": 76
|
|
}
|
|
},
|
|
{
|
|
"agent": "security-auditor",
|
|
"scores": {
|
|
"qwen3-coder-480b": 76,
|
|
"deepseek-v4-pro-max": 80,
|
|
"kimi-k2.6": 80,
|
|
"qwen3.5-122b": 78,
|
|
"gemma4-27b": 78,
|
|
"minimax-m2.5": 74,
|
|
"minimax-m2.7": 68,
|
|
"glm-5.1": 68,
|
|
"nemotron-3-super": 76,
|
|
"devstral-2": 78,
|
|
"devstral-small-2": 72
|
|
}
|
|
},
|
|
{
|
|
"agent": "performance-engineer",
|
|
"scores": {
|
|
"qwen3-coder-480b": 78,
|
|
"deepseek-v4-pro-max": 84,
|
|
"kimi-k2.6": 82,
|
|
"qwen3.5-122b": 76,
|
|
"gemma4-27b": 76,
|
|
"minimax-m2.5": 75,
|
|
"minimax-m2.7": 70,
|
|
"glm-5.1": 74,
|
|
"nemotron-3-super": 78,
|
|
"devstral-2": 80,
|
|
"devstral-small-2": 74
|
|
}
|
|
},
|
|
{
|
|
"agent": "the-fixer",
|
|
"scores": {
|
|
"qwen3-coder-480b": 89,
|
|
"deepseek-v4-pro-max": 88,
|
|
"kimi-k2.6": 90,
|
|
"qwen3.5-122b": 86,
|
|
"gemma4-27b": 82,
|
|
"minimax-m2.5": 88,
|
|
"minimax-m2.7": 84,
|
|
"glm-5.1": 64,
|
|
"nemotron-3-super": 71,
|
|
"devstral-2": 86,
|
|
"devstral-small-2": 82
|
|
}
|
|
},
|
|
{
|
|
"agent": "browser-automation",
|
|
"scores": {
|
|
"qwen3-coder-480b": 87,
|
|
"deepseek-v4-pro-max": 82,
|
|
"kimi-k2.6": 86,
|
|
"qwen3.5-122b": 82,
|
|
"gemma4-27b": 84,
|
|
"minimax-m2.5": 72,
|
|
"minimax-m2.7": 68,
|
|
"glm-5.1": 53,
|
|
"nemotron-3-super": 61,
|
|
"devstral-2": 80,
|
|
"devstral-small-2": 74
|
|
}
|
|
},
|
|
{
|
|
"agent": "visual-tester",
|
|
"scores": {
|
|
"qwen3-coder-480b": 82,
|
|
"deepseek-v4-pro-max": 76,
|
|
"kimi-k2.6": 78,
|
|
"qwen3.5-122b": 76,
|
|
"gemma4-27b": 78,
|
|
"minimax-m2.5": 68,
|
|
"minimax-m2.7": 64,
|
|
"glm-5.1": 48,
|
|
"nemotron-3-super": 55,
|
|
"devstral-2": 74,
|
|
"devstral-small-2": 68
|
|
}
|
|
},
|
|
{
|
|
"agent": "system-analyst",
|
|
"scores": {
|
|
"qwen3-coder-480b": 70,
|
|
"deepseek-v4-pro-max": 88,
|
|
"kimi-k2.6": 86,
|
|
"qwen3.5-122b": 82,
|
|
"gemma4-27b": 82,
|
|
"minimax-m2.5": 66,
|
|
"minimax-m2.7": 63,
|
|
"glm-5.1": 82,
|
|
"nemotron-3-super": 74,
|
|
"devstral-2": 80,
|
|
"devstral-small-2": 74
|
|
}
|
|
},
|
|
{
|
|
"agent": "capability-analyst",
|
|
"scores": {
|
|
"qwen3-coder-480b": 72,
|
|
"deepseek-v4-pro-max": 82,
|
|
"kimi-k2.6": 82,
|
|
"qwen3.5-122b": 80,
|
|
"gemma4-27b": 80,
|
|
"minimax-m2.5": 68,
|
|
"minimax-m2.7": 66,
|
|
"glm-5.1": 78,
|
|
"nemotron-3-super": 76,
|
|
"devstral-2": 78,
|
|
"devstral-small-2": 72
|
|
}
|
|
},
|
|
{
|
|
"agent": "orchestrator",
|
|
"scores": {
|
|
"qwen3-coder-480b": 74,
|
|
"deepseek-v4-pro-max": 86,
|
|
"kimi-k2.6": 92,
|
|
"qwen3.5-122b": 84,
|
|
"gemma4-27b": 82,
|
|
"minimax-m2.5": 70,
|
|
"minimax-m2.7": 68,
|
|
"glm-5.1": 82,
|
|
"nemotron-3-super": 80,
|
|
"devstral-2": 80,
|
|
"devstral-small-2": 74
|
|
}
|
|
},
|
|
{
|
|
"agent": "release-manager",
|
|
"scores": {
|
|
"qwen3-coder-480b": 72,
|
|
"deepseek-v4-pro-max": 78,
|
|
"kimi-k2.6": 78,
|
|
"qwen3.5-122b": 76,
|
|
"gemma4-27b": 76,
|
|
"minimax-m2.5": 66,
|
|
"minimax-m2.7": 64,
|
|
"glm-5.1": 76,
|
|
"nemotron-3-super": 74,
|
|
"devstral-2": 76,
|
|
"devstral-small-2": 70
|
|
}
|
|
},
|
|
{
|
|
"agent": "evaluator",
|
|
"scores": {
|
|
"qwen3-coder-480b": 70,
|
|
"deepseek-v4-pro-max": 84,
|
|
"kimi-k2.6": 84,
|
|
"qwen3.5-122b": 82,
|
|
"gemma4-27b": 80,
|
|
"minimax-m2.5": 73,
|
|
"minimax-m2.7": 70,
|
|
"glm-5.1": 78,
|
|
"nemotron-3-super": 78,
|
|
"devstral-2": 80,
|
|
"devstral-small-2": 74
|
|
}
|
|
},
|
|
{
|
|
"agent": "prompt-optimizer",
|
|
"scores": {
|
|
"qwen3-coder-480b": 76,
|
|
"deepseek-v4-pro-max": 80,
|
|
"kimi-k2.6": 82,
|
|
"qwen3.5-122b": 82,
|
|
"gemma4-27b": 80,
|
|
"minimax-m2.5": 74,
|
|
"minimax-m2.7": 72,
|
|
"glm-5.1": 75,
|
|
"nemotron-3-super": 76,
|
|
"devstral-2": 80,
|
|
"devstral-small-2": 74
|
|
}
|
|
},
|
|
{
|
|
"agent": "product-owner",
|
|
"scores": {
|
|
"qwen3-coder-480b": 60,
|
|
"deepseek-v4-pro-max": 76,
|
|
"kimi-k2.6": 76,
|
|
"qwen3.5-122b": 76,
|
|
"gemma4-27b": 76,
|
|
"minimax-m2.5": 56,
|
|
"minimax-m2.7": 54,
|
|
"glm-5.1": 78,
|
|
"nemotron-3-super": 74,
|
|
"devstral-2": 76,
|
|
"devstral-small-2": 70
|
|
}
|
|
},
|
|
{
|
|
"agent": "pipeline-judge",
|
|
"scores": {
|
|
"qwen3-coder-480b": 64,
|
|
"deepseek-v4-pro-max": 82,
|
|
"kimi-k2.6": 84,
|
|
"qwen3.5-122b": 82,
|
|
"gemma4-27b": 80,
|
|
"minimax-m2.5": 68,
|
|
"minimax-m2.7": 65,
|
|
"glm-5.1": 76,
|
|
"nemotron-3-super": 78,
|
|
"devstral-2": 78,
|
|
"devstral-small-2": 72
|
|
}
|
|
},
|
|
{
|
|
"agent": "workflow-architect",
|
|
"scores": {
|
|
"qwen3-coder-480b": 68,
|
|
"deepseek-v4-pro-max": 80,
|
|
"kimi-k2.6": 82,
|
|
"qwen3.5-122b": 80,
|
|
"gemma4-27b": 80,
|
|
"minimax-m2.5": 62,
|
|
"minimax-m2.7": 60,
|
|
"glm-5.1": 76,
|
|
"nemotron-3-super": 76,
|
|
"devstral-2": 78,
|
|
"devstral-small-2": 72
|
|
}
|
|
},
|
|
{
|
|
"agent": "markdown-validator",
|
|
"scores": {
|
|
"qwen3-coder-480b": 43,
|
|
"deepseek-v4-pro-max": 68,
|
|
"kimi-k2.6": 56,
|
|
"qwen3.5-122b": 56,
|
|
"gemma4-27b": 60,
|
|
"minimax-m2.5": 38,
|
|
"minimax-m2.7": 36,
|
|
"glm-5.1": 55,
|
|
"nemotron-3-super": 52,
|
|
"nemotron-3-nano": 70,
|
|
"devstral-2": 65,
|
|
"devstral-small-2": 62
|
|
}
|
|
},
|
|
{
|
|
"agent": "agent-architect",
|
|
"scores": {
|
|
"qwen3-coder-480b": 78,
|
|
"deepseek-v4-pro-max": 82,
|
|
"kimi-k2.6": 86,
|
|
"qwen3.5-122b": 80,
|
|
"gemma4-27b": 82,
|
|
"minimax-m2.5": 72,
|
|
"minimax-m2.7": 70,
|
|
"glm-5.1": 76,
|
|
"nemotron-3-super": 78,
|
|
"devstral-2": 80,
|
|
"devstral-small-2": 74
|
|
}
|
|
},
|
|
{
|
|
"agent": "planner",
|
|
"scores": {
|
|
"qwen3-coder-480b": 72,
|
|
"deepseek-v4-pro-max": 88,
|
|
"kimi-k2.6": 86,
|
|
"qwen3.5-122b": 86,
|
|
"gemma4-27b": 84,
|
|
"minimax-m2.5": 68,
|
|
"minimax-m2.7": 66,
|
|
"glm-5.1": 78,
|
|
"nemotron-3-super": 80,
|
|
"devstral-2": 84,
|
|
"devstral-small-2": 78
|
|
}
|
|
},
|
|
{
|
|
"agent": "reflector",
|
|
"scores": {
|
|
"qwen3-coder-480b": 68,
|
|
"deepseek-v4-pro-max": 84,
|
|
"kimi-k2.6": 80,
|
|
"qwen3.5-122b": 80,
|
|
"gemma4-27b": 80,
|
|
"minimax-m2.5": 66,
|
|
"minimax-m2.7": 64,
|
|
"glm-5.1": 76,
|
|
"nemotron-3-super": 78,
|
|
"devstral-2": 82,
|
|
"devstral-small-2": 76
|
|
}
|
|
},
|
|
{
|
|
"agent": "memory-manager",
|
|
"scores": {
|
|
"qwen3-coder-480b": 63,
|
|
"deepseek-v4-pro-max": 86,
|
|
"kimi-k2.6": 84,
|
|
"qwen3.5-122b": 85,
|
|
"gemma4-27b": 82,
|
|
"minimax-m2.5": 58,
|
|
"minimax-m2.7": 56,
|
|
"glm-5.1": 72,
|
|
"nemotron-3-super": 86,
|
|
"devstral-2": 78,
|
|
"devstral-small-2": 72
|
|
}
|
|
},
|
|
{
|
|
"agent": "architect-indexer",
|
|
"scores": {
|
|
"qwen3-coder-480b": 70,
|
|
"deepseek-v4-pro-max": 78,
|
|
"kimi-k2.6": 84,
|
|
"qwen3.5-122b": 80,
|
|
"gemma4-27b": 80,
|
|
"minimax-m2.5": 64,
|
|
"minimax-m2.7": 62,
|
|
"glm-5.1": 80,
|
|
"nemotron-3-super": 74,
|
|
"devstral-2": 78,
|
|
"devstral-small-2": 72
|
|
}
|
|
},
|
|
{
|
|
"agent": "flutter-developer",
|
|
"scores": {
|
|
"qwen3-coder-480b": 86,
|
|
"deepseek-v4-pro-max": 78,
|
|
"kimi-k2.6": 84,
|
|
"qwen3.5-122b": 84,
|
|
"gemma4-27b": 84,
|
|
"minimax-m2.5": 70,
|
|
"minimax-m2.7": 66,
|
|
"glm-5.1": 53,
|
|
"nemotron-3-super": 60,
|
|
"devstral-2": 78,
|
|
"devstral-small-2": 74
|
|
}
|
|
}
|
|
],
|
|
"agent_current_config": [
|
|
{ "agent": "lead-developer", "model": "ollama-cloud/qwen3-coder:480b", "fit_score": 92, "status": "optimal" },
|
|
{ "agent": "frontend-developer", "model": "ollama-cloud/minimax-m2.5", "fit_score": 92, "status": "optimal" },
|
|
{ "agent": "backend-developer", "model": "ollama-cloud/qwen3-coder:480b", "fit_score": 91, "status": "optimal" },
|
|
{ "agent": "go-developer", "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 88, "status": "optimal" },
|
|
{ "agent": "python-developer", "model": "ollama-cloud/qwen3-coder:480b", "fit_score": 90, "status": "optimal" },
|
|
{ "agent": "php-developer", "model": "ollama-cloud/qwen3-coder:480b", "fit_score": 87, "status": "optimal" },
|
|
{ "agent": "flutter-developer", "model": "ollama-cloud/qwen3-coder:480b", "fit_score": 86, "status": "optimal" },
|
|
{ "agent": "devops-engineer", "model": "ollama-cloud/kimi-k2.6", "fit_score": 88, "status": "optimal" },
|
|
{ "agent": "sdet-engineer", "model": "ollama-cloud/qwen3-coder:480b", "fit_score": 88, "status": "optimal" },
|
|
{ "agent": "code-skeptic", "model": "ollama-cloud/minimax-m2.5", "fit_score": 85, "status": "optimal" },
|
|
{ "agent": "security-auditor", "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 80, "status": "good" },
|
|
{ "agent": "performance-engineer", "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 84, "status": "optimal" },
|
|
{ "agent": "the-fixer", "model": "ollama-cloud/kimi-k2.6", "fit_score": 90, "status": "optimal" },
|
|
{ "agent": "browser-automation", "model": "ollama-cloud/qwen3-coder:480b", "fit_score": 87, "status": "optimal" },
|
|
{ "agent": "visual-tester", "model": "ollama-cloud/qwen3-coder:480b", "fit_score": 82, "status": "good" },
|
|
{ "agent": "system-analyst", "model": "ollama-cloud/glm-5.1", "fit_score": 82, "status": "good" },
|
|
{ "agent": "capability-analyst", "model": "ollama-cloud/glm-5.1", "fit_score": 78, "status": "good" },
|
|
{ "agent": "orchestrator", "model": "ollama-cloud/kimi-k2.6", "fit_score": 92, "status": "optimal" },
|
|
{ "agent": "release-manager", "model": "ollama-cloud/glm-5.1", "fit_score": 76, "status": "good" },
|
|
{ "agent": "evaluator", "model": "ollama-cloud/glm-5.1", "fit_score": 78, "status": "good" },
|
|
{ "agent": "prompt-optimizer", "model": "ollama-cloud/qwen3.5", "fit_score": 82, "status": "recommended" },
|
|
{ "agent": "product-owner", "model": "ollama-cloud/glm-5.1", "fit_score": 78, "status": "good" },
|
|
{ "agent": "pipeline-judge", "model": "ollama-cloud/glm-5.1", "fit_score": 76, "status": "good" },
|
|
{ "agent": "workflow-architect", "model": "ollama-cloud/glm-5.1", "fit_score": 76, "status": "good" },
|
|
{ "agent": "markdown-validator", "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 68, "status": "poor" },
|
|
{ "agent": "agent-architect", "model": "ollama-cloud/kimi-k2.6", "fit_score": 86, "status": "optimal" },
|
|
{ "agent": "planner", "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 88, "status": "optimal" },
|
|
{ "agent": "reflector", "model": "ollama-cloud/deepseek-v4-pro-max", "fit_score": 84, "status": "optimal" },
|
|
{ "agent": "memory-manager", "model": "ollama-cloud/qwen3.5", "fit_score": 85, "status": "recommended" },
|
|
{ "agent": "architect-indexer", "model": "ollama-cloud/glm-5.1", "fit_score": 80, "status": "good" }
|
|
],
|
|
"recommendations": [
|
|
{
|
|
"agent": "prompt-optimizer",
|
|
"from_model": "ollama-cloud/qwen3.6-plus (openrouter)",
|
|
"to_model": "ollama-cloud/qwen3.5",
|
|
"reason": "Migrated to Ollama Cloud. IF 92, vision+tools+thinking. Same quality, no rate limits.",
|
|
"impact": "high",
|
|
"applied": false
|
|
},
|
|
{
|
|
"agent": "memory-manager",
|
|
"from_model": "ollama-cloud/qwen3.6-plus (openrouter)",
|
|
"to_model": "ollama-cloud/qwen3.5",
|
|
"reason": "Migrated to Ollama Cloud. 1M context via qwen3.5? Actually qwen3.5 has 128K, not 1M. Alternative: kimi-k2.6 (256K) or deepseek-v4 (1M). But matrix shows qwen3.5=85 vs kimi-k2.6=84 vs deepseek=86.",
|
|
"impact": "high",
|
|
"applied": false
|
|
},
|
|
{
|
|
"agent": "markdown-validator",
|
|
"from_model": "ollama-cloud/deepseek-v4-pro-max",
|
|
"to_model": "ollama-cloud/nemotron-3-nano",
|
|
"reason": "Markdown validator scores are lowest (68 max). Nemotron-3-Nano IF=68 but is tiny (4B/30B), extremely cheap. For lightweight validation tasks, nano is sufficient.",
|
|
"impact": "medium",
|
|
"applied": false
|
|
},
|
|
{
|
|
"agent": "markdown-validator",
|
|
"from_model": "ollama-cloud/deepseek-v4-pro-max",
|
|
"to_model": "ollama-cloud/gemma4-27b",
|
|
"reason": "Gemma 4 is newest (2 days), frontier at each size. Scores 60 for validator — better than nano 70? Actually wait: gemma4=60, nano=70. Nano is better for this role. But gemma4 is newer and more general.",
|
|
"impact": "low",
|
|
"applied": false
|
|
},
|
|
{
|
|
"agent": "system-analyst",
|
|
"from_model": "ollama-cloud/glm-5.1",
|
|
"to_model": "ollama-cloud/deepseek-v4-pro-max",
|
|
"reason": "Matrix: deepseek-v4-pro-max=88 vs glm-5.1=82. +6% quality, 1M context for architecture docs. GLM-5.1 still strong for standardization.",
|
|
"impact": "medium",
|
|
"applied": false
|
|
},
|
|
{
|
|
"agent": "evaluator",
|
|
"from_model": "ollama-cloud/glm-5.1",
|
|
"to_model": "ollama-cloud/kimi-k2.6",
|
|
"reason": "Matrix: kimi-k2.6=84 vs glm-5.1=78. +6%. IF=91 for scoring accuracy. High reasoning needed.",
|
|
"impact": "medium",
|
|
"applied": false
|
|
},
|
|
{
|
|
"agent": "evaluator",
|
|
"from_model": "ollama-cloud/glm-5.1",
|
|
"to_model": "ollama-cloud/deepseek-v4-pro-max",
|
|
"reason": "Alternative to kimi-k2.6. deepseek-v4-pro-max=84 (same as kimi), but 1M context. Could be better for large evaluation tasks.",
|
|
"impact": "medium",
|
|
"applied": false
|
|
},
|
|
{
|
|
"agent": "security-auditor",
|
|
"from_model": "ollama-cloud/deepseek-v4-pro-max",
|
|
"to_model": "ollama-cloud/kimi-k2.6",
|
|
"reason": "Matrix: both 80. But kimi-k2.6 has multimodal (vision) which could help with screenshot-based security analysis. Tie.",
|
|
"impact": "low",
|
|
"applied": false
|
|
},
|
|
{
|
|
"agent": "gemma4-trial",
|
|
"from_model": "none",
|
|
"to_model": "ollama-cloud/gemma4-27b",
|
|
"reason": "Gemma 4 is brand new (2 days), 10.1M pulls, frontier at each size, vision+audio+thinking. Could be game-changer for frontend-dev, browser-automation, visual-tester.",
|
|
"impact": "high",
|
|
"applied": false,
|
|
"note": "Requires A/B test on frontend task."
|
|
},
|
|
{
|
|
"agent": "qwen3.5-trial",
|
|
"from_model": "none",
|
|
"to_model": "ollama-cloud/qwen3.5-122b",
|
|
"reason": "Qwen 3.5 updated 2 days ago, 12.4M pulls, IF=92 (highest!), multimodal. Could replace GLM-5.1 for reasoning tasks and qwen3-coder for some coding tasks.",
|
|
"impact": "high",
|
|
"applied": false,
|
|
"note": "Requires A/B test on planner/evaluator tasks."
|
|
}
|
|
],
|
|
"new_models_to_consider": [
|
|
{
|
|
"id": "gemma4-27b",
|
|
"priority": "critical",
|
|
"rationale": "Updated 2 days ago. 10.1M pulls. Frontier-level at each size. Vision + audio + thinking + tools + cloud. Potentially replaces qwen3-coder for some tasks."
|
|
},
|
|
{
|
|
"id": "qwen3.5-122b",
|
|
"priority": "critical",
|
|
"rationale": "Updated 2 days ago. 12.4M pulls. IF=92 highest among tracked. Multimodal. Could replace glm-5.1 for reasoning and compete with qwen3-coder for coding."
|
|
},
|
|
{
|
|
"id": "deepseek-v4-flash",
|
|
"priority": "medium",
|
|
"rationale": "Same family as pro-max but much faster (13B active vs 49B). Good for low-latency agents: code-skeptic, browser-automation."
|
|
},
|
|
{
|
|
"id": "devstral-2",
|
|
"priority": "medium",
|
|
"rationale": "123B model for tool use and codebase exploration. Could be strong for lead-developer on large projects."
|
|
}
|
|
]
|
|
}
|