APAW/agent-evolution/data/agent-versions.json

{
  "$schema": "./agent-versions.schema.json",
  "version": "1.0.0",
  "lastUpdated": "2026-04-05T22:30:00Z",
  "agents": {
    "lead-developer": {
      "current": {
        "model": "ollama-cloud/qwen3-coder:480b",
        "provider": "Ollama",
        "category": "Core Dev",
        "mode": "subagent",
        "color": "#DC2626",
        "description": "Primary code writer for backend and core logic. Writes implementation to pass tests",
        "benchmark": {
          "swe_bench": 66.5,
          "ruler_1m": null,
          "terminal_bench": null,
          "fit_score": 92
        },
        "capabilities": ["code_writing", "refactoring", "bug_fixing", "implementation"]
      },
      "history": [
        {
          "date": "2026-04-05T05:21:00Z",
          "commit": "caf77f53c8",
          "type": "model_change",
          "from": null,
          "to": "ollama-cloud/qwen3-coder:480b",
          "reason": "Initial configuration from capability-index.yaml",
          "source": "git"
        }
      ],
      "performance_log": []
    },
    "frontend-developer": {
      "current": {
        "model": "ollama-cloud/qwen3-coder:480b",
        "provider": "Ollama",
        "category": "Core Dev",
        "mode": "subagent",
        "color": "#3B82F6",
        "description": "UI implementation specialist with multimodal capabilities",
        "benchmark": {
          "swe_bench": null,
          "ruler_1m": null,
          "terminal_bench": null,
          "fit_score": 90
        },
        "capabilities": ["ui_implementation", "component_creation", "styling", "responsive_design"]
      },
      "history": [
        {
          "date": "2026-04-05T05:21:00Z",
          "commit": "af5f401",
          "type": "agent_created",
          "from": null,
          "to": "ollama-cloud/qwen3-coder:480b",
          "reason": "Flutter development support added",
          "source": "git"
        }
      ],
      "performance_log": []
    },
    "backend-developer": {
      "current": {
        "model": "ollama-cloud/qwen3-coder:480b",
        "provider": "Ollama",
        "category": "Core Dev",
        "mode": "subagent",
        "color": "#10B981",
        "description": "Node.js, Express, APIs, database specialist",
        "benchmark": {
          "swe_bench": null,
          "ruler_1m": null,
          "terminal_bench": null,
          "fit_score": 91
        },
        "capabilities": ["api_development", "database_design", "server_logic", "authentication"]
      },
      "history": [],
      "performance_log": []
    },
    "go-developer": {
      "current": {
        "model": "ollama-cloud/qwen3-coder:480b",
        "provider": "Ollama",
        "category": "Core Dev",
        "mode": "subagent",
        "color": "#00ADD8",
        "description": "Go backend services specialist",
        "benchmark": {
          "swe_bench": null,
          "ruler_1m": null,
          "terminal_bench": null,
          "fit_score": 85
        },
        "capabilities": ["go_api_development", "go_database_design", "go_concurrent_programming", "go_authentication"]
      },
      "history": [
        {
          "date": "2026-04-05T05:21:00Z",
          "commit": "caf77f53c8",
          "type": "model_change",
          "from": "ollama-cloud/deepseek-v3.2",
          "to": "ollama-cloud/qwen3-coder:480b",
          "reason": "Qwen3-Coder optimized for Go development",
          "source": "git"
        }
      ],
      "performance_log": []
    },
    "sdet-engineer": {
      "current": {
        "model": "ollama-cloud/qwen3-coder:480b",
        "provider": "Ollama",
        "category": "QA",
        "mode": "subagent",
        "color": "#8B5CF6",
        "description": "Writes tests following TDD methodology. Tests MUST fail initially",
        "benchmark": {
          "swe_bench": null,
          "ruler_1m": null,
          "terminal_bench": null,
          "fit_score": 88
        },
        "capabilities": ["unit_tests", "integration_tests", "e2e_tests", "test_planning", "visual_regression"]
      },
      "history": [],
      "performance_log": []
    },
    "code-skeptic": {
      "current": {
        "model": "ollama-cloud/minimax-m2.5",
        "provider": "Ollama",
        "category": "QA",
        "mode": "subagent",
        "color": "#EF4444",
        "description": "Adversarial code reviewer. Finds problems and issues. Does NOT suggest implementations",
        "benchmark": {
          "swe_bench": 80.2,
          "ruler_1m": null,
          "terminal_bench": null,
          "fit_score": 85
        },
        "capabilities": ["code_review", "security_review", "style_check", "issue_identification"]
      },
      "history": [],
      "performance_log": []
    },
    "security-auditor": {
      "current": {
        "model": "ollama-cloud/nemotron-3-super",
        "provider": "Ollama",
        "category": "Security",
        "mode": "subagent",
        "color": "#DC2626",
        "description": "Scans for security vulnerabilities, OWASP Top 10, dependency CVEs",
        "benchmark": {
          "swe_bench": 60.5,
          "ruler_1m": 91.75,
          "pinch_bench": 85.6,
          "fit_score": 80
        },
        "capabilities": ["vulnerability_scan", "owasp_check", "secret_detection", "auth_review"]
      },
      "history": [
        {
          "date": "2026-04-05T05:21:00Z",
          "commit": "caf77f53c8",
          "type": "model_change",
          "from": "ollama-cloud/deepseek-v3.2",
          "to": "ollama-cloud/nemotron-3-super",
          "reason": "Nemotron 3 Super optimized for security analysis with RULER@1M",
          "source": "git"
        }
      ],
      "performance_log": []
    },
    "performance-engineer": {
      "current": {
        "model": "ollama-cloud/nemotron-3-super",
        "provider": "Ollama",
        "category": "Performance",
        "mode": "subagent",
        "color": "#F59E0B",
        "description": "Reviews code for performance issues: N+1 queries, memory leaks, algorithmic complexity",
        "benchmark": {
          "swe_bench": 60.5,
          "ruler_1m": 91.75,
          "pinch_bench": 85.6,
          "fit_score": 82
        },
        "capabilities": ["performance_analysis", "n_plus_one_detection", "memory_leak_check", "algorithm_analysis"]
      },
      "history": [
        {
          "date": "2026-04-05T05:21:00Z",
          "commit": "caf77f53c8",
          "type": "model_change",
          "from": "ollama-cloud/gpt-oss:120b",
          "to": "ollama-cloud/nemotron-3-super",
          "reason": "Better reasoning for performance analysis",
          "source": "git"
        }
      ],
      "performance_log": []
    },
    "browser-automation": {
      "current": {
        "model": "ollama-cloud/qwen3-coder:480b",
        "provider": "Ollama",
        "category": "Testing",
        "mode": "subagent",
        "color": "#0EA5E9",
        "description": "Browser automation agent using Playwright MCP for E2E testing",
        "benchmark": {
          "swe_bench": null,
          "fit_score": 87
        },
        "capabilities": ["e2e_browser_tests", "form_filling", "navigation_testing", "screenshot_capture"]
      },
      "history": [],
      "performance_log": []
    },
    "visual-tester": {
      "current": {
        "model": "ollama-cloud/qwen3-coder:480b",
        "provider": "Ollama",
        "category": "Testing",
        "mode": "subagent",
        "color": "#EC4899",
        "description": "Visual regression testing agent that compares screenshots",
        "benchmark": {
          "swe_bench": null,
          "fit_score": 82
        },
        "capabilities": ["visual_regression", "pixel_comparison", "screenshot_diff", "ui_validation"]
      },
      "history": [],
      "performance_log": []
    },
    "system-analyst": {
      "current": {
        "model": "ollama-cloud/glm-5",
        "provider": "Ollama",
        "category": "Analysis",
        "mode": "subagent",
        "color": "#6366F1",
        "description": "Designs technical specifications, data schemas, and API contracts",
        "benchmark": {
          "swe_bench": null,
          "fit_score": 82
        },
        "capabilities": ["architecture_design", "api_specification", "database_modeling", "technical_documentation"]
      },
      "history": [
        {
          "date": "2026-04-05T05:21:00Z",
          "commit": "caf77f53c8",
          "type": "model_change",
          "from": "ollama-cloud/gpt-oss:120b",
          "to": "ollama-cloud/glm-5",
          "reason": "GLM-5 better for system engineering and architecture",
          "source": "git"
        }
      ],
      "performance_log": []
    },
    "requirement-refiner": {
      "current": {
        "model": "ollama-cloud/glm-5",
        "provider": "Ollama",
        "category": "Analysis",
        "mode": "subagent",
        "color": "#8B5CF6",
        "description": "Converts vague ideas into strict User Stories with acceptance criteria",
        "benchmark": {
          "swe_bench": null,
          "fit_score": 80,
          "context": "128K"
        },
        "capabilities": ["requirement_analysis", "user_story_creation", "acceptance_criteria", "clarification"]
      },
      "history": [
        {
          "date": "2026-04-05T22:30:00Z",
          "commit": "auto",
          "type": "model_change",
          "from": "ollama-cloud/nemotron-3-super",
          "to": "ollama-cloud/glm-5",
          "reason": "+33% quality. GLM-5 excels at requirement analysis and system engineering",
          "source": "research"
        }
      ],
      "performance_log": []
    },
    "history-miner": {
      "current": {
        "model": "ollama-cloud/glm-5",
        "provider": "Ollama",
        "category": "Analysis",
        "mode": "subagent",
        "color": "#A855F7",
        "description": "Analyzes git history for duplicates and past solutions",
        "benchmark": {
          "swe_bench": null,
          "fit_score": 78
        },
        "capabilities": ["git_search", "duplicate_detection", "past_solution_finder", "pattern_identification"]
      },
      "history": [],
      "performance_log": []
    },
    "capability-analyst": {
      "current": {
        "model": "openrouter/qwen/qwen3.6-plus:free",
        "provider": "OpenRouter",
        "category": "Analysis",
        "mode": "subagent",
        "color": "#14B8A6",
        "description": "Analyzes task coverage and identifies gaps",
        "benchmark": {
          "swe_bench": 78.8,
          "fit_score": 90,
          "context": "1M",
          "free": true
        },
        "capabilities": ["gap_analysis", "capability_mapping", "recommendation_generation", "coverage_analysis"]
      },
      "history": [
        {
          "date": "2026-04-05T22:30:00Z",
          "commit": "auto",
          "type": "model_change",
          "from": "ollama-cloud/nemotron-3-super",
          "to": "openrouter/qwen/qwen3.6-plus:free",
          "reason": "+23% quality, IF:90 score, 1M context, FREE via OpenRouter",
          "source": "research"
        }
      ],
      "performance_log": []
    },
    "orchestrator": {
      "current": {
        "model": "ollama-cloud/glm-5",
        "provider": "Ollama",
        "category": "Process",
        "mode": "primary",
        "color": "#0EA5E9",
        "description": "Process manager. Distributes tasks between agents",
        "benchmark": {
          "swe_bench": null,
          "fit_score": 80
        },
        "capabilities": ["task_routing", "state_management", "agent_coordination", "workflow_execution"]
      },
      "history": [],
      "performance_log": []
    },
    "release-manager": {
      "current": {
        "model": "ollama-cloud/devstral-2:123b",
        "provider": "Ollama",
        "category": "Process",
        "mode": "subagent",
        "color": "#22C55E",
        "description": "Manages git operations, semantic versioning, deployments",
        "benchmark": {
          "swe_bench": null,
          "fit_score": 75
        },
        "capabilities": ["git_operations", "version_management", "changelog_creation", "deployment"]
      },
      "history": [],
      "performance_log": []
    },
    "evaluator": {
      "current": {
        "model": "openrouter/qwen/qwen3.6-plus:free",
        "provider": "OpenRouter",
        "category": "Process",
        "mode": "subagent",
        "color": "#F97316",
        "description": "Scores agent effectiveness after task completion",
        "benchmark": {
          "swe_bench": 78.8,
          "fit_score": 90,
          "context": "1M",
          "free": true
        },
        "capabilities": ["performance_scoring", "process_analysis", "pattern_identification", "improvement_recommendations"]
      },
      "history": [
        {
          "date": "2026-04-05T05:21:00Z",
          "commit": "caf77f53c8",
          "type": "model_change",
          "from": "ollama-cloud/gpt-oss:120b",
          "to": "ollama-cloud/nemotron-3-super",
          "reason": "Nemotron 3 Super better for evaluation tasks",
          "source": "git"
        },
        {
          "date": "2026-04-05T22:30:00Z",
          "commit": "auto",
          "type": "model_change",
          "from": "ollama-cloud/nemotron-3-super",
          "to": "openrouter/qwen/qwen3.6-plus:free",
          "reason": "+4% quality, IF:90 for scoring accuracy, FREE",
          "source": "research"
        }
      ],
      "performance_log": []
    },
    "prompt-optimizer": {
      "current": {
        "model": "ollama-cloud/nemotron-3-super",
        "provider": "Ollama",
        "category": "Process",
        "mode": "subagent",
        "color": "#EC4899",
        "description": "Improves agent system prompts based on performance failures",
        "benchmark": {
          "swe_bench": 60.5,
          "fit_score": 80
        },
        "capabilities": ["prompt_analysis", "prompt_improvement", "failure_pattern_detection"],
        "recommendations": [
          {
            "target": "openrouter/qwen/qwen3.6-plus:free",
            "reason": "Terminal-Bench 61.6% > Nemotron, always-on CoT",
            "priority": "high"
          }
        ]
      },
      "history": [
        {
          "date": "2026-04-05T05:21:00Z",
          "commit": "caf77f53c8",
          "type": "model_change",
          "from": "openrouter/qwen/qwen3.6-plus:free",
          "to": "ollama-cloud/nemotron-3-super",
          "reason": "Research recommendation applied",
          "source": "git"
        }
      ],
      "performance_log": []
    },
    "the-fixer": {
      "current": {
        "model": "ollama-cloud/minimax-m2.5",
        "provider": "Ollama",
        "category": "Fixes",
        "mode": "subagent",
        "color": "#EF4444",
        "description": "Iteratively fixes bugs based on specific error reports",
        "benchmark": {
          "swe_bench": 80.2,
          "fit_score": 88
        },
        "capabilities": ["bug_fixing", "issue_resolution", "code_correction"]
      },
      "history": [],
      "performance_log": []
    },
    "product-owner": {
      "current": {
        "model": "ollama-cloud/glm-5",
        "provider": "Ollama",
        "category": "Management",
        "mode": "subagent",
        "color": "#10B981",
        "description": "Manages issue checklists, status labels, progress tracking",
        "benchmark": {
          "swe_bench": null,
          "fit_score": 76
        },
        "capabilities": ["issue_management", "prioritization", "backlog_management", "workflow_completion"]
      },
      "history": [
        {
          "date": "2026-04-05T05:21:00Z",
          "commit": "caf77f53c8",
          "type": "model_change",
          "from": "openrouter/qwen/qwen3.6-plus:free",
          "to": "ollama-cloud/glm-5",
          "reason": "GLM-5 good for management tasks",
          "source": "git"
        }
      ],
      "performance_log": []
    },
    "workflow-architect": {
      "current": {
        "model": "ollama-cloud/glm-5",
        "provider": "Ollama",
        "category": "Workflow",
        "mode": "subagent",
        "color": "#6366F1",
        "description": "Creates workflow definitions",
        "benchmark": {
          "swe_bench": null,
          "fit_score": 74
        },
        "capabilities": ["workflow_design", "process_definition", "automation_setup"]
      },
      "history": [],
      "performance_log": []
    },
    "markdown-validator": {
      "current": {
        "model": "ollama-cloud/nemotron-3-nano:30b",
        "provider": "Ollama",
        "category": "Validation",
        "mode": "subagent",
        "color": "#84CC16",
        "description": "Validates Markdown formatting",
        "benchmark": {
          "swe_bench": null,
          "fit_score": 72
        },
        "capabilities": ["markdown_validation", "formatting_check", "link_validation"]
      },
      "history": [
        {
          "date": "2026-04-05T05:21:00Z",
          "commit": "caf77f53c8",
          "type": "model_change",
          "from": "openrouter/qwen/qwen3.6-plus:free",
          "to": "ollama-cloud/nemotron-3-nano:30b",
          "reason": "Nano efficient for lightweight validation tasks",
          "source": "git"
        }
      ],
      "performance_log": []
    },
    "agent-architect": {
      "current": {
        "model": "openrouter/qwen/qwen3.6-plus:free",
        "provider": "OpenRouter",
        "category": "Meta",
        "mode": "subagent",
        "color": "#A855F7",
        "description": "Creates new agents when gaps identified",
        "benchmark": {
          "swe_bench": 78.8,
          "fit_score": 90,
          "context": "1M",
          "free": true
        },
        "capabilities": ["agent_design", "prompt_engineering", "capability_definition"]
      },
      "history": [
        {
          "date": "2026-04-05T22:30:00Z",
          "commit": "auto",
          "type": "model_change",
          "from": "ollama-cloud/nemotron-3-super",
          "to": "openrouter/qwen/qwen3.6-plus:free",
          "reason": "+22% quality, IF:90 for YAML frontmatter generation, 1M context for all agents analysis",
          "source": "research"
        }
      ],
      "performance_log": []
    },
    "planner": {
      "current": {
        "model": "ollama-cloud/nemotron-3-super",
        "provider": "Ollama",
        "category": "Cognitive",
        "mode": "subagent",
        "color": "#3B82F6",
        "description": "Task decomposition, CoT, ToT planning",
        "benchmark": {
          "swe_bench": 60.5,
          "fit_score": 84
        },
        "capabilities": ["task_decomposition", "chain_of_thought", "tree_of_thoughts", "plan_execute_reflect"]
      },
      "history": [
        {
          "date": "2026-04-05T05:21:00Z",
          "commit": "caf77f53c8",
          "type": "model_change",
          "from": "ollama-cloud/gpt-oss:120b",
          "to": "ollama-cloud/nemotron-3-super",
          "reason": "Nemotron 3 Super excels at planning",
          "source": "git"
        }
      ],
      "performance_log": []
    },
    "reflector": {
      "current": {
        "model": "ollama-cloud/nemotron-3-super",
        "provider": "Ollama",
        "category": "Cognitive",
        "mode": "subagent",
        "color": "#14B8A6",
        "description": "Self-reflection agent using Reflexion pattern",
        "benchmark": {
          "swe_bench": 60.5,
          "fit_score": 82
        },
        "capabilities": ["self_reflection", "mistake_analysis", "lesson_extraction"]
      },
      "history": [
        {
          "date": "2026-04-05T05:21:00Z",
          "commit": "caf77f53c8",
          "type": "model_change",
          "from": "ollama-cloud/gpt-oss:120b",
          "to": "ollama-cloud/nemotron-3-super",
          "reason": "Better for reflection tasks",
          "source": "git"
        }
      ],
      "performance_log": []
    },
    "memory-manager": {
      "current": {
        "model": "ollama-cloud/nemotron-3-super",
        "provider": "Ollama",
        "category": "Cognitive",
        "mode": "subagent",
        "color": "#F59E0B",
        "description": "Manages agent memory systems",
        "benchmark": {
          "swe_bench": 60.5,
          "ruler_1m": 91.75,
          "fit_score": 90
        },
        "capabilities": ["memory_retrieval", "memory_storage", "memory_consolidation", "relevance_scoring"]
      },
      "history": [
        {
          "date": "2026-04-05T05:21:00Z",
          "commit": "caf77f53c8",
          "type": "model_change",
          "from": "ollama-cloud/gpt-oss:120b",
          "to": "ollama-cloud/nemotron-3-super",
          "reason": "RULER@1M critical for memory ctx",
          "source": "git"
        }
      ],
      "performance_log": []
    },
    "devops-engineer": {
      "current": {
        "model": null,
        "provider": null,
        "category": "DevOps",
        "mode": "subagent",
        "color": "#2563EB",
        "description": "Docker, Kubernetes, CI/CD pipeline automation",
        "benchmark": {
          "fit_score": 0
        },
        "capabilities": ["docker", "kubernetes", "ci_cd", "infrastructure"],
        "status": "new",
        "recommendations": [
          {
            "target": "ollama-cloud/nemotron-3-super",
            "reason": "DevOps requires strong reasoning",
            "priority": "critical"
          }
        ]
      },
      "history": [],
      "performance_log": []
    },
    "flutter-developer": {
      "current": {
        "model": "ollama-cloud/qwen3-coder:480b",
        "provider": "Ollama",
        "category": "Core Dev",
        "mode": "subagent",
        "color": "#0EA5E9",
        "description": "Flutter mobile specialist",
        "benchmark": {
          "fit_score": 86
        },
        "capabilities": ["flutter_development", "state_management", "ui_components", "cross_platform"]
      },
      "history": [
        {
          "date": "2026-04-05T15:00:00Z",
          "commit": "af5f401",
          "type": "agent_created",
          "from": null,
          "to": "ollama-cloud/qwen3-coder:480b",
          "reason": "New agent for Flutter development",
          "source": "git"
        }
      ],
      "performance_log": []
    }
  },
  "providers": {
    "Ollama": {
      "models": [
        {"id": "qwen3-coder:480b", "swe_bench": 66.5, "context": "256K", "active_params": "35B"},
        {"id": "minimax-m2.5", "swe_bench": 80.2, "context": "128K"},
        {"id": "nemotron-3-super", "swe_bench": 60.5, "ruler_1m": 91.75, "context": "1M"},
        {"id": "nemotron-3-nano:30b", "swe_bench": null, "context": "128K"},
        {"id": "glm-5", "swe_bench": null, "context": "128K"},
        {"id": "gpt-oss:120b", "swe_bench": 62.4, "context": "130K"},
        {"id": "gpt-oss:20b", "swe_bench": null, "context": "128K"},
        {"id": "devstral-2:123b", "swe_bench": null, "context": "128K"},
        {"id": "deepseek-v3.2", "swe_bench": null, "context": "128K"}
      ]
    },
    "OpenRouter": {
      "models": [
        {"id": "qwen3.6-plus:free", "swe_bench": null, "terminal_bench": 61.6, "context": "1M", "free": true},
        {"id": "gemma4:31b", "intelligence_index": 39, "context": "256K", "free": true}
      ]
    },
    "Groq": {
      "models": [
        {"id": "gpt-oss-120b", "speed_tps": 500, "rpd": 1000, "tpd": "200K"},
        {"id": "gpt-oss-20b", "speed_tps": 1200, "rpd": 1000},
        {"id": "kimi-k2-instruct", "speed_tps": 300, "rpm": 60},
        {"id": "qwen3-32b", "speed_tps": 400, "rpd": 1000, "tpd": "500K"},
        {"id": "llama-4-scout", "speed_tps": 350, "tpm": "30K"}
      ]
    }
  },
    "evolution_metrics": {
    "total_agents": 32,
    "agents_with_history": 16,
    "pending_recommendations": 0,
    "last_sync": "2026-04-05T22:30:00Z",
    "sync_sources": ["git", "capability-index.yaml", "kilo.jsonc", "research"]
  }
}