{ "$schema": "./agent-versions.schema.json", "version": "1.0.0", "lastUpdated": "2026-04-05T22:30:00Z", "agents": { "lead-developer": { "current": { "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama", "category": "Core Dev", "mode": "subagent", "color": "#DC2626", "description": "Primary code writer for backend and core logic. Writes implementation to pass tests", "benchmark": { "swe_bench": 66.5, "ruler_1m": null, "terminal_bench": null, "fit_score": 92 }, "capabilities": ["code_writing", "refactoring", "bug_fixing", "implementation"] }, "history": [ { "date": "2026-04-05T05:21:00Z", "commit": "caf77f53c8", "type": "model_change", "from": null, "to": "ollama-cloud/qwen3-coder:480b", "reason": "Initial configuration from capability-index.yaml", "source": "git" } ], "performance_log": [] }, "frontend-developer": { "current": { "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama", "category": "Core Dev", "mode": "subagent", "color": "#3B82F6", "description": "UI implementation specialist with multimodal capabilities", "benchmark": { "swe_bench": null, "ruler_1m": null, "terminal_bench": null, "fit_score": 90 }, "capabilities": ["ui_implementation", "component_creation", "styling", "responsive_design"] }, "history": [ { "date": "2026-04-05T05:21:00Z", "commit": "af5f401", "type": "agent_created", "from": null, "to": "ollama-cloud/qwen3-coder:480b", "reason": "Flutter development support added", "source": "git" } ], "performance_log": [] }, "backend-developer": { "current": { "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama", "category": "Core Dev", "mode": "subagent", "color": "#10B981", "description": "Node.js, Express, APIs, database specialist", "benchmark": { "swe_bench": null, "ruler_1m": null, "terminal_bench": null, "fit_score": 91 }, "capabilities": ["api_development", "database_design", "server_logic", "authentication"] }, "history": [], "performance_log": [] }, "go-developer": { "current": { "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama", "category": "Core Dev", "mode": "subagent", "color": "#00ADD8", "description": "Go backend services specialist", "benchmark": { "swe_bench": null, "ruler_1m": null, "terminal_bench": null, "fit_score": 85 }, "capabilities": ["go_api_development", "go_database_design", "go_concurrent_programming", "go_authentication"] }, "history": [ { "date": "2026-04-05T05:21:00Z", "commit": "caf77f53c8", "type": "model_change", "from": "ollama-cloud/deepseek-v3.2", "to": "ollama-cloud/qwen3-coder:480b", "reason": "Qwen3-Coder optimized for Go development", "source": "git" } ], "performance_log": [] }, "sdet-engineer": { "current": { "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama", "category": "QA", "mode": "subagent", "color": "#8B5CF6", "description": "Writes tests following TDD methodology. Tests MUST fail initially", "benchmark": { "swe_bench": null, "ruler_1m": null, "terminal_bench": null, "fit_score": 88 }, "capabilities": ["unit_tests", "integration_tests", "e2e_tests", "test_planning", "visual_regression"] }, "history": [], "performance_log": [] }, "code-skeptic": { "current": { "model": "ollama-cloud/minimax-m2.5", "provider": "Ollama", "category": "QA", "mode": "subagent", "color": "#EF4444", "description": "Adversarial code reviewer. Finds problems and issues. Does NOT suggest implementations", "benchmark": { "swe_bench": 80.2, "ruler_1m": null, "terminal_bench": null, "fit_score": 85 }, "capabilities": ["code_review", "security_review", "style_check", "issue_identification"] }, "history": [], "performance_log": [] }, "security-auditor": { "current": { "model": "ollama-cloud/nemotron-3-super", "provider": "Ollama", "category": "Security", "mode": "subagent", "color": "#DC2626", "description": "Scans for security vulnerabilities, OWASP Top 10, dependency CVEs", "benchmark": { "swe_bench": 60.5, "ruler_1m": 91.75, "pinch_bench": 85.6, "fit_score": 80 }, "capabilities": ["vulnerability_scan", "owasp_check", "secret_detection", "auth_review"] }, "history": [ { "date": "2026-04-05T05:21:00Z", "commit": "caf77f53c8", "type": "model_change", "from": "ollama-cloud/deepseek-v3.2", "to": "ollama-cloud/nemotron-3-super", "reason": "Nemotron 3 Super optimized for security analysis with RULER@1M", "source": "git" } ], "performance_log": [] }, "performance-engineer": { "current": { "model": "ollama-cloud/nemotron-3-super", "provider": "Ollama", "category": "Performance", "mode": "subagent", "color": "#F59E0B", "description": "Reviews code for performance issues: N+1 queries, memory leaks, algorithmic complexity", "benchmark": { "swe_bench": 60.5, "ruler_1m": 91.75, "pinch_bench": 85.6, "fit_score": 82 }, "capabilities": ["performance_analysis", "n_plus_one_detection", "memory_leak_check", "algorithm_analysis"] }, "history": [ { "date": "2026-04-05T05:21:00Z", "commit": "caf77f53c8", "type": "model_change", "from": "ollama-cloud/gpt-oss:120b", "to": "ollama-cloud/nemotron-3-super", "reason": "Better reasoning for performance analysis", "source": "git" } ], "performance_log": [] }, "browser-automation": { "current": { "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama", "category": "Testing", "mode": "subagent", "color": "#0EA5E9", "description": "Browser automation agent using Playwright MCP for E2E testing", "benchmark": { "swe_bench": null, "fit_score": 87 }, "capabilities": ["e2e_browser_tests", "form_filling", "navigation_testing", "screenshot_capture"] }, "history": [], "performance_log": [] }, "visual-tester": { "current": { "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama", "category": "Testing", "mode": "subagent", "color": "#EC4899", "description": "Visual regression testing agent that compares screenshots", "benchmark": { "swe_bench": null, "fit_score": 82 }, "capabilities": ["visual_regression", "pixel_comparison", "screenshot_diff", "ui_validation"] }, "history": [], "performance_log": [] }, "system-analyst": { "current": { "model": "ollama-cloud/glm-5", "provider": "Ollama", "category": "Analysis", "mode": "subagent", "color": "#6366F1", "description": "Designs technical specifications, data schemas, and API contracts", "benchmark": { "swe_bench": null, "fit_score": 82 }, "capabilities": ["architecture_design", "api_specification", "database_modeling", "technical_documentation"] }, "history": [ { "date": "2026-04-05T05:21:00Z", "commit": "caf77f53c8", "type": "model_change", "from": "ollama-cloud/gpt-oss:120b", "to": "ollama-cloud/glm-5", "reason": "GLM-5 better for system engineering and architecture", "source": "git" } ], "performance_log": [] }, "requirement-refiner": { "current": { "model": "ollama-cloud/glm-5", "provider": "Ollama", "category": "Analysis", "mode": "subagent", "color": "#8B5CF6", "description": "Converts vague ideas into strict User Stories with acceptance criteria", "benchmark": { "swe_bench": null, "fit_score": 80, "context": "128K" }, "capabilities": ["requirement_analysis", "user_story_creation", "acceptance_criteria", "clarification"] }, "history": [ { "date": "2026-04-05T22:30:00Z", "commit": "auto", "type": "model_change", "from": "ollama-cloud/nemotron-3-super", "to": "ollama-cloud/glm-5", "reason": "+33% quality. GLM-5 excels at requirement analysis and system engineering", "source": "research" } ], "performance_log": [] }, "history-miner": { "current": { "model": "ollama-cloud/glm-5", "provider": "Ollama", "category": "Analysis", "mode": "subagent", "color": "#A855F7", "description": "Analyzes git history for duplicates and past solutions", "benchmark": { "swe_bench": null, "fit_score": 78 }, "capabilities": ["git_search", "duplicate_detection", "past_solution_finder", "pattern_identification"] }, "history": [], "performance_log": [] }, "capability-analyst": { "current": { "model": "openrouter/qwen/qwen3.6-plus:free", "provider": "OpenRouter", "category": "Analysis", "mode": "subagent", "color": "#14B8A6", "description": "Analyzes task coverage and identifies gaps", "benchmark": { "swe_bench": 78.8, "fit_score": 90, "context": "1M", "free": true }, "capabilities": ["gap_analysis", "capability_mapping", "recommendation_generation", "coverage_analysis"] }, "history": [ { "date": "2026-04-05T22:30:00Z", "commit": "auto", "type": "model_change", "from": "ollama-cloud/nemotron-3-super", "to": "openrouter/qwen/qwen3.6-plus:free", "reason": "+23% quality, IF:90 score, 1M context, FREE via OpenRouter", "source": "research" } ], "performance_log": [] }, "orchestrator": { "current": { "model": "ollama-cloud/glm-5", "provider": "Ollama", "category": "Process", "mode": "primary", "color": "#0EA5E9", "description": "Process manager. Distributes tasks between agents", "benchmark": { "swe_bench": null, "fit_score": 80 }, "capabilities": ["task_routing", "state_management", "agent_coordination", "workflow_execution"] }, "history": [], "performance_log": [] }, "release-manager": { "current": { "model": "ollama-cloud/devstral-2:123b", "provider": "Ollama", "category": "Process", "mode": "subagent", "color": "#22C55E", "description": "Manages git operations, semantic versioning, deployments", "benchmark": { "swe_bench": null, "fit_score": 75 }, "capabilities": ["git_operations", "version_management", "changelog_creation", "deployment"] }, "history": [], "performance_log": [] }, "evaluator": { "current": { "model": "openrouter/qwen/qwen3.6-plus:free", "provider": "OpenRouter", "category": "Process", "mode": "subagent", "color": "#F97316", "description": "Scores agent effectiveness after task completion", "benchmark": { "swe_bench": 78.8, "fit_score": 90, "context": "1M", "free": true }, "capabilities": ["performance_scoring", "process_analysis", "pattern_identification", "improvement_recommendations"] }, "history": [ { "date": "2026-04-05T05:21:00Z", "commit": "caf77f53c8", "type": "model_change", "from": "ollama-cloud/gpt-oss:120b", "to": "ollama-cloud/nemotron-3-super", "reason": "Nemotron 3 Super better for evaluation tasks", "source": "git" }, { "date": "2026-04-05T22:30:00Z", "commit": "auto", "type": "model_change", "from": "ollama-cloud/nemotron-3-super", "to": "openrouter/qwen/qwen3.6-plus:free", "reason": "+4% quality, IF:90 for scoring accuracy, FREE", "source": "research" } ], "performance_log": [] }, "prompt-optimizer": { "current": { "model": "ollama-cloud/nemotron-3-super", "provider": "Ollama", "category": "Process", "mode": "subagent", "color": "#EC4899", "description": "Improves agent system prompts based on performance failures", "benchmark": { "swe_bench": 60.5, "fit_score": 80 }, "capabilities": ["prompt_analysis", "prompt_improvement", "failure_pattern_detection"], "recommendations": [ { "target": "openrouter/qwen/qwen3.6-plus:free", "reason": "Terminal-Bench 61.6% > Nemotron, always-on CoT", "priority": "high" } ] }, "history": [ { "date": "2026-04-05T05:21:00Z", "commit": "caf77f53c8", "type": "model_change", "from": "openrouter/qwen/qwen3.6-plus:free", "to": "ollama-cloud/nemotron-3-super", "reason": "Research recommendation applied", "source": "git" } ], "performance_log": [] }, "the-fixer": { "current": { "model": "ollama-cloud/minimax-m2.5", "provider": "Ollama", "category": "Fixes", "mode": "subagent", "color": "#EF4444", "description": "Iteratively fixes bugs based on specific error reports", "benchmark": { "swe_bench": 80.2, "fit_score": 88 }, "capabilities": ["bug_fixing", "issue_resolution", "code_correction"] }, "history": [], "performance_log": [] }, "product-owner": { "current": { "model": "ollama-cloud/glm-5", "provider": "Ollama", "category": "Management", "mode": "subagent", "color": "#10B981", "description": "Manages issue checklists, status labels, progress tracking", "benchmark": { "swe_bench": null, "fit_score": 76 }, "capabilities": ["issue_management", "prioritization", "backlog_management", "workflow_completion"] }, "history": [ { "date": "2026-04-05T05:21:00Z", "commit": "caf77f53c8", "type": "model_change", "from": "openrouter/qwen/qwen3.6-plus:free", "to": "ollama-cloud/glm-5", "reason": "GLM-5 good for management tasks", "source": "git" } ], "performance_log": [] }, "workflow-architect": { "current": { "model": "ollama-cloud/glm-5", "provider": "Ollama", "category": "Workflow", "mode": "subagent", "color": "#6366F1", "description": "Creates workflow definitions", "benchmark": { "swe_bench": null, "fit_score": 74 }, "capabilities": ["workflow_design", "process_definition", "automation_setup"] }, "history": [], "performance_log": [] }, "markdown-validator": { "current": { "model": "ollama-cloud/nemotron-3-nano:30b", "provider": "Ollama", "category": "Validation", "mode": "subagent", "color": "#84CC16", "description": "Validates Markdown formatting", "benchmark": { "swe_bench": null, "fit_score": 72 }, "capabilities": ["markdown_validation", "formatting_check", "link_validation"] }, "history": [ { "date": "2026-04-05T05:21:00Z", "commit": "caf77f53c8", "type": "model_change", "from": "openrouter/qwen/qwen3.6-plus:free", "to": "ollama-cloud/nemotron-3-nano:30b", "reason": "Nano efficient for lightweight validation tasks", "source": "git" } ], "performance_log": [] }, "agent-architect": { "current": { "model": "openrouter/qwen/qwen3.6-plus:free", "provider": "OpenRouter", "category": "Meta", "mode": "subagent", "color": "#A855F7", "description": "Creates new agents when gaps identified", "benchmark": { "swe_bench": 78.8, "fit_score": 90, "context": "1M", "free": true }, "capabilities": ["agent_design", "prompt_engineering", "capability_definition"] }, "history": [ { "date": "2026-04-05T22:30:00Z", "commit": "auto", "type": "model_change", "from": "ollama-cloud/nemotron-3-super", "to": "openrouter/qwen/qwen3.6-plus:free", "reason": "+22% quality, IF:90 for YAML frontmatter generation, 1M context for all agents analysis", "source": "research" } ], "performance_log": [] }, "planner": { "current": { "model": "ollama-cloud/nemotron-3-super", "provider": "Ollama", "category": "Cognitive", "mode": "subagent", "color": "#3B82F6", "description": "Task decomposition, CoT, ToT planning", "benchmark": { "swe_bench": 60.5, "fit_score": 84 }, "capabilities": ["task_decomposition", "chain_of_thought", "tree_of_thoughts", "plan_execute_reflect"] }, "history": [ { "date": "2026-04-05T05:21:00Z", "commit": "caf77f53c8", "type": "model_change", "from": "ollama-cloud/gpt-oss:120b", "to": "ollama-cloud/nemotron-3-super", "reason": "Nemotron 3 Super excels at planning", "source": "git" } ], "performance_log": [] }, "reflector": { "current": { "model": "ollama-cloud/nemotron-3-super", "provider": "Ollama", "category": "Cognitive", "mode": "subagent", "color": "#14B8A6", "description": "Self-reflection agent using Reflexion pattern", "benchmark": { "swe_bench": 60.5, "fit_score": 82 }, "capabilities": ["self_reflection", "mistake_analysis", "lesson_extraction"] }, "history": [ { "date": "2026-04-05T05:21:00Z", "commit": "caf77f53c8", "type": "model_change", "from": "ollama-cloud/gpt-oss:120b", "to": "ollama-cloud/nemotron-3-super", "reason": "Better for reflection tasks", "source": "git" } ], "performance_log": [] }, "memory-manager": { "current": { "model": "ollama-cloud/nemotron-3-super", "provider": "Ollama", "category": "Cognitive", "mode": "subagent", "color": "#F59E0B", "description": "Manages agent memory systems", "benchmark": { "swe_bench": 60.5, "ruler_1m": 91.75, "fit_score": 90 }, "capabilities": ["memory_retrieval", "memory_storage", "memory_consolidation", "relevance_scoring"] }, "history": [ { "date": "2026-04-05T05:21:00Z", "commit": "caf77f53c8", "type": "model_change", "from": "ollama-cloud/gpt-oss:120b", "to": "ollama-cloud/nemotron-3-super", "reason": "RULER@1M critical for memory ctx", "source": "git" } ], "performance_log": [] }, "devops-engineer": { "current": { "model": null, "provider": null, "category": "DevOps", "mode": "subagent", "color": "#2563EB", "description": "Docker, Kubernetes, CI/CD pipeline automation", "benchmark": { "fit_score": 0 }, "capabilities": ["docker", "kubernetes", "ci_cd", "infrastructure"], "status": "new", "recommendations": [ { "target": "ollama-cloud/nemotron-3-super", "reason": "DevOps requires strong reasoning", "priority": "critical" } ] }, "history": [], "performance_log": [] }, "flutter-developer": { "current": { "model": "ollama-cloud/qwen3-coder:480b", "provider": "Ollama", "category": "Core Dev", "mode": "subagent", "color": "#0EA5E9", "description": "Flutter mobile specialist", "benchmark": { "fit_score": 86 }, "capabilities": ["flutter_development", "state_management", "ui_components", "cross_platform"] }, "history": [ { "date": "2026-04-05T15:00:00Z", "commit": "af5f401", "type": "agent_created", "from": null, "to": "ollama-cloud/qwen3-coder:480b", "reason": "New agent for Flutter development", "source": "git" } ], "performance_log": [] } }, "providers": { "Ollama": { "models": [ {"id": "qwen3-coder:480b", "swe_bench": 66.5, "context": "256K", "active_params": "35B"}, {"id": "minimax-m2.5", "swe_bench": 80.2, "context": "128K"}, {"id": "nemotron-3-super", "swe_bench": 60.5, "ruler_1m": 91.75, "context": "1M"}, {"id": "nemotron-3-nano:30b", "swe_bench": null, "context": "128K"}, {"id": "glm-5", "swe_bench": null, "context": "128K"}, {"id": "gpt-oss:120b", "swe_bench": 62.4, "context": "130K"}, {"id": "gpt-oss:20b", "swe_bench": null, "context": "128K"}, {"id": "devstral-2:123b", "swe_bench": null, "context": "128K"}, {"id": "deepseek-v3.2", "swe_bench": null, "context": "128K"} ] }, "OpenRouter": { "models": [ {"id": "qwen3.6-plus:free", "swe_bench": null, "terminal_bench": 61.6, "context": "1M", "free": true}, {"id": "gemma4:31b", "intelligence_index": 39, "context": "256K", "free": true} ] }, "Groq": { "models": [ {"id": "gpt-oss-120b", "speed_tps": 500, "rpd": 1000, "tpd": "200K"}, {"id": "gpt-oss-20b", "speed_tps": 1200, "rpd": 1000}, {"id": "kimi-k2-instruct", "speed_tps": 300, "rpm": 60}, {"id": "qwen3-32b", "speed_tps": 400, "rpd": 1000, "tpd": "500K"}, {"id": "llama-4-scout", "speed_tps": 350, "tpm": "30K"} ] } }, "evolution_metrics": { "total_agents": 32, "agents_with_history": 16, "pending_recommendations": 0, "last_sync": "2026-04-05T22:30:00Z", "sync_sources": ["git", "capability-index.yaml", "kilo.jsonc", "research"] } }