Files
APAW/agent-evolution/data/agent-versions.json
¨NW¨ 1ab9939c92 fix: correct OpenRouter model paths across all files
Fixed format from 'qwen/...' to 'openrouter/qwen/...' for:
- product-owner.md
- prompt-optimizer.md
- workflow-architect.md
- status.md, blog.md, booking.md, commerce.md
- kilo.jsonc (default model + ask agent)
- agent-frontmatter-validation.md
- agent-versions.json (recommendations and history)
2026-04-05 23:47:14 +01:00

736 lines
23 KiB
JSON

{
"$schema": "./agent-versions.schema.json",
"version": "1.0.0",
"lastUpdated": "2026-04-05T22:30:00Z",
"agents": {
"lead-developer": {
"current": {
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama",
"category": "Core Dev",
"mode": "subagent",
"color": "#DC2626",
"description": "Primary code writer for backend and core logic. Writes implementation to pass tests",
"benchmark": {
"swe_bench": 66.5,
"ruler_1m": null,
"terminal_bench": null,
"fit_score": 92
},
"capabilities": ["code_writing", "refactoring", "bug_fixing", "implementation"]
},
"history": [
{
"date": "2026-04-05T05:21:00Z",
"commit": "caf77f53c8",
"type": "model_change",
"from": null,
"to": "ollama-cloud/qwen3-coder:480b",
"reason": "Initial configuration from capability-index.yaml",
"source": "git"
}
],
"performance_log": []
},
"frontend-developer": {
"current": {
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama",
"category": "Core Dev",
"mode": "subagent",
"color": "#3B82F6",
"description": "UI implementation specialist with multimodal capabilities",
"benchmark": {
"swe_bench": null,
"ruler_1m": null,
"terminal_bench": null,
"fit_score": 90
},
"capabilities": ["ui_implementation", "component_creation", "styling", "responsive_design"]
},
"history": [
{
"date": "2026-04-05T05:21:00Z",
"commit": "af5f401",
"type": "agent_created",
"from": null,
"to": "ollama-cloud/qwen3-coder:480b",
"reason": "Flutter development support added",
"source": "git"
}
],
"performance_log": []
},
"backend-developer": {
"current": {
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama",
"category": "Core Dev",
"mode": "subagent",
"color": "#10B981",
"description": "Node.js, Express, APIs, database specialist",
"benchmark": {
"swe_bench": null,
"ruler_1m": null,
"terminal_bench": null,
"fit_score": 91
},
"capabilities": ["api_development", "database_design", "server_logic", "authentication"]
},
"history": [],
"performance_log": []
},
"go-developer": {
"current": {
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama",
"category": "Core Dev",
"mode": "subagent",
"color": "#00ADD8",
"description": "Go backend services specialist",
"benchmark": {
"swe_bench": null,
"ruler_1m": null,
"terminal_bench": null,
"fit_score": 85
},
"capabilities": ["go_api_development", "go_database_design", "go_concurrent_programming", "go_authentication"]
},
"history": [
{
"date": "2026-04-05T05:21:00Z",
"commit": "caf77f53c8",
"type": "model_change",
"from": "ollama-cloud/deepseek-v3.2",
"to": "ollama-cloud/qwen3-coder:480b",
"reason": "Qwen3-Coder optimized for Go development",
"source": "git"
}
],
"performance_log": []
},
"sdet-engineer": {
"current": {
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama",
"category": "QA",
"mode": "subagent",
"color": "#8B5CF6",
"description": "Writes tests following TDD methodology. Tests MUST fail initially",
"benchmark": {
"swe_bench": null,
"ruler_1m": null,
"terminal_bench": null,
"fit_score": 88
},
"capabilities": ["unit_tests", "integration_tests", "e2e_tests", "test_planning", "visual_regression"]
},
"history": [],
"performance_log": []
},
"code-skeptic": {
"current": {
"model": "ollama-cloud/minimax-m2.5",
"provider": "Ollama",
"category": "QA",
"mode": "subagent",
"color": "#EF4444",
"description": "Adversarial code reviewer. Finds problems and issues. Does NOT suggest implementations",
"benchmark": {
"swe_bench": 80.2,
"ruler_1m": null,
"terminal_bench": null,
"fit_score": 85
},
"capabilities": ["code_review", "security_review", "style_check", "issue_identification"]
},
"history": [],
"performance_log": []
},
"security-auditor": {
"current": {
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama",
"category": "Security",
"mode": "subagent",
"color": "#DC2626",
"description": "Scans for security vulnerabilities, OWASP Top 10, dependency CVEs",
"benchmark": {
"swe_bench": 60.5,
"ruler_1m": 91.75,
"pinch_bench": 85.6,
"fit_score": 80
},
"capabilities": ["vulnerability_scan", "owasp_check", "secret_detection", "auth_review"]
},
"history": [
{
"date": "2026-04-05T05:21:00Z",
"commit": "caf77f53c8",
"type": "model_change",
"from": "ollama-cloud/deepseek-v3.2",
"to": "ollama-cloud/nemotron-3-super",
"reason": "Nemotron 3 Super optimized for security analysis with RULER@1M",
"source": "git"
}
],
"performance_log": []
},
"performance-engineer": {
"current": {
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama",
"category": "Performance",
"mode": "subagent",
"color": "#F59E0B",
"description": "Reviews code for performance issues: N+1 queries, memory leaks, algorithmic complexity",
"benchmark": {
"swe_bench": 60.5,
"ruler_1m": 91.75,
"pinch_bench": 85.6,
"fit_score": 82
},
"capabilities": ["performance_analysis", "n_plus_one_detection", "memory_leak_check", "algorithm_analysis"]
},
"history": [
{
"date": "2026-04-05T05:21:00Z",
"commit": "caf77f53c8",
"type": "model_change",
"from": "ollama-cloud/gpt-oss:120b",
"to": "ollama-cloud/nemotron-3-super",
"reason": "Better reasoning for performance analysis",
"source": "git"
}
],
"performance_log": []
},
"browser-automation": {
"current": {
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama",
"category": "Testing",
"mode": "subagent",
"color": "#0EA5E9",
"description": "Browser automation agent using Playwright MCP for E2E testing",
"benchmark": {
"swe_bench": null,
"fit_score": 87
},
"capabilities": ["e2e_browser_tests", "form_filling", "navigation_testing", "screenshot_capture"]
},
"history": [],
"performance_log": []
},
"visual-tester": {
"current": {
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama",
"category": "Testing",
"mode": "subagent",
"color": "#EC4899",
"description": "Visual regression testing agent that compares screenshots",
"benchmark": {
"swe_bench": null,
"fit_score": 82
},
"capabilities": ["visual_regression", "pixel_comparison", "screenshot_diff", "ui_validation"]
},
"history": [],
"performance_log": []
},
"system-analyst": {
"current": {
"model": "ollama-cloud/glm-5",
"provider": "Ollama",
"category": "Analysis",
"mode": "subagent",
"color": "#6366F1",
"description": "Designs technical specifications, data schemas, and API contracts",
"benchmark": {
"swe_bench": null,
"fit_score": 82
},
"capabilities": ["architecture_design", "api_specification", "database_modeling", "technical_documentation"]
},
"history": [
{
"date": "2026-04-05T05:21:00Z",
"commit": "caf77f53c8",
"type": "model_change",
"from": "ollama-cloud/gpt-oss:120b",
"to": "ollama-cloud/glm-5",
"reason": "GLM-5 better for system engineering and architecture",
"source": "git"
}
],
"performance_log": []
},
"requirement-refiner": {
"current": {
"model": "ollama-cloud/glm-5",
"provider": "Ollama",
"category": "Analysis",
"mode": "subagent",
"color": "#8B5CF6",
"description": "Converts vague ideas into strict User Stories with acceptance criteria",
"benchmark": {
"swe_bench": null,
"fit_score": 80,
"context": "128K"
},
"capabilities": ["requirement_analysis", "user_story_creation", "acceptance_criteria", "clarification"]
},
"history": [
{
"date": "2026-04-05T22:30:00Z",
"commit": "auto",
"type": "model_change",
"from": "ollama-cloud/nemotron-3-super",
"to": "ollama-cloud/glm-5",
"reason": "+33% quality. GLM-5 excels at requirement analysis and system engineering",
"source": "research"
}
],
"performance_log": []
},
"history-miner": {
"current": {
"model": "ollama-cloud/glm-5",
"provider": "Ollama",
"category": "Analysis",
"mode": "subagent",
"color": "#A855F7",
"description": "Analyzes git history for duplicates and past solutions",
"benchmark": {
"swe_bench": null,
"fit_score": 78
},
"capabilities": ["git_search", "duplicate_detection", "past_solution_finder", "pattern_identification"]
},
"history": [],
"performance_log": []
},
"capability-analyst": {
"current": {
"model": "openrouter/qwen/qwen3.6-plus:free",
"provider": "OpenRouter",
"category": "Analysis",
"mode": "subagent",
"color": "#14B8A6",
"description": "Analyzes task coverage and identifies gaps",
"benchmark": {
"swe_bench": 78.8,
"fit_score": 90,
"context": "1M",
"free": true
},
"capabilities": ["gap_analysis", "capability_mapping", "recommendation_generation", "coverage_analysis"]
},
"history": [
{
"date": "2026-04-05T22:30:00Z",
"commit": "auto",
"type": "model_change",
"from": "ollama-cloud/nemotron-3-super",
"to": "openrouter/qwen/qwen3.6-plus:free",
"reason": "+23% quality, IF:90 score, 1M context, FREE via OpenRouter",
"source": "research"
}
],
"performance_log": []
},
"orchestrator": {
"current": {
"model": "ollama-cloud/glm-5",
"provider": "Ollama",
"category": "Process",
"mode": "primary",
"color": "#0EA5E9",
"description": "Process manager. Distributes tasks between agents",
"benchmark": {
"swe_bench": null,
"fit_score": 80
},
"capabilities": ["task_routing", "state_management", "agent_coordination", "workflow_execution"]
},
"history": [],
"performance_log": []
},
"release-manager": {
"current": {
"model": "ollama-cloud/devstral-2:123b",
"provider": "Ollama",
"category": "Process",
"mode": "subagent",
"color": "#22C55E",
"description": "Manages git operations, semantic versioning, deployments",
"benchmark": {
"swe_bench": null,
"fit_score": 75
},
"capabilities": ["git_operations", "version_management", "changelog_creation", "deployment"]
},
"history": [],
"performance_log": []
},
"evaluator": {
"current": {
"model": "openrouter/qwen/qwen3.6-plus:free",
"provider": "OpenRouter",
"category": "Process",
"mode": "subagent",
"color": "#F97316",
"description": "Scores agent effectiveness after task completion",
"benchmark": {
"swe_bench": 78.8,
"fit_score": 90,
"context": "1M",
"free": true
},
"capabilities": ["performance_scoring", "process_analysis", "pattern_identification", "improvement_recommendations"]
},
"history": [
{
"date": "2026-04-05T05:21:00Z",
"commit": "caf77f53c8",
"type": "model_change",
"from": "ollama-cloud/gpt-oss:120b",
"to": "ollama-cloud/nemotron-3-super",
"reason": "Nemotron 3 Super better for evaluation tasks",
"source": "git"
},
{
"date": "2026-04-05T22:30:00Z",
"commit": "auto",
"type": "model_change",
"from": "ollama-cloud/nemotron-3-super",
"to": "openrouter/qwen/qwen3.6-plus:free",
"reason": "+4% quality, IF:90 for scoring accuracy, FREE",
"source": "research"
}
],
"performance_log": []
},
"prompt-optimizer": {
"current": {
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama",
"category": "Process",
"mode": "subagent",
"color": "#EC4899",
"description": "Improves agent system prompts based on performance failures",
"benchmark": {
"swe_bench": 60.5,
"fit_score": 80
},
"capabilities": ["prompt_analysis", "prompt_improvement", "failure_pattern_detection"],
"recommendations": [
{
"target": "openrouter/qwen/qwen3.6-plus:free",
"reason": "Terminal-Bench 61.6% > Nemotron, always-on CoT",
"priority": "high"
}
]
},
"history": [
{
"date": "2026-04-05T05:21:00Z",
"commit": "caf77f53c8",
"type": "model_change",
"from": "openrouter/qwen/qwen3.6-plus:free",
"to": "ollama-cloud/nemotron-3-super",
"reason": "Research recommendation applied",
"source": "git"
}
],
"performance_log": []
},
"the-fixer": {
"current": {
"model": "ollama-cloud/minimax-m2.5",
"provider": "Ollama",
"category": "Fixes",
"mode": "subagent",
"color": "#EF4444",
"description": "Iteratively fixes bugs based on specific error reports",
"benchmark": {
"swe_bench": 80.2,
"fit_score": 88
},
"capabilities": ["bug_fixing", "issue_resolution", "code_correction"]
},
"history": [],
"performance_log": []
},
"product-owner": {
"current": {
"model": "ollama-cloud/glm-5",
"provider": "Ollama",
"category": "Management",
"mode": "subagent",
"color": "#10B981",
"description": "Manages issue checklists, status labels, progress tracking",
"benchmark": {
"swe_bench": null,
"fit_score": 76
},
"capabilities": ["issue_management", "prioritization", "backlog_management", "workflow_completion"]
},
"history": [
{
"date": "2026-04-05T05:21:00Z",
"commit": "caf77f53c8",
"type": "model_change",
"from": "openrouter/qwen/qwen3.6-plus:free",
"to": "ollama-cloud/glm-5",
"reason": "GLM-5 good for management tasks",
"source": "git"
}
],
"performance_log": []
},
"workflow-architect": {
"current": {
"model": "ollama-cloud/glm-5",
"provider": "Ollama",
"category": "Workflow",
"mode": "subagent",
"color": "#6366F1",
"description": "Creates workflow definitions",
"benchmark": {
"swe_bench": null,
"fit_score": 74
},
"capabilities": ["workflow_design", "process_definition", "automation_setup"]
},
"history": [],
"performance_log": []
},
"markdown-validator": {
"current": {
"model": "ollama-cloud/nemotron-3-nano:30b",
"provider": "Ollama",
"category": "Validation",
"mode": "subagent",
"color": "#84CC16",
"description": "Validates Markdown formatting",
"benchmark": {
"swe_bench": null,
"fit_score": 72
},
"capabilities": ["markdown_validation", "formatting_check", "link_validation"]
},
"history": [
{
"date": "2026-04-05T05:21:00Z",
"commit": "caf77f53c8",
"type": "model_change",
"from": "openrouter/qwen/qwen3.6-plus:free",
"to": "ollama-cloud/nemotron-3-nano:30b",
"reason": "Nano efficient for lightweight validation tasks",
"source": "git"
}
],
"performance_log": []
},
"agent-architect": {
"current": {
"model": "openrouter/qwen/qwen3.6-plus:free",
"provider": "OpenRouter",
"category": "Meta",
"mode": "subagent",
"color": "#A855F7",
"description": "Creates new agents when gaps identified",
"benchmark": {
"swe_bench": 78.8,
"fit_score": 90,
"context": "1M",
"free": true
},
"capabilities": ["agent_design", "prompt_engineering", "capability_definition"]
},
"history": [
{
"date": "2026-04-05T22:30:00Z",
"commit": "auto",
"type": "model_change",
"from": "ollama-cloud/nemotron-3-super",
"to": "openrouter/qwen/qwen3.6-plus:free",
"reason": "+22% quality, IF:90 for YAML frontmatter generation, 1M context for all agents analysis",
"source": "research"
}
],
"performance_log": []
},
"planner": {
"current": {
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama",
"category": "Cognitive",
"mode": "subagent",
"color": "#3B82F6",
"description": "Task decomposition, CoT, ToT planning",
"benchmark": {
"swe_bench": 60.5,
"fit_score": 84
},
"capabilities": ["task_decomposition", "chain_of_thought", "tree_of_thoughts", "plan_execute_reflect"]
},
"history": [
{
"date": "2026-04-05T05:21:00Z",
"commit": "caf77f53c8",
"type": "model_change",
"from": "ollama-cloud/gpt-oss:120b",
"to": "ollama-cloud/nemotron-3-super",
"reason": "Nemotron 3 Super excels at planning",
"source": "git"
}
],
"performance_log": []
},
"reflector": {
"current": {
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama",
"category": "Cognitive",
"mode": "subagent",
"color": "#14B8A6",
"description": "Self-reflection agent using Reflexion pattern",
"benchmark": {
"swe_bench": 60.5,
"fit_score": 82
},
"capabilities": ["self_reflection", "mistake_analysis", "lesson_extraction"]
},
"history": [
{
"date": "2026-04-05T05:21:00Z",
"commit": "caf77f53c8",
"type": "model_change",
"from": "ollama-cloud/gpt-oss:120b",
"to": "ollama-cloud/nemotron-3-super",
"reason": "Better for reflection tasks",
"source": "git"
}
],
"performance_log": []
},
"memory-manager": {
"current": {
"model": "ollama-cloud/nemotron-3-super",
"provider": "Ollama",
"category": "Cognitive",
"mode": "subagent",
"color": "#F59E0B",
"description": "Manages agent memory systems",
"benchmark": {
"swe_bench": 60.5,
"ruler_1m": 91.75,
"fit_score": 90
},
"capabilities": ["memory_retrieval", "memory_storage", "memory_consolidation", "relevance_scoring"]
},
"history": [
{
"date": "2026-04-05T05:21:00Z",
"commit": "caf77f53c8",
"type": "model_change",
"from": "ollama-cloud/gpt-oss:120b",
"to": "ollama-cloud/nemotron-3-super",
"reason": "RULER@1M critical for memory ctx",
"source": "git"
}
],
"performance_log": []
},
"devops-engineer": {
"current": {
"model": null,
"provider": null,
"category": "DevOps",
"mode": "subagent",
"color": "#2563EB",
"description": "Docker, Kubernetes, CI/CD pipeline automation",
"benchmark": {
"fit_score": 0
},
"capabilities": ["docker", "kubernetes", "ci_cd", "infrastructure"],
"status": "new",
"recommendations": [
{
"target": "ollama-cloud/nemotron-3-super",
"reason": "DevOps requires strong reasoning",
"priority": "critical"
}
]
},
"history": [],
"performance_log": []
},
"flutter-developer": {
"current": {
"model": "ollama-cloud/qwen3-coder:480b",
"provider": "Ollama",
"category": "Core Dev",
"mode": "subagent",
"color": "#0EA5E9",
"description": "Flutter mobile specialist",
"benchmark": {
"fit_score": 86
},
"capabilities": ["flutter_development", "state_management", "ui_components", "cross_platform"]
},
"history": [
{
"date": "2026-04-05T15:00:00Z",
"commit": "af5f401",
"type": "agent_created",
"from": null,
"to": "ollama-cloud/qwen3-coder:480b",
"reason": "New agent for Flutter development",
"source": "git"
}
],
"performance_log": []
}
},
"providers": {
"Ollama": {
"models": [
{"id": "qwen3-coder:480b", "swe_bench": 66.5, "context": "256K", "active_params": "35B"},
{"id": "minimax-m2.5", "swe_bench": 80.2, "context": "128K"},
{"id": "nemotron-3-super", "swe_bench": 60.5, "ruler_1m": 91.75, "context": "1M"},
{"id": "nemotron-3-nano:30b", "swe_bench": null, "context": "128K"},
{"id": "glm-5", "swe_bench": null, "context": "128K"},
{"id": "gpt-oss:120b", "swe_bench": 62.4, "context": "130K"},
{"id": "gpt-oss:20b", "swe_bench": null, "context": "128K"},
{"id": "devstral-2:123b", "swe_bench": null, "context": "128K"},
{"id": "deepseek-v3.2", "swe_bench": null, "context": "128K"}
]
},
"OpenRouter": {
"models": [
{"id": "qwen3.6-plus:free", "swe_bench": null, "terminal_bench": 61.6, "context": "1M", "free": true},
{"id": "gemma4:31b", "intelligence_index": 39, "context": "256K", "free": true}
]
},
"Groq": {
"models": [
{"id": "gpt-oss-120b", "speed_tps": 500, "rpd": 1000, "tpd": "200K"},
{"id": "gpt-oss-20b", "speed_tps": 1200, "rpd": 1000},
{"id": "kimi-k2-instruct", "speed_tps": 300, "rpm": 60},
{"id": "qwen3-32b", "speed_tps": 400, "rpd": 1000, "tpd": "500K"},
{"id": "llama-4-scout", "speed_tps": 350, "tpm": "30K"}
]
}
},
"evolution_metrics": {
"total_agents": 32,
"agents_with_history": 16,
"pending_recommendations": 0,
"last_sync": "2026-04-05T22:30:00Z",
"sync_sources": ["git", "capability-index.yaml", "kilo.jsonc", "research"]
}
}