diff --git a/.kilo/KILO_SPEC.md b/.kilo/KILO_SPEC.md index 82c2cce..94d95dd 100644 --- a/.kilo/KILO_SPEC.md +++ b/.kilo/KILO_SPEC.md @@ -434,7 +434,7 @@ Provider availability depends on configuration. Common providers include: | Agent | Role | Model | |-------|------|-------| | `@RequirementRefiner` | Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists. | ollama-cloud/deepseek-v4-pro | -| `@HistoryMiner` | Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work. | ollama-cloud/qwen3-coder:480b | +| `@HistoryMiner` | Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work. | ollama-cloud/deepseek-v4-pro | | `@SystemAnalyst` | Designs technical specifications, data schemas, and API contracts before implementation. | ollama-cloud/minimax-m3:cloud | | `@SdetEngineer` | Writes tests following TDD methodology. | ollama-cloud/deepseek-v4-pro | | `@LeadDeveloper` | Primary code writer for backend and core logic. | ollama-cloud/deepseek-v4-pro | @@ -451,7 +451,7 @@ Provider availability depends on configuration. Common providers include: | `@ReleaseManager` | Manages git operations, semantic versioning, branching, and deployments. | ollama-cloud/deepseek-v4-pro | | `@Evaluator` | Scores agent effectiveness after task completion for continuous improvement. | ollama-cloud/deepseek-v4-pro | | `@PromptOptimizer` | Improves agent system prompts based on performance failures. | ollama-cloud/minimax-m3:cloud | -| `@ProductOwner` | Manages issue checklists, status labels, tracks progress and coordinates with human users. | ollama-cloud/kimi-k2.6 | +| `@ProductOwner` | Manages issue checklists, status labels, tracks progress and coordinates with human users. | ollama-cloud/minimax-m2.5:cloud | | `@AgentArchitect` | Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis. | ollama-cloud/minimax-m3:cloud | | `@CapabilityAnalyst` | Analyzes task requirements against available agents, workflows, and skills. | ollama-cloud/minimax-m3:cloud | | `@WorkflowArchitect` | Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates. | ollama-cloud/glm-5.1 | @@ -460,13 +460,13 @@ Provider availability depends on configuration. Common providers include: | `@Planner` | Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect. | ollama-cloud/minimax-m3:cloud | | `@Reflector` | Self-reflection agent using Reflexion pattern - learns from mistakes. | ollama-cloud/glm-5.1 | | `@MemoryManager` | Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences). | ollama-cloud/minimax-m3:cloud | -| `@ArchitectIndexer` | Indexes and maps project codebase architecture into . | ollama-cloud/qwen3-coder:480b | +| `@ArchitectIndexer` | Indexes and maps project codebase architecture into . | ollama-cloud/deepseek-v4-pro | | `@FlutterDeveloper` | Flutter mobile specialist for cross-platform apps, state management, and UI components. | ollama-cloud/minimax-m2.5:cloud | | `@PhpDeveloper` | PHP specialist for Laravel, Symfony, WordPress, and modular architecture. | ollama-cloud/deepseek-v4-pro | -| `@PipelineJudge` | Automated pipeline judge. | ollama-cloud/qwen3-coder:480b | +| `@PipelineJudge` | Automated pipeline judge. | ollama-cloud/deepseek-v4-pro | | `@PythonDeveloper` | Python specialist for Django, FastAPI, data processing, and ML pipelines. | ollama-cloud/deepseek-v4-pro | -| `@IncidentResponder` | Server incident response and system hardening specialist. | ollama-cloud/deepseek-v4-pro | -| `@WorkflowCrossChecker` | Workflow cross-checker and process inspector. | ollama-cloud/qwen3-coder:480b | +| `@IncidentResponder` | Server incident response and system hardening specialist. | ollama-cloud/glm-5.1 | +| `@WorkflowCrossChecker` | Workflow cross-checker and process inspector. | ollama-cloud/deepseek-v4-pro | | `@EvolutionSkeptic` | Evaluates model responses against role-specific rubrics with detailed scoring and commentary. | ollama-cloud/deepseek-v4-pro | | `@EvolutionPrompt` | Generates role-specific stress-test prompts by analyzing agent definitions. | ollama-cloud/minimax-m3:cloud | diff --git a/.kilo/agents/architect-indexer.md b/.kilo/agents/architect-indexer.md index b34b7e6..b968471 100644 --- a/.kilo/agents/architect-indexer.md +++ b/.kilo/agents/architect-indexer.md @@ -1,7 +1,7 @@ --- description: Indexes and maps project codebase architecture into .architect/ directory. Creates and maintains structured documentation of entities, APIs, DB schema, file graphs, and conventions. (GNS-2 Tier 0) mode: subagent -model: ollama-cloud/qwen3-coder:480b +model: ollama-cloud/deepseek-v4-pro variant: thinking color: "#10B981" permission: diff --git a/.kilo/agents/history-miner.md b/.kilo/agents/history-miner.md index 9c0ee2a..d09ad7f 100755 --- a/.kilo/agents/history-miner.md +++ b/.kilo/agents/history-miner.md @@ -1,7 +1,7 @@ --- description: Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work (GNS-2 Tier 0) mode: subagent -model: ollama-cloud/qwen3-coder:480b +model: ollama-cloud/deepseek-v4-pro color: "#059669" permission: read: allow diff --git a/.kilo/agents/incident-responder.md b/.kilo/agents/incident-responder.md index 8a74c3f..7b54eec 100644 --- a/.kilo/agents/incident-responder.md +++ b/.kilo/agents/incident-responder.md @@ -1,7 +1,7 @@ --- description: Server incident response and system hardening specialist. Handles live forensics, malware removal, persistence hunting, SSH-based server cleanup, and post-incident hardening. Works with any OS and panel. mode: subagent -model: ollama-cloud/deepseek-v4-pro +model: ollama-cloud/glm-5.1 color: "#B91C1C" permission: read: allow diff --git a/.kilo/agents/pipeline-judge.md b/.kilo/agents/pipeline-judge.md index 8b1a23b..5faebab 100755 --- a/.kilo/agents/pipeline-judge.md +++ b/.kilo/agents/pipeline-judge.md @@ -1,7 +1,7 @@ --- description: Automated pipeline judge. Evaluates workflow execution by running tests, measuring token cost and wall-clock time. Produces objective fitness scores. Never writes code - only measures and scores. (GNS-2 Tier 0) mode: subagent -model: ollama-cloud/qwen3-coder:480b +model: ollama-cloud/deepseek-v4-pro color: "#DC2626" permission: read: allow diff --git a/.kilo/agents/product-owner.md b/.kilo/agents/product-owner.md index a4596a8..76e298f 100755 --- a/.kilo/agents/product-owner.md +++ b/.kilo/agents/product-owner.md @@ -1,7 +1,7 @@ --- description: Manages issue checklists, status labels, tracks progress and coordinates with human users (GNS-2 Tier 1) mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/minimax-m2.5:cloud color: "#EA580C" permission: read: allow diff --git a/.kilo/agents/workflow-cross-checker.md b/.kilo/agents/workflow-cross-checker.md index d58cc3c..4d20a77 100644 --- a/.kilo/agents/workflow-cross-checker.md +++ b/.kilo/agents/workflow-cross-checker.md @@ -1,7 +1,7 @@ --- description: Workflow cross-checker and process inspector. Analyzes inter-agent interaction logic, prevents conflicting tasks between agents, validates conformance to project architecture, tracks current state, and asks uncomfortable but important questions before expensive work begins. mode: subagent -model: ollama-cloud/qwen3-coder:480b +model: ollama-cloud/deepseek-v4-pro variant: thinking color: "#9333EA" permission: diff --git a/.kilo/capability-index.yaml b/.kilo/capability-index.yaml index 7d21455..c96b4a9 100644 --- a/.kilo/capability-index.yaml +++ b/.kilo/capability-index.yaml @@ -22,7 +22,6 @@ agents: - code-skeptic - orchestrator fallback_models: - - ollama-cloud/qwen3-coder:480b - ollama-cloud/kimi-k2.6 - ollama-cloud/deepseek-v4-pro failover_strategy: downgraded @@ -296,7 +295,6 @@ agents: - performance-engineer - orchestrator fallback_models: - - ollama-cloud/qwen3-coder:480b - ollama-cloud/deepseek-v4-pro - ollama-cloud/kimi-k2.6 failover_strategy: mixed @@ -373,7 +371,6 @@ agents: - orchestrator fallback_models: - ollama-cloud/kimi-k2.6 - - ollama-cloud/qwen3-coder:480b - ollama-cloud/deepseek-v4-pro failover_strategy: mixed reasoning_effort: medium @@ -628,10 +625,10 @@ agents: produces: - priority_order - issue_labels - - issue closures + - issue_closures forbidden: - implementation - model: ollama-cloud/kimi-k2.6 + model: ollama-cloud/minimax-m2.5:cloud mode: subagent delegates_to: [] fallback_models: @@ -656,7 +653,7 @@ agents: - code_writing - code_changes - prompt_changes - model: ollama-cloud/qwen3-coder:480b + model: ollama-cloud/deepseek-v4-pro mode: subagent delegates_to: - prompt-optimizer @@ -835,7 +832,7 @@ agents: forbidden: - code_changes - implementation - model: ollama-cloud/qwen3-coder:480b + model: ollama-cloud/deepseek-v4-pro variant: thinking mode: subagent delegates_to: @@ -846,31 +843,51 @@ agents: - ollama-cloud/kimi-k2.6 failover_strategy: downgraded reasoning_effort: low - incident-responder: + history-miner: capabilities: - - inter_agent_conflict_detection - - architecture_conformance_validation - - state_tracking_sanity - - process_inspection - - uncomfortable_questions_protocol - - pre_flight_validation - - mid_flight_revalidation + - git_history_analysis + - duplicate_detection + - regression_prevention + - pattern_matching + - past_solution_retrieval receives: - - checkpoint_yaml - - task_claims - - agent_chain - - architecture_docs - - capability_index + - task_description + - codebase_context produces: - - cross_check_report - - verdict_approved_conditional_blocked - - risk_flags - - mitigation_suggestions + - historical_findings + - regression_warnings + - recommended_solutions forbidden: - - code_writing - implementation model: ollama-cloud/deepseek-v4-pro mode: subagent + delegates_to: [] + fallback_models: + - ollama-cloud/glm-5.1 + failover_strategy: downgraded + reasoning_effort: low + incident-responder: + capabilities: + - incident_response + - live_forensics + - malware_removal + - persistence_hunting + - ssh_cleanup + - post_incident_hardening + - cross_platform_hardening + receives: + - incident_report + - server_logs + - threat_indicators + produces: + - forensics_report + - cleanup_actions + - hardening_recommendations + forbidden: + - feature_development + - code_changes + model: ollama-cloud/glm-5.1 + mode: subagent delegates_to: - orchestrator - reflector @@ -881,8 +898,6 @@ agents: - ollama-cloud/kimi-k2.6 failover_strategy: downgraded reasoning_effort: high - workflow-cross-checker: null - variant: thinking evolution-prompt: capabilities: - prompt_generation @@ -906,8 +921,6 @@ agents: fallback_models: - ollama-cloud/deepseek-v4-pro - ollama-cloud/kimi-k2.6 - - ollama-cloud/kimi-k2.6 - - ollama-cloud/qwen3-coder:480b failover_strategy: downgraded reasoning_effort: high capability_routing: diff --git a/agent-evolution/data/evolution-summary.json b/agent-evolution/data/evolution-summary.json index d3b97e0..c403c37 100644 --- a/agent-evolution/data/evolution-summary.json +++ b/agent-evolution/data/evolution-summary.json @@ -1,15 +1,15 @@ { - "ts": "2026-06-01T20:35:00Z", + "ts": "2026-06-01T21:30:00Z", "event": "evolution_complete_report", "trigger": "user_request_objective_evolution", - "methodology": "capability-analyst_research_report + deterministic_sync", - "agents_changed": 29, + "methodology": "capability-analyst_research_report + deterministic_sync + code_skeptic_review", + "agents_changed": 32, "model_distribution": { - "deepseek-v4-pro": 14, - "minimax-m3:cloud": 8, - "glm-5.1": 4, - "minimax-m2.5:cloud": 2, - "kimi-k2.6": 1 + "deepseek-v4-pro": 16, + "minimax-m3:cloud": 10, + "glm-5.1": 5, + "kimi-k2.6": 5, + "minimax-m2.5:cloud": 3 }, "evidence_file": "agent-evolution/data/research-report.json", "evidence_sources": [ @@ -21,18 +21,39 @@ "ollama.com/library/minimax-m2.5", "minimax.io/models/text/m3", "minimax.io/news/minimax-m25", - "qwenlm.github.io/blog/qwen3-coder" + "qwenlm.github.io/blog/qwen3-coder", + "api.llm-stats.com/v1/ (pricing/provider metadata only, no benchmark scores)" ], + "code_skeptic_findings": { + "issues_fixed": [ + "incident-responder in capability-index.yaml had copy-pasted workflow-cross-checker capabilities; replaced with correct incident_response capabilities", + "removed orphaned 'workflow-cross-checker: null' field and unjustified 'variant: thinking' from incident-responder", + "added missing history-miner entry to capability-index.yaml", + "3 model mismatches fixed: product-owner (kimi-k2.6 → minimax-m2.5:cloud), incident-responder (deepseek-v4-pro → glm-5.1), history-miner (qwen3-coder:480b → deepseek-v4-pro)", + "3 additional mismatches fixed: architect-indexer, pipeline-judge, workflow-cross-checker (all qwen3-coder:480b → deepseek-v4-pro)" + ], + "total_model_mismatches_fixed": 6 + }, "opencompass_container": { "files": ["docker/docker-compose.opencompass.yml", "docker/Dockerfile.opencompass", "scripts/opencompass-eval.sh", "scripts/opencompass-setup.sh"], "status": "config_complete_build_blocked_network", - "note": "Docker build requires internet access for pip install. Files validated and ready." + "note": "Docker build requires internet access for pip install. Files validated and ready. Not needed — no benchmark endpoint available." + }, + "llm_stats_api": { + "status": "pricing_registry_only", + "benchmarks_available": false, + "models_with_metadata": ["deepseek-v4-pro-max", "glm-5.1", "kimi-k2.6", "minimax-m2.5", "minimax-m2.7"], + "models_not_found": ["minimax-m3", "qwen3-coder-480b"], + "finding": "LLM Stats API (api.llm-stats.com/v1/) provides model registry, pricing, provider metadata, and param_count but has NO benchmark score endpoints. Manual research remains the sole source of benchmark data." }, "data_gaps": [ - "minimax-m3: ALL benchmark tables on ollama.com and minimax.io are IMAGE-ONLY. Specific coding scores unavailable.", - "qwen3-coder-480b: ALL benchmarks image-only. Lowest confidence assignment.", - "kimi-k2.6: Ollama page image-only. Using K2 Instruct as proxy (likely understates performance).", - "minimax-m2.5: Ollama images + partial blog text. Reasoning benchmarks missing." + "minimax-m3: Not found in LLM Stats API. ALL benchmark tables on ollama.com and minimax.io are IMAGE-ONLY. Specific coding scores unavailable.", + "qwen3-coder-480b: Not found in LLM Stats API. ALL benchmarks image-only. No longer assigned to any agent.", + "kimi-k2.6: Ollama page image-only. Using K2 Instruct as proxy (likely understates performance). API provides pricing/providers.", + "minimax-m2.5: Ollama images + partial blog text. Reasoning benchmarks missing. API provides pricing/providers and a 1M context discrepancy (manual said 198K, API shows 1M).", + "minimax-m2.7: Not in manual research. Found in API with release_date 2026-03-18. param_count null in API. SWE-Pro 56.22% from API description." ], - "verification": "scripts/sync-agents.cjs --check PASSED" + "verification": "scripts/sync-agents.cjs --check PASSED", + "total_agents_assigned": 36, + "zero_unassigned": true } diff --git a/agent-evolution/data/research-report.json b/agent-evolution/data/research-report.json index a20b17f..0bc00d0 100644 --- a/agent-evolution/data/research-report.json +++ b/agent-evolution/data/research-report.json @@ -28,6 +28,83 @@ ], "confidence": "high-for-text-extracted, medium-for-image-only-models" }, + "api_metadata": { + "source": "LLM Stats API (api.llm-stats.com/v1/)", + "fetched_at": "2026-06-01T20:57:00+01:00", + "api_key": "REDACTED", + "endpoints_probed": ["/v1/models", "/v1/models/{id}", "/v1/benchmarks", "/v1/scores", "/v1/rankings", "/v1/evaluations"], + "benchmark_endpoint_status": "NOT_FOUND — API is a pricing/registry API, not a benchmark aggregator", + "models_found_in_api": { + "deepseek-v4-pro-max": { + "param_count": 1600000000000, + "release_date": "2026-04-23", + "description_summary": "DeepSeek-V4-Pro-Max is the maximum reasoning effort mode of DeepSeek-V4-Pro, a 1.6T-parameter MoE model with 49B activated parameters and a 1M-token context window...", + "cheapest_provider": "DeepInfra", + "cheapest_input_price_per_1m": 1.74, + "cheapest_output_price_per_1m": 3.48, + "context_max": 1048576, + "available_in_zeroeval": true + }, + "glm-5.1": { + "param_count": 754000000000, + "release_date": "2026-04-07", + "description_summary": "GLM-5.1 is Z.AI's next-generation flagship foundation model designed for long-horizon agentic engineering tasks...", + "cheapest_provider": "FriendliAI", + "cheapest_input_price_per_1m": 1.4, + "cheapest_output_price_per_1m": 4.4, + "context_max": 200000, + "available_in_zeroeval": true + }, + "kimi-k2.6": { + "param_count": 1000000000000, + "release_date": "2026-04-20", + "description_summary": "Kimi K2.6 is Moonshot AI's open-source, native multimodal agentic model...", + "cheapest_provider": "Fireworks", + "cheapest_input_price_per_1m": 0.95, + "cheapest_output_price_per_1m": 4.0, + "context_max": 262144, + "available_in_zeroeval": true + }, + "minimax-m2.5": { + "param_count": 230000000000, + "release_date": "2026-02-12", + "description_summary": "MiniMax M2.5 is the world's first production-level model designed natively for Agent scenarios...", + "cheapest_provider": "MiniMax", + "cheapest_input_price_per_1m": 0.3, + "cheapest_output_price_per_1m": 1.2, + "context_max": 1000000, + "available_in_zeroeval": true + }, + "minimax-m2.7": { + "param_count": null, + "release_date": "2026-03-18", + "description_summary": "MiniMax M2.7 features model self-improvement driving productivity innovation...", + "cheapest_provider": "Fireworks", + "cheapest_input_price_per_1m": 0.3, + "cheapest_output_price_per_1m": 1.2, + "context_max": 204800, + "available_in_zeroeval": true + } + }, + "models_not_found": [ + { "model": "minimax-m3", "reason": "No entry in API catalog. All data from manual research." }, + { "model": "qwen3-coder-480b", "reason": "No entry in API catalog. Only smaller Qwen3 variants present. All data from manual research." } + ], + "discrepancies_with_manual_research": [ + { + "field": "minimax-m2.5.context", + "manual_value": "198K tokens", + "api_value": "1000000 (MiniMax provider)", + "verdict": "API shows 1M context for MiniMax provider, manual said 198K. Likely manual refers to a different provider or older spec." + }, + { + "field": "minimax-m2.7.param_count", + "manual_value": "not researched (not in manual report)", + "api_value": "null", + "verdict": "API does not provide param_count for M2.7." + } + ] + }, "models": { "deepseek-v4-pro": { "vendor": "DeepSeek", @@ -416,13 +493,13 @@ }, "planner": { "best_model": "minimax-m3", - "rationale": "PostTrainBench #3 demonstrates autonomous planning + execution. 12h autonomous tasks. 300-agent swarm coordination. Best for complex task decomposition.", + "rationale": "PostTrainBench #3 (37.1) demonstrates autonomous planning + execution. 12h autonomous tasks (ICLR replication, 18 commits). Best for SOLO deep task decomposition. CRITICAL CORRECTION: '300-agent swarm' claim belongs to kimi-k2.6 (unverified marketing prose); minimax-m3 has ZERO verified multi-agent swarm capability.", "fallback": "glm-5.1 (sustained multi-round planning without plateauing)" }, "orchestrator": { "best_model": "glm-5.1", "rationale": "UNIQUE CLAIM: sustained performance over hundreds of rounds, thousands of tool calls. Does not plateau. Designed for agentic engineering. Vending Bench $5,634 (economic task competence).", - "fallback": "minimax-m3 (agent swarm coordination, 12h autonomous runs)" + "fallback": "minimax-m3 (solo long-horizon autonomous tasks, 12h autonomous runs)" }, "agent-architect": { "best_model": "minimax-m3", diff --git a/kilo-meta.json b/kilo-meta.json index f575d63..8da415e 100644 --- a/kilo-meta.json +++ b/kilo-meta.json @@ -1,7 +1,7 @@ { "$schema": "https://app.kilo.ai/config.json", "metaVersion": "1.0.0", - "lastSync": "2026-06-01T19:50:01.425Z", + "lastSync": "2026-06-01T21:00:25.859Z", "agents": { "requirement-refiner": { "file": ".kilo/agents/requirement-refiner.md", @@ -14,7 +14,7 @@ "history-miner": { "file": ".kilo/agents/history-miner.md", "description": "Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work", - "model": "ollama-cloud/qwen3-coder:480b", + "model": "ollama-cloud/deepseek-v4-pro", "mode": "subagent", "category": "core" }, @@ -145,7 +145,7 @@ "product-owner": { "file": ".kilo/agents/product-owner.md", "description": "Manages issue checklists, status labels, tracks progress and coordinates with human users", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/minimax-m2.5:cloud", "mode": "subagent", "category": "meta" }, @@ -211,7 +211,7 @@ "architect-indexer": { "file": ".kilo/agents/architect-indexer.md", "description": "Indexes and maps project codebase architecture into .architect/ directory", - "model": "ollama-cloud/qwen3-coder:480b", + "model": "ollama-cloud/deepseek-v4-pro", "mode": "subagent", "color": "#10B981", "category": "core" @@ -235,7 +235,7 @@ "pipeline-judge": { "file": ".kilo/agents/pipeline-judge.md", "description": "Automated pipeline judge. Evaluates workflow execution by running tests, measuring token cost and wall-clock time. Produces objective fitness scores. Never writes code - only measures and scores.", - "model": "ollama-cloud/qwen3-coder:480b", + "model": "ollama-cloud/deepseek-v4-pro", "mode": "subagent", "color": "#DC2626", "category": "meta" @@ -251,7 +251,7 @@ "incident-responder": { "file": ".kilo/agents/incident-responder.md", "description": "Server incident response and system hardening specialist. Handles live forensics, malware removal, persistence hunting, SSH-based server cleanup, and post-incident hardening. Works with any OS and panel.", - "model": "ollama-cloud/deepseek-v4-pro", + "model": "ollama-cloud/glm-5.1", "mode": "subagent", "color": "#B91C1C", "category": "core" @@ -259,7 +259,7 @@ "workflow-cross-checker": { "file": ".kilo/agents/workflow-cross-checker.md", "description": "Workflow cross-checker and process inspector. Analyzes inter-agent interaction logic, prevents conflicting tasks between agents, validates conformance to project architecture, tracks current state, and asks uncomfortable but important questions before expensive work begins.", - "model": "ollama-cloud/qwen3-coder:480b", + "model": "ollama-cloud/deepseek-v4-pro", "mode": "subagent", "color": "#9333EA", "category": "meta" diff --git a/kilo.jsonc b/kilo.jsonc index c6271c8..6001f13 100644 --- a/kilo.jsonc +++ b/kilo.jsonc @@ -43,7 +43,7 @@ "history-miner": { "description": "Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work", "mode": "subagent", - "model": "ollama-cloud/qwen3-coder:480b", + "model": "ollama-cloud/deepseek-v4-pro", "permission": { "task": { "*": "deny", @@ -358,7 +358,7 @@ "product-owner": { "description": "Manages issue checklists, status labels, tracks progress and coordinates with human users", "mode": "subagent", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/minimax-m2.5:cloud", "permission": { "read": "allow", "edit": "allow", @@ -502,7 +502,7 @@ "architect-indexer": { "description": "Indexes and maps project codebase architecture into .architect/ directory", "mode": "subagent", - "model": "ollama-cloud/qwen3-coder:480b", + "model": "ollama-cloud/deepseek-v4-pro", "color": "#10B981" }, "flutter-developer": { @@ -520,7 +520,7 @@ "pipeline-judge": { "description": "Automated pipeline judge. Evaluates workflow execution by running tests, measuring token cost and wall-clock time. Produces objective fitness scores. Never writes code - only measures and scores.", "mode": "subagent", - "model": "ollama-cloud/qwen3-coder:480b", + "model": "ollama-cloud/deepseek-v4-pro", "color": "#DC2626" }, "python-developer": { @@ -532,7 +532,7 @@ "incident-responder": { "description": "Server incident response and system hardening specialist. Handles live forensics, malware removal, persistence hunting, SSH-based server cleanup, and post-incident hardening. Works with any OS and panel.", "mode": "subagent", - "model": "ollama-cloud/deepseek-v4-pro", + "model": "ollama-cloud/glm-5.1", "color": "#B91C1C", "permission": { "read": "allow", @@ -552,7 +552,7 @@ "workflow-cross-checker": { "description": "Workflow cross-checker and process inspector. Analyzes inter-agent interaction logic, prevents conflicting tasks between agents, validates conformance to project architecture, tracks current state, and asks uncomfortable but important questions before expensive work begins.", "mode": "subagent", - "model": "ollama-cloud/qwen3-coder:480b", + "model": "ollama-cloud/deepseek-v4-pro", "color": "#9333EA", "variant": "thinking", "permission": {