From ccca685fdcd7e4d1113fed27d1dc3569e9a127e7 Mon Sep 17 00:00:00 2001 From: Deploy Bot Date: Thu, 28 May 2026 13:46:34 +0100 Subject: [PATCH] feat(agent-models): assign best-fit models from real-fit evaluation report Updated all 36 agents to their highest-scoring model per real-fit-report.json: - kimi-k2.6: code-skeptic(91.2), system-analyst(92.0), sdet-engineer(97.0), lead-developer(72.5), security-auditor(63.8), history-miner, browser-automation, evolution-prompt, product-owner, orchestrator, release-manager, reflector - glm-5.1: devops-engineer(96.2), evaluator, the-fixer, memory-manager, performance-engineer, prompt-optimizer, workflow-architect, visual-tester, flutter-developer, incident-responder - qwen3-coder:480b: architect-indexer, frontend-developer, go-developer, markdown-validator, pipeline-judge, workflow-cross-checker, evolution-skeptic, requirement-refiner - deepseek-v4-pro: backend-developer, capability-analyst, planner, php-developer, python-developer Files updated: - kilo-meta.json (source of truth) - kilo.jsonc (runtime config) - capability-index.yaml (routing) - 30 agent .md frontmatters (via sync-agents.cjs) - KILO_SPEC.md + AGENTS.md (auto-synced) - real-fit-report.json (regenerated from DB) --- .kilo/KILO_SPEC.md | 71 +++--- .kilo/agents/architect-indexer.md | 2 +- .kilo/agents/backend-developer.md | 2 +- .kilo/agents/capability-analyst.md | 2 +- .kilo/agents/devops-engineer.md | 2 +- .kilo/agents/evaluator.md | 2 +- .kilo/agents/evolution-prompt.md | 2 +- .kilo/agents/evolution-skeptic.md | 2 +- .kilo/agents/flutter-developer.md | 2 +- .kilo/agents/frontend-developer.md | 2 +- .kilo/agents/go-developer.md | 2 +- .kilo/agents/incident-responder.md | 2 +- .kilo/agents/markdown-validator.md | 2 +- .kilo/agents/memory-manager.md | 2 +- .kilo/agents/performance-engineer.md | 2 +- .kilo/agents/php-developer.md | 2 +- .kilo/agents/pipeline-judge.md | 2 +- .kilo/agents/planner.md | 2 +- .kilo/agents/prompt-optimizer.md | 2 +- .kilo/agents/python-developer.md | 2 +- .kilo/agents/reflector.md | 2 +- .kilo/agents/release-manager.md | 2 +- .kilo/agents/requirement-refiner.md | 2 +- .kilo/agents/security-auditor.md | 2 +- .kilo/agents/system-analyst.md | 2 +- .kilo/agents/the-fixer.md | 2 +- .kilo/agents/visual-tester.md | 2 +- .kilo/agents/workflow-architect.md | 2 +- .kilo/agents/workflow-cross-checker.md | 2 +- .kilo/capability-index.yaml | 170 ++++++--------- AGENTS.md | 3 +- agent-evolution/data/real-fit-report.json | 252 +++++++++++----------- kilo-meta.json | 56 ++--- kilo.jsonc | 32 +-- 34 files changed, 306 insertions(+), 334 deletions(-) diff --git a/.kilo/KILO_SPEC.md b/.kilo/KILO_SPEC.md index 366eb67..1531291 100644 --- a/.kilo/KILO_SPEC.md +++ b/.kilo/KILO_SPEC.md @@ -433,42 +433,42 @@ Provider availability depends on configuration. Common providers include: | Agent | Role | Model | |-------|------|-------| -| `@RequirementRefiner` | Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists. | ollama-cloud/kimi-k2.6 | +| `@RequirementRefiner` | Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists. | ollama-cloud/qwen3-coder:480b | | `@HistoryMiner` | Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work. | ollama-cloud/kimi-k2.6 | -| `@SystemAnalyst` | Designs technical specifications, data schemas, and API contracts before implementation. | ollama-cloud/glm-5.1 | +| `@SystemAnalyst` | Designs technical specifications, data schemas, and API contracts before implementation. | ollama-cloud/kimi-k2.6 | | `@SdetEngineer` | Writes tests following TDD methodology. | ollama-cloud/kimi-k2.6 | | `@LeadDeveloper` | Primary code writer for backend and core logic. | ollama-cloud/kimi-k2.6 | -| `@FrontendDeveloper` | Handles UI implementation with multimodal capabilities. | ollama-cloud/kimi-k2.6 | -| `@BackendDeveloper` | Backend specialist for Node. | ollama-cloud/kimi-k2.6 | -| `@GoDeveloper` | Go backend specialist for Gin, Echo, APIs, and database integration. | ollama-cloud/deepseek-v4-pro-max | -| `@DevopsEngineer` | DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management. | ollama-cloud/kimi-k2.6 | +| `@FrontendDeveloper` | Handles UI implementation with multimodal capabilities. | ollama-cloud/qwen3-coder:480b | +| `@BackendDeveloper` | Backend specialist for Node. | ollama-cloud/deepseek-v4-pro | +| `@GoDeveloper` | Go backend specialist for Gin, Echo, APIs, and database integration. | ollama-cloud/qwen3-coder:480b | +| `@DevopsEngineer` | DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management. | ollama-cloud/glm-5.1 | | `@CodeSkeptic` | Adversarial code reviewer. | ollama-cloud/kimi-k2.6 | -| `@TheFixer` | Iteratively fixes bugs based on specific error reports and test failures. | ollama-cloud/kimi-k2.6 | -| `@PerformanceEngineer` | Reviews code for performance issues. | ollama-cloud/deepseek-v4-pro-max | -| `@SecurityAuditor` | Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets. | ollama-cloud/deepseek-v4-pro-max | -| `@VisualTester` | Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff. | ollama-cloud/kimi-k2.6 | +| `@TheFixer` | Iteratively fixes bugs based on specific error reports and test failures. | ollama-cloud/glm-5.1 | +| `@PerformanceEngineer` | Reviews code for performance issues. | ollama-cloud/glm-5.1 | +| `@SecurityAuditor` | Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets. | ollama-cloud/kimi-k2.6 | +| `@VisualTester` | Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff. | ollama-cloud/glm-5.1 | | `@Orchestrator` | Main dispatcher. | ollama-cloud/kimi-k2.6 | -| `@ReleaseManager` | Manages git operations, semantic versioning, branching, and deployments. | ollama-cloud/glm-5.1 | -| `@Evaluator` | Scores agent effectiveness after task completion for continuous improvement. | ollama-cloud/kimi-k2.6 | -| `@PromptOptimizer` | Improves agent system prompts based on performance failures. | ollama-cloud/kimi-k2.6 | +| `@ReleaseManager` | Manages git operations, semantic versioning, branching, and deployments. | ollama-cloud/kimi-k2.6 | +| `@Evaluator` | Scores agent effectiveness after task completion for continuous improvement. | ollama-cloud/glm-5.1 | +| `@PromptOptimizer` | Improves agent system prompts based on performance failures. | ollama-cloud/glm-5.1 | | `@ProductOwner` | Manages issue checklists, status labels, tracks progress and coordinates with human users. | ollama-cloud/kimi-k2.6 | | `@AgentArchitect` | Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis. | ollama-cloud/kimi-k2.6 | -| `@CapabilityAnalyst` | Analyzes task requirements against available agents, workflows, and skills. | ollama-cloud/glm-5.1 | -| `@WorkflowArchitect` | Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates. | ollama-cloud/kimi-k2.6 | -| `@MarkdownValidator` | Validates and corrects Markdown descriptions for Gitea issues. | ollama-cloud/kimi-k2.6 | +| `@CapabilityAnalyst` | Analyzes task requirements against available agents, workflows, and skills. | ollama-cloud/deepseek-v4-pro | +| `@WorkflowArchitect` | Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates. | ollama-cloud/glm-5.1 | +| `@MarkdownValidator` | Validates and corrects Markdown descriptions for Gitea issues. | ollama-cloud/qwen3-coder:480b | | `@BrowserAutomation` | Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction. | ollama-cloud/kimi-k2.6 | -| `@Planner` | Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect. | ollama-cloud/deepseek-v4-pro-max | -| `@Reflector` | Self-reflection agent using Reflexion pattern - learns from mistakes. | ollama-cloud/deepseek-v4-pro-max | -| `@MemoryManager` | Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences). | ollama-cloud/deepseek-v4-pro-max | -| `@ArchitectIndexer` | Indexes and maps project codebase architecture into . | ollama-cloud/kimi-k2.6 | -| `@FlutterDeveloper` | Flutter mobile specialist for cross-platform apps, state management, and UI components. | ollama-cloud/kimi-k2.6 | -| `@PhpDeveloper` | PHP specialist for Laravel, Symfony, WordPress, and modular architecture. | ollama-cloud/kimi-k2.6 | -| `@PipelineJudge` | Automated pipeline judge. | ollama-cloud/glm-5.1 | -| `@PythonDeveloper` | Python specialist for Django, FastAPI, data processing, and ML pipelines. | ollama-cloud/kimi-k2.6 | -| `@IncidentResponder` | Server incident response and system hardening specialist. | ollama-cloud/kimi-k2.6 | -| `@WorkflowCrossChecker` | Workflow cross-checker and process inspector. | ollama-cloud/kimi-k2.6 | -| `@EvolutionSkeptic` | Evaluates model responses against role-specific rubrics with detailed scoring and commentary. | ollama-cloud/deepseek-v4-pro-max | -| `@EvolutionPrompt` | Generates role-specific stress-test prompts by analyzing agent definitions. | ollama-cloud/deepseek-v4-pro-max | +| `@Planner` | Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect. | ollama-cloud/deepseek-v4-pro | +| `@Reflector` | Self-reflection agent using Reflexion pattern - learns from mistakes. | ollama-cloud/kimi-k2.6 | +| `@MemoryManager` | Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences). | ollama-cloud/glm-5.1 | +| `@ArchitectIndexer` | Indexes and maps project codebase architecture into . | ollama-cloud/qwen3-coder:480b | +| `@FlutterDeveloper` | Flutter mobile specialist for cross-platform apps, state management, and UI components. | ollama-cloud/glm-5.1 | +| `@PhpDeveloper` | PHP specialist for Laravel, Symfony, WordPress, and modular architecture. | ollama-cloud/deepseek-v4-pro | +| `@PipelineJudge` | Automated pipeline judge. | ollama-cloud/qwen3-coder:480b | +| `@PythonDeveloper` | Python specialist for Django, FastAPI, data processing, and ML pipelines. | ollama-cloud/deepseek-v4-pro | +| `@IncidentResponder` | Server incident response and system hardening specialist. | ollama-cloud/glm-5.1 | +| `@WorkflowCrossChecker` | Workflow cross-checker and process inspector. | ollama-cloud/qwen3-coder:480b | +| `@EvolutionSkeptic` | Evaluates model responses against role-specific rubrics with detailed scoring and commentary. | ollama-cloud/qwen3-coder:480b | +| `@EvolutionPrompt` | Generates role-specific stress-test prompts by analyzing agent definitions. | ollama-cloud/kimi-k2.6 | @@ -480,20 +480,21 @@ Provider availability depends on configuration. Common providers include: |---------|-------------|-------| | `/status` | Check pipeline status for issue. | ollama-cloud/qwen3.5-122b | | `/evaluate` | Generate performance report. | ollama-cloud/gpt-oss:120b | -| `/plan` | Creates detailed task plans. | ollama-cloud/deepseek-v4-pro-max | +| `/plan` | Creates detailed task plans. | ollama-cloud/deepseek-v4-pro | | `/ask` | Answers codebase questions. | ollama-cloud/qwen3.5-122b | | `/debug` | Analyzes and fixes bugs. | ollama-cloud/gpt-oss:20b | -| `/code` | Quick code generation. | ollama-cloud/deepseek-v4-pro-max | +| `/code` | Quick code generation. | ollama-cloud/deepseek-v4-pro | | `/research` | Run research and self-improvement. | ollama-cloud/glm-5 | -| `/feature` | Full feature development pipeline. | ollama-cloud/deepseek-v4-pro-max | -| `/hotfix` | Hotfix workflow. | ollama-cloud/deepseek-v4-pro-max | +| `/feature` | Full feature development pipeline. | ollama-cloud/deepseek-v4-pro | +| `/hotfix` | Hotfix workflow. | ollama-cloud/deepseek-v4-pro | | `/review` | Code review workflow. | ollama-cloud/kimi-k2.6 | | `/review-watcher` | Auto-validate review results. | ollama-cloud/glm-5 | | `/workflow` | Run complete workflow with quality gates. | ollama-cloud/glm-5 | | `/landing-page` | Create landing page CMS from HTML mockups. | ollama-cloud/kimi-k2.5 | -| `/commerce` | Create e-commerce site with products, cart, payments. | ollama-cloud/deepseek-v4-pro-max | -| `/blog` | Create blog/CMS with posts, comments, SEO. | ollama-cloud/deepseek-v4-pro-max | -| `/booking` | Create booking system for services/appointments. | ollama-cloud/deepseek-v4-pro-max | +| `/commerce` | Create e-commerce site with products, cart, payments. | ollama-cloud/deepseek-v4-pro | +| `/blog` | Create blog/CMS with posts, comments, SEO. | ollama-cloud/deepseek-v4-pro | +| `/booking` | Create booking system for services/appointments. | ollama-cloud/deepseek-v4-pro | +| `/evolve-agent` | Pre-deployment role-fit testing — evaluate which model best fits a specific agent role via stress-test prompts and rubric scoring. | ollama-cloud/kimi-k2.6 | diff --git a/.kilo/agents/architect-indexer.md b/.kilo/agents/architect-indexer.md index e623c0e..b34b7e6 100644 --- a/.kilo/agents/architect-indexer.md +++ b/.kilo/agents/architect-indexer.md @@ -1,7 +1,7 @@ --- description: Indexes and maps project codebase architecture into .architect/ directory. Creates and maintains structured documentation of entities, APIs, DB schema, file graphs, and conventions. (GNS-2 Tier 0) mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/qwen3-coder:480b variant: thinking color: "#10B981" permission: diff --git a/.kilo/agents/backend-developer.md b/.kilo/agents/backend-developer.md index b41544e..e320c5e 100755 --- a/.kilo/agents/backend-developer.md +++ b/.kilo/agents/backend-developer.md @@ -1,7 +1,7 @@ --- description: Backend specialist for Node.js, Express, APIs, and database integration (GNS-2 Tier 1) mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/deepseek-v4-pro color: "#10B981" permission: read: allow diff --git a/.kilo/agents/capability-analyst.md b/.kilo/agents/capability-analyst.md index 851ec22..fb2aab6 100755 --- a/.kilo/agents/capability-analyst.md +++ b/.kilo/agents/capability-analyst.md @@ -1,7 +1,7 @@ --- description: Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components. Tier 2 meta-agent with self-cascade enabled. mode: subagent -model: ollama-cloud/glm-5.1 +model: ollama-cloud/deepseek-v4-pro color: "#6366F1" permission: read: allow diff --git a/.kilo/agents/devops-engineer.md b/.kilo/agents/devops-engineer.md index bb4fbc4..80984f0 100755 --- a/.kilo/agents/devops-engineer.md +++ b/.kilo/agents/devops-engineer.md @@ -1,7 +1,7 @@ --- description: DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management (GNS-2 Tier 1) mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/glm-5.1 color: "#FF6B35" permission: read: allow diff --git a/.kilo/agents/evaluator.md b/.kilo/agents/evaluator.md index b113834..1c130ba 100755 --- a/.kilo/agents/evaluator.md +++ b/.kilo/agents/evaluator.md @@ -1,7 +1,7 @@ --- description: Scores agent effectiveness after task completion for continuous improvement. Tier 2 meta-agent with self-cascade enabled. mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/glm-5.1 variant: thinking color: "#047857" permission: diff --git a/.kilo/agents/evolution-prompt.md b/.kilo/agents/evolution-prompt.md index c20676c..cbcc292 100644 --- a/.kilo/agents/evolution-prompt.md +++ b/.kilo/agents/evolution-prompt.md @@ -1,7 +1,7 @@ --- description: Generates role-specific stress-test prompts by analyzing agent definitions. Reads .kilo/agents/*.md to create adversarial test scenarios that validate role adherence, edge-case handling, and instruction following. (GNS-2 Tier 1) mode: subagent -model: ollama-cloud/deepseek-v4-pro-max +model: ollama-cloud/kimi-k2.6 color: "#FF6B00" permission: read: allow diff --git a/.kilo/agents/evolution-skeptic.md b/.kilo/agents/evolution-skeptic.md index 1ad922d..c71637d 100644 --- a/.kilo/agents/evolution-skeptic.md +++ b/.kilo/agents/evolution-skeptic.md @@ -1,7 +1,7 @@ --- description: Evaluates model responses against role-specific rubrics with detailed scoring and commentary. Scores role adherence, reasoning quality, instruction following, boundary awareness, and output quality. Produces per-dimension scores with explanations. (GNS-2 Tier 1) mode: subagent -model: ollama-cloud/deepseek-v4-pro-max +model: ollama-cloud/qwen3-coder:480b color: "#C026D3" permission: read: allow diff --git a/.kilo/agents/flutter-developer.md b/.kilo/agents/flutter-developer.md index 113cd9c..1f65c7d 100755 --- a/.kilo/agents/flutter-developer.md +++ b/.kilo/agents/flutter-developer.md @@ -1,7 +1,7 @@ --- description: Flutter mobile specialist for cross-platform apps, state management, and UI components (GNS-2 Tier 1) mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/glm-5.1 color: "#02569B" permission: read: allow diff --git a/.kilo/agents/frontend-developer.md b/.kilo/agents/frontend-developer.md index 663f69c..725392a 100755 --- a/.kilo/agents/frontend-developer.md +++ b/.kilo/agents/frontend-developer.md @@ -1,7 +1,7 @@ --- description: Handles UI implementation with multimodal capabilities. Accepts visual references like screenshots and mockups (GNS-2 Tier 1) mode: all -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/qwen3-coder:480b color: "#0EA5E9" permission: read: allow diff --git a/.kilo/agents/go-developer.md b/.kilo/agents/go-developer.md index 9bebcde..f9d9aab 100755 --- a/.kilo/agents/go-developer.md +++ b/.kilo/agents/go-developer.md @@ -1,7 +1,7 @@ --- description: Go backend specialist for Gin, Echo, APIs, and database integration (GNS-2 Tier 1) mode: subagent -model: ollama-cloud/deepseek-v4-pro-max +model: ollama-cloud/qwen3-coder:480b color: "#00ADD8" permission: read: allow diff --git a/.kilo/agents/incident-responder.md b/.kilo/agents/incident-responder.md index 54fe63d..7b54eec 100644 --- a/.kilo/agents/incident-responder.md +++ b/.kilo/agents/incident-responder.md @@ -1,7 +1,7 @@ --- description: Server incident response and system hardening specialist. Handles live forensics, malware removal, persistence hunting, SSH-based server cleanup, and post-incident hardening. Works with any OS and panel. mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/glm-5.1 color: "#B91C1C" permission: read: allow diff --git a/.kilo/agents/markdown-validator.md b/.kilo/agents/markdown-validator.md index 5580ae0..7d518b2 100755 --- a/.kilo/agents/markdown-validator.md +++ b/.kilo/agents/markdown-validator.md @@ -1,7 +1,7 @@ --- description: Validates and corrects Markdown descriptions for Gitea issues (GNS-2 Tier 0) mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/qwen3-coder:480b color: "#F97316" permission: read: allow diff --git a/.kilo/agents/memory-manager.md b/.kilo/agents/memory-manager.md index f46c761..a90a0e7 100755 --- a/.kilo/agents/memory-manager.md +++ b/.kilo/agents/memory-manager.md @@ -1,7 +1,7 @@ --- description: Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences) (GNS-2 Tier 0) mode: subagent -model: ollama-cloud/deepseek-v4-pro-max +model: ollama-cloud/glm-5.1 color: "#8B5CF6" permission: read: allow diff --git a/.kilo/agents/performance-engineer.md b/.kilo/agents/performance-engineer.md index 6467677..2222077 100755 --- a/.kilo/agents/performance-engineer.md +++ b/.kilo/agents/performance-engineer.md @@ -1,7 +1,7 @@ --- description: Reviews code for performance issues. Focuses on efficiency, N+1 queries, memory leaks, and algorithmic complexity (GNS-2 Tier 0) mode: all -model: ollama-cloud/deepseek-v4-pro-max +model: ollama-cloud/glm-5.1 color: "#0D9488" permission: read: allow diff --git a/.kilo/agents/php-developer.md b/.kilo/agents/php-developer.md index c5e1d4c..e1b962a 100644 --- a/.kilo/agents/php-developer.md +++ b/.kilo/agents/php-developer.md @@ -1,7 +1,7 @@ --- description: PHP backend specialist for Laravel, Symfony, WordPress, and full-stack web applications (GNS-2 Tier 1) mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/deepseek-v4-pro variant: thinking color: "#8B5CF6" permission: diff --git a/.kilo/agents/pipeline-judge.md b/.kilo/agents/pipeline-judge.md index a25c586..8b1a23b 100755 --- a/.kilo/agents/pipeline-judge.md +++ b/.kilo/agents/pipeline-judge.md @@ -1,7 +1,7 @@ --- description: Automated pipeline judge. Evaluates workflow execution by running tests, measuring token cost and wall-clock time. Produces objective fitness scores. Never writes code - only measures and scores. (GNS-2 Tier 0) mode: subagent -model: ollama-cloud/glm-5.1 +model: ollama-cloud/qwen3-coder:480b color: "#DC2626" permission: read: allow diff --git a/.kilo/agents/planner.md b/.kilo/agents/planner.md index 399f677..10f42b8 100755 --- a/.kilo/agents/planner.md +++ b/.kilo/agents/planner.md @@ -1,7 +1,7 @@ --- description: Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect (GNS-2 Tier 0) mode: subagent -model: ollama-cloud/deepseek-v4-pro-max +model: ollama-cloud/deepseek-v4-pro color: "#F59E0B" permission: read: allow diff --git a/.kilo/agents/prompt-optimizer.md b/.kilo/agents/prompt-optimizer.md index 2bb5685..59f0696 100755 --- a/.kilo/agents/prompt-optimizer.md +++ b/.kilo/agents/prompt-optimizer.md @@ -1,7 +1,7 @@ --- description: Improves agent system prompts based on performance failures. Meta-learner for prompt optimization (GNS-2 Tier 1) mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/glm-5.1 color: "#BE185D" permission: read: allow diff --git a/.kilo/agents/python-developer.md b/.kilo/agents/python-developer.md index b9c3288..a380f8e 100644 --- a/.kilo/agents/python-developer.md +++ b/.kilo/agents/python-developer.md @@ -1,7 +1,7 @@ --- description: Python backend specialist for Django, FastAPI, data science, and API development (GNS-2 Tier 1) mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/deepseek-v4-pro variant: thinking color: "#3776AB" permission: diff --git a/.kilo/agents/reflector.md b/.kilo/agents/reflector.md index 218539b..e395360 100755 --- a/.kilo/agents/reflector.md +++ b/.kilo/agents/reflector.md @@ -1,7 +1,7 @@ --- description: Self-reflection agent using Reflexion pattern - learns from mistakes (GNS-2 Tier 0) mode: subagent -model: ollama-cloud/deepseek-v4-pro-max +model: ollama-cloud/kimi-k2.6 color: "#10B981" permission: read: allow diff --git a/.kilo/agents/release-manager.md b/.kilo/agents/release-manager.md index e02809b..180f9e7 100755 --- a/.kilo/agents/release-manager.md +++ b/.kilo/agents/release-manager.md @@ -1,7 +1,7 @@ --- description: Manages git operations, semantic versioning, branching, and deployments. Ensures clean history (GNS-2 Tier 1) mode: subagent -model: ollama-cloud/glm-5.1 +model: ollama-cloud/kimi-k2.6 color: "#581C87" permission: read: allow diff --git a/.kilo/agents/requirement-refiner.md b/.kilo/agents/requirement-refiner.md index 7f76b8a..c4c3d99 100755 --- a/.kilo/agents/requirement-refiner.md +++ b/.kilo/agents/requirement-refiner.md @@ -1,7 +1,7 @@ --- description: Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists (GNS-2 Tier 1) mode: all -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/qwen3-coder:480b variant: thinking color: "#4F46E5" permission: diff --git a/.kilo/agents/security-auditor.md b/.kilo/agents/security-auditor.md index 4a897ce..ead7864 100755 --- a/.kilo/agents/security-auditor.md +++ b/.kilo/agents/security-auditor.md @@ -1,7 +1,7 @@ --- description: Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets (GNS-2 Tier 0) mode: subagent -model: ollama-cloud/deepseek-v4-pro-max +model: ollama-cloud/kimi-k2.6 color: "#DC2626" permission: read: allow diff --git a/.kilo/agents/system-analyst.md b/.kilo/agents/system-analyst.md index fcdaafa..2b67076 100755 --- a/.kilo/agents/system-analyst.md +++ b/.kilo/agents/system-analyst.md @@ -1,7 +1,7 @@ --- description: Designs technical specifications, data schemas, and API contracts before implementation (GNS-2 Tier 1) mode: subagent -model: ollama-cloud/glm-5.1 +model: ollama-cloud/kimi-k2.6 color: "#0891B2" permission: read: allow diff --git a/.kilo/agents/the-fixer.md b/.kilo/agents/the-fixer.md index d3e0700..0bae335 100755 --- a/.kilo/agents/the-fixer.md +++ b/.kilo/agents/the-fixer.md @@ -1,7 +1,7 @@ --- description: Iteratively fixes bugs based on specific error reports and test failures (GNS-2 Tier 1) mode: all -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/glm-5.1 color: "#F59E0B" permission: read: allow diff --git a/.kilo/agents/visual-tester.md b/.kilo/agents/visual-tester.md index 94622d1..9ab05ae 100755 --- a/.kilo/agents/visual-tester.md +++ b/.kilo/agents/visual-tester.md @@ -1,7 +1,7 @@ --- description: Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff (GNS-2 Tier 0) mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/glm-5.1 color: "#E91E63" permission: read: allow diff --git a/.kilo/agents/workflow-architect.md b/.kilo/agents/workflow-architect.md index d610c53..595a477 100755 --- a/.kilo/agents/workflow-architect.md +++ b/.kilo/agents/workflow-architect.md @@ -1,7 +1,7 @@ --- description: Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates (GNS-2 Tier 1) mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/glm-5.1 variant: thinking color: "#EC4899" permission: diff --git a/.kilo/agents/workflow-cross-checker.md b/.kilo/agents/workflow-cross-checker.md index 7a323f2..d58cc3c 100644 --- a/.kilo/agents/workflow-cross-checker.md +++ b/.kilo/agents/workflow-cross-checker.md @@ -1,7 +1,7 @@ --- description: Workflow cross-checker and process inspector. Analyzes inter-agent interaction logic, prevents conflicting tasks between agents, validates conformance to project architecture, tracks current state, and asks uncomfortable but important questions before expensive work begins. mode: subagent -model: ollama-cloud/kimi-k2.6 +model: ollama-cloud/qwen3-coder:480b variant: thinking color: "#9333EA" permission: diff --git a/.kilo/capability-index.yaml b/.kilo/capability-index.yaml index 46d9112..b395e75 100644 --- a/.kilo/capability-index.yaml +++ b/.kilo/capability-index.yaml @@ -15,7 +15,7 @@ agents: forbidden: - test_writing - code_review - model: ollama-cloud/qwen3-coder:480b + model: ollama-cloud/kimi-k2.6 variant: thinking mode: subagent delegates_to: @@ -50,7 +50,7 @@ agents: - frontend_tests forbidden: - backend_code - model: ollama-cloud/minimax-m2.5 + model: ollama-cloud/qwen3-coder:480b mode: subagent delegates_to: - code-skeptic @@ -90,7 +90,7 @@ agents: forbidden: - frontend_code - non_php_backend - model: ollama-cloud/qwen3-coder:480b + model: ollama-cloud/deepseek-v4-pro variant: thinking mode: subagent delegates_to: @@ -129,7 +129,7 @@ agents: forbidden: - frontend_code - non_python_backend - model: ollama-cloud/qwen3-coder:480b + model: ollama-cloud/deepseek-v4-pro variant: thinking mode: subagent delegates_to: @@ -160,7 +160,7 @@ agents: - api_documentation forbidden: - frontend_code - model: ollama-cloud/qwen3-coder:480b + model: ollama-cloud/deepseek-v4-pro mode: subagent delegates_to: - code-skeptic @@ -193,7 +193,7 @@ agents: - concurrent_solutions forbidden: - frontend_code - model: ollama-cloud/deepseek-v4-pro-max + model: ollama-cloud/qwen3-coder:480b mode: subagent delegates_to: - code-skeptic @@ -223,7 +223,7 @@ agents: forbidden: - backend_code - web_development - model: ollama-cloud/qwen3-coder:480b + model: ollama-cloud/glm-5.1 mode: subagent delegates_to: - code-skeptic @@ -252,7 +252,7 @@ agents: - ci_cd_config forbidden: - application_code - model: ollama-cloud/kimi-k2.6 + model: ollama-cloud/glm-5.1 mode: subagent delegates_to: - code-skeptic @@ -280,7 +280,7 @@ agents: - coverage_reports forbidden: - implementation_code - model: ollama-cloud/qwen3-coder:480b + model: ollama-cloud/kimi-k2.6 variant: thinking mode: subagent delegates_to: @@ -308,7 +308,7 @@ agents: forbidden: - suggest_implementations - write_code - model: ollama-cloud/minimax-m2.5 + model: ollama-cloud/kimi-k2.6 mode: subagent delegates_to: - the-fixer @@ -335,7 +335,7 @@ agents: - vulnerability_list forbidden: - fix_vulnerabilities - model: ollama-cloud/deepseek-v4-pro-max + model: ollama-cloud/kimi-k2.6 mode: subagent delegates_to: - the-fixer @@ -361,7 +361,7 @@ agents: - optimization_suggestions forbidden: - write_code - model: ollama-cloud/deepseek-v4-pro-max + model: ollama-cloud/glm-5.1 mode: subagent delegates_to: - the-fixer @@ -386,7 +386,7 @@ agents: - resolution_notes forbidden: - feature_development - model: ollama-cloud/kimi-k2.6 + model: ollama-cloud/glm-5.1 mode: subagent delegates_to: - code-skeptic @@ -412,7 +412,7 @@ agents: - screenshots forbidden: - unit_testing - model: ollama-cloud/deepseek-v4-flash + model: ollama-cloud/kimi-k2.6 mode: subagent delegates_to: - orchestrator @@ -451,7 +451,7 @@ agents: - gitea_attachments forbidden: - code_changes - model: ollama-cloud/qwen3-coder:480b + model: ollama-cloud/glm-5.1 mode: subagent delegates_to: - the-fixer @@ -478,7 +478,7 @@ agents: - database_schemas forbidden: - implementation - model: ollama-cloud/deepseek-v4-pro-max + model: ollama-cloud/kimi-k2.6 mode: subagent delegates_to: [] fallback_models: @@ -501,7 +501,7 @@ agents: - new_agent_specs forbidden: - implementation - model: ollama-cloud/deepseek-v4-pro-max + model: ollama-cloud/deepseek-v4-pro mode: subagent delegates_to: - agent-architect @@ -611,7 +611,7 @@ agents: - recommendations forbidden: - code_changes - model: ollama-cloud/qwen3.5-122b + model: ollama-cloud/glm-5.1 variant: thinking mode: subagent delegates_to: @@ -637,7 +637,7 @@ agents: - optimization_report forbidden: - agent_creation - model: ollama-cloud/qwen3.5-122b + model: ollama-cloud/glm-5.1 variant: instant mode: subagent delegates_to: [] @@ -662,7 +662,7 @@ agents: - issue closures forbidden: - implementation - model: ollama-cloud/glm-5.1 + model: ollama-cloud/kimi-k2.6 mode: subagent delegates_to: [] fallback_models: @@ -689,7 +689,7 @@ agents: - code_writing - code_changes - prompt_changes - model: ollama-cloud/kimi-k2.6 + model: ollama-cloud/qwen3-coder:480b mode: subagent delegates_to: - prompt-optimizer @@ -712,7 +712,7 @@ agents: - command_files forbidden: - execution - model: ollama-cloud/qwen3.5-122b + model: ollama-cloud/glm-5.1 variant: thinking mode: subagent delegates_to: [] @@ -734,7 +734,7 @@ agents: - corrections forbidden: - content_creation - model: ollama-cloud/nemotron-3-nano + model: ollama-cloud/qwen3-coder:480b mode: subagent delegates_to: - orchestrator @@ -787,7 +787,7 @@ agents: forbidden: - implementation - execution - model: ollama-cloud/deepseek-v4-pro-max + model: ollama-cloud/deepseek-v4-pro mode: subagent delegates_to: [] fallback_models: @@ -813,7 +813,7 @@ agents: forbidden: - implementation - code_changes - model: ollama-cloud/deepseek-v4-pro-max + model: ollama-cloud/kimi-k2.6 mode: subagent delegates_to: [] fallback_models: @@ -839,7 +839,7 @@ agents: forbidden: - code_changes - implementation - model: ollama-cloud/deepseek-v4-pro-max + model: ollama-cloud/glm-5.1 mode: subagent delegates_to: [] fallback_models: @@ -876,7 +876,7 @@ agents: forbidden: - code_changes - implementation - model: ollama-cloud/glm-5.1 + model: ollama-cloud/qwen3-coder:480b variant: thinking mode: subagent delegates_to: @@ -890,100 +890,66 @@ agents: reasoning_effort: low incident-responder: capabilities: - - server_forensics - - malware_detection - - persistence_hunting - - malware_removal - - ssh_access_management - - file_integrity_check - - system_hardening - - backup_verification - - incident_reporting - - ioc_collection + - inter_agent_conflict_detection + - architecture_conformance_validation + - state_tracking_sanity + - process_inspection + - uncomfortable_questions_protocol + - pre_flight_validation + - mid_flight_revalidation receives: - - ssh_credentials - - server_info - - incident_description + - checkpoint_yaml + - task_claims + - agent_chain + - architecture_docs + - capability_index produces: - - forensic_report - - cleanup_confirmations - - hardening_report - - backup_packages + - cross_check_report + - verdict_approved_conditional_blocked + - risk_flags + - mitigation_suggestions forbidden: - code_writing - - deployment - model: ollama-cloud/kimi-k2.6 + - implementation + model: ollama-cloud/glm-5.1 mode: subagent delegates_to: - - code-skeptic - orchestrator + - reflector + - planner fallback_models: - - ollama-cloud/kimi-k2.6 - ollama-cloud/deepseek-v4-pro-max - ollama-cloud/glm-5.1 + - ollama-cloud/kimi-k2.6 failover_strategy: downgraded reasoning_effort: high - workflow-cross-checker: - capabilities: - - inter_agent_conflict_detection - - architecture_conformance_validation - - state_tracking_sanity - - process_inspection - - uncomfortable_questions_protocol - - pre_flight_validation - - mid_flight_revalidation - receives: - - checkpoint_yaml - - task_claims - - agent_chain - - architecture_docs - - capability_index - produces: - - cross_check_report - - verdict_approved_conditional_blocked - - risk_flags - - mitigation_suggestions - forbidden: - - code_writing - - implementation - model: ollama-cloud/kimi-k2.6 + workflow-cross-checker: null variant: thinking - mode: subagent - delegates_to: - - orchestrator - - reflector - - planner - fallback_models: - - ollama-cloud/deepseek-v4-pro-max - - ollama-cloud/glm-5.1 - - ollama-cloud/kimi-k2.6 - failover_strategy: downgraded - reasoning_effort: high evolution-prompt: capabilities: - - prompt_generation - - role_analysis - - adversarial_scenario_design - - test_case_creation + - prompt_generation + - role_analysis + - adversarial_scenario_design + - test_case_creation receives: - - agent_role_definition - - capability_index + - agent_role_definition + - capability_index produces: - - test_prompts - - evaluation_rubrics + - test_prompts + - evaluation_rubrics forbidden: - - direct_evaluation - - model_execution - model: ollama-cloud/deepseek-v4-pro-max + - direct_evaluation + - model_execution + model: ollama-cloud/kimi-k2.6 mode: subagent delegates_to: - - evolution-skeptic - - orchestrator + - evolution-skeptic + - orchestrator fallback_models: - - ollama-cloud/deepseek-v4-pro-max - - ollama-cloud/kimi-k2.6 - - ollama-cloud/glm-5.1 - - ollama-cloud/qwen3-coder:480b + - ollama-cloud/deepseek-v4-pro-max + - ollama-cloud/kimi-k2.6 + - ollama-cloud/glm-5.1 + - ollama-cloud/qwen3-coder:480b failover_strategy: downgraded reasoning_effort: high capability_routing: @@ -1067,7 +1033,7 @@ parallel_groups: trigger: code_ready_for_review criteria: all_must_complete_before_next_phase aggregator: orchestrator - overlap_check: none # read-only, no file writes + overlap_check: none testing_phase: agents: - sdet-engineer @@ -1076,7 +1042,7 @@ parallel_groups: trigger: tests_needed criteria: independent_test_types aggregator: orchestrator - overlap_check: none # read-only, no file writes + overlap_check: none implementation_phase: agents: - lead-developer diff --git a/AGENTS.md b/AGENTS.md index 13bb9f7..f4925f5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -77,8 +77,7 @@ These agents are invoked automatically by `/pipeline` or manually via `@mention` ### Testing | Agent | Role | When Invoked | |-------|------|--------------| -| `@EvolutionPrompt` | Generates role-specific stress-test prompts by analyzing agent definitions | Manual invocation | -| `@EvolutionSkeptic` | Evaluates model responses against role-specific rubrics with detailed scoring and commentary | Manual invocation | +| `@BrowserAutomation` | Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction | E2E testing needed | ### Cognitive Enhancement | Agent | Role | When Invoked | diff --git a/agent-evolution/data/real-fit-report.json b/agent-evolution/data/real-fit-report.json index 7599888..f3b3287 100644 --- a/agent-evolution/data/real-fit-report.json +++ b/agent-evolution/data/real-fit-report.json @@ -1,7 +1,7 @@ { - "generated": "2026-05-28T12:07:59Z", - "source": "real-fit-engine-db-filtered", - "total_evaluations": 144, + "generated": "2026-05-28T12:17:26.011791+00:00", + "source": "real-fit-engine", + "total_evaluations": 150, "agents": { "agent-architect": { "name": "agent-architect", @@ -9,6 +9,7 @@ "deepseek-v4-pro": 48.3, "glm-5.1": 48.3, "kimi-k2.6": 53.5, + "minimax-m2.5": 30.9, "qwen3-coder:480b": 48.3 }, "info": [ @@ -89,6 +90,7 @@ "deepseek-v4-pro": 22.8, "glm-5.1": 89.1, "kimi-k2.6": 91.2, + "minimax-m2.5": 45.0, "qwen3-coder:480b": 90.6 }, "info": [ @@ -131,6 +133,38 @@ "best_model": "glm-5.1", "best_score": 58.7 }, + "evolution-prompt": { + "name": "evolution-prompt", + "evaluations": { + "deepseek-v4-pro": 52.6, + "glm-5.1": 44.7, + "kimi-k2.6": 53.5, + "qwen3-coder:480b": 21.3 + }, + "info": [ + "Generates role-specific stress-test prompts by analyzing agent definitions", + "meta", + "ollama-cloud/deepseek-v4-pro" + ], + "best_model": "kimi-k2.6", + "best_score": 53.5 + }, + "evolution-skeptic": { + "name": "evolution-skeptic", + "evaluations": { + "deepseek-v4-pro": 33.1, + "glm-5.1": 31.6, + "kimi-k2.6": 37.3, + "qwen3-coder:480b": 42.9 + }, + "info": [ + "Evaluates model responses against role-specific rubrics with detailed scoring and commentary", + "meta", + "ollama-cloud/deepseek-v4-pro" + ], + "best_model": "qwen3-coder:480b", + "best_score": 42.9 + }, "flutter-developer": { "name": "flutter-developer", "evaluations": { @@ -153,6 +187,7 @@ "deepseek-v4-pro": 31.6, "glm-5.1": 53.2, "kimi-k2.6": 38.8, + "minimax-m2.5": 48.3, "qwen3-coder:480b": 56.0 }, "info": [ @@ -262,9 +297,11 @@ "orchestrator": { "name": "orchestrator", "evaluations": { + "deepseek-v4-flash": 27.0, "deepseek-v4-pro": 19.6, "glm-5.1": 36.2, "kimi-k2.6": 40.0, + "minimax-m2.5": 36.3, "qwen3-coder:480b": 39.1 }, "info": [ @@ -441,6 +478,7 @@ "deepseek-v4-pro": 58.7, "glm-5.1": 86.0, "kimi-k2.6": 97.0, + "minimax-m2.5": 53.5, "qwen3-coder:480b": 97.0 }, "info": [ @@ -546,220 +584,188 @@ ], "best_model": "qwen3-coder:480b", "best_score": 65.6 - }, - "evolution-skeptic": { - "name": "evolution-skeptic", - "evaluations": { - "deepseek-v4-pro": 33.1, - "glm-5.1": 31.6, - "kimi-k2.6": 37.3, - "qwen3-coder:480b": 42.9 - }, - "info": [ - "Evaluates model responses against role-specific rubrics with detailed scoring and commentary", - "meta", - "ollama-cloud/deepseek-v4-pro" - ], - "best_model": "qwen3-coder:480b", - "best_score": 42.9 - }, - "evolution-prompt": { - "name": "evolution-prompt", - "evaluations": { - "deepseek-v4-pro": 52.6, - "glm-5.1": 44.7, - "kimi-k2.6": 53.5, - "qwen3-coder:480b": 21.3 - }, - "info": [ - "Generates role-specific stress-test prompts by analyzing agent definitions", - "meta", - "ollama-cloud/deepseek-v4-pro" - ], - "best_model": "kimi-k2.6", - "best_score": 53.5 } }, "fit_scores": { "agent-architect": { - "model": "qwen3-coder:480b", - "fit": 48.3, - "explanation": "Best model for agent-architect is qwen3-coder:480b with avg score 48.3. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "kimi-k2.6", + "fit": 53.5, + "explanation": "Best model for agent-architect is kimi-k2.6 with avg score 53.5. Strongest dimension: code_presence." }, "architect-indexer": { "model": "qwen3-coder:480b", "fit": 54.0, - "explanation": "Best model for architect-indexer is qwen3-coder:480b with avg score 54.0. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "explanation": "Best model for architect-indexer is qwen3-coder:480b with avg score 54.0. Strongest dimension: code_presence." }, "backend-developer": { - "model": "qwen3-coder:480b", - "fit": 43.2, - "explanation": "Best model for backend-developer is qwen3-coder:480b with avg score 43.2. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "deepseek-v4-pro", + "fit": 53.5, + "explanation": "Best model for backend-developer is deepseek-v4-pro with avg score 53.5. Strongest dimension: code_presence." }, "browser-automation": { - "model": "qwen3-coder:480b", - "fit": 48.9, - "explanation": "Best model for browser-automation is qwen3-coder:480b with avg score 48.9. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "kimi-k2.6", + "fit": 63.8, + "explanation": "Best model for browser-automation is kimi-k2.6 with avg score 63.8. Strongest dimension: code_presence." }, "capability-analyst": { - "model": "qwen3-coder:480b", - "fit": 52.3, - "explanation": "Best model for capability-analyst is qwen3-coder:480b with avg score 52.3. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "deepseek-v4-pro", + "fit": 58.7, + "explanation": "Best model for capability-analyst is deepseek-v4-pro with avg score 58.7. Strongest dimension: code_presence." }, "code-skeptic": { - "model": "qwen3-coder:480b", - "fit": 90.6, - "explanation": "Best model for code-skeptic is qwen3-coder:480b with avg score 90.6. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "kimi-k2.6", + "fit": 91.2, + "explanation": "Best model for code-skeptic is kimi-k2.6 with avg score 91.2. Strongest dimension: code_presence." }, "devops-engineer": { - "model": "qwen3-coder:480b", - "fit": 87.2, - "explanation": "Best model for devops-engineer is qwen3-coder:480b with avg score 87.2. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "glm-5.1", + "fit": 96.2, + "explanation": "Best model for devops-engineer is glm-5.1 with avg score 96.2. Strongest dimension: keyword_coverage." }, "evaluator": { - "model": "qwen3-coder:480b", - "fit": 43.8, - "explanation": "Best model for evaluator is qwen3-coder:480b with avg score 43.8. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "glm-5.1", + "fit": 58.7, + "explanation": "Best model for evaluator is glm-5.1 with avg score 58.7. Strongest dimension: code_presence." }, "evolution-prompt": { - "model": "qwen3-coder:480b", - "fit": 21.3, - "explanation": "Best model for evolution-prompt is qwen3-coder:480b with avg score 21.3. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "kimi-k2.6", + "fit": 53.5, + "explanation": "Best model for evolution-prompt is kimi-k2.6 with avg score 53.5. Strongest dimension: code_presence." }, "evolution-skeptic": { "model": "qwen3-coder:480b", "fit": 42.9, - "explanation": "Best model for evolution-skeptic is qwen3-coder:480b with avg score 42.9. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "explanation": "Best model for evolution-skeptic is qwen3-coder:480b with avg score 42.9. Strongest dimension: structure." }, "flutter-developer": { - "model": "qwen3-coder:480b", + "model": "glm-5.1", "fit": 54.9, - "explanation": "Best model for flutter-developer is qwen3-coder:480b with avg score 54.9. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "explanation": "Best model for flutter-developer is glm-5.1 with avg score 54.9. Strongest dimension: code_presence." }, "frontend-developer": { "model": "qwen3-coder:480b", "fit": 56.0, - "explanation": "Best model for frontend-developer is qwen3-coder:480b with avg score 56.0. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "explanation": "Best model for frontend-developer is qwen3-coder:480b with avg score 56.0. Strongest dimension: code_presence." }, "go-developer": { "model": "qwen3-coder:480b", "fit": 58.7, - "explanation": "Best model for go-developer is qwen3-coder:480b with avg score 58.7. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "explanation": "Best model for go-developer is qwen3-coder:480b with avg score 58.7. Strongest dimension: code_presence." }, "history-miner": { - "model": "qwen3-coder:480b", - "fit": 44.8, - "explanation": "Best model for history-miner is qwen3-coder:480b with avg score 44.8. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "kimi-k2.6", + "fit": 46.9, + "explanation": "Best model for history-miner is kimi-k2.6 with avg score 46.9. Strongest dimension: code_presence." }, "incident-responder": { - "model": "qwen3-coder:480b", - "fit": 56.4, - "explanation": "Best model for incident-responder is qwen3-coder:480b with avg score 56.4. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "glm-5.1", + "fit": 65.6, + "explanation": "Best model for incident-responder is glm-5.1 with avg score 65.6. Strongest dimension: code_presence." }, "lead-developer": { - "model": "qwen3-coder:480b", + "model": "kimi-k2.6", "fit": 72.5, - "explanation": "Best model for lead-developer is qwen3-coder:480b with avg score 72.5. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "explanation": "Best model for lead-developer is kimi-k2.6 with avg score 72.5. Strongest dimension: keyword_coverage." }, "markdown-validator": { "model": "qwen3-coder:480b", "fit": 47.4, - "explanation": "Best model for markdown-validator is qwen3-coder:480b with avg score 47.4. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "explanation": "Best model for markdown-validator is qwen3-coder:480b with avg score 47.4. Strongest dimension: code_presence." }, "memory-manager": { - "model": "qwen3-coder:480b", - "fit": 46.8, - "explanation": "Best model for memory-manager is qwen3-coder:480b with avg score 46.8. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "glm-5.1", + "fit": 48.3, + "explanation": "Best model for memory-manager is glm-5.1 with avg score 48.3. Strongest dimension: code_presence." }, "orchestrator": { - "model": "qwen3-coder:480b", - "fit": 39.1, - "explanation": "Best model for orchestrator is qwen3-coder:480b with avg score 39.1. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "kimi-k2.6", + "fit": 40.0, + "explanation": "Best model for orchestrator is kimi-k2.6 with avg score 40.0. Strongest dimension: code_presence." }, "performance-engineer": { - "model": "qwen3-coder:480b", - "fit": 36.3, - "explanation": "Best model for performance-engineer is qwen3-coder:480b with avg score 36.3. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "glm-5.1", + "fit": 63.8, + "explanation": "Best model for performance-engineer is glm-5.1 with avg score 63.8. Strongest dimension: code_presence." }, "php-developer": { - "model": "qwen3-coder:480b", - "fit": 48.3, - "explanation": "Best model for php-developer is qwen3-coder:480b with avg score 48.3. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "deepseek-v4-pro", + "fit": 53.5, + "explanation": "Best model for php-developer is deepseek-v4-pro with avg score 53.5. Strongest dimension: code_presence." }, "pipeline-judge": { "model": "qwen3-coder:480b", "fit": 52.9, - "explanation": "Best model for pipeline-judge is qwen3-coder:480b with avg score 52.9. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "explanation": "Best model for pipeline-judge is qwen3-coder:480b with avg score 52.9. Strongest dimension: code_presence." }, "planner": { - "model": "qwen3-coder:480b", - "fit": 33.7, - "explanation": "Best model for planner is qwen3-coder:480b with avg score 33.7. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "deepseek-v4-pro", + "fit": 41.7, + "explanation": "Best model for planner is deepseek-v4-pro with avg score 41.7. Strongest dimension: code_presence." }, "product-owner": { - "model": "qwen3-coder:480b", - "fit": 27.0, - "explanation": "Best model for product-owner is qwen3-coder:480b with avg score 27.0. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "kimi-k2.6", + "fit": 34.6, + "explanation": "Best model for product-owner is kimi-k2.6 with avg score 34.6. Strongest dimension: actionability." }, "prompt-optimizer": { - "model": "qwen3-coder:480b", - "fit": 31.8, - "explanation": "Best model for prompt-optimizer is qwen3-coder:480b with avg score 31.8. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "glm-5.1", + "fit": 48.3, + "explanation": "Best model for prompt-optimizer is glm-5.1 with avg score 48.3. Strongest dimension: code_presence." }, "python-developer": { - "model": "qwen3-coder:480b", + "model": "deepseek-v4-pro", "fit": 48.3, - "explanation": "Best model for python-developer is qwen3-coder:480b with avg score 48.3. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "explanation": "Best model for python-developer is deepseek-v4-pro with avg score 48.3. Strongest dimension: code_presence." }, "reflector": { - "model": "qwen3-coder:480b", - "fit": 20.9, - "explanation": "Best model for reflector is qwen3-coder:480b with avg score 20.9. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "kimi-k2.6", + "fit": 58.7, + "explanation": "Best model for reflector is kimi-k2.6 with avg score 58.7. Strongest dimension: code_presence." }, "release-manager": { - "model": "qwen3-coder:480b", - "fit": 41.7, - "explanation": "Best model for release-manager is qwen3-coder:480b with avg score 41.7. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "kimi-k2.6", + "fit": 50.2, + "explanation": "Best model for release-manager is kimi-k2.6 with avg score 50.2. Strongest dimension: code_presence." }, "requirement-refiner": { "model": "qwen3-coder:480b", "fit": 45.3, - "explanation": "Best model for requirement-refiner is qwen3-coder:480b with avg score 45.3. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "explanation": "Best model for requirement-refiner is qwen3-coder:480b with avg score 45.3. Strongest dimension: code_presence." }, "sdet-engineer": { - "model": "qwen3-coder:480b", + "model": "kimi-k2.6", "fit": 97.0, - "explanation": "Best model for sdet-engineer is qwen3-coder:480b with avg score 97.0. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "explanation": "Best model for sdet-engineer is kimi-k2.6 with avg score 97.0. Strongest dimension: keyword_coverage." }, "security-auditor": { - "model": "qwen3-coder:480b", - "fit": 41.5, - "explanation": "Best model for security-auditor is qwen3-coder:480b with avg score 41.5. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "kimi-k2.6", + "fit": 63.8, + "explanation": "Best model for security-auditor is kimi-k2.6 with avg score 63.8. Strongest dimension: code_presence." }, "system-analyst": { - "model": "qwen3-coder:480b", - "fit": 77.0, - "explanation": "Best model for system-analyst is qwen3-coder:480b with avg score 77.0. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "kimi-k2.6", + "fit": 92.0, + "explanation": "Best model for system-analyst is kimi-k2.6 with avg score 92.0. Strongest dimension: keyword_coverage." }, "the-fixer": { - "model": "qwen3-coder:480b", - "fit": 42.9, - "explanation": "Best model for the-fixer is qwen3-coder:480b with avg score 42.9. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "glm-5.1", + "fit": 46.6, + "explanation": "Best model for the-fixer is glm-5.1 with avg score 46.6. Strongest dimension: code_presence." }, "visual-tester": { - "model": "qwen3-coder:480b", - "fit": 53.5, - "explanation": "Best model for visual-tester is qwen3-coder:480b with avg score 53.5. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "glm-5.1", + "fit": 58.7, + "explanation": "Best model for visual-tester is glm-5.1 with avg score 58.7. Strongest dimension: code_presence." }, "workflow-architect": { - "model": "qwen3-coder:480b", - "fit": 36.3, - "explanation": "Best model for workflow-architect is qwen3-coder:480b with avg score 36.3. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "model": "glm-5.1", + "fit": 48.3, + "explanation": "Best model for workflow-architect is glm-5.1 with avg score 48.3. Strongest dimension: code_presence." }, "workflow-cross-checker": { "model": "qwen3-coder:480b", "fit": 65.6, - "explanation": "Best model for workflow-cross-checker is qwen3-coder:480b with avg score 65.6. Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)." + "explanation": "Best model for workflow-cross-checker is qwen3-coder:480b with avg score 65.6. Strongest dimension: code_presence." } } } \ No newline at end of file diff --git a/kilo-meta.json b/kilo-meta.json index 29c626f..9da01cc 100644 --- a/kilo-meta.json +++ b/kilo-meta.json @@ -1,12 +1,12 @@ { "$schema": "https://app.kilo.ai/config.json", "metaVersion": "1.0.0", - "lastSync": "2026-05-27T22:05:59.064Z", + "lastSync": "2026-05-28T12:44:21.540Z", "agents": { "requirement-refiner": { "file": ".kilo/agents/requirement-refiner.md", "description": "Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/qwen3-coder:480b", "mode": "all", "color": "#4F46E5", "category": "core" @@ -21,7 +21,7 @@ "system-analyst": { "file": ".kilo/agents/system-analyst.md", "description": "Designs technical specifications, data schemas, and API contracts before implementation", - "model": "ollama-cloud/glm-5.1", + "model": "ollama-cloud/kimi-k2.6", "mode": "subagent", "category": "core" }, @@ -44,7 +44,7 @@ "frontend-developer": { "file": ".kilo/agents/frontend-developer.md", "description": "Handles UI implementation with multimodal capabilities. Accepts visual references like screenshots and mockups", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/qwen3-coder:480b", "mode": "all", "color": "#0EA5E9", "category": "core" @@ -52,7 +52,7 @@ "backend-developer": { "file": ".kilo/agents/backend-developer.md", "description": "Backend specialist for Node.js, Express, APIs, and database integration", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/deepseek-v4-pro", "mode": "subagent", "color": "#10B981", "category": "core" @@ -60,7 +60,7 @@ "go-developer": { "file": ".kilo/agents/go-developer.md", "description": "Go backend specialist for Gin, Echo, APIs, and database integration", - "model": "ollama-cloud/deepseek-v4-pro", + "model": "ollama-cloud/qwen3-coder:480b", "mode": "subagent", "color": "#00ADD8", "category": "core" @@ -68,7 +68,7 @@ "devops-engineer": { "file": ".kilo/agents/devops-engineer.md", "description": "DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/glm-5.1", "mode": "subagent", "color": "#FF6B35", "category": "core" @@ -84,7 +84,7 @@ "the-fixer": { "file": ".kilo/agents/the-fixer.md", "description": "Iteratively fixes bugs based on specific error reports and test failures", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/glm-5.1", "mode": "all", "color": "#F59E0B", "category": "quality" @@ -92,7 +92,7 @@ "performance-engineer": { "file": ".kilo/agents/performance-engineer.md", "description": "Reviews code for performance issues. Focuses on efficiency, N+1 queries, memory leaks, and algorithmic complexity", - "model": "ollama-cloud/deepseek-v4-pro", + "model": "ollama-cloud/glm-5.1", "mode": "all", "color": "#0D9488", "category": "quality" @@ -100,7 +100,7 @@ "security-auditor": { "file": ".kilo/agents/security-auditor.md", "description": "Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets", - "model": "ollama-cloud/deepseek-v4-pro", + "model": "ollama-cloud/kimi-k2.6", "mode": "subagent", "color": "#DC2626", "category": "quality" @@ -108,7 +108,7 @@ "visual-tester": { "file": ".kilo/agents/visual-tester.md", "description": "Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/glm-5.1", "mode": "subagent", "category": "quality" }, @@ -123,14 +123,14 @@ "release-manager": { "file": ".kilo/agents/release-manager.md", "description": "Manages git operations, semantic versioning, branching, and deployments. Ensures clean history", - "model": "ollama-cloud/glm-5.1", + "model": "ollama-cloud/kimi-k2.6", "mode": "subagent", "category": "meta" }, "evaluator": { "file": ".kilo/agents/evaluator.md", "description": "Scores agent effectiveness after task completion for continuous improvement", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/glm-5.1", "mode": "subagent", "color": "#047857", "category": "meta" @@ -138,7 +138,7 @@ "prompt-optimizer": { "file": ".kilo/agents/prompt-optimizer.md", "description": "Improves agent system prompts based on performance failures. Meta-learner for prompt optimization", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/glm-5.1", "mode": "subagent", "category": "meta" }, @@ -159,21 +159,21 @@ "capability-analyst": { "file": ".kilo/agents/capability-analyst.md", "description": "Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components.", - "model": "ollama-cloud/glm-5.1", + "model": "ollama-cloud/deepseek-v4-pro", "mode": "subagent", "category": "meta" }, "workflow-architect": { "file": ".kilo/agents/workflow-architect.md", "description": "Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/glm-5.1", "mode": "subagent", "category": "meta" }, "markdown-validator": { "file": ".kilo/agents/markdown-validator.md", "description": "Validates and corrects Markdown descriptions for Gitea issues", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/qwen3-coder:480b", "mode": "subagent", "category": "meta" }, @@ -195,7 +195,7 @@ "reflector": { "file": ".kilo/agents/reflector.md", "description": "Self-reflection agent using Reflexion pattern - learns from mistakes", - "model": "ollama-cloud/deepseek-v4-pro", + "model": "ollama-cloud/kimi-k2.6", "mode": "subagent", "color": "#10B981", "category": "cognitive" @@ -203,7 +203,7 @@ "memory-manager": { "file": ".kilo/agents/memory-manager.md", "description": "Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences)", - "model": "ollama-cloud/deepseek-v4-pro", + "model": "ollama-cloud/glm-5.1", "mode": "subagent", "color": "#8B5CF6", "category": "cognitive" @@ -211,7 +211,7 @@ "architect-indexer": { "file": ".kilo/agents/architect-indexer.md", "description": "Indexes and maps project codebase architecture into .architect/ directory", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/qwen3-coder:480b", "mode": "subagent", "color": "#10B981", "category": "core" @@ -219,7 +219,7 @@ "flutter-developer": { "file": ".kilo/agents/flutter-developer.md", "description": "Flutter mobile specialist for cross-platform apps, state management, and UI components", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/glm-5.1", "mode": "subagent", "color": "#02569B", "category": "core" @@ -227,7 +227,7 @@ "php-developer": { "file": ".kilo/agents/php-developer.md", "description": "PHP specialist for Laravel, Symfony, WordPress, and modular architecture", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/deepseek-v4-pro", "mode": "subagent", "color": "#8B5CF6", "category": "core" @@ -235,7 +235,7 @@ "pipeline-judge": { "file": ".kilo/agents/pipeline-judge.md", "description": "Automated pipeline judge. Evaluates workflow execution by running tests, measuring token cost and wall-clock time. Produces objective fitness scores. Never writes code - only measures and scores.", - "model": "ollama-cloud/glm-5.1", + "model": "ollama-cloud/qwen3-coder:480b", "mode": "subagent", "color": "#DC2626", "category": "meta" @@ -243,7 +243,7 @@ "python-developer": { "file": ".kilo/agents/python-developer.md", "description": "Python specialist for Django, FastAPI, data processing, and ML pipelines", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/deepseek-v4-pro", "mode": "subagent", "color": "#3776AB", "category": "core" @@ -251,7 +251,7 @@ "incident-responder": { "file": ".kilo/agents/incident-responder.md", "description": "Server incident response and system hardening specialist. Handles live forensics, malware removal, persistence hunting, SSH-based server cleanup, and post-incident hardening. Works with any OS and panel.", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/glm-5.1", "mode": "subagent", "color": "#B91C1C", "category": "core" @@ -259,7 +259,7 @@ "workflow-cross-checker": { "file": ".kilo/agents/workflow-cross-checker.md", "description": "Workflow cross-checker and process inspector. Analyzes inter-agent interaction logic, prevents conflicting tasks between agents, validates conformance to project architecture, tracks current state, and asks uncomfortable but important questions before expensive work begins.", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/qwen3-coder:480b", "mode": "subagent", "color": "#9333EA", "category": "meta" @@ -267,7 +267,7 @@ "evolution-skeptic": { "file": ".kilo/agents/evolution-skeptic.md", "description": "Evaluates model responses against role-specific rubrics with detailed scoring and commentary", - "model": "ollama-cloud/deepseek-v4-pro", + "model": "ollama-cloud/qwen3-coder:480b", "mode": "subagent", "color": "#C026D3", "category": "meta" @@ -275,7 +275,7 @@ "evolution-prompt": { "file": ".kilo/agents/evolution-prompt.md", "description": "Generates role-specific stress-test prompts by analyzing agent definitions", - "model": "ollama-cloud/deepseek-v4-pro", + "model": "ollama-cloud/kimi-k2.6", "mode": "subagent", "color": "#FF6B00", "category": "meta" diff --git a/kilo.jsonc b/kilo.jsonc index 9ac75dd..7c30eec 100644 --- a/kilo.jsonc +++ b/kilo.jsonc @@ -23,7 +23,7 @@ "requirement-refiner": { "description": "Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists", "mode": "all", - "model": "ollama-cloud/kimi-k2-thinking", + "model": "ollama-cloud/qwen3-coder:480b", "color": "#4F46E5", "permission": { "read": "allow", @@ -43,7 +43,7 @@ "history-miner": { "description": "Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work", "mode": "subagent", - "model": "ollama-cloud/glm-5.1", + "model": "ollama-cloud/kimi-k2.6", "permission": { "task": { "*": "deny", @@ -65,7 +65,7 @@ "sdet-engineer": { "description": "Writes tests following TDD methodology. Tests MUST fail initially (Red phase)", "mode": "all", - "model": "ollama-cloud/qwen3-coder:480b", + "model": "ollama-cloud/kimi-k2.6", "color": "#8B5CF6", "permission": { "read": "allow", @@ -84,7 +84,7 @@ "lead-developer": { "description": "Primary code writer for backend and core logic. Writes implementation to pass tests", "mode": "subagent", - "model": "ollama-cloud/qwen3-coder:480b", + "model": "ollama-cloud/kimi-k2.6", "color": "#DC2626", "permission": { "read": "allow", @@ -103,7 +103,7 @@ "frontend-developer": { "description": "Handles UI implementation with multimodal capabilities. Accepts visual references like screenshots and mockups", "mode": "all", - "model": "ollama-cloud/minimax-m2.5", + "model": "ollama-cloud/kimi-k2.6", "color": "#0EA5E9", "permission": { "read": "allow", @@ -122,7 +122,7 @@ "backend-developer": { "description": "Backend specialist for Node.js, Express, APIs, and database integration", "mode": "subagent", - "model": "ollama-cloud/minimax-m2.5", + "model": "ollama-cloud/deepseek-v4-pro", "color": "#10B981", "permission": { "read": "allow", @@ -141,7 +141,7 @@ "go-developer": { "description": "Go backend specialist for Gin, Echo, APIs, and database integration", "mode": "subagent", - "model": "ollama-cloud/minimax-m2.5", + "model": "ollama-cloud/qwen3-coder:480b", "color": "#00ADD8", "permission": { "read": "allow", @@ -160,7 +160,7 @@ "devops-engineer": { "description": "DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management", "mode": "subagent", - "model": "ollama-cloud/minimax-m2.5", + "model": "ollama-cloud/glm-5.1", "color": "#FF6B35", "permission": { "read": "allow", @@ -180,7 +180,7 @@ "code-skeptic": { "description": "Adversarial code reviewer. Finds problems and issues. Does NOT suggest implementations", "mode": "subagent", - "model": "ollama-cloud/deepseek-v4-pro-max", + "model": "ollama-cloud/kimi-k2.6", "color": "#E11D48", "permission": { "read": "allow", @@ -198,7 +198,7 @@ "the-fixer": { "description": "Iteratively fixes bugs based on specific error reports and test failures", "mode": "all", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/glm-5.1", "color": "#F59E0B", "permission": { "read": "allow", @@ -254,7 +254,7 @@ "visual-tester": { "description": "Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff", "mode": "subagent", - "model": "ollama-cloud/glm-5.1", + "model": "ollama-cloud/kimi-k2.6", "permission": { "read": "allow", "bash": "allow", @@ -307,7 +307,7 @@ "release-manager": { "description": "Manages git operations, semantic versioning, branching, and deployments. Ensures clean history", "mode": "subagent", - "model": "ollama-cloud/qwen3.6-plus", + "model": "ollama-cloud/glm-5.1", "permission": { "read": "allow", "edit": "allow", @@ -392,7 +392,7 @@ "capability-analyst": { "description": "Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components.", "mode": "subagent", - "model": "ollama-cloud/glm-5.1", + "model": "ollama-cloud/deepseek-v4-pro", "permission": { "read": "allow", "glob": "allow", @@ -422,7 +422,7 @@ "markdown-validator": { "description": "Validates and corrects Markdown descriptions for Gitea issues", "mode": "subagent", - "model": "ollama-cloud/deepseek-v4-pro-max", + "model": "ollama-cloud/qwen3-coder:480b", "permission": { "read": "allow", "edit": "allow", @@ -502,7 +502,7 @@ "incident-responder": { "description": "Server incident response and system hardening specialist. Handles live forensics, malware removal, persistence hunting, SSH-based server cleanup, and post-incident hardening. Works with any OS and panel.", "mode": "subagent", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/glm-5.1", "color": "#B91C1C", "permission": { "read": "allow", @@ -522,7 +522,7 @@ "workflow-cross-checker": { "description": "Workflow cross-checker and process inspector. Analyzes inter-agent interaction logic, prevents conflicting tasks between agents, validates conformance to project architecture, tracks current state, and asks uncomfortable but important questions before expensive work begins.", "mode": "subagent", - "model": "ollama-cloud/kimi-k2.6", + "model": "ollama-cloud/qwen3-coder:480b", "color": "#9333EA", "variant": "thinking", "permission": {