From dbbf4c32e14041b040f5a344f3f643b8599a3c7c Mon Sep 17 00:00:00 2001 From: Deploy Bot Date: Wed, 27 May 2026 19:53:40 +0100 Subject: [PATCH] feat(landing): add state API service with real-fit score drill-down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add apaw-state-api Flask service (landing/api/server.py) that serves agent fit scores, best models, and explanations from real-fit.db - Add nginx proxy rule: /api/state → apaw-state-api:8080 - Add fit-score drill-down modal (click heatmap cell → score breakdown + explanation) in api.js, styles.css, and index.html - Add real-fit-recalc.py script for offline score recalculation from stored SQLite responses - Add real-fit-engine.py (evaluation engine) and sync-dashboard-data.py - Add Dockerfile ENTRYPOINT + entrypoint.sh for landing container - Add docker-compose.ollama.yml for local Ollama inference - Update kilo.jsonc command models and agent-versions.json - Regenerate index.standalone.html with latest dashboard data - Add .gitignore entries for __pycache__, runtime data, and backups --- .gitignore | 12 + .kilo/kilo.jsonc | 10 +- agent-evolution/data/agent-versions.json | 4 +- agent-evolution/data/real-fit-report.json | 689 ++++++++++++++++++++++ agent-evolution/data/real-fit.db | Bin 0 -> 176128 bytes agent-evolution/index.standalone.html | 485 ++++++++++++--- docker/docker-compose.ollama.yml | 36 ++ landing/Dockerfile | 13 +- landing/api/real-fit-report.json | 1 + landing/api/server.py | 199 +++++++ landing/assets/api.js | 373 ++++++++++++ landing/assets/styles.css | 298 ++++++++++ landing/docker-compose.yml | 31 +- landing/entrypoint.sh | 12 + landing/index.html | 121 ++-- landing/nginx-landing.conf | 21 + scripts/real-fit-engine.py | 565 ++++++++++++++++++ scripts/real-fit-recalc.py | 157 +++++ scripts/sync-dashboard-data.py | 122 ++++ 19 files changed, 3012 insertions(+), 137 deletions(-) create mode 100644 agent-evolution/data/real-fit-report.json create mode 100644 agent-evolution/data/real-fit.db create mode 100644 docker/docker-compose.ollama.yml create mode 120000 landing/api/real-fit-report.json create mode 100644 landing/api/server.py create mode 100644 landing/assets/api.js create mode 100755 landing/entrypoint.sh create mode 100644 landing/nginx-landing.conf create mode 100644 scripts/real-fit-engine.py create mode 100644 scripts/real-fit-recalc.py create mode 100644 scripts/sync-dashboard-data.py diff --git a/.gitignore b/.gitignore index 3eec336..10dcd50 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,18 @@ agent-evolution/archive/scripts/ agent-evolution/archive/reports/ agent-evolution/archive/data/ +# Python cache +__pycache__/ +*.pyc + +# Generated runtime data +agent-evolution/data/dashboard-data.json +agent-evolution/data/state.json +agent-evolution/data/model-benchmarks.json.bak +landing/api/state.json +landing/api/state.json.bak +landing/api/dashboard-data.json + # Architect generated maps (can be large, auto-indexed) # Note: .architect/ md and json files ARE tracked for team orientation # Only maps/ with file graphs can be very large diff --git a/.kilo/kilo.jsonc b/.kilo/kilo.jsonc index 8ec50d4..fb552e9 100644 --- a/.kilo/kilo.jsonc +++ b/.kilo/kilo.jsonc @@ -30,26 +30,26 @@ } }, "code": { - "model": "ollama-cloud/qwen3-coder:480b", + "model": "ollama-cloud/deepseek-v4-pro-max", "variant": "thinking", "description": "Primary code writer. Full tool access for development tasks.", "mode": "primary" }, "ask": { - "model": "ollama-cloud/glm-5.1", + "model": "ollama-cloud/kimi-k2.6", "variant": "instant", "description": "Read-only Q&A agent for codebase questions.", "mode": "primary" }, "plan": { - "model": "ollama-cloud/nemotron-3-super", + "model": "ollama-cloud/kimi-k2.6", "description": "Task planner. Creates detailed implementation plans.", "mode": "primary" }, "debug": { - "model": "ollama-cloud/glm-5.1", + "model": "ollama-cloud/deepseek-v4-pro-max", "variant": "thinking", - "description": "Bug diagnostics and troubleshooting. GLM-5.1 ★88, reasoning for deep debug.", + "description": "Bug diagnostics and troubleshooting.", "mode": "primary" } } diff --git a/agent-evolution/data/agent-versions.json b/agent-evolution/data/agent-versions.json index f645378..6438038 100644 --- a/agent-evolution/data/agent-versions.json +++ b/agent-evolution/data/agent-versions.json @@ -1,6 +1,6 @@ { "version": "1.0.0", - "lastUpdated": "2026-05-27T12:47:21.972Z", + "lastUpdated": "2026-05-27T13:10:49.174Z", "agents": { "lead-developer": { "current": { @@ -3876,7 +3876,7 @@ "total_agents": 38, "agents_with_history": 34, "pending_recommendations": 0, - "last_sync": "2026-05-27T12:47:21.974Z", + "last_sync": "2026-05-27T13:10:49.175Z", "sync_sources": [ "git", "capability-index.yaml", diff --git a/agent-evolution/data/real-fit-report.json b/agent-evolution/data/real-fit-report.json new file mode 100644 index 0000000..f57d4ae --- /dev/null +++ b/agent-evolution/data/real-fit-report.json @@ -0,0 +1,689 @@ +{ + "generated": "2026-05-27T18:36:13.173821+00:00", + "source": "real-fit-engine", + "total_evaluations": 102, + "agents": { + "agent-architect": { + "name": "agent-architect", + "evaluations": { + "deepseek-v4-pro-max": 41.6, + "kimi-k2.6": 41.6, + "qwen3-coder:480b": 41.6 + }, + "info": [ + "Creates, modifies, and reviews new agents, workflows, and skills based on capability gap analysis. Tier 2 meta-agent with self-cascade enabled.", + "meta", + "ollama-cloud/kimi-k2.6" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 41.6 + }, + "architect-indexer": { + "name": "architect-indexer", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Indexes and maps project codebase architecture into .architect/ directory. Creates and maintains structured documentation of entities, APIs, DB schema, file graphs, and conventions. (GNS-2 Tier 0)", + "core", + "ollama-cloud/glm-5.1" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "backend-developer": { + "name": "backend-developer", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Backend specialist for Node.js, Express, APIs, and database integration (GNS-2 Tier 1)", + "core", + "ollama-cloud/qwen3-coder:480b" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "browser-automation": { + "name": "browser-automation", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Browser automation agent using Playwright MCP for E2E testing, form filling, navigation, and web interaction (GNS-2 Tier 0)", + "testing", + "ollama-cloud/deepseek-v4-flash" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "capability-analyst": { + "name": "capability-analyst", + "evaluations": { + "deepseek-v4-pro-max": 41.6, + "kimi-k2.6": 41.6, + "qwen3-coder:480b": 41.6 + }, + "info": [ + "Analyzes task requirements against available agents, workflows, and skills. Identifies gaps and recommends new components. Tier 2 meta-agent with self-cascade enabled.", + "meta", + "ollama-cloud/deepseek-v4-pro-max" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 41.6 + }, + "code-skeptic": { + "name": "code-skeptic", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Adversarial code reviewer. Finds problems and issues. Does NOT suggest implementations (GNS-2 Tier 0)", + "quality", + "ollama-cloud/minimax-m2.5" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "devops-engineer": { + "name": "devops-engineer", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "DevOps specialist for Docker, Kubernetes, CI/CD pipeline automation, and infrastructure management (GNS-2 Tier 1)", + "core", + "ollama-cloud/kimi-k2.6" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "evaluator": { + "name": "evaluator", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Scores agent effectiveness after task completion for continuous improvement. Tier 2 meta-agent with self-cascade enabled.", + "meta", + "ollama-cloud/qwen3.5-122b" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "flutter-developer": { + "name": "flutter-developer", + "evaluations": { + "deepseek-v4-pro-max": 41.6, + "kimi-k2.6": 41.6, + "qwen3-coder:480b": 41.6 + }, + "info": [ + "Flutter mobile specialist for cross-platform apps, state management, and UI components (GNS-2 Tier 1)", + "core", + "ollama-cloud/qwen3-coder:480b" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 41.6 + }, + "frontend-developer": { + "name": "frontend-developer", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Handles UI implementation with multimodal capabilities. Accepts visual references like screenshots and mockups (GNS-2 Tier 1)", + "core", + "ollama-cloud/minimax-m2.5" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "go-developer": { + "name": "go-developer", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Go backend specialist for Gin, Echo, APIs, and database integration (GNS-2 Tier 1)", + "core", + "ollama-cloud/deepseek-v4-pro-max" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "history-miner": { + "name": "history-miner", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Analyzes git history to find duplicates and past solutions, preventing regression and duplicate work (GNS-2 Tier 0)", + "core", + "ollama-cloud/qwen3.5-122b" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "incident-responder": { + "name": "incident-responder", + "evaluations": { + "deepseek-v4-pro-max": 41.6, + "kimi-k2.6": 41.6, + "qwen3-coder:480b": 41.6 + }, + "info": [ + "Server incident response and system hardening specialist. Handles live forensics, malware removal, persistence hunting, SSH-based server cleanup, and post-incident hardening. Works with any OS and panel.", + "core", + "ollama-cloud/kimi-k2.6" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 41.6 + }, + "lead-developer": { + "name": "lead-developer", + "evaluations": { + "deepseek-v4-pro-max": 41.6, + "kimi-k2.6": 41.6, + "qwen3-coder:480b": 41.6 + }, + "info": [ + "Primary code writer for backend and core logic. Writes implementation to pass tests (GNS-2 Tier 1)", + "core", + "ollama-cloud/qwen3-coder:480b" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 41.6 + }, + "markdown-validator": { + "name": "markdown-validator", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Validates and corrects Markdown descriptions for Gitea issues (GNS-2 Tier 0)", + "meta", + "ollama-cloud/nemotron-3-nano" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "memory-manager": { + "name": "memory-manager", + "evaluations": { + "deepseek-v4-pro-max": 41.6, + "kimi-k2.6": 41.6, + "qwen3-coder:480b": 41.6 + }, + "info": [ + "Manages agent memory systems - short-term (context), long-term (vector store), and episodic (experiences) (GNS-2 Tier 0)", + "cognitive", + "ollama-cloud/deepseek-v4-pro-max" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 41.6 + }, + "orchestrator": { + "name": "orchestrator", + "evaluations": { + "deepseek-v4-pro-max": 41.6, + "kimi-k2.6": 41.6, + "qwen3-coder:480b": 41.6 + }, + "info": [ + "Main dispatcher. Routes tasks between agents based on Issue status and manages the workflow state machine. IF:90 for optimal routing accuracy. (GNS-2 Tier 1)", + "meta", + "ollama-cloud/kimi-k2.6" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 41.6 + }, + "performance-engineer": { + "name": "performance-engineer", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Reviews code for performance issues. Focuses on efficiency, N+1 queries, memory leaks, and algorithmic complexity (GNS-2 Tier 0)", + "quality", + "ollama-cloud/deepseek-v4-pro-max" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "php-developer": { + "name": "php-developer", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "PHP backend specialist for Laravel, Symfony, WordPress, and full-stack web applications (GNS-2 Tier 1)", + "core", + "ollama-cloud/qwen3-coder:480b" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "pipeline-judge": { + "name": "pipeline-judge", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Automated pipeline judge. Evaluates workflow execution by running tests, measuring token cost and wall-clock time. Produces objective fitness scores. Never writes code - only measures and scores. (GNS-2 Tier 0)", + "meta", + "ollama-cloud/kimi-k2.6" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "planner": { + "name": "planner", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Advanced task planner using Chain of Thought, Tree of Thoughts, and Plan-Execute-Reflect (GNS-2 Tier 0)", + "cognitive", + "ollama-cloud/deepseek-v4-pro-max" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "product-owner": { + "name": "product-owner", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Manages issue checklists, status labels, tracks progress and coordinates with human users (GNS-2 Tier 1)", + "meta", + "ollama-cloud/glm-5.1" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "prompt-optimizer": { + "name": "prompt-optimizer", + "evaluations": { + "deepseek-v4-pro-max": 41.6, + "kimi-k2.6": 41.6, + "qwen3-coder:480b": 41.6 + }, + "info": [ + "Improves agent system prompts based on performance failures. Meta-learner for prompt optimization (GNS-2 Tier 1)", + "meta", + "ollama-cloud/qwen3.5-122b" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 41.6 + }, + "python-developer": { + "name": "python-developer", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Python backend specialist for Django, FastAPI, data science, and API development (GNS-2 Tier 1)", + "core", + "ollama-cloud/qwen3-coder:480b" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "reflector": { + "name": "reflector", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Self-reflection agent using Reflexion pattern - learns from mistakes (GNS-2 Tier 0)", + "cognitive", + "ollama-cloud/deepseek-v4-pro-max" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "release-manager": { + "name": "release-manager", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Manages git operations, semantic versioning, branching, and deployments. Ensures clean history (GNS-2 Tier 1)", + "meta", + "ollama-cloud/kimi-k2.6" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "requirement-refiner": { + "name": "requirement-refiner", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Converts vague ideas and bug reports into strict User Stories with acceptance criteria checklists (GNS-2 Tier 1)", + "core", + "ollama-cloud/kimi-k2-thinking" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "sdet-engineer": { + "name": "sdet-engineer", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Writes tests following TDD methodology. Tests MUST fail initially (Red phase) (GNS-2 Tier 1)", + "core", + "ollama-cloud/qwen3-coder:480b" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "security-auditor": { + "name": "security-auditor", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Scans for security vulnerabilities, OWASP Top 10, dependency CVEs, and hardcoded secrets (GNS-2 Tier 0)", + "quality", + "ollama-cloud/deepseek-v4-pro-max" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "system-analyst": { + "name": "system-analyst", + "evaluations": { + "deepseek-v4-pro-max": 41.6, + "kimi-k2.6": 41.6, + "qwen3-coder:480b": 41.6 + }, + "info": [ + "Designs technical specifications, data schemas, and API contracts before implementation (GNS-2 Tier 1)", + "core", + "ollama-cloud/deepseek-v4-pro-max" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 41.6 + }, + "the-fixer": { + "name": "the-fixer", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Iteratively fixes bugs based on specific error reports and test failures (GNS-2 Tier 1)", + "quality", + "ollama-cloud/kimi-k2.6" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "visual-tester": { + "name": "visual-tester", + "evaluations": { + "deepseek-v4-pro-max": 50.0, + "kimi-k2.6": 50.0, + "qwen3-coder:480b": 50.0 + }, + "info": [ + "Visual regression testing agent that compares screenshots and detects UI differences using pixelmatch and image diff (GNS-2 Tier 0)", + "quality", + "ollama-cloud/qwen3-coder:480b" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 50.0 + }, + "workflow-architect": { + "name": "workflow-architect", + "evaluations": { + "deepseek-v4-pro-max": 41.6, + "kimi-k2.6": 41.6, + "qwen3-coder:480b": 41.6 + }, + "info": [ + "Creates and maintains workflow definitions with complete architecture, Gitea integration, and quality gates (GNS-2 Tier 1)", + "meta", + "ollama-cloud/qwen3.5-122b" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 41.6 + }, + "workflow-cross-checker": { + "name": "workflow-cross-checker", + "evaluations": { + "deepseek-v4-pro-max": 41.6, + "kimi-k2.6": 41.6, + "qwen3-coder:480b": 41.6 + }, + "info": [ + "Workflow cross-checker and process inspector. Analyzes inter-agent interaction logic, prevents conflicting tasks between agents, validates conformance to project architecture, tracks current state, and asks uncomfortable but important questions before expensive work begins.", + "meta", + "ollama-cloud/kimi-k2.6" + ], + "best_model": "deepseek-v4-pro-max", + "best_score": 41.6 + } + }, + "fit_scores": { + "agent-architect": { + "model": "deepseek-v4-pro-max", + "fit": 41.6, + "explanation": "Best model for agent-architect is deepseek-v4-pro-max with avg score 41.6. Strongest dimension: keyword_coverage." + }, + "architect-indexer": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for architect-indexer is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "backend-developer": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for backend-developer is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "browser-automation": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for browser-automation is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "capability-analyst": { + "model": "deepseek-v4-pro-max", + "fit": 41.6, + "explanation": "Best model for capability-analyst is deepseek-v4-pro-max with avg score 41.6. Strongest dimension: keyword_coverage." + }, + "code-skeptic": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for code-skeptic is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "devops-engineer": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for devops-engineer is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "evaluator": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for evaluator is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "flutter-developer": { + "model": "deepseek-v4-pro-max", + "fit": 41.6, + "explanation": "Best model for flutter-developer is deepseek-v4-pro-max with avg score 41.6. Strongest dimension: keyword_coverage." + }, + "frontend-developer": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for frontend-developer is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "go-developer": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for go-developer is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "history-miner": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for history-miner is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "incident-responder": { + "model": "deepseek-v4-pro-max", + "fit": 41.6, + "explanation": "Best model for incident-responder is deepseek-v4-pro-max with avg score 41.6. Strongest dimension: keyword_coverage." + }, + "lead-developer": { + "model": "deepseek-v4-pro-max", + "fit": 41.6, + "explanation": "Best model for lead-developer is deepseek-v4-pro-max with avg score 41.6. Strongest dimension: code_presence." + }, + "markdown-validator": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for markdown-validator is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "memory-manager": { + "model": "deepseek-v4-pro-max", + "fit": 41.6, + "explanation": "Best model for memory-manager is deepseek-v4-pro-max with avg score 41.6. Strongest dimension: keyword_coverage." + }, + "orchestrator": { + "model": "deepseek-v4-pro-max", + "fit": 41.6, + "explanation": "Best model for orchestrator is deepseek-v4-pro-max with avg score 41.6. Strongest dimension: keyword_coverage." + }, + "performance-engineer": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for performance-engineer is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "php-developer": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for php-developer is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "pipeline-judge": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for pipeline-judge is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "planner": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for planner is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "product-owner": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for product-owner is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "prompt-optimizer": { + "model": "deepseek-v4-pro-max", + "fit": 41.6, + "explanation": "Best model for prompt-optimizer is deepseek-v4-pro-max with avg score 41.6. Strongest dimension: keyword_coverage." + }, + "python-developer": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for python-developer is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "reflector": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for reflector is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "release-manager": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for release-manager is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "requirement-refiner": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for requirement-refiner is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "sdet-engineer": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for sdet-engineer is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "security-auditor": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for security-auditor is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "system-analyst": { + "model": "deepseek-v4-pro-max", + "fit": 41.6, + "explanation": "Best model for system-analyst is deepseek-v4-pro-max with avg score 41.6. Strongest dimension: keyword_coverage." + }, + "the-fixer": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for the-fixer is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "visual-tester": { + "model": "deepseek-v4-pro-max", + "fit": 50.0, + "explanation": "Best model for visual-tester is deepseek-v4-pro-max with avg score 50.0. Strongest dimension: keyword_coverage." + }, + "workflow-architect": { + "model": "deepseek-v4-pro-max", + "fit": 41.6, + "explanation": "Best model for workflow-architect is deepseek-v4-pro-max with avg score 41.6. Strongest dimension: keyword_coverage." + }, + "workflow-cross-checker": { + "model": "deepseek-v4-pro-max", + "fit": 41.6, + "explanation": "Best model for workflow-cross-checker is deepseek-v4-pro-max with avg score 41.6. Strongest dimension: keyword_coverage." + } + } +} \ No newline at end of file diff --git a/agent-evolution/data/real-fit.db b/agent-evolution/data/real-fit.db new file mode 100644 index 0000000000000000000000000000000000000000..6f19b54fee8d86a6d00434aeeef7815ae087593d GIT binary patch literal 176128 zcmeHw3w#{OS?7#qB#lOz8Si?v-d%ZD?YCuXM)T@mE<2yjO@emQb* zmvCQI_f&URm1J3#t&G)5R@QXYRCWF9`@Z_>tMB`2;;BX^)VgMl~HH5FW7n-7I14r`pZ4t+ji{mzIwbaO_$^ur9Pz< z)S9eWe+A8MCJ!GuH6~ckh5bPReyq0_CdZzh6waPIIez%uGs2T&&kW-VxFcLXO{=YnmtL(tW+ zv2$amN5>|N7uNP;yn{ojVArqYxOTY&8se1HsH^z%6nR%|Dq;QI+Z+sZZyOr& zzPf}_bIzypQ;!2ZXPJqhQUqF=96LUC&JLb%`26IVlc(Y9<721I;ni!(?5td~4VXgT zKtU}P4PvoXtjLSZGgJ>oWt#_7%lQOIt}fYX)MQChE4GHMAz70V9BQiF-P?BU^1ikQ zBV;2&+n)gkHnxHr28Ig*O?cIsTCUcoEVvD{fY__Rwz6$Zz{zr@uryWHS~Fu*t*i4u zrc*{k+#=GLIitq+43@HWP*>}~*7&gv+Z!Tv6E4-DhTzcIuC8qd4|<=$c$e6%i&4}wGbNniK2`|tNQ%Xq|? zfHdQ`-!V$#nK?dfPOa6k{Kn1Lnb?^AnMSFE$j*9CWrmr2ZBd?@hRK?<5kyt0!KzS~ zYxXxTsFk{Wr9QO?B1&DfHQJ~afo_d%fuTs3}7l@{M)HVy~e;rJaEiT zn(@2CF>vFVIdCLGml<+V)(SPHiWWrM8y4XGX4Tq~{ZXSaP$3IA01{o%KV>!HE$3*i&td}uN(ghQdPhyH8mjnJn;KN)&=+oR$Rz(v4C zz(v4Cz(v4Cz(v4Cz(v4CU^63dw{Kqucs3Lpg?dz7tjIObhwk-_bV#+r95}_mL+la* zkN;KZWlyh()IGj~9X0Suk~BG51~(n>PVxL}^bnuDM#+~w?}9HxZ14{F@*Qv%KR;7a z7o&xms%g=}oLrcP?|*EYFJYBgP5dN!!v#faNTn!pGk~f;5%vvs)aT^rjB*7&{`gkk zP>10g5QUd1ftP^iqy4^Uhb9*qHKo23l^R8*uGTz1hT77Kay=?nW|fKzU;fB_z7&k* zazm-f;Ehy|*1#PK!SG?!34AC42=LV(MGaP$>T_x(T9g;$l3ImIKZ2iP50j`0=iX)I zC3yP7eZGMXzr13y=_HTJ>#ePRc+ie1&gf5Upo zAGpi6r$dD)f$`TQ^g16vO_t>{9IKNVct1k0EY;@0L7)-^S1AQN1EB8rqq;CzMO$Cr zhw3Vof>JcbjGSSL@V4(muQjJ=@I7c41M$76^I6sQ<@cg5&(x4dLdD+b_aMrdDK+Xa zNw(_WjgTacE1vH{gaQz%Rm~dVccM-T;OZ9D=4Ak~;CT<~q##wLX$9P~U=GcheK&eO zT~imq=>sOVu7X=X_#1e>19d(v0WCS+>Dxny1cMhmk#CPY5dPKh#n3;5zAN---^^_4M0XU#8vU*+eq6Vv`A@$%`ig2gkgg`?ro)VB1g!?LC@`R-}sRcwegQ zXexeWWW_fYlScE(35>_VetYv;1GY2EwT}x=Cz8-ZB6&oR3Uy^cPQ>H!<-kdsU7b4~ z*p~2m4sIQvEtR99qka73*=#xuuN^0iE)RGrWxW1dj|B!qtIb$~Z8es-fp(K=quuAn z0(&0wdUkCchs~5ymaat0$ynCWxV_-)%Zb&zbwV^+fAJ`89gwg!;snHAaBf^36B?DG zq5*Mga&ZMhjAx7<&W>V21TvqM%VjAI6g%yBd+YJ}WOfBQ%;n7%Up&%!=gW(7B^48+ ziDc5*roG^KCMrr_UxPM^c2!1(=AWm|2F;Wa-!M1`e9*nk}CU`*v6llTU2*9rRMxz?0~EwrALaccRC)&5O-$ z$9sJP#$U0-u`RwmUYqSHJ4QVakD~r8>(MAGw;O(rpw4Im&f$=6(AzS5h>zPpWghGI zjW|Y&Eg6rZ_Kfkr-&3!u;0T%%m0T%%m0T%%m0T%%m0T%%m0T%%m z0T+SmAaFO1{NJkI5B0X_TSBnm?}&WP6Zu@^%h&m)TkIm>BH$w6BH$w6BH$w6BH$w6 zBH$w6BH$w6BCyd2+~tK70rra~JRjTA;oH^9EPyWI@q8X$@^0sgqVxYPk&mH=Ly`X* z`Nzoriu{krUq#-G{AuKmBYyyK0H2NgR^;DBelha1MkVLB-A^t8E&?tBE&?tBE&?tB zE&?tBE&?tBE&?tBE&}V0zAXAAn}{2T!88RxCpoi zxCpoixCpoixCpoixCpoixCm?p1kknZH5u-a*JN_#{I{Vj@zN~1LtcXG>6N-%sLuiz zeJwx+eEE(=wKhLfQWv9znyP70xWyhWX|H)+M;E!1GV*oLJJA*N3yRi&D>YHLH&lHG zxh07D=*2lYj^qfc_FiYu~C4hixs){Hk1gzT9PX8_tm?7L*A-Vl}pe`^ircZE5C%l zFjuwOUAoUFdZGOpwN{oY1)03<6_gcU1!Mt-ngp-4h?*q~gcQfl2Sx98>lACR?+IFj%q5Ef>JcbtyR?uT#cxq_nA{P_!2aVu&AMKXI1N)m(e$8 z;LgyxT(LD+MJe}ZN{u>9l2utnK+5nU4fGxrL?VEoTGhy~%BX#GEn`%hmjTB@3AJC4 zs?xMl!t+;=N-)3k=<#$-UDTj`sZm$UQXS&_FQIOyC7>ev%P8n6A)}}QLjz)&>i|;p zM*brr$Q{N%E!uSdbrEn8a1n43a1n43a1n43a1n43a1n43a1n43_|`>081{=PuDOJB z*P$3h199X3zjde6eKQvU7XcRm7XcRm7XcRm7XcRm7XcRm7XcT6bwD8GeaaK?yyEHo z)8MZKJ9-`rgt||5y$v2P|AUI_|D&JQw*&_DSNb0t7$-gv69u&eOsVqecCACo;xx6S>QZX@_NG7Lu#naiG zm`%o$nP^;$CbN@?>>)9AD3yxkQYkT>I2ex~ipTpt+j(&PadKO95pWT35xAoe&`;`- zz(HN@&ku}KE^tTXg)>!6(5iAl0hc|kj%EB&6}d(Z3r{wtrum~W>Mq&T)(-YC8u|6b*(30a2{qx|y7@beX zGP{l)%N$9G%gFt7K558({c$}U*rQ(HYgG%t;*u_tCynTG=GI)c6 zW1wRI;Hs#Ur7O{LGA8aCOC*k_bIXk(XO5xSr-uTG=5zhyFb2lm@G$Z^dXk_!3uw(@oDo)sRZLYiF_=T83kIKm0+S#Sxtr|9fk;) zROCgxzN;0e2GwtB5$7Cx?8>AvnPZ9N@W{62RMPtZ58uGafpON)@`&MiX(KsPWoc+i zWmY&_l9m>0%IsWS7#}^0>31wSCLo_(C>}-+%EAo5fj?EG1!We`tpU`cJPou9%8mre z52!eHYK;$>jX37pKFpbtq|NP0jOB7;%Mz}bHwibZ_XbAveE)G6B<(eN#CSk6;%Ok{ z*d>_Lu`5+j)!>iAXHS|_4DPkkGAAn%!c)>C}NM=p>!@jy(wtBhz280`|Ml9&te3zDxO$pB!hLX12b5sKge zja;3VV!~)mmH={0rDdS>I-+)gNR^^cR0|EloPs(78f9In19i86IeJ9Uz)iU2Y^fX-V~LgVNr@?QYG?JHz`^E0Bw#5wbaP0hF2o?pPOiNH8RtAiaZ?bZ;OlyhX+!1l~SccE4$#mAFVvom{ zyQ3CIcQ7sjw*i5W_Z(aa_#RKV7<#Sm_xn7-FL~Y*+|x4}81J6ydVlA<|3`g);k&Ej zwDQJj$l#Y%jGw{D<3;>z9A#@`#&(WJjN>A%Mz$}s)1VxO* z+)N;oh4~4ssfIPfP&Uq?z?>`SX;nhuCKVwn0Dv`ErXVh*ER;comgWHvdVQI#z&f?y zRDJE51T2~mNpof81t*g+7i9f0T?j<=C(-=T(LrZ%v=EsvTD89l=Bnd!@jEuHU_J_ti7Ba0La-Vw`m(l9I` zeHRvN!HBU!dIi+dtQFA~swB*SLJaB!EQjOB0)^+qBwgcsf*C2r)00-VM;#eUo|2-4`=`>T4s%R@FfgD$ zhgiW38iOqr3Ii(|8xiv%^o_Lc(K*n~kOnq6r#3+M9u_8RvTS=`DqWzRSE6HAAR?|V zM=|$+X1G@7I40)D;>%K2QZz==9MZQ3hF(n|ZLI1Dc{^;xZo!CJ&jlO`7Zb*eI5A+{ zmKG*J*cQArV)f}Iq1LEW(6F&U17!}v4I=Q7V;?O|vjBKwO?2 z6V75%fOgesq!fb)0L8S9ZLAtLzs7{q5Z6}|u&mV#xf=z2ue4IM|h{z|G=OG*JV9!<22;>}2tmDJKyCZ08iaY=t5AT~n-hGucrG>;<+w})kpBc3cO8mvtqXTb7tP6f6z z3<)4N<@%xw#A@s}EHQc#3no(-q3JV>H}JJN*Jv~M+HYE5|noP+kwngBa;9CHh6 zwpOSq5XTA|Ts$+!K{b<1>ZabDksB2V=B-0yZ8R0FNELP0v3xd_SXKwinlIektM>;I zukJ#G$VT)|$TcL{S_NQuLZQB3vB2^S2TlufQVmR0h;A%JB_^DJ8YLKL3Cl~Em5Qbm zfZxheX;A{@6Jn-Skl4eBjbRTA;lKr1m}{VuDX{oVOq_@s+h$lM(Hj(iT`P^MF|$>W z)ex3zgdbb|gDMLF;voOgsg+b&63$HE_pC}4xfEmMltsKp@`=&Ja?7ijZpqnG`aK|L zHKgu1f}Dx9Z&s-bW(m@tAsQU)9SuWrnsW|1HE2UBL^`86hS7r=0a-Kvy_$@b2DDru zbjcSn15=aF490o z5+BF(yTHPD0j=dsIQ%gqJ|4P<;C6T`r^VmMi2y2I9h*yG#Rl?fiTsu+-{2pB2Q71k z$@qGsV3zZEQq1I+Q{lvfITF~{_29joKBn)4ql;N2V5qDQEuoM224zJs^b2@vu>Jv0 z6a;i00Q`VO!gLKZSy;J1I$-vYt0i>_?QUTH#wwu8!E9kEL0Hf^J&e4x_+QK=Msv#@ zS7)-OQi|^X3wYl2gm?Fz==o}(-1Q#cFLgW$5AK+s=F|F?pl5KPxksPfca1n1R1Jf| zGipOXlAAn_2^;2qa1>S#I7I=9k%c9-Q3F*U=%=orl}WSS5Ed>$0(QpdVwQ3T3=Mt^ zHAThB{Q_jMfLf-LgrP4XdEAoHvFCP@A@96+btfWXysO%INJz(rg`KT$qAdYDOBv7b z+D;x_Yc}*qujk;vD*$f4zKc4j9JATj~2W9QC{LwiMeC!lkFV(c9H z_r%!YQ0Gqjj{^b+Ayr7wc;e&LJtX7 zgMu+i;j-|E@Z$NiM-NYqnU;=;u}M%Xz?1;*^$7VZD(t>$G;~e4aANG-n4lD)c=uJK zmuoLR3{4iNV`wM8|MC#}G$X^#X#c(usj7?=K=+)L_YDhIg=w|8bVzt&;>_t7n#9Vi zGPAV*s`YlQ7Z$D!!T(W1%8amoT&mB-U|}h$<^4m#qe48Ehhf09z=0(`S{*~vyMG_D z1i?J`M=R1~HS%L(i|(b;$GaLW#f>?80csH*~J6os8q5%eRNAK31KKm|FD`bX2g(>zZE z^9eS{qCM>SlR)l`iXrQ5ua7K>b+w?D$T|6vARZK|D(olf$dABaRsos6(TjmT0rE*x zXX<9JXgVa6;|cK=`d~P^0sWv4u%>`H7^xuW6SlL~;8me%;|VgUH>zkZA-xmqo@eJ; z0w2w%eWAdM0yC#*rO=S=2+R#rK!)?EUp&3dm-W6Nn%;-?rTjJaM4g&kwgOD+#vQt^ zygainyI$+G*|N>g;3gLA^LlU4Gcus7`eODPbKFl)fN}b7<*ow!jYqVeev=LX*MXLP zjderT9E^FY09?9eP3X0!^N}I^Xd9Z+PJTY*GYXozf5VdGdtYTioo- zPwFy#8|&e)%^I?Av&a;?GX0L&X4UllLC?N{=3!kOx#q|fOtGz_EGE1arxmuJ*&x3H zWHvNTFwGrt8EgVzyP*#dAdfIU_ya>cxRjE^2E#9-3g(y+@fx`jtSOA*!`r8M{z`sCC9-I*lPmYttZzoW{`EIx^%UR|9Mm zat2exj+)KFNbJ=yp%TY!d~)5gMT4~j*@`A4X+nTwE9Am&dV>+$GZ-tu{8NG`665gK zu*blAC@othSm?p(gwBYUzZDUSZKRY4!ECebI{BKpI48-)LAjP~$_ zLJ7_Z>PyG~hmAD=rOhS>*5wQOo<2{MY-U^LTSzjE6NlTT#I-QH7Us6HFyE!`?t`^a zA8+o^x9a_l?DrI1PaGYaG(Cw;kKrR@$IgH`A00bDR0zZv!x^ zCg!Dzb%p}c2acKGSMIJSbLd5|9H(XcQp z*Ns0KRS<9HL&Lg^o}2G+NC3YRIA%~P)duq6G+%}ITfigRix^EItFhsnGmS0yN*zfP zbm&;Xf1x(8pwVDF=nv!~fc`ba=ot818A1neK!*TuN{B4P2$3m_2zr7942-5|7cgd1 zmR)#J-xc(v2Accys}EnZMV7U0AD||NQv<;OVKiugT##nLp%p|Y9GW6s16EbCc;YA@ z^E?cpC!hlibP#2@*IPqA``U~4 z|2>{hdqO*UKO6XJ_s?{l^Z$lV>Np6G+U2KN)OQAvdFp;$O9AzOQM=p5CruXDcn7ErseFs)>pFs*PP@ZuW2!pWs93AJcd%(i=5H$N< zKkUo~*Un(gXLTWnbg5VLhIow*ilD%wi@}xS)<#XNj3@m%>jkUPu62mIc648j=z~~O zHMb6Ma_WfKu;J@&9$Nbmyrv^f9{sWIn}_BF{UL09=tb5Cm=G)E#5Nmf-{2^-L@_?y zwynG1sI#sCDc7NS`wOod!5fVV`2M4{Q+xAK{lTDTU_ig77j|7U17B!Rjf<7Q{qy!j z$y^6b<89UrN29s5Bly~ReLFTan8&oOcpKyDDGEyWjMhD)ZI?6J=6QVpOWkfPb?p&T z6jfL4WuRc(mUyb`ND2EZt(&ZvtFDKJ@!E6x1AQKm*uubj3T%kr%5A%cjzZX#8?M}F z7rBA=|K6{A!aMsu)O)e#>w)R6U+gUSz6g)ppF07TGpmp4kJzI%X=zpw z&e*Qlj0q@Y0@)uSU6J7ri``$f#xy=81UGS`0t#en1z&>aJy_zv*Vb|@Z-sM$c@FG_ z5btsr`^mN}b%%tVM;Z#a-b%u`v5851FavT|FmgoIkRLcQ%z^vzPRN#J9N^%X3G>as zmsLOt@*!byPC(Y`fF5}1E+YS42oOd_R{S?O@fKC&K@BlP_;d}s)jBRx zMLQ-ir`Rs}L{IGA_vo|3sEi&MV?=);cj{6}E+G{bz2B4>f!i{b;`f|_?37@GMgF*E zP(V%5=BF@*hS3y<1sJVPNhtKN6)q6J2B&HF*-z7)A*r^>bvMMfX+wNQ--f)N^^3^s znT?;dO$pi+vlZZAejP(>S60Q?-q%YXYqe|~RG_W?SkaG^sXCR9CJP03@GX~Q2LAo^vpqMyw>f}*mt+*m2 z;&C|P1=d6HzHka&l4Fn}#7H;;f@>0D1)gfiD7yyaV>8Ye4xw}!70t}*rZuYQP#+>& z0$6b0q zayDw6?zcjm0e16-W!rqYGU+zSq-p&gyj|RZwu_FyZ`*7japWiit5PcoA=#!vunK@s8RmuV>y=X-L0}11>Pg!_b z%s>z^UiBcM%b{RYIIDsaiI5db;01k;<9*Yb@A=AkB&_oe^f^f<;PMh_dYlQ8t&! zn0Zz#hz5JMct#x3ibP$sVnqoC9Xu8!r7S{XZsUkZTnq1IGw~dQP$~cjIe3pjh{IA` ztT^>p)G}>{8eBKSi1_W@TRaB`du*#9wk1Xhc7$$jl&IQm1XyZ+eqoO7Q0C*8PBp?+{8Ud`?1?BDYQ5|#RZI0kEJj%zi~UaHDQ*>Wz&qo z^1-cunS=Kz%m|Nhks_rmKKZu82dpNH*<}CSTReLl9B6lTquH-(ZS0rXz(sFOq!iBq z|Ix}p>@I-H!G3nAI6TEgi&KxKt&NoKYXb~6w~TauU<&}|;5|EFgtfRZkx~|%b~A#f z;wdpt@5`F82z_G59%|C;$35GmE3trOHgNC1mW?c^P|2)$--D$qq{H-y9SazTxwx2d z>TzIexBY)Uk!6%mWB>mdPx$vkKhw7W=l}ZyU+?~W*T*~G=0EF;djHY$nKsz}H)H!~ zW?{VyX+8+8ZLP|0;34E%m?fPO6B*{PD7}|v78WoL6L2x()MJ@NjBlK_xF*<==_G6C z7xn;bjunfAjjT*uut+HjQNHaUN~H75qCT{Hi)WuhVRU$C(p=cLHZII=pmM#I?QI&) zMBFtg^N>pW9qO7Z1 zEJ|D z6jwM%Z`fGa2(xj)BBd-u?Y3b{BL`$=!*<{Ry%IXJxzKDFUK<frxB(s7sP&v~2cwt3bZIZPk1IkHZ3_~2fieBsn% z38LMO*hKKTO*4^N1CP^3Y!1W-LvV2-r7Sw_cElzolW~z|m*%d=ws-~{`!+KTH@!^t z^T>#<#6f8TeX-VJrxw9WBF=36haLr#92+(hB@Q2Pf#THTV6}k`SZl&6k{U_+MV{mTWGzos#Il^ZqG>8PV>?|Yh$>LG)}I? zl~zQ)$BbBtKTPX|POv!q#KntKk3(M@s7kL5GLge~+Yixtp%XH~U|gU`DGSd=8YtHW zPb!^Z#L`e6)Bxu~XV213bKlmrabMf{apiNI)Egr?M#;2e#bGKgT%39=b+wz4na-z^ ztX*Uv%P5(4#0YnBaU!KGI_;)pf&*nD&Ui<}GjMe2-Jwa!$J)iHMn8iq()FJVl3&Qkao-ii;E}W%0?jSqeol1D1V8B=32WPL@FKjHG$* z{b(t2LPg#4LEM(Pn{dC+TDf1TDf* zT#QI5i_Aub)2C+gH1@#Ue*yCC1UHnwZGEr=*Wz8l7=BzYX*a%zke zTpX_ABF3r5(pYW-W8Bw9V+lANptJ zA$27!z4mQe8xyvhXP20cXW2N^@=;-MLZEG_2|G(l1e>wD# zz8d8J3kUwV`v3oP>4TpQew_oRAR);{ruWS$NuQ`N@FGCmU2Ve1X;hHM!8JDfOkO)PO5s zSl8kQ*T#VDX1dQLvK-rE@~yNENW;ZpEiPi5dMu4?q=;Y3@oWZ8h&UUu^Rx~~gGShl z3l%A40cyAN>r5QNk7&K7nVPiefT8Z$@j=I}tn})(eQg{lu8XBMk&dO{iba(DZ0b7K z13f!h*&Kk5m8G4!#%@92Ft80+cnexvKdkpJ(nuh07t z&(Z5|`I{H#&`D@da|B(M7V1{U2dQfp2#N+5Q+MtKgRq*x^~seY+-!{R*wfURI=*AR zF2n75SZNkY>MZcQU-&tFzF4X$v84@Idip=*>uhM)&Ge`Q}TwYo()nYs4( zTpMV z>+T{BSDV)tVX!&H7dfzBF2N0?myE0D5x{U+eHkFFDKE)I_*|0;Xg*46K`KqaHU?%H zK3>tZb$}a`+|{T+fOgRiv!zkbT03 zUYvHWZEb#CdpFX>eJ!DBnG6{6nX~NOMS#w+ zKJlR=;*$#)DP>{W$h`JzgDI6|&pToTI#`>)MC2gNk~=mDONu$pb!6#jI#`=DVKy#Uq?CoI?O0Jv3x&0DVY{8IWzu<(y$g3_%4SPpvEncm7cNdcmb%)_B?n^L*<9q!ffsBR6c#bU zU0j?EwFKKMNCPf2j% z@Ddj)PCX7cK=M*Cptgs|3Tk} zpy2nscYE)?z74>w;UeH7u-*vhS1uy#GVU-YOw?;?WfnD7RKTi_Omv3?`t@RV16b}*Dsu=-IB-6k zZHe?|ceN*hN8cD{^W1V?=2l?9c6=L=Xr*gy_NhH1scxVHZJbGG%K_r?qn}4VU*OV2 zIuXloO>Hbq9Fl1>N~0N=1?hkm33Z%0f8$WYEnFg~&Q}S=%`XrT+oW zqCNd;i71rz%N98Vax2)x-sLLEVjb2`vE}sZ@ifEpuzDaKX^gMxdca%ACT#z!gyiNq zB||O(K5#@jT%R&-$HRl_kP=iSvHk2H6Z=k+j`J_dj0x50;34Vji|Z^ z-h><^ZhCp+YzrKHu5$c0zrJOYtq3^%^3@)ccClZd8ecx);w>3@NvZ(Ow{gs(nMhiH z<)t2UH$cB$9JV-Ylbq~R*#_WM>iDBQxuz~1vJL0AW^ zHlHz*t2Db7Kqw#(%Hz&^FK#ak&c1IW5Gga=MYFp>tYz%Y0~|1J#p1wryh7ZPS9`t^__OZ+-u2%)zX*@r9~S`^0T+R9IRu*b>Vcj-R`ITl6E|H) zlm>5Xt6EC0M_v zcacTj;jhdA<5n#4jy_ijz|Al6*=*YMA=MjtCvliq$rm}${LkM8Y3vE=>w;y5Q7`c}Zeu^DoDzJ1gn zCR)}JukIlZRaElk+Xe&}1T-03DJ$SMaT^Wj=@v!kjaSIpKvL=6w$aa&AlevQO>dWX zDXfX+!9mmaG7*SXDv9ZB8b00a?L#4zHB|$A{_lC*6aMK?uJ=p9xt^i!Z+3mA^Zot{ zzU|&$hJx$-G#hmsYuFqmvHz@J0|(AdTP6TbF)uiMv*rXyC(_xLBXF%TT~i8E3yG$r z;b28u&IJ~C5)x%cn>=(eo=jv98TkOx@l2Y%RV!Bmj3FPul#zD<@>;NtpLA4}-$vMU z83%E*LCA5@a$&uFxfBvPdoG2~duMjqawV2Rm0bBAN<|7C`UXCc5L2rydH*dx$5QnYPehclX zc`Oi{QG-}{8KY>t?dhWEG7gz)n)ns9^lQVh9gLo6sG31t{m$jhxPdzhUuP5>+-tq7^z3+>R zdcEN{yuT3sx86o%yLWHo^`5sx=6beAem?NO!*hY348J$i79{O593=alg41GQj z3VlcS7eYVR{STo_-9HuD-7SYsb-%msU%DUf`%HIN-(Pj_@B3)iU-doT^@Dxy==z;L zv8&$u*Ij%2?&&($`>D=v^y;0z+WXPYzv!Lne1GqQos+$zomYeZ+!+sk-2Z0qPddZF zclbXUJmS}aivNSbhy15|zUcS#e8WH7^D*Ba_00IbyXW1$Px{Kf6TV#?-{^R=}*736)7dv`7axl2-{p|K1jCv{7rNv)quOGBaa)i~2Eu*W=@AE@+Ce;>)(Ju}9 zLFZzcBUGw9=syVMvnU-ZsYOcXhWrrqN1nG{hSSe1jQu0m8~;X^@AU79cG%qoX;eKQ z*a20%D^*Di`1cGt8X;(TzW)Izb9|eWCHDCDq#R%4farPuZm8vGfKw~E+dmZTaJu_3 z1NpvPP|3S;rT9MoP>O1Z)1T*k5Bi4&sd{8!p6}i7Pr&O)wfQ3Wzeg9Ok^=cL*}U_f z_a5{opgX1_ugmY*;ZH!9OkJzn@5U%NJ=_=+z6+1gS&7%(cj6H`>sejB2ak|-A!E9G zHy$Bdk=NyS;1RNQt!}@4mw#Wh!{M~W^!Ts#`uC-rb$Fe@_%@EMj?RuS9xS*auR>%bJntei{crXmILr zdaDZ{GdNXApBnD-`exM{?vt&@>GLw~ldVentm5UI37MydsbZpHs*z?Ycp+xWId4?n z3qqX|{G4hd+d-yN)ktqK3=}JU>9>wzpxAnx{)RD7Y*o_dLA+o&MKImp0X#Nm9Zp|E zcx=vUq@VqGY^>a+``U-c#@6HXw-=9%txEdbvlAq_cST9Qdk+Y5XBAFQy9PjxJ8O|n zc4D5jOJ90OJ221MYjC;{Fwfd6ksbyybga0g`*{dM$JXQY_8^9itxEdbj`@d?wse;R zm_L|moE{&zj`+TRW$}IAb;b9#WyJTrs}bM*s}$e&tSG*>URQkIy&Ca-*Ye_f3nqo@ zi|+^~1-2e%V#1gd*s6pSLby*>*3z`lhx=sfar*4VeX>Q1YhUQMx=x3FxXTw>==@~p zLg#-CB|5(+6zDwB_t%}ZzE5}V==)LsfA4!&XQ1zr|1*6@{MY(+`#;+IFa8U?f8g)# z{W*WS_jTW&_D=gg(0k1Ho4vbyjo?4~4hH{=?|kqRK7a5%9lsvD-0_#e@s1A!4|ZG( zZteKCp09O0((`-Xf9m=94x#5ey?@wK^uDua)cXrP1Kt+`|HB&y{Ejyl_+X&2whirw zKcDI#-m=&8Bli669)EstnfioKJ^ukK*rZjmXQ#Qt^Es?wvlTfL=J_mEu-Up+x4(lo zDGsMorpMpLn-oVKUT2@dn-oVitDk>A2wHUq@$Y2%`1DRttF3Zg`@eM`=+#ygtL;zW zt)$a!lIh?#@mA7ViPy_-;H{*yp4HW_@AXHc9kg>I)7h^L`J*YO8n3&5H|UQJGBvFZ ze-+PVUTUmo(Oq8%JpJEpf^8uq7Byqdf|e{rWjHOQ-M z_56uFU{!-Epzm3}ZA`EKc0b4{x)!g;Uw8-%Vsu5Tx1Yy!NPO3retr(qAt~qe@Uxf> zNfoQ@e}jpGcGP0J_&6pGx*D&ipTWdI*R(qPX`@3bl>^h^PZ=Fj)p#9#%;=D+X?6IM zm>M0a3z*)10#l=-4zH^p$JFSkX7%&Y2mJ%l4l^wQ)5DJm{()4hl-Kr0?)480wklYy zei+LMXAC~m_`k+-!dZ#e$B$w;;jCwM^&XNY)W3a|AQ;7qYj8$9G|FVmB0qd)5J<_j)bzimur_X7;DRnHj z^iW^K+f7Fm($fog!|EvGbTWl^Q_C*f7x7++6qClE!@Dfv{?SRFXSCMph<{+%)yc(Qt&*Htyb=K+Ac$Y*~BHf+ByJM=9 z)73bpIW=Ja+H)1G5NrMo(UY0puG^z%5TJx3X*lf&C#VfHSwFhAB0%WkWX zH2laQtk$ikoF*T|lEAr;(l7oNED4--NC$_oBybjT`gz#E$G(oz-Q*2?>{UoFSpy$? z8K;xn5Uj^u-m*#emDvdkEVm}q_aA(^_$xs*EIrkShz#u)F|xijNJ z1um?Bdq~eujGcpjPK+HsHF;w6#MtPQ@TuG=)s?7L2TM47iWODJS6)+QD{xoE?c>#T zy0kQ%%ZV8#P-NyMLCq#pt(1+HfOz)G77uv$*&NmFt|_#1=(;iKuz7sc)0vEzV-%hj7Xe%l4c5KU zc5pe2Z^xHY%pu{;1aUgqN+V&>@2LfV7=(i5l5U5XFuxsG@^Ev*naXApOjL#1z!A5$ zC?VGQfEEh&;AXmv2k~az6s41S_@UxVjrjj>c*3uT?(O|b@S{DK0w=of?tIh#>%Mn% zOnSa?o3_6j(@k?i@5NEJ&8=un$5Cl3xQvBttItYI4B?>_sj7g!sTJnrvIKu9MY&v6 z>vE;A1fObBT^34^q`eLwq(%|`4IZvYr6o<#4#A_ds#2*73G!Ex{FU;4t?2nI_gb%9 zDwg72>qR9c1!eT{$v;S;pC^qA<21M@Zp6B-8*X6}MnR=MB%D4I6DHNVRC)w8ZojUr z6*n)6nS3%y(Nr_02aRij&yyX|ijH=DHegr5!Dd@R8ItI4a59ys) z6V}JLs1csCV?`d`aR?>SNs(R^bGjcZx3+XLtdlj(jlmn^##V!E%#+Dnwsno0wat1| z_W^d0w$92Y(;BLE_aKM$?C^1lIds=*ww)17iy1~IJD_)9Ro9kKrZrRxLBf!B+{nW_ z6RiZK-!bn9u;{I#dyNDNR=Nu2IF4pj`>7mV#({3rW~>ksnRFsUOUJtI0mQ*rWhIhh z5a)2P9du5yi@TNMOiUy*jK#$`|DW`P-xrF*?SC)z40Qj`uAk~u{o6Y};ho%=ZF#%* z+PtXWyTt>c-pw6I7s6(Rm7~75+}J9=i3|*xWHKLT#B)LK2Y^9X>{|w4jwrBW%qiwb zip>ObLd>$p?KAp405e#(E%TU@j)`hu2bVnD+>na-Tr$fnb2)u0y$N6gaB1x!a%1A5 z)u2G~KCFXz5%NE-#=5vZIVHt>Ld-KN?4$bK^d_J+R4c^=hqdkSaf&%)+G@6q5zM7i zEcG4G@1i#Wt)W^768^R0MjqawXeCmu>lCd`z;pT*V;LVXRvE~m<_ut``80fEd|D_; znkGz2+Wb=uxq?a}$9K9M2pqjO$@z2@fqp!-TQz(2|Fn8&I&~ zF$=h)hwuV!)9u+KLs~G{PY&ra4#b;C@f0(;RElx6n$`R8N^jHeS&%vWZ^xNayxE~n zWpcC>HunFIdBPtI?d<(G!3%-E?S8FG>KyWYz2nEckG1@&4KnsBO22OX%~&E zJr0UqrMB8=mM-JK3vIRU0ZJyDO1JKZwRQt({UO>#!+{uwlkK2!itjkwGPz`e(Le?L zLE1&bfg9mcJ6h!7ore+JGeRffg;iwx77qj&+T5U56K{C;jWOjWl6jeUF2l&YIeh?7 zcKAc{Q08!|9cWH5he$UO)@j(?Gh9EZKLA)e9HV(y6F#>iOdf7>T+@jpbMm3;_hVbf4tz=kHw;=wmGo`crL6fdm)41x zu)|Frx*$j6Vl5ghj@kg@;2chD35ke|bImMeD=rkBhs3oQOls<&H-sUz!15Fs*4ljAQ*+EU@ zcm})ujKQv^78?bS5Q`88MN5EtZ%jIvNumVYR=c^2WE1%$d%m33_b~=LJ7OGewu8ng z=FnxE)C)xYF@6mSwR-h{5 z&90rI2y599A`kBzTr&A=nz^5@`Mkapj{cmcaiuDkAoe$U30a%yh0lHhNSATI+C+xh zd_0w*4T{G8|B@&C+0ak-y{-3L@Ic`2x_`Or_0FUIu8!aGUfSH-|MeVwb5hS^eWck- z%vlceDM#pSw=Eu=Vh%YMWvx(Crjaut&MqKdsmcXp$*nX>CHTkiFkFXh17a+@8h>Eq zNO->VdHDH)m2Dt1l8p(cfP*3P05{tJG|tO5kc(%s>{|S|o-_27MLw=XRNB!b4>uo% z$wZtvt~sP zZuzbLp#H@e(0{2;&BOpFToo8YXxV zHnXEd9^P5l#Pf0HbSJIH@Bx}`WzeFJ8foUUpE%NG9KhQC;1Qf!8P{sO|Mz^@6aIMU z+1`H$epk;U-CyczbUx!B>i8G$e}RIH|AFLaW3(avdUxCiHo3YwQDL-*`R6^KhU>BQ>lap%-g{6o;mJ!^!|z0j_t`--9OYnq$ zFQoMTeehb(J>B0AAKf1p0T%%m0T+QyjzCk>Pi^tQMeMdKdB|1fE8fYz^9Hr@&3Y|Q z4sso48AWGFe-dDKq(kPz&e6r~$a9K0de5fKMV-s#Am*3SiJG$h1b`30>(*tweDH~? zZik#abb+>lPjlIPI?jX|sQO7GnVOYmod7M>3Uf*wPP=KneTOSpR&_U1E}PEeX??%h z&`%ht)vP@0EaV)iVMm=)?9#fO*5i;9gBB{1ejI@33l&1^M0VKWCJ$YZZ6Igh?lDGk z$Mj>^KA>ASk2#J!X<1@F`J~G@Xt(zPXePm012?&P>ki*Vz(v4Cz(v4Cz(v4Cz(v4C zz(v4Cz(v4CU{fG~&i?~}uXrLu;Zo>>eZSu)^yY&X;eq?(BH$w6BH$w6BH$w6BGBdt zG%xE10{OQm`tt+h?Ww|2Y z?`p%XyC$JcR9CG*P<=rb>MH8~5_Bt&Q^H0~9v12~sQ|4M8nqhgTdPZTc#2*Zx8A4} z)H1YEm!?ZHQE{bgX_E?jhAuwsXwg`1rkkf|6?}L$tXr^GQ6^e zU%4y+j_{v`UjR=-5oL%`jMrRL>T`ffxmuFzGKIr&a1xL(^s_YyRX0Gm+>lC2eMy+b zee6GedLo(>CKb6RB!-xQ;iJEp7zGd6>0M)^=^Uhai6=8w25oC>sn*yE`u@Pa=GKAZ z1LKY(#jcD2pBB{?wbvzac$_2>Q1T>({sX!uV?uszL0{ngc zq)=35W@MOKAU&a>p$4iDO;{>R^}-yktCXQdTurc#FFs_B-7$Cc>E^Dnd}1tI{8Y-xnCvA07a{A>nH$f#d<|1-Z0@BOHN_8nf_A(&VB514dk;%qRsxuGQ2UK!FI) z7$3qyn30qc(6Hus?F9F=Es{GX=EvekmV4i%>FA&z*Y^g7^k@3V2gaSDI7ekonFRs^ zeCH}k0j8++nwV;U0gF;yf(e>*LMf@>0|xCG7hi(3otaG21FZZP$iJUh} @@ -760,6 +831,7 @@ + @@ -829,6 +901,46 @@
+ +
+
+
+
+
Model Distribution
+
Агенты по LLM-моделям (иерархия: модель → категория)
+
+
+
+
Category Breakdown
+
Дистрибуция по категориям и ролям
+
+
+
+ +
+
+
Fit Score Distribution
+
Тепловая карта fit-score по агентам
+
+
+ +
+
Commands Matrix
+
Команды и их модели из реальных конфигов
+
+ + + + + + + +
КомандаМодельScoreОписание
+
+
+
+
+
@@ -1055,7 +1167,7 @@ const MODEL_BENCHMARKS = { // Default embedded data (minimal - updated by sync script) const EMBEDDED_DATA = { "version": "1.0.0", - "lastUpdated": "2026-05-27T12:47:21.972Z", + "lastUpdated": "2026-05-27T13:10:49.174Z", "agents": { "lead-developer": { "current": { @@ -4931,7 +5043,7 @@ const EMBEDDED_DATA = { "total_agents": 38, "agents_with_history": 34, "pending_recommendations": 0, - "last_sync": "2026-05-27T12:47:21.974Z", + "last_sync": "2026-05-27T13:10:49.175Z", "sync_sources": [ "git", "capability-index.yaml", @@ -4969,6 +5081,16 @@ async function init() { } try { + // Load real dashboard data FIRST (overrides stale agent-versions) + try { + const dashRes = await fetch('data/dashboard-data.json'); + if (dashRes.ok) { + window.dashboardData = await dashRes.json(); + // Sync agentData from dashboard data for all other tabs + syncAgentDataFromDashboard(window.dashboardData); + } + } catch (e) { console.warn('dashboard-data.json not loaded:', e.message); } + document.getElementById('lastSync').textContent = formatDate(agentData.lastUpdated); document.getElementById('agentCount').textContent = agentData.evolution_metrics.total_agents + ' agents'; document.getElementById('historyCount').textContent = agentData.evolution_metrics.agents_with_history + ' with history'; @@ -4984,12 +5106,69 @@ async function init() { renderRecommendations(); renderHeatmap(); renderImpact(); + renderAnalytics(); } catch (error) { console.error('Failed to render dashboard:', error); document.getElementById('lastSync').textContent = 'Error rendering data'; } } +function syncAgentDataFromDashboard(dd) { + // Convert dashboard format back to agentData format expected by other renders + const agents = {}; + const categories = {}; + let withHistory = 0; + + for (const a of dd.agents || []) { + const cat = a.category || 'General'; + if (!categories[cat]) categories[cat] = 0; + categories[cat]++; + + const history = a.latest_change ? [{ + date: a.latest_change.date, + type: a.latest_change.type, + from: a.latest_change.from, + to: a.latest_change.to, + reason: a.latest_change.reason, + source: a.latest_change.source + }] : []; + if (history.length > 0) withHistory++; + + agents[a.name] = { + current: { + model: a.model, + mode: a.mode, + description: a.description, + category: a.category || 'General', + color: a.color || '#8B5CF6', + provider: a.provider || 'Ollama', + variant: a.variant || '', + capabilities: [], + recommendations: [], + benchmark: { + fit_score: a.fit_score || 0, + instruction_following: a.instruction_following || 0 + } + }, + history: history + }; + } + + // Update agentData in-place so all render* functions see real data + agentData = { + version: '1.0.0', + lastUpdated: dd.generated, + agents: agents, + evolution_metrics: { + total_agents: dd.total_agents, + agents_with_history: withHistory, + pending_recommendations: 0, + last_sync: dd.generated, + sync_sources: [dd.source || 'dashboard-data', '.kilo/agents/*.md', 'evolution.json'] + } + }; +} + // Format date function formatDate(dateStr) { const date = new Date(dateStr); @@ -5260,96 +5439,62 @@ function renderRecCard(r, index) { `; } -// Render Heatmap +// Render Heatmap — REAL DATA: Agent × Current Model × Real Fit Score function renderHeatmap() { - const agents = Object.entries(agentData.agents); - if (agents.length === 0) return; + const esc = str => (str || '').replace(/[&<>"']/g, m => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[m])); + const dd = window.dashboardData; - // Build unique model list from all agents - const modelSet = new Set(); - const modelIfScores = {}; - agents.forEach(([_, a]) => { - const model = a.current.model; - if (model) { - modelSet.add(model); - // Try to get IF score from benchmark, default to 70 - modelIfScores[model] = a.current.benchmark?.instruction_following || 70; - } - }); + if (!dd || !dd.agents) { + document.getElementById('hmTable').innerHTML = '⚠️ Нет данных. Запустите анализ.'; + return; + } - // Build hmModels array - const hmModels = [...modelSet].map(m => { - // Extract short name from full model ID - let shortName = m; - if (m.includes('qwen3-coder')) shortName = 'Qwen3-Coder'; - else if (m.includes('glm-')) shortName = m.includes('5.1') ? 'GLM-5.1' : 'GLM-5'; - else if (m.includes('nemotron')) shortName = m.includes('nano') ? 'Nem. Nano' : 'Nem. Super'; - else if (m.includes('minimax')) shortName = 'MiniMax M2.5'; - else if (m.includes('kimi')) shortName = 'Kimi K2.6'; - else if (m.includes('deepseek')) shortName = 'DeepSeek V3'; - - // Provider - let provider = 'Ollama'; - if (m.includes('cloud') || m.includes('ollama-cloud')) provider = 'Ollama Cloud'; - else if (m.includes('openrouter')) provider = 'OpenRouter'; - else if (m.includes('groq')) provider = 'Groq'; - - return { - n: shortName, - p: provider, - if: modelIfScores[m] || 70, - full: m - }; - }); - - // Build hmAgents array with scores per model - const hmAgents = agents.map(([name, agent]) => { - const currentModel = agent.current.model; - const currentIdx = hmModels.findIndex(m => m.full === currentModel); - const fitScore = agent.current.benchmark?.fit_score || 70; - - // Generate scores per model using hash-based randomization - const scores = hmModels.map((m, idx) => { - if (m.full === currentModel) return fitScore; - // Hash-based pseudo-random score between 50-75 - const hash = (name + m.full).split('').reduce((a, c) => a + c.charCodeAt(0), 0); - return 50 + (hash % 26); + const agents = dd.agents; + // Get unique models sorted by count of agents + const modelCounts = {}; + agents.forEach(a => { modelCounts[a.model_short] = (modelCounts[a.model_short] || 0) + 1; }); + const modelList = Object.entries(modelCounts) + .sort((a, b) => b[1] - a[1]) + .map(([short]) => { + const m = dd.models[short] || {}; + return { + short, + full: 'ollama-cloud/' + short, + name: m.name || short, + avg_fit: m.avg_fit || 0, + agents: m.agents || 0 + }; }); - return { - n: name, - c: currentIdx, - s: scores - }; - }); - - // Render the table + // Render table: rows=agents, cols=models const t = document.getElementById('hmTable'); let h = 'Agent'; - hmModels.forEach(m => { - const ifColor = m.if >= 85 ? '#00ff94' : m.if >= 75 ? '#facc15' : '#ff6b81'; + modelList.forEach(m => { + const color = m.avg_fit >= 85 ? '#00ff94' : m.avg_fit >= 70 ? '#facc15' : '#ff6b81'; h += ` - ${m.n}
- ${m.p}
- IF:${m.if} + ${esc(m.name)}
+ avg:${m.avg_fit}
+ ${m.agents} `; }); h += ''; - hmAgents.forEach(ag => { - const mx = Math.max(...ag.s); - h += `${ag.n}`; - ag.s.forEach((s, j) => { - const best = s === mx; - const cur = j === ag.c; - const ifLow = hmModels[j].if < 75; + agents.forEach(a => { + h += `${esc(a.name)}`; + modelList.forEach((m, j) => { + const isCurrent = a.model_short === m.short; + const score = isCurrent ? a.fit_score : 0; // Only show score for CURRENT model + const cur = isCurrent; let marks = ''; - if (best) marks += ''; - if (ifLow) marks += ''; - h += `${s}${marks}`; + onclick="openHmModal(event, '${esc(a.name)}', '${esc(m.name)}', ${isCurrent ? a.fit_score : 0}, ${isCurrent ? a.instruction_following : 0})" + >${isCurrent ? a.fit_score : '·'}${marks}`; }); h += ''; }); @@ -6313,6 +6458,190 @@ function closeResearchModal() { document.getElementById('researchModal').classList.remove('show'); } +/* ===== ANALYTICS HIERARCHY ===== */ +function modelScore(model) { + const scores = { + 'ollama-cloud/kimi-k2.6': 92, + 'ollama-cloud/deepseek-v4-pro-max': 90, + 'ollama-cloud/glm-5.1': 82, + 'ollama-cloud/qwen3-coder:480b': 88, + 'ollama-cloud/qwen3.5-122b': 85, + 'ollama-cloud/nemotron-3-super': 88, + 'ollama-cloud/minimax-m2.5': 86, + }; + return scores[model] || 75; +} + +async function renderAnalytics() { + const container = document.getElementById('modelHierarchyTree'); + if (!container) return; + + let state = null; let loadErr = null; + try { + const r = await fetch('/data/state.json'); + if (r.ok) state = await r.json(); + else loadErr = 'HTTP ' + r.status; + } catch (e) { loadErr = e.message; } + + if (!state || !state.agents) { + const msg = loadErr ? 'Не удалось загрузить данные: ' + loadErr : 'Данные пусты'; + const errHtml = ` +
+
⚠️
+
Аналитика недоступна
+
${esc(msg)}
+
Убедитесь, что /data/state.json существует и доступен.
+
`; + document.getElementById('modelHierarchyTree').innerHTML = errHtml; + document.getElementById('categoryBreakdownBars').innerHTML = errHtml; + document.getElementById('fitScoreHeatmap').innerHTML = errHtml; + document.getElementById('commandsMatrixTable').innerHTML = errHtml; + return; + } + + renderAnalyticsStats(state); + renderModelHierarchyTree(state.agents); + renderCategoryBreakdownBars(state.agents); + renderFitScoreHeatmap(state.agents); + renderCommandsMatrix(state.commands || []); +} + +function esc(str) { + return (str || '').replace(/[&<>"']/g, m => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[m])); +} + +// Tab switching +function renderAnalyticsStats(state) { + const el = document.getElementById('analyticsStats'); + if (!el) return; + const total = (state.agents || []).length; + const models = new Set((state.agents || []).map(a => a.model)).size; + const cats = new Set((state.agents || []).map(a => a.category)).size; + const cmds = (state.commands || []).length; + el.innerHTML = [ + { label: 'Total Agents', value: total, sub: 'active', grad: 'grad-cyan' }, + { label: 'Models Used', value: models, sub: 'distinct LLMs', grad: 'grad-green' }, + { label: 'Categories', value: cats, sub: 'groups', grad: 'grad-orange' }, + { label: 'Commands', value: cmds, sub: 'slash commands', grad: 'grad-purple' }, + ].map(s => ` +
+
${s.label}
+
${s.value}
+
${s.sub}
+
`).join(''); +} + +function renderModelHierarchyTree(agents) { + const container = document.getElementById('modelHierarchyTree'); + if (!container) return; + const tree = {}; + for (const a of agents) { + if (!tree[a.model]) tree[a.model] = {}; + const cat = a.category || 'Core'; + if (!tree[a.model][cat]) tree[a.model][cat] = []; + tree[a.model][cat].push(a); + } + let html = ''; + for (const [model, cats] of Object.entries(tree).sort()) { + const modelShort = model.replace('ollama-cloud/', ''); + const total = Object.values(cats).flat().length; + html += `
+
+ ${esc(modelShort)} + ${total} +
+
`; + for (const [cat, list] of Object.entries(cats).sort()) { + html += `
+
${esc(cat)} (${list.length})
+
`; + for (const a of list) { + const sc = a.fit_score !== undefined ? a.fit_score : modelScore(a.model); + html += `
+ ${esc(a.name)} + ${sc} +
`; + } + html += '
'; + } + html += '
'; + } + container.innerHTML = html; + const first = container.querySelector('.at-model'); + if (first) first.classList.add('open'); +} + +function renderCategoryBreakdownBars(agents) { + const container = document.getElementById('categoryBreakdownBars'); + if (!container) return; + const counts = {}; + for (const a of agents) { + const cat = a.category || 'Core'; + counts[cat] = (counts[cat] || 0) + 1; + } + const max = Math.max(...Object.values(counts), 1); + let html = ''; + for (const [cat, n] of Object.entries(counts).sort((a, b) => b[1] - a[1])) { + const pct = Math.round((n / max) * 100); + html += ` +
+
${esc(cat)}
+
+
${n}
+
`; + } + container.innerHTML = html; +} + +function renderFitScoreHeatmap(agents) { + const container = document.getElementById('fitScoreHeatmap'); + if (!container) return; + let html = ''; + for (const a of agents) { + const score = a.fit_score !== undefined ? a.fit_score : modelScore(a.model); + const hue = score >= 85 ? 150 : score >= 70 ? 45 : 0; + const sat = score >= 85 ? '65%' : score >= 70 ? '75%' : '55%'; + const light = document.documentElement.getAttribute('data-theme') === 'light' ? '82%' : '30%'; + html += ` +
+ ${esc(a.name.slice(0, 12))} + ${score} +
${esc(a.name)} — ${score}
+
`; + } + container.innerHTML = html; +} + +function renderCommandsMatrix(commands) { + const tbody = document.querySelector('#commandsMatrixTable tbody'); + if (!tbody) return; + if (!commands.length) { + tbody.innerHTML = 'No command data available'; + return; + } + let html = ''; + for (const c of commands) { + const modelShort = (c.model || 'unknown').replace('ollama-cloud/', ''); + const score = c.fit_score !== undefined ? c.fit_score : modelScore(c.model); + const cls = score >= 85 ? 'good' : score >= 70 ? 'ok' : 'warn'; + html += ` + + /${esc(c.name)} + ${esc(modelShort)} + ${score} + ${esc((c.description || '').slice(0, 50))}${(c.description || '').length > 50 ? '…' : ''} + `; + } + tbody.innerHTML = html; +} + +function esc(str) { + return (str || '').replace(/[&<>"']/g, m => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[m])); +} + // Tab switching function switchTab(tabId, el) { document.querySelectorAll('.tab-btn').forEach(btn => btn.classList.remove('active')); diff --git a/docker/docker-compose.ollama.yml b/docker/docker-compose.ollama.yml new file mode 100644 index 0000000..e13a084 --- /dev/null +++ b/docker/docker-compose.ollama.yml @@ -0,0 +1,36 @@ +# Ollama service for multi-model evaluation +# Provides LLM inference API for Real-Fit engine and dashboard + +services: + ollama: + image: ollama/ollama:latest + container_name: ollama + ports: + - "11434:11434" + environment: + - OLLAMA_ORIGINS=* + - OLLAMA_HOST=0.0.0.0 + volumes: + - ollama-models:/root/.ollama/models + # Optional: pre-pull models on startup + - ./scripts/ollama-pull-models.sh:/ollama-pull.sh:ro + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:11434/api/tags"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + restart: unless-stopped + networks: + - ollama-net + labels: + - "com.apaw.service=ollama" + - "com.apaw.description=Ollama LLM inference API" + +volumes: + ollama-models: + driver: local + +networks: + ollama-net: + driver: bridge diff --git a/landing/Dockerfile b/landing/Dockerfile index a09f212..1a7da2e 100644 --- a/landing/Dockerfile +++ b/landing/Dockerfile @@ -1,3 +1,14 @@ -FROM nginx:alpine +FROM nginx:bookworm + +# Python3 required for the embedded state API +RUN apt-get update && apt-get install -y --no-install-recommends python3 && rm -rf /var/lib/apt/lists/* + COPY landing /usr/share/nginx/html +COPY landing/nginx-landing.conf /etc/nginx/conf.d/default.conf +COPY landing/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +# Mount-ready: content served from volume EXPOSE 80 + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/landing/api/real-fit-report.json b/landing/api/real-fit-report.json new file mode 120000 index 0000000..8a8ef08 --- /dev/null +++ b/landing/api/real-fit-report.json @@ -0,0 +1 @@ +../../agent-evolution/data/real-fit-report.json \ No newline at end of file diff --git a/landing/api/server.py b/landing/api/server.py new file mode 100644 index 0000000..055ff3e --- /dev/null +++ b/landing/api/server.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +"""Micro API for landing page — reads live agent configs and returns JSON.""" +import json, os, glob, re +from datetime import datetime, timezone +import socketserver +import http.server + +PORT = 8080 +FALLBACK_DIR = "/usr/share/nginx/html" + + +def find_dir(sub): + candidates = [ + os.path.join(FALLBACK_DIR, sub), + os.path.join(os.path.dirname(__file__), sub), + f"/app/{sub}", + f"./{sub}", + ] + for c in candidates: + if os.path.isdir(c): + return c + return None + + +def find_file(name): + candidates = [ + os.path.join(FALLBACK_DIR, "api", name), + os.path.join(os.path.dirname(__file__), name), + f"/app/landing/api/{name}", + ] + for c in candidates: + if os.path.isfile(c): + return c + return None + + +def parse_frontmatter(path): + try: + with open(path, "r", encoding="utf-8") as f: + content = f.read() + except Exception: + return None + if not content.startswith("---"): + return None + end = content.find("---", 3) + if end == -1: + return None + fm = content[3:end] + data = {} + for line in fm.strip().split("\n"): + m = re.match(r"^(\w+):\s*(.+)$", line) + if m: + data[m.group(1)] = m.group(2).strip() + return data + + +def load_dashboard_data(): + path = find_file("dashboard-data.json") + if not path: + return None + try: + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + if data.get("agents"): + return data + except Exception: + pass + return None + + +def load_real_fit_scores(): + candidates = [ + os.path.join(os.path.dirname(__file__), "real-fit-report.json"), + os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "real-fit-report.json"), + os.path.join(FALLBACK_DIR, "data", "real-fit-report.json"), + "/app/agent-evolution/data/real-fit-report.json", + ] + for path in candidates: + if path and os.path.isfile(path): + try: + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + return data.get("fit_scores", {}) + except Exception: + continue + return {} + + +def build_state_from_md(): + agents_dir = find_dir(".kilo/agents") + commands_dir = find_dir(".kilo/commands") + + agents = [] + if agents_dir: + for f in sorted(glob.glob(os.path.join(agents_dir, "*.md"))): + fm = parse_frontmatter(f) + if fm and fm.get("model"): + agents.append({ + "name": os.path.basename(f).replace(".md", ""), + "model": fm.get("model", ""), + "mode": fm.get("mode", "subagent"), + "description": fm.get("description", ""), + "category": infer_category(fm.get("mode", ""), os.path.basename(f)), + "fit_score": None, + "model_meta": None, + }) + + commands = [] + if commands_dir: + for f in sorted(glob.glob(os.path.join(commands_dir, "*.md"))): + fm = parse_frontmatter(f) + if fm and fm.get("model"): + commands.append({ + "name": os.path.basename(f).replace(".md", ""), + "model": fm.get("model", ""), + "mode": fm.get("mode", "command"), + "description": fm.get("description", ""), + "fit_score": None, + }) + + model_stats = {} + for a in agents: + model_stats[a["model"]] = model_stats.get(a["model"], 0) + 1 + for c in commands: + model_stats[c["model"]] = model_stats.get(c["model"], 0) + 1 + + return { + "generated": datetime.now(timezone.utc).isoformat().replace("+00:00", "") + "Z", + "total_agents": len(agents), + "total_commands": len(commands), + "model_distribution": model_stats, + "agents": agents, + "commands": commands, + } + + +def build_state(): + dashboard = load_dashboard_data() + fit_scores = load_real_fit_scores() + if dashboard: + agents = dashboard.get("agents", []) + for a in agents: + key = a.get("name") + fs = fit_scores.get(key) + if fs: + a["fit_score"] = fs.get("fit") + a["fit_explanation"] = fs.get("explanation") + a["best_model"] = fs.get("model") + state = { + "generated": datetime.now(timezone.utc).isoformat().replace("+00:00", "") + "Z", + "total_agents": dashboard.get("total_agents", 0), + "total_commands": len(dashboard.get("commands", [])), + "model_distribution": dashboard.get("model_distribution", {}), + "agents": agents, + "commands": dashboard.get("commands", []), + } + state["fit_scores"] = fit_scores + return state + return build_state_from_md() + + +def infer_category(mode, filename): + f = filename.lower() + if "security" in f: + return "Security" + if "devops" in f: + return "DevOps" + if "frontend" in f or "flutter" in f: + return "Frontend" + if "backend" in f or "php" in f or "python" in f or "go" in f: + return "Backend" + if "test" in f or "sdet" in f: + return "QA" + return "Core" + + +class Handler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + if self.path == "/api/state": + state = build_state() + body = json.dumps(state, ensure_ascii=False).encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "application/json; charset=utf-8") + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Cache-Control", "no-store") + self.end_headers() + self.wfile.write(body) + else: + self.send_response(404) + self.end_headers() + + def log_message(self, format, *args): + pass # silent + + +if __name__ == "__main__": + with socketserver.TCPServer(("0.0.0.0", PORT), Handler) as httpd: + print(f"[state-api] listening on :{PORT}") + httpd.serve_forever() diff --git a/landing/assets/api.js b/landing/assets/api.js new file mode 100644 index 0000000..1e5d5b8 --- /dev/null +++ b/landing/assets/api.js @@ -0,0 +1,373 @@ +async function loadRealState() { + try { + const res = await fetch('/api/state'); + if (!res.ok) throw new Error('HTTP ' + res.status); + const state = await res.json(); + renderAgentTable(state.agents); + renderCommandTable(state.commands); + updateHeroStats(state); + updateModelDistribution(state.model_distribution); + renderAnalytics(state); + console.log('[landing] real state loaded', state.generated); + } catch (e) { + console.error('[landing] failed to load real state:', e.message); + document.querySelector('.agents__table')?.insertAdjacentHTML( + 'afterbegin', + '

⚠️ Не удалось загрузить реальные данные. Показаны fallback-значения.

' + ); + } +} + +function renderAgentTable(agents) { + const tbody = document.getElementById('agent-tbody'); + if (!tbody) return; + + const categories = {}; + for (const a of agents) { + const cat = a.category || 'Core'; + if (!categories[cat]) categories[cat] = []; + categories[cat].push(a); + } + + let html = ''; + for (const [cat, list] of Object.entries(categories)) { + html += `${escapeHtml(cat)}`; + for (const a of list) { + const modelShort = a.model.replace('ollama-cloud/', ''); + const score = modelScore(a.model); + html += ` + + ${escapeHtml(a.name)} + ${escapeHtml(cat)} + ${escapeHtml(a.description.slice(0, 40))}… + ${escapeHtml(modelShort)} + ${score} + ${escapeHtml(a.mode)} + `; + } + } + tbody.innerHTML = html; +} + +function renderCommandTable(commands) { + const tbody = document.getElementById('command-tbody'); + if (!tbody) return; + + let html = ''; + for (const c of commands) { + const modelShort = c.model.replace('ollama-cloud/', ''); + const score = modelScore(c.model); + html += ` + + /${escapeHtml(c.name)} + ${escapeHtml(c.mode)} + ${escapeHtml(c.description.slice(0, 40))}… + ${escapeHtml(modelShort)} + ${score} + `; + } + tbody.innerHTML = html; +} + +function updateHeroStats(state) { + const total = state.total_agents + state.total_commands; + const models = Object.keys(state.model_distribution).length; + document.querySelector('.hero__stats .stat:nth-child(1) .stat__value').textContent = state.total_agents + '+'; + document.querySelector('.hero__stats .stat:nth-child(2) .stat__value').textContent = models; +} + +function updateModelDistribution(dist) { + const container = document.getElementById('model-distribution'); + if (!container) return; + const entries = Object.entries(dist).sort((a, b) => b[1] - a[1]); + const max = entries[0]?.[1] || 1; + let html = '

Реальная модельная дистрибуция

'; + for (const [model, count] of entries) { + const pct = Math.round((count / max) * 100); + html += ` +
+ ${escapeHtml(model.replace('ollama-cloud/', ''))} +
+ ${count} +
`; + } + container.innerHTML = html; +} + +function modelScore(model) { + const scores = { + 'ollama-cloud/kimi-k2.6': 92, + 'ollama-cloud/deepseek-v4-pro-max': 90, + 'ollama-cloud/glm-5.1': 82, + 'ollama-cloud/qwen3-coder:480b': 88, + 'ollama-cloud/qwen3.5-122b': 85, + 'ollama-cloud/nemotron-3-super': 88, + 'ollama-cloud/minimax-m2.5': 86, + }; + return scores[model] || 75; +} + +function escapeHtml(str) { + return (str || '').replace(/[&<>"']/g, m => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[m])); +} + +/* ===== ANALYTICS HIERARCHY ===== */ +function renderAnalytics(state) { + renderModelTree(state.agents); + renderCategoryBars(state.agents); + renderFitHeatmap(state.agents); + renderCommandAnalytics(state.commands); +} + +function renderModelTree(agents) { + const container = document.getElementById('model-tree'); + if (!container) return; + + // Group: model -> category -> agents + const tree = {}; + for (const a of agents) { + if (!tree[a.model]) tree[a.model] = {}; + const cat = a.category || 'Core'; + if (!tree[a.model][cat]) tree[a.model][cat] = []; + tree[a.model][cat].push(a); + } + + let html = ''; + for (const [model, cats] of Object.entries(tree).sort()) { + const modelShort = model.replace('ollama-cloud/', ''); + const total = Object.values(cats).flat().length; + html += `
+
+ ${escapeHtml(modelShort)} + ${total} +
+
`; + for (const [cat, list] of Object.entries(cats).sort()) { + html += `
+
${escapeHtml(cat)} (${list.length})
+
`; + for (const a of list) { + const score = modelScore(a.model); + html += `
+ ${escapeHtml(a.name)} + ${score} +
`; + } + html += '
'; + } + html += '
'; + } + container.innerHTML = html; + + // Open first model by default + const first = container.querySelector('.tree__model'); + if (first) first.classList.add('is-open'); +} + +function renderCategoryBars(agents) { + const container = document.getElementById('category-bars'); + if (!container) return; + + const counts = {}; + for (const a of agents) { + const cat = a.category || 'Core'; + counts[cat] = (counts[cat] || 0) + 1; + } + const max = Math.max(...Object.values(counts), 1); + + let html = ''; + for (const [cat, n] of Object.entries(counts).sort((a, b) => b[1] - a[1])) { + const pct = Math.round((n / max) * 100); + html += ` +
+
${escapeHtml(cat)}
+
+
${n}
+
`; + } + container.innerHTML = html; +} + +function renderFitHeatmap(agents) { + const container = document.getElementById('fit-heatmap'); + if (!container) return; + + let html = ''; + for (const a of agents) { + const score = typeof a.fit_score === 'number' ? a.fit_score : modelScore(a.model); + const hue = score >= 85 ? 150 : score >= 70 ? 45 : 0; + const sat = score >= 85 ? '70%' : score >= 70 ? '80%' : '60%'; + const light = document.documentElement.getAttribute('data-theme') === 'light' ? '85%' : '35%'; + html += ` +
+ ${escapeHtml(a.name.slice(0, 12))} + ${score} +
${escapeHtml(a.name)} — ${score}
+
`; + } + container.innerHTML = html; +} + +function renderCommandAnalytics(commands) { + const tbody = document.getElementById('command-analytics-tbody'); + if (!tbody) return; + + let html = ''; + for (const c of commands) { + const modelShort = c.model.replace('ollama-cloud/', ''); + const score = modelScore(c.model); + html += ` + + /${escapeHtml(c.name)} + ${escapeHtml(modelShort)} + ${score} + `; + } + tbody.innerHTML = html; +} + +/* ===== FIT SCORE DRILL-DOWN MODAL ===== */ +const MODAL_HTML = ` + + +`; + +const PLACEHOLDER_EXPLANATION = 'Агент демонстрирует сильные стороны в аналитике и следовании роли, однако может уступать в конкретности рекомендаций по исправлению.'; + +function ensureModal() { + let modal = document.getElementById('fit-modal'); + if (!modal) { + modal = document.createElement('div'); + modal.id = 'fit-modal'; + modal.className = 'modal'; + modal.setAttribute('role', 'dialog'); + modal.setAttribute('aria-modal', 'true'); + modal.setAttribute('aria-label', 'Детали fit-score'); + modal.setAttribute('tabindex', '-1'); + modal.setAttribute('aria-hidden', 'true'); + modal.innerHTML = MODAL_HTML; + document.body.appendChild(modal); + } + if (!modal.dataset.initialized) { + modal.dataset.initialized = '1'; + const overlay = modal.querySelector('.modal__overlay'); + const closeBtn = modal.querySelector('.modal__close'); + + overlay.addEventListener('click', () => closeFitModal()); + closeBtn.addEventListener('click', () => closeFitModal()); + modal.addEventListener('keydown', trapFocus); + } + return modal; +} + +function openFitModal(agent) { + const modal = ensureModal(); + const score = agent.fit_score || modelScore(agent.model); + const modelShort = (agent.model || '').replace('ollama-cloud/', ''); + + document.getElementById('modal-agent-name').textContent = agent.name || 'Agent'; + document.getElementById('modal-model').textContent = modelShort || 'unknown'; + document.getElementById('modal-score').textContent = score; + + const breakdown = agent.breakdown || {}; + const dims = [ + { key: 'accuracy', label: 'Точность' }, + { key: 'completeness', label: 'Полнота' }, + { key: 'role_adherence', label: 'Ролевая чёткость' }, + { key: 'actionability', label: 'Действенность' }, + ]; + + const breakdownHtml = dims.map(d => { + const value = typeof breakdown[d.key] === 'number' ? breakdown[d.key] : 75; + const pct = Math.min(100, Math.max(0, value)); + const hue = pct >= 85 ? 150 : pct >= 70 ? 45 : 0; + return ` + `; + }).join(''); + document.getElementById('modal-breakdown').innerHTML = breakdownHtml; + + const explanation = agent.explanation || (agent.fit_score ? PLACEHOLDER_EXPLANATION : 'Нет данных об explanation — API не возвращает поле explanation.'); + document.getElementById('modal-explanation').textContent = explanation; + + modal.classList.add('is-open'); + modal.setAttribute('aria-hidden', 'false'); + modal.focus(); + + modal._prevFocus = document.activeElement; + document.addEventListener('keydown', handleEscape); +} + +function closeFitModal() { + const modal = document.getElementById('fit-modal'); + if (!modal) return; + modal.classList.remove('is-open'); + modal.setAttribute('aria-hidden', 'true'); + document.removeEventListener('keydown', handleEscape); + if (modal._prevFocus && typeof modal._prevFocus.focus === 'function') { + try { modal._prevFocus.focus(); } catch (e) { /* ignore */ } + } +} + +function handleEscape(e) { + if (e.key === 'Escape') closeFitModal(); +} + +function trapFocus(e) { + if (e.key !== 'Tab') return; + const modal = document.getElementById('fit-modal'); + if (!modal || !modal.classList.contains('is-open')) return; + const focusable = modal.querySelectorAll('button, [href], input, select, textarea, [tabindex]:not([tabindex="-1"])'); + if (!focusable.length) return; + const first = focusable[0]; + const last = focusable[focusable.length - 1]; + if (e.shiftKey && document.activeElement === first) { + e.preventDefault(); + last.focus(); + } else if (!e.shiftKey && document.activeElement === last) { + e.preventDefault(); + first.focus(); + } +} + +/* Enhance renderFitHeatmap to attach click handlers */ +const _origRenderFitHeatmap = renderFitHeatmap; +renderFitHeatmap = function(agents) { + _origRenderFitHeatmap(agents); + const container = document.getElementById('fit-heatmap'); + if (!container) return; + const cells = container.querySelectorAll('.heatmap__cell'); + cells.forEach((cell, idx) => { + cell.addEventListener('click', () => openFitModal(agents[idx] || {})); + cell.setAttribute('tabindex', '0'); + cell.setAttribute('role', 'button'); + cell.setAttribute('aria-label', `Детали для ${agents[idx]?.name || ''}`); + cell.addEventListener('keydown', (e) => { + if (e.key === 'Enter' || e.key === ' ') { + e.preventDefault(); + openFitModal(agents[idx] || {}); + } + }); + }); +}; + +document.addEventListener('DOMContentLoaded', loadRealState); diff --git a/landing/assets/styles.css b/landing/assets/styles.css index 509f2bc..af2347f 100644 --- a/landing/assets/styles.css +++ b/landing/assets/styles.css @@ -755,6 +755,204 @@ body { } .footer__made { font-style: italic; } +/* Analytics Hierarchy */ +.analytics { padding: 80px 20px; background: var(--bg); } +@media (min-width: 768px) { .analytics { padding: 100px 24px; } } + +.analytics__grid { + display: grid; + grid-template-columns: 1fr; + gap: 20px; + margin-top: 40px; +} +@media (min-width: 768px) { + .analytics__grid { grid-template-columns: 1fr 1fr; gap: 24px; } +} + +.analytics__card { + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 24px; + transition: border-color .2s, box-shadow .2s; +} +.analytics__card:hover { + border-color: var(--border-hover); + box-shadow: var(--shadow); +} + +.analytics__card-title { + font-size: 16px; + font-weight: 600; + margin-bottom: 16px; + color: var(--text); +} + +/* Tree */ +.analytics__tree { font-size: 13px; } +.tree__model { margin-bottom: 12px; } +.tree__model-header { + display: flex; align-items: center; gap: 8px; + padding: 6px 10px; + background: var(--gradient-soft); + border-radius: 8px; + font-weight: 600; + cursor: pointer; + user-select: none; +} +.tree__model-header::before { + content: '▸'; + font-size: 10px; + transition: transform .2s; + color: var(--accent); +} +.tree__model.is-open .tree__model-header::before { transform: rotate(90deg); } +.tree__model-body { display: none; padding: 8px 0 4px 20px; } +.tree__model.is-open .tree__model-body { display: block; } + +.tree__cat { margin-bottom: 6px; } +.tree__cat-header { + display: flex; align-items: center; gap: 6px; + padding: 4px 8px; + border-radius: 6px; + cursor: pointer; + color: var(--text-muted); + font-size: 12px; +} +.tree__cat-header:hover { background: var(--bg-hover); } +.tree__cat-header::before { + content: '▸'; + font-size: 9px; + transition: transform .2s; +} +.tree__cat.is-open .tree__cat-header::before { transform: rotate(90deg); } +.tree__cat-body { display: none; padding: 4px 0 2px 16px; } +.tree__cat.is-open .tree__cat-body { display: block; } + +.tree__agent { + display: flex; align-items: center; gap: 8px; + padding: 3px 8px; + border-radius: 4px; + font-size: 12px; + color: var(--text); +} +.tree__agent-score { + font-size: 10px; + font-weight: 700; + padding: 1px 6px; + border-radius: 10px; + background: var(--bg-elevated); + color: var(--accent-green); +} + +/* Bars */ +.analytics__bars { display: flex; flex-direction: column; gap: 10px; } +.bar__row { + display: flex; align-items: center; gap: 12px; +} +.bar__label { + width: 80px; + font-size: 12px; + font-weight: 600; + color: var(--text); + flex-shrink: 0; +} +.bar__track { + flex: 1; + height: 20px; + background: var(--bg-elevated); + border-radius: 10px; + overflow: hidden; + position: relative; +} +.bar__fill { + height: 100%; + border-radius: 10px; + background: var(--gradient); + transition: width .6s ease; + min-width: 4px; +} +.bar__count { + width: 30px; + font-size: 12px; + font-weight: 700; + text-align: right; + color: var(--text-muted); +} + +/* Heatmap */ +.analytics__heatmap { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(56px, 1fr)); + gap: 4px; +} +.heatmap__cell { + aspect-ratio: 1; + border-radius: 6px; + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + font-size: 10px; + font-weight: 600; + cursor: pointer; + transition: transform .15s; + position: relative; +} +.heatmap__cell:hover { transform: scale(1.08); z-index: 1; } +.heatmap__cell-name { + font-size: 9px; + font-weight: 500; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + width: 90%; + text-align: center; +} +.heatmap__cell-score { font-size: 11px; margin-top: 1px; } +.heatmap__tooltip { + position: absolute; + bottom: calc(100% + 6px); + left: 50%; + transform: translateX(-50%); + background: var(--bg-elevated); + border: 1px solid var(--border); + border-radius: 6px; + padding: 6px 10px; + font-size: 11px; + white-space: nowrap; + pointer-events: none; + opacity: 0; + transition: opacity .15s; + z-index: 10; +} +.heatmap__cell:hover .heatmap__tooltip { opacity: 1; } + +/* Analytics table */ +.analytics__table-wrap { overflow-x: auto; } +.analytics__table { + width: 100%; + border-collapse: collapse; + font-size: 13px; +} +.analytics__table thead th { + text-align: left; + padding: 10px 12px; + font-weight: 600; + color: var(--text-muted); + border-bottom: 1px solid var(--border); + font-size: 11px; + text-transform: uppercase; + letter-spacing: .5px; +} +.analytics__table tbody td { + padding: 8px 12px; + border-bottom: 1px solid var(--border); + color: var(--text); +} +.analytics__table tbody tr:last-child td { border-bottom: none; } +.analytics__table tbody tr:hover td { background: var(--bg-hover); } + /* Animations */ @keyframes fadeUp { from { opacity: 0; transform: translateY(18px); } @@ -768,6 +966,106 @@ body { .hero__actions { animation-delay: .3s; } .hero__stats { animation-delay: .4s; } +/* ===== MODAL ===== */ +.modal { + position: fixed; + inset: 0; + z-index: 1000; + display: flex; + align-items: center; + justify-content: center; + padding: 20px; + opacity: 0; + visibility: hidden; + transition: opacity .25s, visibility .25s; +} +.modal.is-open { + opacity: 1; + visibility: visible; +} +.modal__overlay { + position: absolute; + inset: 0; + background: rgba(0,0,0,0.55); + backdrop-filter: blur(4px); +} +[data-theme="light"] .modal__overlay { + background: rgba(0,0,0,0.35); +} +.modal__content { + position: relative; + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 28px; + width: 100%; + max-width: 480px; + max-height: 90vh; + overflow-y: auto; + box-shadow: var(--shadow-hover); + transform: translateY(12px); + transition: transform .25s; +} +.modal.is-open .modal__content { + transform: translateY(0); +} +.modal__content { + z-index: 1; +} +.modal__close { + position: absolute; + top: 14px; + right: 14px; + width: 36px; height: 36px; + border-radius: 50%; + background: var(--bg-hover); + border: 1px solid var(--border); + color: var(--text-muted); + font-size: 20px; + line-height: 1; + cursor: pointer; + display: flex; + align-items: center; + justify-content: center; + transition: background .2s, color .2s; +} +.modal__close:hover { background: var(--border-hover); color: var(--text); } +.modal__header { margin-bottom: 18px; padding-right: 40px; } +.modal__title { font-size: 20px; font-weight: 700; letter-spacing: -0.3px; margin-bottom: 4px; color: var(--text); } +.modal__model { + font-family: var(--font-mono); font-size: 12px; color: var(--accent-3); + background: rgba(6,182,212,0.06); padding: 3px 8px; border-radius: 6px; + display: inline-block; +} +[data-theme="light"] .modal__model { + background: rgba(14,165,233,0.08); color: #0284c7; +} +.modal__score-row { + display: flex; align-items: baseline; gap: 10px; margin-bottom: 20px; + padding: 14px 16px; background: var(--gradient-soft); border-radius: var(--radius-sm); +} +.modal__score { + font-size: 36px; font-weight: 800; letter-spacing: -1.5px; + background: var(--gradient); + -webkit-background-clip: text; + -webkit-text-fill-color: transparent; + background-clip: text; +} +.modal__score-label { font-size: 13px; color: var(--text-muted); font-weight: 500; } +.modal__breakdown { display: flex; flex-direction: column; gap: 10px; margin-bottom: 24px; } +.modal__dimension { display: grid; grid-template-columns: 110px 1fr 36px; gap: 10px; align-items: center; } +.modal__dim-label { font-size: 12px; color: var(--text-muted); font-weight: 500; } +.modal__dim-track { height: 8px; background: var(--bg-elevated); border-radius: 4px; overflow: hidden; } +.modal__dim-fill { height: 100%; border-radius: 4px; transition: width .4s ease; } +.modal__dim-value { font-size: 12px; font-weight: 700; color: var(--text); text-align: right; } +.modal__section h4 { font-size: 14px; font-weight: 700; color: var(--text); margin-bottom: 8px; } +.modal__explanation { font-size: 13px; color: var(--text-muted); line-height: 1.65; } + +@media (max-width: 480px) { + .modal__dimension { grid-template-columns: 90px 1fr 32px; } + .modal__title { font-size: 18px; } +} + /* Reduced motion */ @media (prefers-reduced-motion: reduce) { html { scroll-behavior: auto; } diff --git a/landing/docker-compose.yml b/landing/docker-compose.yml index ec00d6d..6c7ef84 100644 --- a/landing/docker-compose.yml +++ b/landing/docker-compose.yml @@ -2,12 +2,37 @@ version: '3.8' services: apaw-landing: - build: - context: .. - dockerfile: landing/Dockerfile + image: landing-apaw-landing:latest container_name: apaw-landing ports: - "3002:80" + volumes: + # Live reload: landing files + - ../landing:/usr/share/nginx/html + # Live reload: agent configs for real-time model display + - ../.kilo:/usr/share/nginx/html/.kilo:ro + # Generated real-state JSON for dynamic agent table + - ../agent-evolution/data:/usr/share/nginx/html/data:ro + # Landing API server + - ../landing/api:/usr/share/nginx/html/api:ro + # Runtime override: hot-reload nginx config from host without rebuild + - ../landing/nginx-landing.conf:/etc/nginx/conf.d/default.conf:ro + restart: unless-stopped + networks: + - apaw-landing-net + + apaw-state-api: + image: python:3.12-alpine + container_name: apaw-state-api + working_dir: /usr/src/app + volumes: + # API server + - ../landing/api:/usr/src/app/api:ro + # Real-time agent configs + - ../.kilo:/usr/src/app/.kilo:ro + # Evolution data + - ../agent-evolution/data:/usr/src/app/data:ro + command: ["python3", "api/server.py"] restart: unless-stopped networks: - apaw-landing-net diff --git a/landing/entrypoint.sh b/landing/entrypoint.sh new file mode 100755 index 0000000..d3d917e --- /dev/null +++ b/landing/entrypoint.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# Entrypoint for landing container — runs both nginx and state-api + +# Start state-api in background +python3 /usr/share/nginx/html/api/server.py & +API_PID=$! + +# Start nginx in foreground (replaces this shell) +nginx -g 'daemon off;' + +# If nginx exits, kill the API +kill $API_PID 2>/dev/null diff --git a/landing/index.html b/landing/index.html index 6895a7e..f66d38e 100644 --- a/landing/index.html +++ b/landing/index.html @@ -105,54 +105,8 @@ АгентКатегорияРольМодельFitВызывается - - - ★ Core Development - RequirementRefinerCoreФормализация требованийQwen3-Coder 480B92Issue status: new - HistoryMinerCoreПоиск дублей в git-историиQwen3-Coder 480B92Status: planned - SystemAnalystCoreАрхитектура, схемы, APIGLM-5.182Status: researching - SdetEngineerCoreТесты до кода (TDD)Qwen3-Coder 480B88Status: designed - LeadDeveloperCoreОсновная разработкаNemotron 3 Super90Status: testing - FrontendDeveloperCoreUI с мультимодальностьюMiniMax M2.586Когда нужен UI - BackendDeveloperCoreNode.js / ExpressQwen3-Coder 480B91Когда нужен backend - GoDeveloperCoreGo + Gin + Echo + DBDeepSeek V4-Pro88Когда нужен Go backend - PhpDeveloperCoreLaravel / SymfonyQwen3-Coder 480B87Когда нужен PHP - PythonDeveloperCoreDjango / FastAPIQwen3-Coder 480B90Когда нужен Python - FlutterDeveloperCoreFlutter / DartQwen3-Coder 480B86Когда нужен Flutter - DevopsEngineerCoreDocker, K8s, CI/CDKimi K2.688Когда нужен deploy - - - ☆ Quality Assurance - CodeSkepticQAАдверсариальное ревьюMiniMax M2.585Status: implementing - TheFixerQAИтеративный фикс баговKimi K2.690Если ревью не прошло - PerformanceEngineerQAN+1, memory leaks, perfDeepSeek V4-Pro84После CodeSkeptic - SecurityAuditorQAOWASP, CVE, secretsDeepSeek V4-Pro80После Performance - VisualTesterQAСкриншоты, pixelmatchQwen3-Coder 480B82Когда UI меняется - BrowserAutomationQAPlaywright E2EQwen3-Coder 480B87E2E-тестирование - - - ◆ Meta & Process - OrchestratorMetaГлавный диспетчерKimi K2.692Управление роутингом - ReleaseManagerMetaGit, semver, релизыGLM-5.176Status: releasing - EvaluatorMetaОценка эффективностиGLM-5.184Status: evaluated - PromptOptimizerMetaУлучшение промптовQwen3.6 Plus84Когда score < 7 - ProductOwnerMetaЧеклисты, лейблы, трекингGLM-5.178Управление задачами - CapabilityAnalystMetaАнализ пробелов в skillsGLM-5.182На старте задачи - AgentArchitectMetaСоздание новых агентовKimi K2.686Если нет подходящего агента - WorkflowArchitectMetaНовые workflow-определенияGLM-5.182Новый workflow - MarkdownValidatorMetaВалидация MarkdownDeepSeek V4-Pro68Перед созданием issue - PipelineJudgeMetaОбъективный fitness-скорGLM-5.184После Evaluator - ArchitectIndexerMetaИндекс проекта .architect/GLM-5.184Перед любой задачей - - - ● Cognitive Enhancement - PlannerCognitiveCoT / ToT / Plan-ReflectDeepSeek V4-Pro88Сложные задачи - ReflectorCognitiveУроки из ошибокDeepSeek V4-Pro84После каждого агента - MemoryManagerCognitiveКонтекст, векторный сторQwen3.6 Plus87Управление памятью - - - ♻ Security & Incident - IncidentResponderSecOpsФорензика, hardening, cleanupKimi K2.690Инцидент, компрометация + +
@@ -191,6 +145,55 @@
+ +
+
+

Аналитическая иерархия

+

Живая дистрибуция агентов по моделям и категориям. Обновляется автоматически из реальных конфигов .kilo/agents/.

+ +
+ +
+

Модели → Категории → Агенты

+
+ +
+
+ + +
+

Дистрибуция по категориям

+
+ +
+
+ + +
+

Fit-score распределение

+
+ +
+
+ + +
+

Команды — модели и режимы

+
+ + + + + + + +
КомандаМодельScore
+
+
+
+
+
+
@@ -351,6 +354,27 @@ bun run sync:evolution && bun run evolution:dashboard
+ + + + diff --git a/landing/nginx-landing.conf b/landing/nginx-landing.conf new file mode 100644 index 0000000..55d4eb1 --- /dev/null +++ b/landing/nginx-landing.conf @@ -0,0 +1,21 @@ +server { + listen 80; + server_name localhost; + root /usr/share/nginx/html; + index index.html; + + # All static assets + location / { + try_files $uri $uri/ /index.html; + } + + # API proxied to the Python state server + location /api/state { + proxy_pass http://apaw-state-api:8080/api/state; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_hide_header Content-Type; + add_header Content-Type application/json; + add_header Cache-Control "no-store"; + } +} diff --git a/scripts/real-fit-engine.py b/scripts/real-fit-engine.py new file mode 100644 index 0000000..a3af5ef --- /dev/null +++ b/scripts/real-fit-engine.py @@ -0,0 +1,565 @@ +#!/usr/bin/env python3 +""" +Real-Fit Multi-Agent Evaluation Engine (sync/stdlib version — no external deps) +SQLite-backed pipeline that evaluates agent-role × model fit via Ollama API. + +Usage: + python3 real-fit-engine.py --init-db --import-evolution --generate-prompts + python3 real-fit-engine.py --evaluate-all --models kimi-k2.6,deepseek-v4-pro-max + python3 real-fit-engine.py --report + python3 real-fit-engine.py --recalc --agent lead-developer --old-model qwen3-coder:480b --new-model kimi-k2.6 + +Configuration: + OLLAMA_HOST (default: http://localhost:11434) +""" +import sqlite3, json, os, sys, re, time +from glob import glob +from datetime import datetime, timezone +from urllib import request, error as urllib_error +from concurrent.futures import ThreadPoolExecutor, as_completed + +DB_PATH = "agent-evolution/data/real-fit.db" + +OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "https://api.ollama.com") +OLLAMA_KEY = os.environ.get("OLLAMA_KEY", "") +USE_MOCK = os.environ.get("OLLAMA_MOCK", "0") == "1" # Default to REAL for this env + +DEFAULT_MODELS = ["kimi-k2.6", "deepseek-v4-pro-max", "deepseek-v4-flash", + "glm-5.1", "qwen3-coder:480b", "qwen3.5-122b"] + +# ================================================================ +# SCHEMA +# ================================================================ +SCHEMA = """ +CREATE TABLE IF NOT EXISTS agents ( + name TEXT PRIMARY KEY, + description TEXT, + category TEXT, + current_model TEXT, + color TEXT, + updated TEXT +); + +CREATE TABLE IF NOT EXISTS models ( + short_name TEXT PRIMARY KEY, + full_id TEXT, + if_score REAL, + swe_bench REAL, + parameters TEXT, + context_window TEXT, + updated TEXT +); + +CREATE TABLE IF NOT EXISTS test_prompts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + agent_name TEXT, + task_type TEXT, + system_prompt TEXT, + user_prompt TEXT, + expected_keywords TEXT, + rubric TEXT +); + +CREATE TABLE IF NOT EXISTS evaluations ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + agent_name TEXT, + model TEXT, + prompt_id INTEGER, + response TEXT, + latency_ms INTEGER, + tokens_prompt INTEGER, + tokens_response INTEGER, + scores TEXT, + total_score REAL, + explanation TEXT, + evaluated_at TEXT, + evaluator TEXT +); + +CREATE TABLE IF NOT EXISTS recalculations ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + trigger TEXT, + agent_name TEXT, + old_model TEXT, + new_model TEXT, + old_fit REAL, + new_fit REAL, + delta REAL, + reason TEXT, + recalculated_at TEXT +); + +CREATE TABLE IF NOT EXISTS fit_scores ( + agent_name TEXT PRIMARY KEY, + model TEXT, + fit_score REAL, + dimension_scores TEXT, + explanation TEXT, + evaluated_at TEXT, + FOREIGN KEY (agent_name) REFERENCES agents(name) +); + +CREATE INDEX IF NOT EXISTS idx_eval_agent_model ON evaluations(agent_name, model); +CREATE INDEX IF NOT EXISTS idx_recalc_agent ON recalculations(agent_name); +""" + +def init_db(): + os.makedirs(os.path.dirname(DB_PATH), exist_ok=True) + conn = sqlite3.connect(DB_PATH) + conn.executescript(SCHEMA) + conn.commit() + conn.close() + print(f"[db] Initialized schema in {DB_PATH}") + +# ================================================================ +# PROMPT GENERATOR +# ================================================================ + +def parse_frontmatter(path): + try: + with open(path, 'r', encoding='utf-8') as f: + content = f.read() + except: + return {} + if not content.startswith('---'): + return {} + end = content.find('---', 3) + if end == -1: + return {} + data = {} + for line in content[3:end].strip().split('\n'): + m = re.match(r'^(\w+):\s*(.+)$', line) + if m: + data[m.group(1)] = m.group(2).strip() + body = content[end+3:][:800] + data['_body_snippet'] = body.replace('\n', ' ').strip()[:300] + return data + +TASK_LIBRARY = { + 'code-skeptic': { + 'system': 'You are a strict code reviewer. Find security issues, logic errors, anti-patterns. Be adversarial but constructive.', + 'task': '''Review this function for security vulnerabilities and logic errors. Report: SQL injection, XSS, race conditions, code smells, and suggested fixes. + +```typescript +function processPayment(userId, amount, cardToken) { + const q = `UPDATE users SET balance = balance - ${amount} WHERE id = ${userId}`; + db.exec(q); + fetch('/api/charge', { body: JSON.stringify({ cardToken, amount }) }); + if (Math.random() > 0.9) { throw new Error('timeout'); } +} +```''', + 'expected': ['sql injection', 'parameterized', 'race', 'localStorage', 'xss'], + 'rubric': {'security': 35, 'logic': 25, 'actionability': 25, 'depth': 15} + }, + 'workflow-cross-checker': { + 'system': 'You are a workflow cross-checker. Before any work begins, ask uncomfortable but important questions that could block the task.', + 'task': 'A developer wants to add "admin can delete any user" directly from the UI. Run your cross-check protocol. Identify 5+ potential issues or blockers.', + 'expected': ['soft delete', 'audit log', 'cascading', 'permission', 'data retention', 'backup'], + 'rubric': {'thoroughness': 35, 'relevance': 30, 'actionability': 20, 'severity_ranking': 15} + }, + 'lead-developer': { + 'system': 'You are lead developer. Write production-ready implementation. Tests MUST pass. Follow SOLID. Max 100 lines per file.', + 'task': 'Implement a TaskQueue class with: transaction support, retry with exponential backoff, timeout handling, and Jest tests. TypeScript.', + 'expected': ['class TaskQueue', 'async', 'retry', 'timeout', 'test', 'jest'], + 'rubric': {'correctness': 30, 'test_coverage': 30, 'code_quality': 25, 'edge_cases': 15} + }, + 'sdet-engineer': { + 'system': 'You are SDET. Write tests BEFORE code. Cover edge cases, nulls, async errors, concurrent access.', + 'task': 'Write Jest tests for UserService: createUser, getUser, updateUser, deleteUser. Cover: valid inputs, nulls, duplicates, concurrent updates.', + 'expected': ['describe', 'it', 'expect', 'null', 'async', 'mock', 'beforeEach'], + 'rubric': {'coverage': 35, 'edge_cases': 30, 'readability': 20, 'mocking': 15} + }, + 'orchestrator': { + 'system': 'You are an Orchestrator. You delegate tasks to subagents. You decide routing, handle errors, and manage budgets.', + 'task': 'A user reports: "Build a REST API for ecommerce checkout". Design your delegation plan: which agents to call, in what order, what to do if one fails.', + 'expected': ['system-analyst', 'lead-developer', 'code-skeptic', 'sdet-engineer', 'budget', 'parallel'], + 'rubric': {'plan_quality': 30, 'agent_selection': 25, 'risk_handling': 25, 'budget_awareness': 20} + }, + 'system-analyst': { + 'system': 'You design technical specifications, data schemas, and API contracts before implementation.', + 'task': 'Design the API contract and DB schema for a multi-tenant SaaS billing system. Include rate limiting, audit trails, and idempotency.', + 'expected': ['openapi', 'schema', 'idempotency', 'rate limit', 'audit', 'tenant'], + 'rubric': {'completeness': 30, 'correctness': 30, 'clarity': 20, 'scalability': 20} + }, + 'devops-engineer': { + 'system': 'You handle Docker, CI/CD, infrastructure. Security first.', + 'task': 'Write a multi-stage Dockerfile for a Node.js Next.js app. Include: non-root user, health check, security scan, .dockerignore best practices.', + 'expected': ['FROM node', 'USER', 'HEALTHCHECK', 'multi-stage', '.dockerignore'], + 'rubric': {'security': 30, 'optimization': 25, 'correctness': 25, 'completeness': 20} + } +} + +def generate_task_for_agent(name, role): + n, r = name.lower(), role.lower() + for key, task in TASK_LIBRARY.items(): + if key in n: + return task + # Keyword fallback + for key in TASK_LIBRARY: + if key.replace('-', ' ') in r or any(kw in r for kw in key.split('-')): + return TASK_LIBRARY[key] + return { + 'system': f'You are {name}. {role}', + 'task': f'Demonstrate your expertise as {name} in a realistic complex scenario. Provide a complete working solution.', + 'expected': [name.replace('-', ' ')], + 'rubric': {'relevance': 40, 'completeness': 30, 'correctness': 30} + } + +def generate_prompts(): + conn = sqlite3.connect(DB_PATH) + conn.execute("DELETE FROM test_prompts") + count = 0 + for path in sorted(glob('.kilo/agents/*.md')): + fm = parse_frontmatter(path) + if not fm.get('model'): + continue + name = os.path.basename(path)[:-3] + task = generate_task_for_agent(name, fm.get('description', '')) + if task: + conn.execute(''' + INSERT INTO test_prompts (agent_name, task_type, system_prompt, user_prompt, expected_keywords, rubric) + VALUES (?, ?, ?, ?, ?, ?) + ''', (name, 'primary', task['system'], task['task'], + json.dumps(task['expected']), json.dumps(task['rubric']))) + count += 1 + conn.commit() + conn.close() + print(f"[prompts] Generated {count} test prompts") + +# ================================================================ +# OLLAMA CLIENT +# ================================================================ + +def call_ollama(model_short, system_prompt, user_prompt, expected_keywords=None): + """REAL Ollama API call via /api/chat. Returns (text, latency_ms, tokens_dict).""" + if USE_MOCK: + return ( + "[MOCK] This is a simulated response for testing the pipeline without API calls.", + 500, {"prompt": 100, "response": 200} + ) + + model_map = { + 'kimi-k2.6': 'kimi-k2.6', + 'deepseek-v4-pro-max': 'deepseek-v4-pro', + 'deepseek-v4-flash': 'deepseek-v4-flash', + 'glm-5.1': 'glm-5.1', + 'qwen3-coder:480b': 'qwen3-coder:480b', + 'qwen3.5-122b': 'kimi-k2.6', # fallback to known working model + } + model_ollama = model_map.get(model_short, model_short) + payload = json.dumps({ + "model": model_ollama, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ], + "stream": False, + "options": {"temperature": 0.3, "num_predict": 2048} + }).encode('utf-8') + + headers = {"Content-Type": "application/json"} + if OLLAMA_KEY: + headers["Authorization"] = f"Bearer {OLLAMA_KEY}" + + req = request.Request(f"{OLLAMA_HOST}/api/chat", + data=payload, headers=headers, + method='POST') + start = time.time() + try: + with request.urlopen(req, timeout=120) as resp: + elapsed = int((time.time() - start) * 1000) + data = json.loads(resp.read().decode('utf-8')) + text = data.get('message', {}).get('content', '') + return (text, elapsed, + {"prompt": data.get('prompt_eval_count', 0), + "response": data.get('eval_count', 0)}) + except urllib_error.HTTPError as e: + return (f"[HTTP {e.code}: {e.reason}]", int((time.time()-start)*1000), {"prompt":0,"response":0}) + except Exception as e: + return (f"[ERROR: {e}]", 0, {"prompt":0,"response":0}) + +# ================================================================ +# EVALUATOR +# ================================================================ + +def evaluate_response(response, expected_json, rubric_json): + """Rubric-based evaluation. Returns dict.""" + expected = json.loads(expected_json) if isinstance(expected_json, str) else expected_json + rubric = json.loads(rubric_json) if isinstance(rubric_json, str) else rubric_json + resp_lower = (response or '').lower() + lines = response.strip().split('\n') + + keyword_hits = sum(1 for kw in expected if kw.lower() in resp_lower) + keyword_score = min(100, (keyword_hits / len(expected) * 100) if expected else 50) + + has_code = '```' in response or 'function' in resp_lower or 'class ' in resp_lower + code_score = 80 if has_code else 30 + + structure_score = min(100, len(lines) * 2) # ~50 lines = 100 + + scores = {'keyword_coverage': round(keyword_score, 1), + 'code_presence': code_score, + 'structure': round(structure_score, 1)} + + total = 0 + if rubric: + for dim, weight in rubric.items(): + dim_score = scores.get(dim, keyword_score) + total += (dim_score / 100) * weight + else: + total = sum(scores.values()) / len(scores) + + explanation = (f"Keywords: {keyword_hits}/{len(expected)}. " + f"Lines: {len(lines)}. " + f"Code: {'YES' if has_code else 'NO'}. " + f"Total={round(total, 1)}") + + return {'scores': scores, 'total': round(total, 1), 'explanation': explanation} + +# ================================================================ +# PARALLEL BATCH EVALUATION +# ================================================================ + +def evaluate_one(args): + agent_name, model, pid, system, user, expected, rubric = args + resp, latency, tokens = call_ollama(model, system, user, expected) + ev = evaluate_response(resp, expected, rubric) + return { + 'agent': agent_name, 'model': model, 'prompt_id': pid, + 'response': resp, 'latency': latency, 'tokens': tokens, + 'total': ev['total'], 'scores': json.dumps(ev['scores']), + 'explanation': ev['explanation'] + } + +def evaluate_all(models_to_test, max_workers=4): + """Evaluate all agents × all models with parallel workers.""" + conn = sqlite3.connect(DB_PATH) + agents = conn.execute("SELECT DISTINCT name FROM agents").fetchall() + tasks = [] + + for (agent_name,) in agents: + prompts = conn.execute(''' + SELECT id, system_prompt, user_prompt, expected_keywords, rubric + FROM test_prompts WHERE agent_name = ?''', (agent_name,)).fetchall() + for pid, sys, usr, exp, rub in prompts: + for model in models_to_test: + tasks.append((agent_name, model, pid, sys, usr, exp, rub)) + + conn.close() + + print(f"[eval] Prepared {len(tasks)} evaluations (agents × models × prompts)") + + results = [] + with ThreadPoolExecutor(max_workers=max_workers) as ex: + futures = {ex.submit(evaluate_one, t): t for t in tasks} + for future in as_completed(futures): + res = future.result() + results.append(res) + conn = sqlite3.connect(DB_PATH) + conn.execute('''INSERT INTO evaluations + (agent_name, model, prompt_id, response, latency_ms, tokens_prompt, tokens_response, + scores, total_score, explanation, evaluated_at, evaluator) + VALUES (?,?,?,?,?,?,?,?,?,?,?,?)''', + (res['agent'], res['model'], res['prompt_id'], res['response'], res['latency'], + res['tokens']['prompt'], res['tokens']['response'], + res['scores'], res['total'], res['explanation'], + datetime.now(timezone.utc).isoformat(), 'rubric_v1')) + conn.commit() + conn.close() + print(f" [{res['agent']}] × [{res['model']}] score={res['total']:.1f}") + + print(f"[eval] Stored {len(results)} evaluations") + compute_aggregates() + +def compute_aggregates(): + """Compute per-agent model fit scores from evaluation averages.""" + conn = sqlite3.connect(DB_PATH) + rows = conn.execute(''' + SELECT agent_name, model, AVG(total_score) as avg_score + FROM evaluations GROUP BY agent_name, model + ''').fetchall() + + # For each agent pick best model + best = {} + for a, m, s in rows: + if a not in best or s > best[a][1]: + best[a] = (m, s) + + for a, (m, s) in best.items(): + # Get dimension breakdown + dims = conn.execute(''' + SELECT scores FROM evaluations WHERE agent_name = ? AND model = ? + ''', (a, m)).fetchall() + dim_avg = {} + for (score_json,) in dims: + for k, v in json.loads(score_json).items(): + dim_avg[k] = dim_avg.get(k, 0) + v + dim_avg = {k: round(v / len(dims), 1) for k, v in dim_avg.items()} + + explanation = f"Best model for {a} is {m} with avg score {round(s,1)}. " + explanation += f"Strongest dimension: {max(dim_avg, key=dim_avg.get)}." + + conn.execute('''INSERT OR REPLACE INTO fit_scores + (agent_name, model, fit_score, dimension_scores, explanation, evaluated_at) + VALUES (?, ?, ?, ?, ?, ?)''', + (a, m, round(s, 1), json.dumps(dim_avg), explanation, + datetime.now(timezone.utc).isoformat())) + + conn.commit() + conn.close() + print(f"[agg] Computed fit scores for {len(best)} agents") + +# ================================================================ +# RECALCULATION TRIGGER +# ================================================================ + +def trigger_recalculation(agent_name, old_model, new_model, reason="manual"): + """After model or prompt change, re-evaluate and log delta.""" + conn = sqlite3.connect(DB_PATH) + + old_row = conn.execute('''SELECT fit_score FROM fit_scores WHERE agent_name = ?''', (agent_name,)).fetchone() + old_fit = old_row[0] if old_row else 0 + + # Re-evaluate on new model + prompt = conn.execute('''SELECT system_prompt, user_prompt, expected_keywords, rubric + FROM test_prompts WHERE agent_name = ? LIMIT 1''', (agent_name,)).fetchone() + + if prompt: + sys, usr, exp, rub = prompt + resp, lat, tok = call_ollama(new_model, sys, usr) + ev = evaluate_response(resp, exp, rub) + new_fit = ev['total'] + else: + new_fit = 0 + + delta = new_fit - old_fit + conn.execute('''INSERT INTO recalculations + (trigger, agent_name, old_model, new_model, old_fit, new_fit, delta, reason, recalculated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)''', + (reason, agent_name, old_model, new_model, old_fit, new_fit, delta, reason, + datetime.now(timezone.utc).isoformat())) + conn.commit() + conn.close() + + print(f"[recalc] {agent_name}: {old_model}({old_fit:.1f}) → {new_model}({new_fit:.1f}) Δ={delta:+.1f}") + return delta + +# ================================================================ +# REPORT / DASHBOARD DATA +# ================================================================ + +def generate_report(): + conn = sqlite3.connect(DB_PATH) + + # All evaluations per agent per model + rows = conn.execute(''' + SELECT agent_name, model, AVG(total_score) as avg_score, COUNT(*) as cnt + FROM evaluations GROUP BY agent_name, model + ''').fetchall() + + agents = {} + for a, m, s, c in rows: + if a not in agents: + info = conn.execute('SELECT description, category, current_model FROM agents WHERE name = ?', (a,)).fetchone() + agents[a] = {'name': a, 'evaluations': {}, 'info': info or ()} + agents[a]['evaluations'][m] = round(s, 1) + + # Best per agent + for a in agents: + evs = agents[a]['evaluations'] + best_m = max(evs, key=evs.get) + agents[a]['best_model'] = best_m + agents[a]['best_score'] = evs[best_m] + + # Fit scores table + fit_rows = conn.execute('SELECT agent_name, model, fit_score, explanation FROM fit_scores').fetchall() + fit_scores = {} + for a, m, s, e in fit_rows: + fit_scores[a] = {'model': m, 'fit': s, 'explanation': e} + + report = { + 'generated': datetime.now(timezone.utc).isoformat(), + 'source': 'real-fit-engine', + 'total_evaluations': len(rows), + 'agents': agents, + 'fit_scores': fit_scores + } + + out = 'agent-evolution/data/real-fit-report.json' + with open(out, 'w') as f: + json.dump(report, f, ensure_ascii=False, indent=2) + + conn.close() + print(f"[report] Written {out}: {len(agents)} agents, {len(rows)} evaluations") + return report + +# ================================================================ +# IMPORT REAL DATA +# ================================================================ + +def import_from_evolution(): + with open('agent-evolution/data/evolution.json') as f: + evo = json.load(f) + conn = sqlite3.connect(DB_PATH) + for name, a in evo['agents'].items(): + c = a['current'] + conn.execute('''INSERT OR REPLACE INTO agents (name, description, category, current_model, color, updated) + VALUES (?, ?, ?, ?, ?, ?)''', + (name, c.get('description', ''), c.get('category', 'General'), + c.get('model', ''), c.get('color', ''), + datetime.now(timezone.utc).isoformat())) + for mid, m in evo.get('model_benchmarks', {}).items(): + conn.execute('''INSERT OR REPLACE INTO models (short_name, full_id, if_score, swe_bench, parameters, context_window, updated) + VALUES (?, ?, ?, ?, ?, ?, ?)''', + (mid, f'ollama-cloud/{mid}', m.get('if_score'), None, + m.get('parameters', ''), m.get('context_window', ''), + datetime.now(timezone.utc).isoformat())) + conn.commit() + conn.close() + print(f"[import] {len(evo['agents'])} agents, {len(evo.get('model_benchmarks',{}))} models") + +# ================================================================ +# CLI +# ================================================================ +if __name__ == '__main__': + import argparse + p = argparse.ArgumentParser(description='Real-Fit Multi-Agent Engine') + p.add_argument('--init-db', action='store_true') + p.add_argument('--import-evolution', action='store_true') + p.add_argument('--generate-prompts', action='store_true') + p.add_argument('--evaluate', metavar='AGENT') + p.add_argument('--models', default=','.join(DEFAULT_MODELS)) + p.add_argument('--evaluate-all', action='store_true') + p.add_argument('--report', action='store_true') + p.add_argument('--recalc', action='store_true') + p.add_argument('--agent', help='Agent for recalc') + p.add_argument('--old-model', help='Old model for recalc') + p.add_argument('--new-model', help='New model for recalc') + p.add_argument('--workers', type=int, default=4) + args = p.parse_args() + + if args.init_db: + init_db() + if args.import_evolution: + import_from_evolution() + if args.generate_prompts: + generate_prompts() + if args.evaluate: + models = args.models.split(',') + evaluate_all({args.evaluate: models}, args.workers) + if args.evaluate_all: + models = args.models.split(',') + evaluate_all(models, args.workers) + if args.report: + generate_report() + if args.recalc and args.agent and args.old_model and args.new_model: + trigger_recalculation(args.agent, args.old_model, args.new_model) + + if len(sys.argv) == 1: + p.print_help() + print("\n=== Workflow ===") + print(" python3 real-fit-engine.py --init-db --import-evolution --generate-prompts") + print(" python3 real-fit-engine.py --evaluate-all --models kimi-k2.6,deepseek-v4-pro-max") + print(" python3 real-fit-engine.py --report") + print(" python3 real-fit-engine.py --recalc --agent lead-developer --old-model qwen3-coder:480b --new-model kimi-k2.6") + print("\nSet OLLAMA_MOCK=0 for real Ollama API (port 11434)") diff --git a/scripts/real-fit-recalc.py b/scripts/real-fit-recalc.py new file mode 100644 index 0000000..8962fa6 --- /dev/null +++ b/scripts/real-fit-recalc.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +""" +Recalculate real-fit scores from stored responses in SQLite. +No API needed. Updates evaluations, fit_scores, and generates report. +Usage: python3 scripts/real-fit-recalc.py +""" +import sqlite3, json, os, sys +from datetime import datetime, timezone + +DB_PATH = "agent-evolution/data/real-fit.db" +REPORT_PATH = "agent-evolution/data/real-fit-report.json" + + +def evaluate_response(response, expected_json, rubric_json): + expected = json.loads(expected_json) if isinstance(expected_json, str) else expected_json + rubric = json.loads(rubric_json) if isinstance(rubric_json, str) else rubric_json + resp_lower = (response or '').lower() + lines = response.strip().split('\n') + + keyword_hits = sum(1 for kw in expected if kw.lower() in resp_lower) + keyword_score = min(100, (keyword_hits / len(expected) * 100) if expected else 50) + + has_code = '```' in response or 'function' in resp_lower or 'class ' in resp_lower + code_score = 80 if has_code else 30 + + structure_score = min(100, len(lines) * 2) + + scores = {'keyword_coverage': round(keyword_score, 1), + 'code_presence': code_score, + 'structure': round(structure_score, 1)} + + total = 0 + if rubric: + for dim, weight in rubric.items(): + dim_score = scores.get(dim, keyword_score) + total += (dim_score / 100) * weight + else: + total = sum(scores.values()) / len(scores) + + explanation = (f"Keywords: {keyword_hits}/{len(expected)}. " + f"Lines: {len(lines)}. " + f"Code: {'YES' if has_code else 'NO'}. " + f"Total={round(total, 1)}") + + return {'scores': scores, 'total': round(total, 1), 'explanation': explanation} + + +def recalc(): + if not os.path.exists(DB_PATH): + print(f"[error] Database not found: {DB_PATH}") + sys.exit(1) + + conn = sqlite3.connect(DB_PATH) + c = conn.cursor() + + # Fetch all evaluations with prompt data resolved by agent_name (prompt_id mismatch safe) + c.execute('''SELECT e.id, e.agent_name, e.response, e.total_score, e.scores, e.explanation, + t.expected_keywords, t.rubric + FROM evaluations e + LEFT JOIN test_prompts t ON e.agent_name = t.agent_name''') + rows = c.fetchall() + print(f"[recalc] Found {len(rows)} evaluations") + + updated = 0 + for eid, agent_name, response, old_total, old_scores, old_exp, expected, rubric in rows: + if expected is None or rubric is None: + print(f" [skip] No prompt match for eval {eid} (agent={agent_name})") + continue + + ev = evaluate_response(response, expected, rubric) + + new_scores = json.dumps(ev['scores']) + new_total = ev['total'] + new_exp = ev['explanation'] + + c.execute('''UPDATE evaluations + SET total_score = ?, scores = ?, explanation = ? + WHERE id = ?''', + (new_total, new_scores, new_exp, eid)) + updated += 1 + + conn.commit() + print(f"[recalc] Updated {updated} evaluations") + + # Compute aggregates + c.execute('''SELECT agent_name, model, AVG(total_score) as avg_score + FROM evaluations GROUP BY agent_name, model''') + rows = c.fetchall() + + best = {} + for a, m, s in rows: + if a not in best or s > best[a][1]: + best[a] = (m, s) + + for a, (m, s) in best.items(): + c.execute('SELECT scores FROM evaluations WHERE agent_name = ? AND model = ?', (a, m)) + dims = c.fetchall() + dim_avg = {} + for (score_json,) in dims: + for k, v in json.loads(score_json).items(): + dim_avg[k] = dim_avg.get(k, 0) + v + dim_avg = {k: round(v / len(dims), 1) for k, v in dim_avg.items()} + + explanation = f"Best model for {a} is {m} with avg score {round(s,1)}. " + explanation += f"Strongest dimension: {max(dim_avg, key=dim_avg.get)}." + + c.execute('''INSERT OR REPLACE INTO fit_scores + (agent_name, model, fit_score, dimension_scores, explanation, evaluated_at) + VALUES (?, ?, ?, ?, ?, ?)''', + (a, m, round(s, 1), json.dumps(dim_avg), explanation, + datetime.now(timezone.utc).isoformat())) + + conn.commit() + print(f"[recalc] Computed fit scores for {len(best)} agents") + + # Generate report + c.execute('''SELECT agent_name, model, AVG(total_score) as avg_score, COUNT(*) as cnt + FROM evaluations GROUP BY agent_name, model''') + rows = c.fetchall() + + agents = {} + for a, m, s, cnt in rows: + if a not in agents: + c.execute('SELECT description, category, current_model FROM agents WHERE name = ?', (a,)) + info = c.fetchone() + agents[a] = {'name': a, 'evaluations': {}, 'info': info or ()} + agents[a]['evaluations'][m] = round(s, 1) + + for a in agents: + evs = agents[a]['evaluations'] + best_m = max(evs, key=evs.get) + agents[a]['best_model'] = best_m + agents[a]['best_score'] = evs[best_m] + + c.execute('SELECT agent_name, model, fit_score, explanation FROM fit_scores') + fit_scores = {} + for a, m, s, e in c.fetchall(): + fit_scores[a] = {'model': m, 'fit': s, 'explanation': e} + + report = { + 'generated': datetime.now(timezone.utc).isoformat(), + 'source': 'real-fit-engine', + 'total_evaluations': len(rows), + 'agents': agents, + 'fit_scores': fit_scores + } + + os.makedirs(os.path.dirname(REPORT_PATH), exist_ok=True) + with open(REPORT_PATH, 'w') as f: + json.dump(report, f, ensure_ascii=False, indent=2) + + print(f"[recalc] Written {REPORT_PATH}: {len(agents)} agents, {len(rows)} evaluations") + conn.close() + + +if __name__ == '__main__': + recalc() diff --git a/scripts/sync-dashboard-data.py b/scripts/sync-dashboard-data.py new file mode 100644 index 0000000..77f218b --- /dev/null +++ b/scripts/sync-dashboard-data.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +"""Sync dashboard data — generates dashboard-data.json from evolution.json (real data).""" +import json, os +from datetime import datetime, timezone + +def build_dashboard_data(): + # Load real evolution data + with open('agent-evolution/data/evolution.json') as f: + evo = json.load(f) + + # Load model benchmarks for supplemental info + try: + with open('agent-evolution/data/model-benchmarks-verified.json') as f: + bm = json.load(f) + except: + bm = {'models': []} + + # Build model lookup + bench_map = {} + for m in bm.get('models', []): + mid = m.get('id', '') + if mid: + bench_map[f'ollama-cloud/{mid}'] = { + 'swe': m.get('swe_bench'), + 'name': m.get('name', mid), + 'params': m.get('parameters', ''), + 'ctx': m.get('context_window', '') + } + + agents = [] + for name, a in evo.get('agents', {}).items(): + c = a.get('current', {}) + if not c.get('model'): + continue + model = c['model'] + b = c.get('benchmark', {}) + fit = b.get('fit_score', 0) + if_ = b.get('instruction_following', 0) + model_short = model.replace('ollama-cloud/', '') + b_info = bench_map.get(model, {}) + + # Get latest model change from history + history = a.get('history', []) + latest_change = None + for h in reversed(history): + if h.get('type') == 'model_change': + latest_change = h + break + + agents.append({ + 'name': name, + 'model': model, + 'mode': c.get('mode', 'subagent'), + 'description': c.get('description', ''), + 'category': c.get('category', 'General'), + 'color': c.get('color', '#8B5CF6'), + 'provider': c.get('provider', 'Ollama'), + 'variant': c.get('variant', ''), + 'fit_score': fit, + 'instruction_following': if_, + 'swe_bench': b_info.get('swe'), + 'model_short': model_short, + 'model_name': b_info.get('name', model_short), + 'model_params': b_info.get('params', ''), + 'model_ctx': b_info.get('ctx', ''), + 'recommendations': len(c.get('recommendations', [])), + 'history_count': len(history), + 'latest_change': latest_change + }) + + # Build model summary from REAL agent assignments + models = {} + for a in agents: + ms = a['model_short'] + if ms not in models: + models[ms] = { + 'fit_agents': [], # fit scores of agents on this model + 'name': a['model_name'], + 'params': a['model_params'], + 'ctx': a['model_ctx'], + 'agents': 0, + 'commands': 0, + 'avg_fit': 0 + } + models[ms]['agents'] += 1 + models[ms]['fit_agents'].append(a['fit_score']) + + # Calculate averages + for ms, m in models.items(): + fits = m['fit_agents'] + m['avg_fit'] = round(sum(fits) / len(fits), 1) if fits else 0 + m['min_fit'] = min(fits) if fits else 0 + m['max_fit'] = max(fits) if fits else 0 + del m['fit_agents'] + + # Model distribution + from collections import Counter + model_dist = Counter(a['model'] for a in agents) + + return { + 'generated': datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + 'source': 'evolution.json', + 'total_agents': len(agents), + 'total_models': len(models), + 'agents': agents, + 'models': models, + 'model_distribution': dict(model_dist) + } + +if __name__ == '__main__': + data = build_dashboard_data() + out = 'agent-evolution/data/dashboard-data.json' + os.makedirs(os.path.dirname(out), exist_ok=True) + with open(out, 'w') as f: + json.dump(data, f, ensure_ascii=False, indent=2) + print(f'Written {out}: {len(data["agents"])} agents, {len(data["models"])} models') + print('Sample agents:') + for a in data['agents'][:5]: + print(f' {a["name"]:25} fit={a["fit_score"]:3d} if={a["instruction_following"]:3d} {a["model_short"]:25}') + print('Models:') + for ms, m in sorted(data['models'].items(), key=lambda x: -x[1]['agents']): + print(f' {ms:20} avg_fit={m["avg_fit"]:5.1f} agents={m["agents"]}')