feat: bidirectional research dashboard + agent config fixes

- Integrate apaw_agent_model_research_v3.html as standalone dashboard - Add model-benchmarks.json with 32 agents, 11 scored models, 11 recommendations - Add build-research-dashboard.ts: inject live data into template → standalone HTML - Add rebuild-template.cjs: regenerate template from v3.html source - Add sync-benchmarks-from-yaml.cjs: sync YAML → JSON round-trip - Add sync-model-research.ts: apply recommendation matrix to config files - Add model-benchmarks.schema.json and model-research.schema.json for validation - Add bidirectional-data-flow.md architecture documentation - Add log-execution.cjs pipeline hook - Update capability-index.yaml: add fallback_models, failover_strategy - Update kilo-meta.json, kilo.jsonc, KILO_SPEC.md with synced models - Update evolution.md / research.md / self-evolution.md / evolutionary-sync.md docs - Fix security-auditor.md: quote YAML color (#DC2626) - Fix orchestrator.md: remove duplicate devops-engineer key - Build research-dashboard.html (106KB standalone) + dated archive
2026-04-29 21:04:22 +01:00
parent 2ae7789802
commit 3badb259cc
29 changed files with 13779 additions and 992 deletions
--- a/agent-evolution/data/agent-versions.json
+++ b/agent-evolution/data/agent-versions.json
@@ -1,12 +1,12 @@
 {
  "version": "1.0.0",
-  "lastUpdated": "2026-04-23T06:24:32.543Z",
+  "lastUpdated": "2026-04-27T20:28:58.592Z",
  "agents": {
    "lead-developer": {
      "current": {
        "description": "Primary code writer for backend and core logic. Writes implementation to pass tests",
        "mode": "subagent",
-        "model": "ollama-cloud/qwen3-coder:480b",
+        "model": "ollama-cloud/nemotron-3-super",
        "provider": "Ollama",
        "variant": "thinking",
        "color": "\"#DC2626\"",
@@ -27,6 +27,24 @@
          "to": "ollama-cloud/qwen3-coder:480b",
          "reason": "Initial configuration from capability-index.yaml",
          "source": "git"
+        },
+        {
+          "date": "2026-04-27T16:56:09.013Z",
+          "commit": "model-research-sync",
+          "type": "model_change",
+          "from": "ollama-cloud/qwen3-coder:480b",
+          "to": "ollama-cloud/nemotron-3-super",
+          "reason": "Nemotron 3 Super has better reasoning for core development tasks and RULER@1M context window. SWE-bench 68% vs Qwen's 66.5%.",
+          "source": "research"
+        },
+        {
+          "date": "2026-04-27T20:28:58.592Z",
+          "commit": "model-research-sync",
+          "type": "model_change",
+          "from": "ollama-cloud/qwen3-coder:480b",
+          "to": "ollama-cloud/nemotron-3-super",
+          "reason": "Nemotron 3 Super has better reasoning for core development tasks and RULER@1M context window. SWE-bench 68% vs Qwen's 66.5%.",
+          "source": "research"
        }
      ],
      "performance_log": []
@@ -255,7 +273,7 @@
      "current": {
        "description": "Designs technical specifications, data schemas, and API contracts before implementation",
        "mode": "subagent",
-        "model": "ollama-cloud/glm-5.1",
+        "model": "ollama-cloud/nemotron-3-super",
        "provider": "Ollama",
        "variant": "thinking",
        "color": "\"#0891B2\"",
@@ -285,6 +303,15 @@
          "to": "ollama-cloud/glm-5.1",
          "reason": "Model update from sync",
          "source": "git"
+        },
+        {
+          "date": "2026-04-27T16:59:52.825Z",
+          "commit": "model-research-sync",
+          "type": "model_change",
+          "from": "ollama-cloud/glm-5.1",
+          "to": "ollama-cloud/nemotron-3-super",
+          "reason": "Test recommendation for model research sync script",
+          "source": "research"
        }
      ],
      "performance_log": []