Files
APAW/agent-evolution/data/model-research-latest.json
¨NW¨ 3badb259cc feat: bidirectional research dashboard + agent config fixes
- Integrate apaw_agent_model_research_v3.html as standalone dashboard
- Add model-benchmarks.json with 32 agents, 11 scored models, 11 recommendations
- Add build-research-dashboard.ts: inject live data into template → standalone HTML
- Add rebuild-template.cjs: regenerate template from v3.html source
- Add sync-benchmarks-from-yaml.cjs: sync YAML → JSON round-trip
- Add sync-model-research.ts: apply recommendation matrix to config files
- Add model-benchmarks.schema.json and model-research.schema.json for validation
- Add bidirectional-data-flow.md architecture documentation
- Add log-execution.cjs pipeline hook
- Update capability-index.yaml: add fallback_models, failover_strategy
- Update kilo-meta.json, kilo.jsonc, KILO_SPEC.md with synced models
- Update evolution.md / research.md / self-evolution.md / evolutionary-sync.md docs
- Fix security-auditor.md: quote YAML color (#DC2626)
- Fix orchestrator.md: remove duplicate devops-engineer key
- Build research-dashboard.html (106KB standalone) + dated archive
2026-04-29 21:04:22 +01:00

59 lines
1.8 KiB
JSON

{
"version": "1.0.0",
"generated": "2026-04-27T17:51:36.000Z",
"source": "/research model-optimization",
"models": [],
"recommendations": [
{
"agent": "lead-developer",
"action": "update_model",
"current_model": "ollama-cloud/qwen3-coder:480b",
"current_provider": "ollama-cloud",
"recommended_model": "ollama-cloud/nemotron-3-super",
"recommended_provider": "ollama-cloud",
"impact": "high",
"expected_improvement": {
"quality": "+15%",
"speed": "+20%",
"context_window": "1M→1M"
},
"score_before": 85,
"score_after": 92,
"score_delta": 7,
"rationale": "Nemotron 3 Super has better reasoning for core development tasks and RULER@1M context window. SWE-bench 68% vs Qwen's 66.5%.",
"applied": false,
"applied_date": null
},
{
"agent": "devops-engineer",
"action": "confirm_model",
"current_model": "ollama-cloud/nemotron-3-super",
"current_provider": "ollama-cloud",
"recommended_model": "ollama-cloud/nemotron-3-super",
"recommended_provider": "ollama-cloud",
"impact": "low",
"expected_improvement": {
"quality": "0%",
"speed": "0%",
"context_window": "1M→1M"
},
"score_before": 88,
"score_after": 88,
"score_delta": 0,
"rationale": "Current model already optimal for DevOps tasks. Nemotron 3 Super's RULER@1M is critical for parsing complex Docker/Compose configs.",
"applied": false,
"applied_date": null
}
],
"heatmap": {},
"closed_source_comparison": {},
"capability_index_patch": [],
"summary": {
"avg_quality_improvement": "+7.5%",
"providers_used": ["ollama-cloud"],
"key_models": ["nemotron-3-super"],
"total_recommendations": 2,
"applied_count": 0,
"pending_count": 2
}
}