- Integrate apaw_agent_model_research_v3.html as standalone dashboard - Add model-benchmarks.json with 32 agents, 11 scored models, 11 recommendations - Add build-research-dashboard.ts: inject live data into template → standalone HTML - Add rebuild-template.cjs: regenerate template from v3.html source - Add sync-benchmarks-from-yaml.cjs: sync YAML → JSON round-trip - Add sync-model-research.ts: apply recommendation matrix to config files - Add model-benchmarks.schema.json and model-research.schema.json for validation - Add bidirectional-data-flow.md architecture documentation - Add log-execution.cjs pipeline hook - Update capability-index.yaml: add fallback_models, failover_strategy - Update kilo-meta.json, kilo.jsonc, KILO_SPEC.md with synced models - Update evolution.md / research.md / self-evolution.md / evolutionary-sync.md docs - Fix security-auditor.md: quote YAML color (#DC2626) - Fix orchestrator.md: remove duplicate devops-engineer key - Build research-dashboard.html (106KB standalone) + dated archive
59 lines
1.8 KiB
JSON
59 lines
1.8 KiB
JSON
{
|
|
"version": "1.0.0",
|
|
"generated": "2026-04-27T17:51:36.000Z",
|
|
"source": "/research model-optimization",
|
|
"models": [],
|
|
"recommendations": [
|
|
{
|
|
"agent": "lead-developer",
|
|
"action": "update_model",
|
|
"current_model": "ollama-cloud/qwen3-coder:480b",
|
|
"current_provider": "ollama-cloud",
|
|
"recommended_model": "ollama-cloud/nemotron-3-super",
|
|
"recommended_provider": "ollama-cloud",
|
|
"impact": "high",
|
|
"expected_improvement": {
|
|
"quality": "+15%",
|
|
"speed": "+20%",
|
|
"context_window": "1M→1M"
|
|
},
|
|
"score_before": 85,
|
|
"score_after": 92,
|
|
"score_delta": 7,
|
|
"rationale": "Nemotron 3 Super has better reasoning for core development tasks and RULER@1M context window. SWE-bench 68% vs Qwen's 66.5%.",
|
|
"applied": false,
|
|
"applied_date": null
|
|
},
|
|
{
|
|
"agent": "devops-engineer",
|
|
"action": "confirm_model",
|
|
"current_model": "ollama-cloud/nemotron-3-super",
|
|
"current_provider": "ollama-cloud",
|
|
"recommended_model": "ollama-cloud/nemotron-3-super",
|
|
"recommended_provider": "ollama-cloud",
|
|
"impact": "low",
|
|
"expected_improvement": {
|
|
"quality": "0%",
|
|
"speed": "0%",
|
|
"context_window": "1M→1M"
|
|
},
|
|
"score_before": 88,
|
|
"score_after": 88,
|
|
"score_delta": 0,
|
|
"rationale": "Current model already optimal for DevOps tasks. Nemotron 3 Super's RULER@1M is critical for parsing complex Docker/Compose configs.",
|
|
"applied": false,
|
|
"applied_date": null
|
|
}
|
|
],
|
|
"heatmap": {},
|
|
"closed_source_comparison": {},
|
|
"capability_index_patch": [],
|
|
"summary": {
|
|
"avg_quality_improvement": "+7.5%",
|
|
"providers_used": ["ollama-cloud"],
|
|
"key_models": ["nemotron-3-super"],
|
|
"total_recommendations": 2,
|
|
"applied_count": 0,
|
|
"pending_count": 2
|
|
}
|
|
} |