- Integrate apaw_agent_model_research_v3.html as standalone dashboard - Add model-benchmarks.json with 32 agents, 11 scored models, 11 recommendations - Add build-research-dashboard.ts: inject live data into template → standalone HTML - Add rebuild-template.cjs: regenerate template from v3.html source - Add sync-benchmarks-from-yaml.cjs: sync YAML → JSON round-trip - Add sync-model-research.ts: apply recommendation matrix to config files - Add model-benchmarks.schema.json and model-research.schema.json for validation - Add bidirectional-data-flow.md architecture documentation - Add log-execution.cjs pipeline hook - Update capability-index.yaml: add fallback_models, failover_strategy - Update kilo-meta.json, kilo.jsonc, KILO_SPEC.md with synced models - Update evolution.md / research.md / self-evolution.md / evolutionary-sync.md docs - Fix security-auditor.md: quote YAML color (#DC2626) - Fix orchestrator.md: remove duplicate devops-engineer key - Build research-dashboard.html (106KB standalone) + dated archive
331 lines
8.3 KiB
JSON
331 lines
8.3 KiB
JSON
{
|
||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||
"$id": "https://app.kilo.ai/model-research.schema.json",
|
||
"title": "APAW Model Research Output",
|
||
"description": "Schema for automated model research and recommendation output",
|
||
"type": "object",
|
||
"required": ["version", "generated", "source", "models", "recommendations", "heatmap"],
|
||
"properties": {
|
||
"version": {
|
||
"type": "string",
|
||
"const": "1.0.0"
|
||
},
|
||
"generated": {
|
||
"type": "string",
|
||
"format": "date-time"
|
||
},
|
||
"source": {
|
||
"type": "string",
|
||
"description": "What triggered this research (e.g. /evolution, /research, manual)"
|
||
},
|
||
"trigger": {
|
||
"type": "object",
|
||
"properties": {
|
||
"type": {
|
||
"type": "string",
|
||
"enum": ["evolution_cycle", "manual_research", "fitness_below_threshold", "scheduled"]
|
||
},
|
||
"issue": {
|
||
"type": "integer"
|
||
},
|
||
"fitness_score": {
|
||
"type": "number"
|
||
},
|
||
"reason": {
|
||
"type": "string"
|
||
}
|
||
}
|
||
},
|
||
"models": {
|
||
"type": "array",
|
||
"items": {
|
||
"type": "object",
|
||
"required": ["id", "name", "organization", "if_score", "provider"],
|
||
"properties": {
|
||
"id": {
|
||
"type": "string",
|
||
"description": "Full model ID like ollama-cloud/qwen3-coder:480b"
|
||
},
|
||
"name": {
|
||
"type": "string"
|
||
},
|
||
"organization": {
|
||
"type": "string"
|
||
},
|
||
"parameters": {
|
||
"type": "string"
|
||
},
|
||
"context_window": {
|
||
"type": "string"
|
||
},
|
||
"swe_bench": {
|
||
"type": ["number", "null"]
|
||
},
|
||
"swe_bench_pro": {
|
||
"type": ["number", "null"]
|
||
},
|
||
"terminal_bench": {
|
||
"type": ["number", "null"]
|
||
},
|
||
"live_codebench": {
|
||
"type": ["number", "null"]
|
||
},
|
||
"gpqa": {
|
||
"type": ["number", "null"]
|
||
},
|
||
"hle": {
|
||
"type": ["number", "null"]
|
||
},
|
||
"browse_comp": {
|
||
"type": ["number", "null"]
|
||
},
|
||
"if_score": {
|
||
"type": "number",
|
||
"minimum": 0,
|
||
"maximum": 100,
|
||
"description": "Instruction Following composite score (IFEval + IFBench)"
|
||
},
|
||
"categories": {
|
||
"type": "array",
|
||
"items": {
|
||
"type": "string"
|
||
}
|
||
},
|
||
"tags": {
|
||
"type": "array",
|
||
"items": {
|
||
"type": "string"
|
||
}
|
||
},
|
||
"provider": {
|
||
"type": "string",
|
||
"enum": ["ollama", "ollama-cloud", "openrouter", "groq", "hybrid"]
|
||
},
|
||
"free": {
|
||
"type": "boolean"
|
||
},
|
||
"cost_per_1m_input": {
|
||
"type": ["number", "string", "null"]
|
||
},
|
||
"description": {
|
||
"type": "string"
|
||
},
|
||
"availability": {
|
||
"type": "object",
|
||
"properties": {
|
||
"rpm": {
|
||
"type": ["integer", "null"]
|
||
},
|
||
"rpd": {
|
||
"type": ["integer", "string", "null"]
|
||
},
|
||
"tpm": {
|
||
"type": ["integer", "string", "null"]
|
||
},
|
||
"tpd": {
|
||
"type": ["integer", "string", "null"]
|
||
}
|
||
}
|
||
},
|
||
"speed_tps": {
|
||
"type": ["number", "string", "null"]
|
||
}
|
||
}
|
||
}
|
||
},
|
||
"recommendations": {
|
||
"type": "array",
|
||
"items": {
|
||
"type": "object",
|
||
"required": ["agent", "action", "current_model", "recommended_model", "impact", "rationale"],
|
||
"properties": {
|
||
"agent": {
|
||
"type": "string"
|
||
},
|
||
"action": {
|
||
"type": "string",
|
||
"enum": ["update_model", "confirm_model", "add_fallback", "redesign_agent"]
|
||
},
|
||
"current_model": {
|
||
"type": "string"
|
||
},
|
||
"current_provider": {
|
||
"type": "string"
|
||
},
|
||
"recommended_model": {
|
||
"type": "string"
|
||
},
|
||
"recommended_provider": {
|
||
"type": "string"
|
||
},
|
||
"fallback_model": {
|
||
"type": "string"
|
||
},
|
||
"fallback_strategy": {
|
||
"type": "string"
|
||
},
|
||
"impact": {
|
||
"type": "string",
|
||
"enum": ["critical", "high", "medium", "low"]
|
||
},
|
||
"expected_improvement": {
|
||
"type": "object",
|
||
"properties": {
|
||
"quality": {
|
||
"type": "string"
|
||
},
|
||
"speed": {
|
||
"type": "string"
|
||
},
|
||
"context_window": {
|
||
"type": "string"
|
||
}
|
||
}
|
||
},
|
||
"score_before": {
|
||
"type": "number"
|
||
},
|
||
"score_after": {
|
||
"type": "number"
|
||
},
|
||
"score_delta": {
|
||
"type": "number"
|
||
},
|
||
"rationale": {
|
||
"type": "string"
|
||
},
|
||
"applied": {
|
||
"type": "boolean",
|
||
"default": false
|
||
},
|
||
"applied_date": {
|
||
"type": ["string", "null"],
|
||
"format": "date-time"
|
||
}
|
||
}
|
||
}
|
||
},
|
||
"heatmap": {
|
||
"type": "object",
|
||
"description": "Agent × Model compatibility matrix with IF adjustment",
|
||
"required": ["models", "agents"],
|
||
"properties": {
|
||
"models": {
|
||
"type": "array",
|
||
"items": {
|
||
"type": "object",
|
||
"required": ["id", "if_score"],
|
||
"properties": {
|
||
"id": {
|
||
"type": "string"
|
||
},
|
||
"display_name": {
|
||
"type": "string"
|
||
},
|
||
"provider": {
|
||
"type": "string"
|
||
},
|
||
"if_score": {
|
||
"type": "number"
|
||
}
|
||
}
|
||
}
|
||
},
|
||
"agents": {
|
||
"type": "array",
|
||
"items": {
|
||
"type": "object",
|
||
"required": ["agent", "reasoning_effort", "scores"],
|
||
"properties": {
|
||
"agent": {
|
||
"type": "string"
|
||
},
|
||
"current_model": {
|
||
"type": "string"
|
||
},
|
||
"reasoning_effort": {
|
||
"type": "string",
|
||
"enum": ["L", "M", "H"]
|
||
},
|
||
"scores": {
|
||
"type": "object",
|
||
"additionalProperties": {
|
||
"type": "number"
|
||
},
|
||
"description": "Model ID → compatibility score (0-100, IF-adjusted)"
|
||
}
|
||
}
|
||
}
|
||
},
|
||
"if_adjustment_formula": {
|
||
"type": "string",
|
||
"default": "score * (0.7 + 0.3 * IF/100)"
|
||
}
|
||
}
|
||
},
|
||
"closed_source_comparison": {
|
||
"type": "object",
|
||
"description": "APAW pipeline models vs top closed-source models",
|
||
"properties": {
|
||
"benchmarks": {
|
||
"type": "array"
|
||
},
|
||
"models": {
|
||
"type": "array"
|
||
},
|
||
"apaw_best_per_benchmark": {
|
||
"type": "object"
|
||
},
|
||
"closed_best_per_benchmark": {
|
||
"type": "object"
|
||
}
|
||
}
|
||
},
|
||
"capability_index_patch": {
|
||
"type": "array",
|
||
"description": "Ready-to-apply patches to capability-index.yaml",
|
||
"items": {
|
||
"type": "object",
|
||
"required": ["agent", "set"],
|
||
"properties": {
|
||
"agent": {
|
||
"type": "string"
|
||
},
|
||
"set": {
|
||
"type": "object",
|
||
"additionalProperties": true
|
||
}
|
||
}
|
||
}
|
||
},
|
||
"summary": {
|
||
"type": "object",
|
||
"properties": {
|
||
"avg_quality_improvement": {
|
||
"type": "string"
|
||
},
|
||
"providers_used": {
|
||
"type": "array",
|
||
"items": {
|
||
"type": "string"
|
||
}
|
||
},
|
||
"key_models": {
|
||
"type": "array",
|
||
"items": {
|
||
"type": "string"
|
||
}
|
||
},
|
||
"total_recommendations": {
|
||
"type": "integer"
|
||
},
|
||
"applied_count": {
|
||
"type": "integer"
|
||
},
|
||
"pending_count": {
|
||
"type": "integer"
|
||
}
|
||
}
|
||
}
|
||
}
|
||
} |