- Integrate apaw_agent_model_research_v3.html as standalone dashboard - Add model-benchmarks.json with 32 agents, 11 scored models, 11 recommendations - Add build-research-dashboard.ts: inject live data into template → standalone HTML - Add rebuild-template.cjs: regenerate template from v3.html source - Add sync-benchmarks-from-yaml.cjs: sync YAML → JSON round-trip - Add sync-model-research.ts: apply recommendation matrix to config files - Add model-benchmarks.schema.json and model-research.schema.json for validation - Add bidirectional-data-flow.md architecture documentation - Add log-execution.cjs pipeline hook - Update capability-index.yaml: add fallback_models, failover_strategy - Update kilo-meta.json, kilo.jsonc, KILO_SPEC.md with synced models - Update evolution.md / research.md / self-evolution.md / evolutionary-sync.md docs - Fix security-auditor.md: quote YAML color (#DC2626) - Fix orchestrator.md: remove duplicate devops-engineer key - Build research-dashboard.html (106KB standalone) + dated archive
137 lines
4.5 KiB
JavaScript
137 lines
4.5 KiB
JavaScript
const fs = require('fs');
|
|
|
|
// Parse simple YAML structure with 2-space indentation
|
|
function parseCapabilityIndex(text) {
|
|
const lines = text.split(/\r?\n/);
|
|
const agents = {};
|
|
let currentAgent = '';
|
|
let currentList = '';
|
|
|
|
for (const line of lines) {
|
|
const indent = line.length - line.trimStart().length;
|
|
const trimmed = line.trim();
|
|
|
|
if (indent === 2 && trimmed.endsWith(':') && !trimmed.startsWith('-')) {
|
|
// Agent name
|
|
currentAgent = trimmed.slice(0, -1);
|
|
agents[currentAgent] = {};
|
|
currentList = '';
|
|
continue;
|
|
}
|
|
|
|
if (indent === 4 && trimmed.endsWith(':') && !trimmed.startsWith('-')) {
|
|
// Scalar property or list start
|
|
const key = trimmed.slice(0, -1);
|
|
currentList = key;
|
|
if (!Array.isArray(agents[currentAgent][key])) {
|
|
agents[currentAgent][key] = [];
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (indent === 4 && trimmed.includes(':') && !trimmed.startsWith('-')) {
|
|
// key: value
|
|
const [key, ...rest] = trimmed.split(':');
|
|
const value = rest.join(':').trim();
|
|
agents[currentAgent][key.trim()] = value;
|
|
currentList = '';
|
|
continue;
|
|
}
|
|
|
|
if (indent >= 6 && trimmed.startsWith('- ')) {
|
|
// List item
|
|
const value = trimmed.slice(2).trim();
|
|
if (currentList) {
|
|
if (!agents[currentAgent][currentList]) agents[currentAgent][currentList] = [];
|
|
agents[currentAgent][currentList].push(value);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Reset list context on unknown indentation
|
|
if (indent < 4) {
|
|
currentList = '';
|
|
}
|
|
}
|
|
|
|
// Filter out non-agent entries (flat sections like capability_routing, etc.)
|
|
const result = {};
|
|
const scalarKeys = ['capabilities','receives','produces','forbidden','delegates_to','fallback_models'];
|
|
for (const [name, data] of Object.entries(agents)) {
|
|
const hasAgentProps = scalarKeys.some(k => k in data) || 'model' in data;
|
|
if (hasAgentProps) result[name] = data;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
const yaml = fs.readFileSync('.kilo/capability-index.yaml', 'utf8');
|
|
const parsed = parseCapabilityIndex(yaml);
|
|
console.log('Parsed agents:', Object.keys(parsed).length);
|
|
|
|
// Read existing benchmarks
|
|
const bench = JSON.parse(fs.readFileSync('agent-evolution/data/model-benchmarks.json', 'utf8'));
|
|
|
|
// Update agent_current_config
|
|
bench.agent_current_config = Object.entries(parsed).map(([agent, data]) => {
|
|
const rawModel = data.model || '';
|
|
const modelId = rawModel.replace('ollama-cloud/', '');
|
|
const badge = modelId.includes('qwen3') ? 'qwen' :
|
|
modelId.includes('minimax') ? 'minimax' :
|
|
modelId.includes('nemotron') ? 'nemotron' :
|
|
modelId.includes('glm') ? 'glm' :
|
|
modelId.includes('kimi') ? 'kimi' :
|
|
modelId.includes('deepseek') ? 'deepseek' : 'groq';
|
|
return {
|
|
agent,
|
|
model: rawModel,
|
|
provider: data.mode === 'all' ? 'Ollama Cloud' : (rawModel.startsWith('ollama-cloud/') ? 'Ollama Cloud' : 'Ollama'),
|
|
category: 'Process',
|
|
badge_type: badge,
|
|
fit_score: 0,
|
|
status: 'good',
|
|
previous_model: null
|
|
};
|
|
});
|
|
|
|
// Update agent_model_scores — preserve existing scores, fix current_model_id
|
|
const existingScores = {};
|
|
(bench.agent_model_scores || []).forEach(s => {
|
|
existingScores[s.agent] = s.scores || {};
|
|
});
|
|
|
|
bench.agent_model_scores = Object.entries(parsed).map(([agent, data]) => {
|
|
const rawModel = data.model || '';
|
|
const modelId = rawModel.replace('ollama-cloud/', '');
|
|
const currentIndex = bench.models.findIndex(m => m.id === modelId);
|
|
// Preserve existing scores or empty
|
|
const scores = existingScores[agent] || {};
|
|
return {
|
|
agent,
|
|
current_model_index: currentIndex >= 0 ? currentIndex : -1,
|
|
current_model_id: modelId,
|
|
reasoning_effort: data.variant === 'thinking' ? 'H' : 'M',
|
|
scores
|
|
};
|
|
});
|
|
|
|
// Update metadata
|
|
bench.generated = new Date().toISOString();
|
|
bench.source = '.kilo/capability-index.yaml (synced v2)';
|
|
bench.total_agents = bench.agent_current_config.length;
|
|
|
|
fs.writeFileSync('agent-evolution/data/model-benchmarks.json', JSON.stringify(bench, null, 2));
|
|
console.log('Synced', bench.agent_current_config.length, 'agents');
|
|
console.log('Generated:', bench.generated);
|
|
|
|
// Verify
|
|
let mismatches = 0;
|
|
bench.agent_current_config.forEach(c => {
|
|
const scores = bench.agent_model_scores.find(s => s.agent === c.agent);
|
|
if (scores && scores.current_model_id !== c.model.replace('ollama-cloud/', '')) {
|
|
console.log(' MISMATCH:', c.agent, scores.current_model_id, '->', c.model);
|
|
mismatches++;
|
|
}
|
|
});
|
|
console.log('Mismatches:', mismatches);
|