Files
APAW/agent-evolution/scripts/sync-benchmarks-from-yaml.cjs
¨NW¨ 3badb259cc feat: bidirectional research dashboard + agent config fixes
- Integrate apaw_agent_model_research_v3.html as standalone dashboard
- Add model-benchmarks.json with 32 agents, 11 scored models, 11 recommendations
- Add build-research-dashboard.ts: inject live data into template → standalone HTML
- Add rebuild-template.cjs: regenerate template from v3.html source
- Add sync-benchmarks-from-yaml.cjs: sync YAML → JSON round-trip
- Add sync-model-research.ts: apply recommendation matrix to config files
- Add model-benchmarks.schema.json and model-research.schema.json for validation
- Add bidirectional-data-flow.md architecture documentation
- Add log-execution.cjs pipeline hook
- Update capability-index.yaml: add fallback_models, failover_strategy
- Update kilo-meta.json, kilo.jsonc, KILO_SPEC.md with synced models
- Update evolution.md / research.md / self-evolution.md / evolutionary-sync.md docs
- Fix security-auditor.md: quote YAML color (#DC2626)
- Fix orchestrator.md: remove duplicate devops-engineer key
- Build research-dashboard.html (106KB standalone) + dated archive
2026-04-29 21:04:22 +01:00

137 lines
4.5 KiB
JavaScript

const fs = require('fs');
// Parse simple YAML structure with 2-space indentation
function parseCapabilityIndex(text) {
const lines = text.split(/\r?\n/);
const agents = {};
let currentAgent = '';
let currentList = '';
for (const line of lines) {
const indent = line.length - line.trimStart().length;
const trimmed = line.trim();
if (indent === 2 && trimmed.endsWith(':') && !trimmed.startsWith('-')) {
// Agent name
currentAgent = trimmed.slice(0, -1);
agents[currentAgent] = {};
currentList = '';
continue;
}
if (indent === 4 && trimmed.endsWith(':') && !trimmed.startsWith('-')) {
// Scalar property or list start
const key = trimmed.slice(0, -1);
currentList = key;
if (!Array.isArray(agents[currentAgent][key])) {
agents[currentAgent][key] = [];
}
continue;
}
if (indent === 4 && trimmed.includes(':') && !trimmed.startsWith('-')) {
// key: value
const [key, ...rest] = trimmed.split(':');
const value = rest.join(':').trim();
agents[currentAgent][key.trim()] = value;
currentList = '';
continue;
}
if (indent >= 6 && trimmed.startsWith('- ')) {
// List item
const value = trimmed.slice(2).trim();
if (currentList) {
if (!agents[currentAgent][currentList]) agents[currentAgent][currentList] = [];
agents[currentAgent][currentList].push(value);
}
continue;
}
// Reset list context on unknown indentation
if (indent < 4) {
currentList = '';
}
}
// Filter out non-agent entries (flat sections like capability_routing, etc.)
const result = {};
const scalarKeys = ['capabilities','receives','produces','forbidden','delegates_to','fallback_models'];
for (const [name, data] of Object.entries(agents)) {
const hasAgentProps = scalarKeys.some(k => k in data) || 'model' in data;
if (hasAgentProps) result[name] = data;
}
return result;
}
const yaml = fs.readFileSync('.kilo/capability-index.yaml', 'utf8');
const parsed = parseCapabilityIndex(yaml);
console.log('Parsed agents:', Object.keys(parsed).length);
// Read existing benchmarks
const bench = JSON.parse(fs.readFileSync('agent-evolution/data/model-benchmarks.json', 'utf8'));
// Update agent_current_config
bench.agent_current_config = Object.entries(parsed).map(([agent, data]) => {
const rawModel = data.model || '';
const modelId = rawModel.replace('ollama-cloud/', '');
const badge = modelId.includes('qwen3') ? 'qwen' :
modelId.includes('minimax') ? 'minimax' :
modelId.includes('nemotron') ? 'nemotron' :
modelId.includes('glm') ? 'glm' :
modelId.includes('kimi') ? 'kimi' :
modelId.includes('deepseek') ? 'deepseek' : 'groq';
return {
agent,
model: rawModel,
provider: data.mode === 'all' ? 'Ollama Cloud' : (rawModel.startsWith('ollama-cloud/') ? 'Ollama Cloud' : 'Ollama'),
category: 'Process',
badge_type: badge,
fit_score: 0,
status: 'good',
previous_model: null
};
});
// Update agent_model_scores — preserve existing scores, fix current_model_id
const existingScores = {};
(bench.agent_model_scores || []).forEach(s => {
existingScores[s.agent] = s.scores || {};
});
bench.agent_model_scores = Object.entries(parsed).map(([agent, data]) => {
const rawModel = data.model || '';
const modelId = rawModel.replace('ollama-cloud/', '');
const currentIndex = bench.models.findIndex(m => m.id === modelId);
// Preserve existing scores or empty
const scores = existingScores[agent] || {};
return {
agent,
current_model_index: currentIndex >= 0 ? currentIndex : -1,
current_model_id: modelId,
reasoning_effort: data.variant === 'thinking' ? 'H' : 'M',
scores
};
});
// Update metadata
bench.generated = new Date().toISOString();
bench.source = '.kilo/capability-index.yaml (synced v2)';
bench.total_agents = bench.agent_current_config.length;
fs.writeFileSync('agent-evolution/data/model-benchmarks.json', JSON.stringify(bench, null, 2));
console.log('Synced', bench.agent_current_config.length, 'agents');
console.log('Generated:', bench.generated);
// Verify
let mismatches = 0;
bench.agent_current_config.forEach(c => {
const scores = bench.agent_model_scores.find(s => s.agent === c.agent);
if (scores && scores.current_model_id !== c.model.replace('ollama-cloud/', '')) {
console.log(' MISMATCH:', c.agent, scores.current_model_id, '->', c.model);
mismatches++;
}
});
console.log('Mismatches:', mismatches);