APAW/agent-evolution/scripts/sync-benchmarks-from-yaml.cjs

const fs = require('fs');
const { runGate } = require('./lib/fitness-gate.cjs');

// Parse simple YAML structure with 2-space indentation
function parseCapabilityIndex(text) {
  const lines = text.split(/\r?\n/);
  const agents = {};
  let currentAgent = '';
  let currentList = '';

  for (const line of lines) {
    const indent = line.length - line.trimStart().length;
    const trimmed = line.trim();

    if (indent === 2 && trimmed.endsWith(':') && !trimmed.startsWith('-')) {
      currentAgent = trimmed.slice(0, -1);
      agents[currentAgent] = {};
      currentList = '';
      continue;
    }

    if (indent === 4 && trimmed.endsWith(':') && !trimmed.startsWith('-')) {
      const key = trimmed.slice(0, -1);
      currentList = key;
      if (!Array.isArray(agents[currentAgent][key])) {
        agents[currentAgent][key] = [];
      }
      continue;
    }

    if (indent === 4 && trimmed.includes(':') && !trimmed.startsWith('-')) {
      const [key, ...rest] = trimmed.split(':');
      const value = rest.join(':').trim();
      agents[currentAgent][key.trim()] = value;
      currentList = '';
      continue;
    }

    if (indent >= 6 && trimmed.startsWith('- ')) {
      const value = trimmed.slice(2).trim();
      if (currentList) {
        if (!agents[currentAgent][currentList]) agents[currentAgent][currentList] = [];
        agents[currentAgent][currentList].push(value);
      }
      continue;
    }

    if (indent < 4) {
      currentList = '';
    }
  }

  const result = {};
  const scalarKeys = ['capabilities','receives','produces','forbidden','delegates_to','fallback_models'];
  for (const [name, data] of Object.entries(agents)) {
    const hasAgentProps = scalarKeys.some(k => k in data) || 'model' in data;
    if (hasAgentProps) result[name] = data;
  }

  return result;
}

const yaml = fs.readFileSync('.kilo/capability-index.yaml', 'utf8');
const parsed = parseCapabilityIndex(yaml);
console.log('Parsed agents:', Object.keys(parsed).length);

// Read existing benchmarks
const bench = JSON.parse(fs.readFileSync('agent-evolution/data/model-benchmarks.json', 'utf8'));

// === FITNESS GATE: validate model changes ===
const oldConfig = {};
(bench.agent_current_config || []).forEach(c => {
  oldConfig[c.agent] = c.model;
});

const changes = [];
for (const [agent, data] of Object.entries(parsed)) {
  const newModel = data.model || '';
  const oldModel = oldConfig[agent];
  if (oldModel && oldModel !== newModel) {
    changes.push({
      agent,
      from: oldModel.replace('ollama-cloud/', ''),
      to: newModel.replace('ollama-cloud/', '')
    });
  }
}

if (changes.length > 0) {
  console.log('\nDetected model changes:', changes.length);
  const report = runGate(changes);

  if (!report.passed) {
    console.error('\n⛔ FITNESS GATE REJECTED the sync. No files modified.');
    console.error('If you intend to downgrade, update the source scores in model-benchmarks.json first.');
    process.exit(1);
  }

  console.log('\n✅ All model changes passed fitness gate. Proceeding...');
}

// Update agent_current_config
bench.agent_current_config = Object.entries(parsed).map(([agent, data]) => {
  const rawModel = data.model || '';
  const modelId = rawModel.replace('ollama-cloud/', '');
  const badge = modelId.includes('qwen3') ? 'qwen' :
    modelId.includes('minimax') ? 'minimax' :
    modelId.includes('nemotron') ? 'nemotron' :
    modelId.includes('glm') ? 'glm' :
    modelId.includes('kimi') ? 'kimi' :
    modelId.includes('deepseek') ? 'deepseek' : 'groq';
  return {
    agent,
    model: rawModel,
    provider: data.mode === 'all' ? 'Ollama Cloud' : (rawModel.startsWith('ollama-cloud/') ? 'Ollama Cloud' : 'Ollama'),
    category: 'Process',
    badge_type: badge,
    fit_score: 0,
    status: 'good',
    previous_model: null
  };
});

// Update agent_model_scores — preserve existing scores, fix current_model_id
const existingScores = {};
(bench.agent_model_scores || []).forEach(s => {
  existingScores[s.agent] = s.scores || {};
});

bench.agent_model_scores = Object.entries(parsed).map(([agent, data]) => {
  const rawModel = data.model || '';
  const modelId = rawModel.replace('ollama-cloud/', '');
  const currentIndex = bench.models.findIndex(m => m.id === modelId);
  const scores = existingScores[agent] || {};
  return {
    agent,
    current_model_index: currentIndex >= 0 ? currentIndex : -1,
    current_model_id: modelId,
    reasoning_effort: data.variant === 'thinking' ? 'H' : 'M',
    scores
  };
});

// Update metadata
bench.generated = new Date().toISOString();
bench.source = '.kilo/capability-index.yaml (synced v3 + fitness-gate)';
bench.total_agents = bench.agent_current_config.length;

fs.writeFileSync('agent-evolution/data/model-benchmarks.json', JSON.stringify(bench, null, 2));
console.log('\nSynced', bench.agent_current_config.length, 'agents');
console.log('Generated:', bench.generated);

// Verify
let mismatches = 0;
bench.agent_current_config.forEach(c => {
  const scores = bench.agent_model_scores.find(s => s.agent === c.agent);
  if (scores && scores.current_model_id !== c.model.replace('ollama-cloud/', '')) {
    console.log('  MISMATCH:', c.agent, scores.current_model_id, '->', c.model);
    mismatches++;
  }
});
console.log('Mismatches:', mismatches);
console.log('\n💡 Tip: If fitness gate rejected changes, verify that model-benchmarks.json has correct heatmap scores before syncing from YAML.');