feat: bidirectional research dashboard + agent config fixes

- Integrate apaw_agent_model_research_v3.html as standalone dashboard
- Add model-benchmarks.json with 32 agents, 11 scored models, 11 recommendations
- Add build-research-dashboard.ts: inject live data into template → standalone HTML
- Add rebuild-template.cjs: regenerate template from v3.html source
- Add sync-benchmarks-from-yaml.cjs: sync YAML → JSON round-trip
- Add sync-model-research.ts: apply recommendation matrix to config files
- Add model-benchmarks.schema.json and model-research.schema.json for validation
- Add bidirectional-data-flow.md architecture documentation
- Add log-execution.cjs pipeline hook
- Update capability-index.yaml: add fallback_models, failover_strategy
- Update kilo-meta.json, kilo.jsonc, KILO_SPEC.md with synced models
- Update evolution.md / research.md / self-evolution.md / evolutionary-sync.md docs
- Fix security-auditor.md: quote YAML color (#DC2626)
- Fix orchestrator.md: remove duplicate devops-engineer key
- Build research-dashboard.html (106KB standalone) + dated archive
This commit is contained in:
¨NW¨
2026-04-29 21:04:22 +01:00
parent 2ae7789802
commit 3badb259cc
29 changed files with 13779 additions and 992 deletions

View File

@@ -0,0 +1,651 @@
#!/usr/bin/env bun
/**
* Model Research Synchronization Script
* Applies model recommendations from research output to agent configuration files.
*
* Usage:
* bun run agent-evolution/scripts/sync-model-research.ts # apply latest
* bun run agent-evolution/scripts/sync-model-research.ts --dry-run # preview only
* bun run agent-evolution/scripts/sync-model-research.ts --input path/to.json # custom input
* bun run agent-evolution/scripts/sync-model-research.ts --agent planner # single agent
*/
import * as fs from "fs";
import * as path from "path";
import { spawnSync } from "child_process";
// Types based on model-research.schema.json
interface Recommendation {
agent: string;
action: "update_model" | "confirm_model" | "add_fallback" | "redesign_agent";
current_model: string;
recommended_model: string;
impact: "critical" | "high" | "medium" | "low";
rationale: string;
applied: boolean;
applied_date?: string | null;
score_delta?: number;
}
interface ModelResearchData {
version: string;
generated: string;
source: string;
recommendations: Recommendation[];
capability_index_patch?: Array<{
agent: string;
set: Record<string, unknown>;
}>;
summary?: {
total_recommendations: number;
applied_count: number;
pending_count: number;
};
}
interface ChangeSummary {
total_recommendations: number;
applied: number;
confirmed: number;
skipped: number;
errors: string[];
files_modified: string[];
agents_updated: string[];
dashboard_rebuilt: boolean;
}
// Default paths
const DEFAULT_RESEARCH_FILE = path.join(__dirname, "../data/model-research-latest.json");
const SCHEMA_FILE = path.join(__dirname, "../data/model-research.schema.json");
const CAPABILITY_INDEX = path.join(process.cwd(), ".kilo/capability-index.yaml");
const AGENT_VERSIONS = path.join(__dirname, "../data/agent-versions.json");
const KILO_META = path.join(process.cwd(), "kilo-meta.json");
const SYNC_SCRIPT = path.join(process.cwd(), "scripts/sync-agents.cjs");
// Parse command line arguments
function parseArgs(): {
dryRun: boolean;
inputFile: string;
singleAgent?: string;
} {
const args = process.argv.slice(2);
const options: { dryRun: boolean; inputFile: string; singleAgent?: string } = {
dryRun: false,
inputFile: DEFAULT_RESEARCH_FILE,
};
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (arg === "--dry-run" || arg === "-n") {
options.dryRun = true;
} else if (arg === "--input" || arg === "-i") {
options.inputFile = args[++i] || DEFAULT_RESEARCH_FILE;
} else if (arg === "--agent" || arg === "-a") {
options.singleAgent = args[++i];
} else if (!arg.startsWith("-")) {
// Positional argument as input file
options.inputFile = arg;
}
}
return options;
}
// Load research data
function loadResearchData(filePath: string): ModelResearchData {
console.log(`📖 Loading research data from: ${filePath}`);
if (!fs.existsSync(filePath)) {
throw new Error(`Research file not found: ${filePath}`);
}
const content = fs.readFileSync(filePath, "utf-8");
const data = JSON.parse(content);
// Basic validation (we don't implement full schema validation for simplicity)
if (!data.version || !data.generated || !Array.isArray(data.recommendations)) {
throw new Error("Invalid research data structure");
}
console.log(` Found ${data.recommendations.length} recommendations`);
console.log(` Generated: ${data.generated}`);
console.log(` Source: ${data.source}`);
return data;
}
// Validate schema (basic check)
function validateSchema(data: ModelResearchData): boolean {
// For now, just check required fields
const required = [
"version",
"generated",
"source",
"recommendations",
];
for (const field of required) {
if (!(field in data)) {
console.warn(`⚠️ Missing required field: ${field}`);
return false;
}
}
return true;
}
// Load capability-index.yaml
function loadCapabilityIndex(): string {
return fs.readFileSync(CAPABILITY_INDEX, "utf-8");
}
// Update model in capability-index.yaml
function replaceModelInYaml(content: string, agentName: string, newModel: string): { content: string; changed: boolean } {
// Find the agent block section
const agentStart = content.indexOf(` ${agentName}:`);
if (agentStart === -1) {
throw new Error(`Agent ${agentName} not found in capability-index.yaml`);
}
// Find next agent section (at same indent level)
const remaining = content.substring(agentStart);
const nextAgentMatch = remaining.match(/\n \w/);
const agentEnd = nextAgentMatch ? agentStart + nextAgentMatch.index! : content.length;
const agentBlock = content.substring(agentStart, agentEnd);
// Find and replace the model line (more flexible regex for whitespace)
const modelLineRegex = /^\s+model:\s+.+$/gm;
const match = agentBlock.match(modelLineRegex);
if (!match) {
throw new Error(`Model line not found in agent ${agentName} block`);
}
const currentModelLine = match[0];
const currentModelMatch = currentModelLine.match(/:\s*(.+)$/);
const currentModel = currentModelMatch ? currentModelMatch[1].trim() : '';
// Check if model already matches
if (currentModel === newModel) {
console.log(` ⏭️ Model already set to ${newModel}, skipping`);
return { content, changed: false }; // No change needed
}
// Replace model line with new model
const updatedBlock = agentBlock.replace(modelLineRegex, currentModelLine.replace(currentModel, newModel));
if (updatedBlock === agentBlock) {
throw new Error(`Failed to replace model line in agent ${agentName} block`);
}
console.log(` 🔄 Updating model: ${currentModel}${newModel}`);
const newContent = content.substring(0, agentStart) + updatedBlock + content.substring(agentEnd);
return { content: newContent, changed: true };
}
// Update kilo-meta.json
function updateKiloMeta(agentName: string, newModel: string): void {
const content = fs.readFileSync(KILO_META, "utf-8");
const data = JSON.parse(content);
if (!data.agents[agentName]) {
throw new Error(`Agent ${agentName} not found in kilo-meta.json`);
}
data.agents[agentName].model = newModel;
data.lastSync = new Date().toISOString();
fs.writeFileSync(KILO_META, JSON.stringify(data, null, 2));
}
// Update kilo.jsonc (manual update required per evolutionary-sync.md rules)
function updateKiloJsonc(agentName: string, newModel: string): void {
const content = fs.readFileSync(path.join(process.cwd(), "kilo.jsonc"), "utf-8");
// Simple regex replacement for agent block
// Find agent block: "agentName": { ... "model": "old", ... }
const agentRegex = new RegExp(`"${agentName}":\\s*{[\\s\\S]*?"model":\\s*"[^"]*"`, 'm');
const match = content.match(agentRegex);
if (!match) {
console.warn(`⚠️ Could not find agent ${agentName} in kilo.jsonc - manual update required`);
return;
}
const oldMatch = match[0];
const newMatch = oldMatch.replace(/"model":\s*"[^"]*"/, `"model": "${newModel}"`);
const updatedContent = content.replace(oldMatch, newMatch);
fs.writeFileSync(path.join(process.cwd(), "kilo.jsonc"), updatedContent);
}
// Load agent-versions.json
function loadAgentVersions(): any {
const content = fs.readFileSync(AGENT_VERSIONS, "utf-8");
return JSON.parse(content);
}
// Update agent-versions.json with model change
function updateAgentVersions(
agentVersions: any,
agentName: string,
fromModel: string,
toModel: string,
reason: string
): any {
const now = new Date().toISOString();
if (!agentVersions.agents[agentName]) {
agentVersions.agents[agentName] = {
current: {},
history: [],
performance_log: [],
};
}
const agent = agentVersions.agents[agentName];
// Add history entry
agent.history.push({
date: now,
commit: "model-research-sync",
type: "model_change",
from: fromModel,
to: toModel,
reason,
source: "research",
});
// Update current model
if (!agent.current) agent.current = {};
agent.current.model = toModel;
agent.current.provider = detectProvider(toModel);
// Update lastUpdated
agentVersions.lastUpdated = now;
return agentVersions;
}
// Provider detection
function detectProvider(model: string): string {
if (model.startsWith("ollama-cloud/") || model.startsWith("ollama/")) return "Ollama";
if (model.startsWith("openrouter/") || model.includes("openrouter")) return "OpenRouter";
if (model.startsWith("groq/")) return "Groq";
return "Unknown";
}
// Apply a single recommendation
function applyRecommendation(
rec: Recommendation,
dryRun: boolean,
singleAgent?: string
): { applied: boolean; error?: string; filesModified?: string[] } {
if (singleAgent && rec.agent !== singleAgent) {
return { applied: false };
}
console.log(`\n🔧 Applying recommendation for ${rec.agent}`);
console.log(` Action: ${rec.action}`);
console.log(` Current: ${rec.current_model}`);
console.log(` Recommended: ${rec.recommended_model}`);
console.log(` Impact: ${rec.impact}`);
console.log(` Rationale: ${rec.rationale}`);
// Skip if already applied
if (rec.applied) {
console.log(` ⏭️ Already applied, skipping`);
return { applied: false };
}
if (rec.action === "update_model") {
try {
// 1. Update capability-index.yaml
const capIndexContent = loadCapabilityIndex();
const { content: updatedContent, changed: yamlChanged } = replaceModelInYaml(capIndexContent, rec.agent, rec.recommended_model);
if (!dryRun && yamlChanged) {
fs.writeFileSync(CAPABILITY_INDEX, updatedContent);
console.log(` ✅ Updated capability-index.yaml`);
} else if (!dryRun) {
console.log(` ⏭️ Skipping capability-index.yaml (no change needed)`);
} else {
console.log(` 📋 Would update capability-index.yaml`);
}
// Only update other files if YAML was actually changed
if (!yamlChanged) {
return {
applied: false,
filesModified: [],
};
}
// 2. Update kilo-meta.json (source of truth)
if (!dryRun) {
updateKiloMeta(rec.agent, rec.recommended_model);
console.log(` ✅ Updated kilo-meta.json`);
} else {
console.log(` 📋 Would update kilo-meta.json`);
}
// 3. Update agent-versions.json
const agentVersions = loadAgentVersions();
const updatedVersions = updateAgentVersions(
agentVersions,
rec.agent,
rec.current_model,
rec.recommended_model,
rec.rationale
);
if (!dryRun) {
fs.writeFileSync(AGENT_VERSIONS, JSON.stringify(updatedVersions, null, 2));
console.log(` ✅ Updated agent-versions.json`);
} else {
console.log(` 📋 Would update agent-versions.json`);
}
// 4. Attempt to update kilo.jsonc (manual verification still required)
if (!dryRun) {
try {
updateKiloJsonc(rec.agent, rec.recommended_model);
console.log(` ✅ Updated kilo.jsonc`);
} catch (error: any) {
console.warn(` ⚠️ Could not update kilo.jsonc: ${error.message}`);
console.log(` ⚠️ Manual update required per evolutionary-sync.md rules`);
}
} else {
console.log(` 📋 Would update kilo.jsonc`);
}
return {
applied: true,
filesModified: [CAPABILITY_INDEX, KILO_META, AGENT_VERSIONS],
};
} catch (error: any) {
return {
applied: false,
error: error.message,
};
}
} else if (rec.action === "confirm_model") {
// Mark as confirmed in agent-versions.json
try {
const agentVersions = loadAgentVersions();
if (agentVersions.agents[rec.agent]) {
// Add confirmation history entry
agentVersions.agents[rec.agent].history.push({
date: new Date().toISOString(),
commit: "model-research-confirm",
type: "model_change",
from: rec.current_model,
to: rec.current_model, // same model
reason: `Confirmed: ${rec.rationale}`,
source: "research",
});
if (!dryRun) {
fs.writeFileSync(AGENT_VERSIONS, JSON.stringify(agentVersions, null, 2));
console.log(` ✅ Confirmed current model in agent-versions.json`);
} else {
console.log(` 📋 Would confirm current model`);
}
return {
applied: true,
filesModified: [AGENT_VERSIONS],
};
} else {
return {
applied: false,
error: `Agent ${rec.agent} not found in agent-versions.json`,
};
}
} catch (error: any) {
return {
applied: false,
error: error.message,
};
}
}
// Unsupported action
console.log(` ⏭️ Unsupported action: ${rec.action}`);
return { applied: false };
}
// Run sync-agents.js --fix
function runSyncAgentsFix(): boolean {
console.log(`\n🔄 Running sync-agents.js --fix...`);
const result = spawnSync("node", [SYNC_SCRIPT, "--fix"], {
cwd: process.cwd(),
encoding: "utf-8",
stdio: "inherit",
});
if (result.status !== 0) {
console.error(`❌ Sync script failed with exit code ${result.status}`);
return false;
}
console.log(`✅ Sync script completed`);
return true;
}
// Run sync-agents.js --check
function runSyncAgentsCheck(): boolean {
console.log(`\n✅ Running sync-agents.js --check...`);
const result = spawnSync("node", [SYNC_SCRIPT, "--check"], {
cwd: process.cwd(),
encoding: "utf-8",
stdio: "inherit",
});
if (result.status !== 0) {
console.error(`❌ Sync check failed with exit code ${result.status}`);
return false;
}
console.log(`✅ Sync check passed`);
return true;
}
// Run build-research-dashboard script
function runBuildDashboard(): { success: boolean; error?: string } {
console.log("\n📊 Rebuilding research dashboard...");
try {
// Try to import buildResearchDashboard from build-research-dashboard.ts
const dashboardScript = path.join(__dirname, "build-research-dashboard.ts");
const standaloneScript = path.join(__dirname, "build-standalone.cjs");
// Check which build script exists
let scriptToRun = "";
let args: string[] = [];
if (fs.existsSync(dashboardScript)) {
scriptToRun = "bun";
args = ["run", dashboardScript];
} else if (fs.existsSync(standaloneScript)) {
scriptToRun = "node";
args = [standaloneScript];
} else {
return {
success: false,
error: "No dashboard build script found (build-research-dashboard.ts or build-standalone.cjs)"
};
}
const result = spawnSync(scriptToRun, args, {
cwd: process.cwd(),
encoding: "utf-8",
stdio: "inherit",
timeout: 30000
});
if (result.status !== 0) {
return {
success: false,
error: result.stderr || `Build script failed with exit code ${result.status}`
};
}
console.log(result.stdout);
console.log("✅ Dashboard rebuilt: agent-evolution/index.standalone.html");
return { success: true };
} catch (error: any) {
return {
success: false,
error: error.message
};
}
}
// Print summary
function printSummary(summary: ChangeSummary): void {
console.log("\n" + "=".repeat(60));
console.log("📊 SYNC SUMMARY");
console.log("=".repeat(60));
console.log(`Total recommendations: ${summary.total_recommendations}`);
console.log(`Applied: ${summary.applied}`);
console.log(`Confirmed: ${summary.confirmed}`);
console.log(`Skipped: ${summary.skipped}`);
if (summary.dashboard_rebuilt) {
console.log(`Dashboard rebuilt: ✅ Yes`);
}
if (summary.agents_updated.length > 0) {
console.log(`\nAgents updated:`);
summary.agents_updated.forEach(agent => console.log(` - ${agent}`));
}
if (summary.files_modified.length > 0) {
console.log(`\nFiles modified:`);
summary.files_modified.forEach(file => console.log(` - ${file}`));
}
if (summary.errors.length > 0) {
console.log(`\nErrors:`);
summary.errors.forEach(error => console.log(` - ${error}`));
}
console.log("=".repeat(60));
}
// Main function
async function main() {
const options = parseArgs();
console.log("🧬 Model Research Synchronization");
console.log(` Dry run: ${options.dryRun ? "YES" : "NO"}`);
console.log(` Input: ${options.inputFile}`);
if (options.singleAgent) {
console.log(` Single agent: ${options.singleAgent}`);
}
console.log("");
// Load research data
const researchData = loadResearchData(options.inputFile);
if (!validateSchema(researchData)) {
console.warn("⚠️ Schema validation issues detected, but continuing...");
}
// Filter recommendations
let recommendations = researchData.recommendations;
if (options.singleAgent) {
recommendations = recommendations.filter(r => r.agent === options.singleAgent);
console.log(`Filtered to ${recommendations.length} recommendations for ${options.singleAgent}`);
}
// Initialize summary
const summary: ChangeSummary = {
total_recommendations: recommendations.length,
applied: 0,
confirmed: 0,
skipped: 0,
errors: [],
files_modified: [],
agents_updated: [],
dashboard_rebuilt: false,
};
// Apply recommendations
for (const rec of recommendations) {
const result = applyRecommendation(rec, options.dryRun, options.singleAgent);
if (result.applied) {
if (rec.action === "update_model") {
summary.applied++;
summary.agents_updated.push(rec.agent);
if (result.filesModified) {
summary.files_modified.push(...result.filesModified);
}
} else if (rec.action === "confirm_model") {
summary.confirmed++;
}
} else {
if (result.error) {
summary.errors.push(`${rec.agent}: ${result.error}`);
} else {
summary.skipped++;
}
}
}
// Remove duplicate files from files_modified
summary.files_modified = [...new Set(summary.files_modified)];
// Run sync-agents.js if we made changes (and not dry run)
if (summary.applied > 0 && !options.dryRun) {
console.log(`\n📦 Propagating changes to all agent files...`);
const syncOk = runSyncAgentsFix();
if (syncOk) {
console.log(`\n✅ Validating changes...`);
const checkOk = runSyncAgentsCheck();
if (checkOk) {
// Rebuild research dashboard
const buildResult = runBuildDashboard();
if (buildResult.success) {
console.log("✅ Dashboard rebuilt: agent-evolution/index.standalone.html");
summary.dashboard_rebuilt = true;
} else {
console.warn(`⚠️ Dashboard rebuild failed: ${buildResult.error}`);
summary.errors.push(`Dashboard rebuild failed: ${buildResult.error}`);
}
} else {
summary.errors.push("Sync check failed after applying changes");
}
} else {
summary.errors.push("Sync fix script failed");
}
}
// Print summary
printSummary(summary);
// Exit with error if any errors occurred
if (summary.errors.length > 0) {
console.error(`\n❌ Sync completed with ${summary.errors.length} errors`);
process.exit(1);
} else if (summary.applied === 0 && summary.confirmed === 0) {
console.warn(`\n⚠ No changes applied`);
} else {
console.log(`\n🎉 Sync completed successfully!`);
}
}
// Run the script
main().catch((error) => {
console.error("Fatal error:", error);
process.exit(1);
});