feat(evolution): add real-fit dashboard, API, report builder, and docker compose
- real-fit.html: API-driven research dashboard with agent/model heatmap, detail modal with score breakdown and evaluator commentary - api.py: FastAPI backend serving /api/real-fit-report (dynamic from SQLite), /api/research, /api/evolve-agent/start - rebuild-report.py: generates real-fit-report.json from SQLite DB for static fallback - docker-compose.yml: add evolution-api service (Python 3.12, uvicorn) for research endpoints - index.standalone.html: sync with dashboard data updates - archive/index.html: standalone dashboard snapshot (263KB) - .gitignore: exclude *.db, research-jobs.json from tracking
This commit is contained in:
138
agent-evolution/scripts/audit-system.cjs
Normal file
138
agent-evolution/scripts/audit-system.cjs
Normal file
@@ -0,0 +1,138 @@
|
||||
const fs = require('fs');
|
||||
|
||||
function parseFrontmatter(content) {
|
||||
if (!content.startsWith('---')) return null;
|
||||
const end = content.indexOf('---', 3);
|
||||
if (end === -1) return null;
|
||||
const fm = content.slice(3, end).trim();
|
||||
const data = {};
|
||||
for (const line of fm.split('\n')) {
|
||||
const m = line.match(/^(\w+):\s*(.+)$/);
|
||||
if (m) data[m[1]] = m[2].trim();
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
function stripComments(str) {
|
||||
// Remove single-line comments, but not inside strings
|
||||
return str.replace(/\/\/.*$/gm, '');
|
||||
}
|
||||
|
||||
const agents = [];
|
||||
const commands = [];
|
||||
const issues = [];
|
||||
|
||||
// 1. Parse agent .md files
|
||||
for (const f of fs.readdirSync('.kilo/agents').filter(f => f.endsWith('.md'))) {
|
||||
const content = fs.readFileSync('.kilo/agents/' + f, 'utf8');
|
||||
const fm = parseFrontmatter(content);
|
||||
if (fm && fm.model) {
|
||||
agents.push({
|
||||
name: f.replace('.md', ''),
|
||||
model: fm.model,
|
||||
mode: fm.mode || 'subagent',
|
||||
source: '.kilo/agents/' + f,
|
||||
description: fm.description || ''
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Parse command .md files
|
||||
for (const f of fs.readdirSync('.kilo/commands').filter(f => f.endsWith('.md'))) {
|
||||
const content = fs.readFileSync('.kilo/commands/' + f, 'utf8');
|
||||
const fm = parseFrontmatter(content);
|
||||
if (fm && fm.model) {
|
||||
commands.push({
|
||||
name: f.replace('.md', ''),
|
||||
model: fm.model,
|
||||
mode: fm.mode || 'command',
|
||||
source: '.kilo/commands/' + f,
|
||||
description: fm.description || ''
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Parse kilo-meta.json
|
||||
const meta = JSON.parse(fs.readFileSync('kilo-meta.json', 'utf8'));
|
||||
for (const a of agents) {
|
||||
const m = meta.agents?.[a.name];
|
||||
if (m) {
|
||||
a.metaModel = m.model;
|
||||
if (a.model !== m.model) issues.push(`AGENT ${a.name}: .md=${a.model} vs meta=${m.model}`);
|
||||
}
|
||||
}
|
||||
for (const c of commands) {
|
||||
const m = meta.commands?.[c.name];
|
||||
if (m) {
|
||||
c.metaModel = m.model;
|
||||
if (c.model !== m.model) issues.push(`CMD ${c.name}: .md=${c.model} vs meta=${m.model}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Parse .kilo/kilo.jsonc
|
||||
const dotKiloRaw = stripComments(fs.readFileSync('.kilo/kilo.jsonc', 'utf8'));
|
||||
const dotKilo = JSON.parse(dotKiloRaw);
|
||||
for (const [name, cfg] of Object.entries(dotKilo.agent || {})) {
|
||||
if (!cfg.model) continue;
|
||||
const agent = agents.find(a => a.name === name);
|
||||
if (agent) {
|
||||
agent.kiloModel = cfg.model;
|
||||
if (agent.model !== cfg.model) issues.push(`AGENT ${name}: .md=${agent.model} vs .kilo/kilo.jsonc=${cfg.model}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Parse root kilo.jsonc
|
||||
const rootKiloRaw = stripComments(fs.readFileSync('kilo.jsonc', 'utf8'));
|
||||
const rootKilo = JSON.parse(rootKiloRaw);
|
||||
for (const [name, cfg] of Object.entries(rootKilo.agent || {})) {
|
||||
if (!cfg.model) continue;
|
||||
const cmd = commands.find(c => c.name === name);
|
||||
if (cmd) {
|
||||
cmd.rootModel = cfg.model;
|
||||
if (cmd.model !== cfg.model) issues.push(`CMD ${name}: .md=${cmd.model} vs kilo.jsonc=${cfg.model}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 6. Check non-ollama
|
||||
const nonOllama = [];
|
||||
for (const a of agents) if (!a.model.startsWith('ollama-cloud/')) nonOllama.push({type:'agent', name:a.name, model:a.model});
|
||||
for (const c of commands) if (!c.model.startsWith('ollama-cloud/')) nonOllama.push({type:'command', name:c.name, model:c.model});
|
||||
|
||||
// 7. Summary by model
|
||||
const modelStats = {};
|
||||
for (const a of agents) modelStats[a.model] = (modelStats[a.model] || 0) + 1;
|
||||
for (const c of commands) modelStats[c.model] = (modelStats[c.model] || 0) + 1;
|
||||
|
||||
const state = {
|
||||
generated: new Date().toISOString(),
|
||||
totalAgents: agents.length,
|
||||
totalCommands: commands.length,
|
||||
allOllama: nonOllama.length === 0,
|
||||
modelDistribution: modelStats,
|
||||
agents: agents.sort((a,b) => a.name.localeCompare(b.name)),
|
||||
commands: commands.sort((a,b) => a.name.localeCompare(b.name)),
|
||||
issues: issues,
|
||||
nonOllama: nonOllama
|
||||
};
|
||||
|
||||
fs.writeFileSync('agent-evolution/data/real-state.json', JSON.stringify(state, null, 2) + '\n');
|
||||
|
||||
// Console report
|
||||
console.log('=== REAL SYSTEM STATE ===');
|
||||
console.log('Generated:', state.generated);
|
||||
console.log('Agents:', state.totalAgents);
|
||||
console.log('Commands:', state.totalCommands);
|
||||
console.log('All ollama-cloud/:', state.allOllama ? 'YES' : 'NO (' + nonOllama.length + ' exceptions)');
|
||||
console.log('\n=== MODEL DISTRIBUTION ===');
|
||||
for (const [m, c] of Object.entries(modelStats).sort((a,b) => b[1]-a[1])) {
|
||||
console.log(` ${m}: ${c}`);
|
||||
}
|
||||
if (issues.length > 0) {
|
||||
console.log('\n=== ISSUES ===');
|
||||
issues.forEach(i => console.log(' ⚠️', i));
|
||||
}
|
||||
if (nonOllama.length > 0) {
|
||||
console.log('\n=== NON-OLLOMA ===');
|
||||
nonOllama.forEach(n => console.log(' ❌', n.type, n.name, n.model));
|
||||
}
|
||||
console.log('\n✅ State written to agent-evolution/data/real-state.json');
|
||||
29
agent-evolution/scripts/merge-real-fit.cjs
Normal file
29
agent-evolution/scripts/merge-real-fit.cjs
Normal file
@@ -0,0 +1,29 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const DASH = path.join(__dirname, '../data/dashboard-data.json');
|
||||
const REAL = path.join(__dirname, '../data/real-fit-report.json');
|
||||
const OUT = path.join(__dirname, '../data/dashboard-data.json');
|
||||
|
||||
const dash = JSON.parse(fs.readFileSync(DASH, 'utf-8'));
|
||||
const real = JSON.parse(fs.readFileSync(REAL, 'utf-8'));
|
||||
|
||||
// Inject real_evaluations into each agent
|
||||
dash.agents.forEach(a => {
|
||||
const r = real.agents?.[a.name];
|
||||
if (r && r.evaluations) {
|
||||
a.real_evaluations = r.evaluations;
|
||||
a.real_best_model = r.best_model;
|
||||
a.real_best_score = r.best_score;
|
||||
} else {
|
||||
a.real_evaluations = {};
|
||||
}
|
||||
});
|
||||
|
||||
// Add metadata
|
||||
dash.real_fit_generated = real.generated;
|
||||
dash.real_fit_source = real.source;
|
||||
|
||||
fs.writeFileSync(OUT, JSON.stringify(dash, null, 2));
|
||||
console.log('Merged real-fit data into ' + OUT);
|
||||
console.log('Agents with real evals:', dash.agents.filter(a => Object.keys(a.real_evaluations||{}).length > 0).length);
|
||||
98
agent-evolution/scripts/patch-heatmap.js
Normal file
98
agent-evolution/scripts/patch-heatmap.js
Normal file
@@ -0,0 +1,98 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const INDEX = path.join(__dirname, '../index.standalone.html');
|
||||
|
||||
// 1. New renderHeatmap that reads real-fit data
|
||||
const newRenderHeatmap = `function renderHeatmap() {
|
||||
const esc = str => (str || '').replace(/[&<>"']/g, m => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[m]));
|
||||
const dd = window.dashboardData;
|
||||
|
||||
// Merge real-fit if loaded
|
||||
const rf = window.realFitData || {};
|
||||
const realAgents = rf.agents || {};
|
||||
|
||||
if (!dd || !dd.agents) {
|
||||
document.getElementById('hmTable').innerHTML = '<tr><td style="color:var(--text-secondary);padding:20px;text-align:center;">⚠️ No data. Run analysis.</td></tr>';
|
||||
return;
|
||||
}
|
||||
|
||||
// Build model list from real-fit (cross-model) + current dashboard data
|
||||
const modelsSeen = new Set();
|
||||
dd.agents.forEach(a => { modelsSeen.add(a.model_short); });
|
||||
Object.values(realAgents).forEach(a => { Object.keys(a.evaluations || {}).forEach(m => modelsSeen.add(m)); });
|
||||
const modelList = Array.from(modelsSeen).filter(m => m && m !== 'code-skeptic');
|
||||
|
||||
const t = document.getElementById('hmTable');
|
||||
let h = '<thead><tr><th class="hm-role">Agent</th>';
|
||||
modelList.forEach(m => {
|
||||
h += '<th style="writing-mode:vertical-lr;transform:rotate(180deg);max-width:32px;font-size:.56em;padding:3px 1px;">' + esc(m) + '</th>';
|
||||
});
|
||||
h += '<th>Best</th><th>Score</th></tr></thead><tbody>';
|
||||
|
||||
dd.agents.forEach(a => {
|
||||
const realAgent = realAgents[a.name];
|
||||
h += '<tr><td class="hm-r">' + esc(a.name) + '</td>';
|
||||
modelList.forEach(m => {
|
||||
let score = 0;
|
||||
if (realAgent && realAgent.evaluations && realAgent.evaluations[m] > 0) {
|
||||
score = Math.round(realAgent.evaluations[m]);
|
||||
}
|
||||
const isCurrent = a.model_short === m;
|
||||
let cls = 'na';
|
||||
if (score >= 90) cls = 'high';
|
||||
else if (score >= 75) cls = 'good';
|
||||
else if (score >= 50) cls = 'med';
|
||||
else if (score > 0) cls = 'low';
|
||||
const display = score > 0 ? score : (isCurrent ? Math.round(a.fit_score || 0) : '·');
|
||||
const curStyle = isCurrent ? 'box-shadow:inset 0 0 0 2px var(--accent-cyan)' : '';
|
||||
h += '<td class="score ' + cls + '" style="' + curStyle + '">' + display + '</td>';
|
||||
});
|
||||
const bestModel = realAgent ? (realAgent.best_model || a.model_short) : a.model_short;
|
||||
const bestScore = realAgent ? Math.round(realAgent.best_score || 0) : Math.round(a.fit_score || 0);
|
||||
h += '<td>' + esc(bestModel) + '</td><td style="font-weight:700">' + bestScore + '</td></tr>';
|
||||
});
|
||||
t.innerHTML = h + '</tbody>';
|
||||
}`;
|
||||
|
||||
// 2. Add loadRealFitData script after dashboard load
|
||||
const loadRealFitData = `
|
||||
// Load real-fit report for cross-model evaluation
|
||||
try {
|
||||
const rfRes = await fetch('data/real-fit-report.json');
|
||||
if (rfRes.ok) window.realFitData = await rfRes.json();
|
||||
} catch(e) { console.warn('real-fit-report.json not loaded:', e.message); }
|
||||
`;
|
||||
|
||||
let html = fs.readFileSync(INDEX, 'utf-8');
|
||||
|
||||
// Patch A: replace renderHeatmap function
|
||||
const oldPattern = /\/\/ Render Heatmap[\s\S]*?function renderHeatmap\(\)\s*\{[^}]*\{[^}]*\}[^}]*\}/;
|
||||
const oldMatch = html.match(oldPattern);
|
||||
if (oldMatch) {
|
||||
html = html.substring(0, oldMatch.index) + '// Render Heatmap (real-fit enabled)\n' + newRenderHeatmap + html.substring(oldMatch.index + oldMatch[0].length);
|
||||
console.log('Patched renderHeatmap');
|
||||
} else {
|
||||
console.log('Pattern A not found, trying fallback...');
|
||||
// Fallback: find and replace the specific renderHeatmap block
|
||||
const start = html.indexOf('function renderHeatmap() {');
|
||||
if (start !== -1) {
|
||||
let brace = 0, end = start;
|
||||
for (let i = start; i < html.length; i++) {
|
||||
if (html[i] === '{') brace++;
|
||||
else if (html[i] === '}') { brace--; if (brace === 0) { end = i + 1; break; } }
|
||||
}
|
||||
html = html.substring(0, start) + newRenderHeatmap + '\n' + html.substring(end);
|
||||
console.log('Patched renderHeatmap (fallback)');
|
||||
}
|
||||
}
|
||||
|
||||
// Patch B: insert real-fit loading after dashboard load
|
||||
const dashLoadPattern = /window\.dashboardData = await dashRes\.json\(\);/;
|
||||
if (dashLoadPattern.test(html)) {
|
||||
html = html.replace(dashLoadPattern, 'window.dashboardData = await dashRes.json();\n' + loadRealFitData.trim());
|
||||
console.log('Patched init() to load real-fit data');
|
||||
}
|
||||
|
||||
fs.writeFileSync(INDEX, html);
|
||||
console.log('Done — ' + (fs.statSync(INDEX).size / 1024).toFixed(1) + ' KB');
|
||||
173
agent-evolution/scripts/rebuild-report.py
Normal file
173
agent-evolution/scripts/rebuild-report.py
Normal file
@@ -0,0 +1,173 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Rebuild real-fit-report.json from SQLite DB.
|
||||
|
||||
Usage:
|
||||
python3 rebuild-report.py
|
||||
python3 rebuild-report.py --db /path/to/real-fit.db --report /path/to/real-fit-report.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sqlite3
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _sync_agents_from_meta(db_path: Path) -> None:
|
||||
"""Import any missing agents from kilo-meta.json into the DB agents table."""
|
||||
meta_path = db_path.parent.parent.parent / "kilo-meta.json"
|
||||
if not meta_path.exists():
|
||||
return
|
||||
with open(meta_path) as f:
|
||||
meta = json.load(f)
|
||||
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT name FROM agents")
|
||||
existing = {r[0] for r in cursor.fetchall()}
|
||||
|
||||
for name, info in meta.get("agents", {}).items():
|
||||
if name in existing:
|
||||
continue
|
||||
cursor.execute(
|
||||
"INSERT OR IGNORE INTO agents (name, description, category, current_model, color, updated) VALUES (?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
name,
|
||||
info.get("description", ""),
|
||||
info.get("category", "meta"),
|
||||
info.get("model", ""),
|
||||
info.get("color", "#6B7280"),
|
||||
datetime.now(timezone.utc).isoformat(),
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def build_report(db_path: Path) -> dict:
|
||||
_sync_agents_from_meta(db_path)
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
SELECT name, description, category, current_model
|
||||
FROM agents
|
||||
""")
|
||||
agents_meta = {row["name"]: dict(row) for row in cursor.fetchall()}
|
||||
|
||||
# Only take evaluations that are NOT HTTP error responses
|
||||
# AND prefer evaluator='rubric_v2' over 'rubric_v1'
|
||||
cursor.execute("""
|
||||
SELECT agent_name, model, total_score, evaluator, response
|
||||
FROM evaluations
|
||||
WHERE total_score > 0
|
||||
AND evaluator NOT LIKE '%rubric_v1%'
|
||||
AND (response IS NULL
|
||||
OR (response NOT LIKE '%[HTTP %' AND response != ''))
|
||||
ORDER BY agent_name, model,
|
||||
CASE evaluator
|
||||
WHEN 'evolution-skeptic' THEN 0
|
||||
WHEN 'rubric_v2' THEN 1
|
||||
ELSE 2
|
||||
END,
|
||||
total_score DESC
|
||||
""")
|
||||
|
||||
# Take the first (best preferred evaluator, highest score) per agent-model
|
||||
best_evals = {}
|
||||
for row in cursor.fetchall():
|
||||
agent = row["agent_name"]
|
||||
model = row["model"]
|
||||
score = row["total_score"]
|
||||
if agent not in best_evals:
|
||||
best_evals[agent] = {}
|
||||
if model not in best_evals[agent]:
|
||||
best_evals[agent][model] = score
|
||||
|
||||
# Rebuild fit_scores from selected evaluations only
|
||||
cursor.execute("""
|
||||
SELECT agent_name, model, MAX(total_score) as best_score, scores, explanation
|
||||
FROM evaluations
|
||||
WHERE total_score > 0
|
||||
AND evaluator NOT LIKE '%rubric_v1%'
|
||||
AND (response IS NULL
|
||||
OR (response NOT LIKE '%[HTTP %' AND response != ''))
|
||||
GROUP BY agent_name, model
|
||||
""")
|
||||
fit_scores = {}
|
||||
for row in cursor.fetchall():
|
||||
fit_scores[row["agent_name"]] = {
|
||||
"model": row["model"],
|
||||
"fit": row["best_score"],
|
||||
"explanation": (
|
||||
f"Best model for {row['agent_name']} is {row['model']} "
|
||||
f"with avg score {row['best_score']:.1f}. "
|
||||
"Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)."
|
||||
),
|
||||
}
|
||||
|
||||
conn.close()
|
||||
|
||||
agents_report = {}
|
||||
for agent_name, meta in agents_meta.items():
|
||||
evals = best_evals.get(agent_name, {})
|
||||
if evals:
|
||||
best_model = max(evals, key=evals.get)
|
||||
best_score = evals[best_model]
|
||||
else:
|
||||
best_model = ""
|
||||
best_score = 0.0
|
||||
agents_report[agent_name] = {
|
||||
"name": agent_name,
|
||||
"evaluations": evals,
|
||||
"info": [
|
||||
meta.get("description") or "",
|
||||
meta.get("category") or "",
|
||||
meta.get("current_model") or "",
|
||||
],
|
||||
"best_model": best_model,
|
||||
"best_score": best_score,
|
||||
}
|
||||
|
||||
total_evals = sum(len(evals) for evals in best_evals.values())
|
||||
generated = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||
|
||||
return {
|
||||
"generated": generated,
|
||||
"source": "real-fit-engine-db-filtered",
|
||||
"total_evaluations": total_evals,
|
||||
"agents": agents_report,
|
||||
"fit_scores": fit_scores,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Rebuild real-fit-report.json from DB")
|
||||
parser.add_argument(
|
||||
"--db",
|
||||
type=Path,
|
||||
default=Path(__file__).parent.parent / "data" / "real-fit.db",
|
||||
help="Path to SQLite DB",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--report",
|
||||
type=Path,
|
||||
default=Path(__file__).parent.parent / "data" / "real-fit-report.json",
|
||||
help="Path to report JSON output",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
report = build_report(args.db)
|
||||
args.report.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(args.report, "w", encoding="utf-8") as f:
|
||||
json.dump(report, f, indent=2)
|
||||
|
||||
print(f"Report rebuilt: {args.report}")
|
||||
print(f"Agents: {len(report['agents'])}, Evaluations: {report['total_evaluations']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user