feat(evolution): add real-fit dashboard, API, report builder, and docker compose

- real-fit.html: API-driven research dashboard with agent/model heatmap, detail modal with score breakdown and evaluator commentary
- api.py: FastAPI backend serving /api/real-fit-report (dynamic from SQLite), /api/research, /api/evolve-agent/start
- rebuild-report.py: generates real-fit-report.json from SQLite DB for static fallback
- docker-compose.yml: add evolution-api service (Python 3.12, uvicorn) for research endpoints
- index.standalone.html: sync with dashboard data updates
- archive/index.html: standalone dashboard snapshot (263KB)
- .gitignore: exclude *.db, research-jobs.json from tracking
This commit is contained in:
Deploy Bot
2026-05-28 11:55:49 +01:00
parent dbbf4c32e1
commit b95fd41587
13 changed files with 8886 additions and 353 deletions

View File

@@ -0,0 +1,138 @@
const fs = require('fs');
function parseFrontmatter(content) {
if (!content.startsWith('---')) return null;
const end = content.indexOf('---', 3);
if (end === -1) return null;
const fm = content.slice(3, end).trim();
const data = {};
for (const line of fm.split('\n')) {
const m = line.match(/^(\w+):\s*(.+)$/);
if (m) data[m[1]] = m[2].trim();
}
return data;
}
function stripComments(str) {
// Remove single-line comments, but not inside strings
return str.replace(/\/\/.*$/gm, '');
}
const agents = [];
const commands = [];
const issues = [];
// 1. Parse agent .md files
for (const f of fs.readdirSync('.kilo/agents').filter(f => f.endsWith('.md'))) {
const content = fs.readFileSync('.kilo/agents/' + f, 'utf8');
const fm = parseFrontmatter(content);
if (fm && fm.model) {
agents.push({
name: f.replace('.md', ''),
model: fm.model,
mode: fm.mode || 'subagent',
source: '.kilo/agents/' + f,
description: fm.description || ''
});
}
}
// 2. Parse command .md files
for (const f of fs.readdirSync('.kilo/commands').filter(f => f.endsWith('.md'))) {
const content = fs.readFileSync('.kilo/commands/' + f, 'utf8');
const fm = parseFrontmatter(content);
if (fm && fm.model) {
commands.push({
name: f.replace('.md', ''),
model: fm.model,
mode: fm.mode || 'command',
source: '.kilo/commands/' + f,
description: fm.description || ''
});
}
}
// 3. Parse kilo-meta.json
const meta = JSON.parse(fs.readFileSync('kilo-meta.json', 'utf8'));
for (const a of agents) {
const m = meta.agents?.[a.name];
if (m) {
a.metaModel = m.model;
if (a.model !== m.model) issues.push(`AGENT ${a.name}: .md=${a.model} vs meta=${m.model}`);
}
}
for (const c of commands) {
const m = meta.commands?.[c.name];
if (m) {
c.metaModel = m.model;
if (c.model !== m.model) issues.push(`CMD ${c.name}: .md=${c.model} vs meta=${m.model}`);
}
}
// 4. Parse .kilo/kilo.jsonc
const dotKiloRaw = stripComments(fs.readFileSync('.kilo/kilo.jsonc', 'utf8'));
const dotKilo = JSON.parse(dotKiloRaw);
for (const [name, cfg] of Object.entries(dotKilo.agent || {})) {
if (!cfg.model) continue;
const agent = agents.find(a => a.name === name);
if (agent) {
agent.kiloModel = cfg.model;
if (agent.model !== cfg.model) issues.push(`AGENT ${name}: .md=${agent.model} vs .kilo/kilo.jsonc=${cfg.model}`);
}
}
// 5. Parse root kilo.jsonc
const rootKiloRaw = stripComments(fs.readFileSync('kilo.jsonc', 'utf8'));
const rootKilo = JSON.parse(rootKiloRaw);
for (const [name, cfg] of Object.entries(rootKilo.agent || {})) {
if (!cfg.model) continue;
const cmd = commands.find(c => c.name === name);
if (cmd) {
cmd.rootModel = cfg.model;
if (cmd.model !== cfg.model) issues.push(`CMD ${name}: .md=${cmd.model} vs kilo.jsonc=${cfg.model}`);
}
}
// 6. Check non-ollama
const nonOllama = [];
for (const a of agents) if (!a.model.startsWith('ollama-cloud/')) nonOllama.push({type:'agent', name:a.name, model:a.model});
for (const c of commands) if (!c.model.startsWith('ollama-cloud/')) nonOllama.push({type:'command', name:c.name, model:c.model});
// 7. Summary by model
const modelStats = {};
for (const a of agents) modelStats[a.model] = (modelStats[a.model] || 0) + 1;
for (const c of commands) modelStats[c.model] = (modelStats[c.model] || 0) + 1;
const state = {
generated: new Date().toISOString(),
totalAgents: agents.length,
totalCommands: commands.length,
allOllama: nonOllama.length === 0,
modelDistribution: modelStats,
agents: agents.sort((a,b) => a.name.localeCompare(b.name)),
commands: commands.sort((a,b) => a.name.localeCompare(b.name)),
issues: issues,
nonOllama: nonOllama
};
fs.writeFileSync('agent-evolution/data/real-state.json', JSON.stringify(state, null, 2) + '\n');
// Console report
console.log('=== REAL SYSTEM STATE ===');
console.log('Generated:', state.generated);
console.log('Agents:', state.totalAgents);
console.log('Commands:', state.totalCommands);
console.log('All ollama-cloud/:', state.allOllama ? 'YES' : 'NO (' + nonOllama.length + ' exceptions)');
console.log('\n=== MODEL DISTRIBUTION ===');
for (const [m, c] of Object.entries(modelStats).sort((a,b) => b[1]-a[1])) {
console.log(` ${m}: ${c}`);
}
if (issues.length > 0) {
console.log('\n=== ISSUES ===');
issues.forEach(i => console.log(' ⚠️', i));
}
if (nonOllama.length > 0) {
console.log('\n=== NON-OLLOMA ===');
nonOllama.forEach(n => console.log(' ❌', n.type, n.name, n.model));
}
console.log('\n✅ State written to agent-evolution/data/real-state.json');

View File

@@ -0,0 +1,29 @@
const fs = require('fs');
const path = require('path');
const DASH = path.join(__dirname, '../data/dashboard-data.json');
const REAL = path.join(__dirname, '../data/real-fit-report.json');
const OUT = path.join(__dirname, '../data/dashboard-data.json');
const dash = JSON.parse(fs.readFileSync(DASH, 'utf-8'));
const real = JSON.parse(fs.readFileSync(REAL, 'utf-8'));
// Inject real_evaluations into each agent
dash.agents.forEach(a => {
const r = real.agents?.[a.name];
if (r && r.evaluations) {
a.real_evaluations = r.evaluations;
a.real_best_model = r.best_model;
a.real_best_score = r.best_score;
} else {
a.real_evaluations = {};
}
});
// Add metadata
dash.real_fit_generated = real.generated;
dash.real_fit_source = real.source;
fs.writeFileSync(OUT, JSON.stringify(dash, null, 2));
console.log('Merged real-fit data into ' + OUT);
console.log('Agents with real evals:', dash.agents.filter(a => Object.keys(a.real_evaluations||{}).length > 0).length);

View File

@@ -0,0 +1,98 @@
const fs = require('fs');
const path = require('path');
const INDEX = path.join(__dirname, '../index.standalone.html');
// 1. New renderHeatmap that reads real-fit data
const newRenderHeatmap = `function renderHeatmap() {
const esc = str => (str || '').replace(/[&<>"']/g, m => ({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[m]));
const dd = window.dashboardData;
// Merge real-fit if loaded
const rf = window.realFitData || {};
const realAgents = rf.agents || {};
if (!dd || !dd.agents) {
document.getElementById('hmTable').innerHTML = '<tr><td style="color:var(--text-secondary);padding:20px;text-align:center;">⚠️ No data. Run analysis.</td></tr>';
return;
}
// Build model list from real-fit (cross-model) + current dashboard data
const modelsSeen = new Set();
dd.agents.forEach(a => { modelsSeen.add(a.model_short); });
Object.values(realAgents).forEach(a => { Object.keys(a.evaluations || {}).forEach(m => modelsSeen.add(m)); });
const modelList = Array.from(modelsSeen).filter(m => m && m !== 'code-skeptic');
const t = document.getElementById('hmTable');
let h = '<thead><tr><th class="hm-role">Agent</th>';
modelList.forEach(m => {
h += '<th style="writing-mode:vertical-lr;transform:rotate(180deg);max-width:32px;font-size:.56em;padding:3px 1px;">' + esc(m) + '</th>';
});
h += '<th>Best</th><th>Score</th></tr></thead><tbody>';
dd.agents.forEach(a => {
const realAgent = realAgents[a.name];
h += '<tr><td class="hm-r">' + esc(a.name) + '</td>';
modelList.forEach(m => {
let score = 0;
if (realAgent && realAgent.evaluations && realAgent.evaluations[m] > 0) {
score = Math.round(realAgent.evaluations[m]);
}
const isCurrent = a.model_short === m;
let cls = 'na';
if (score >= 90) cls = 'high';
else if (score >= 75) cls = 'good';
else if (score >= 50) cls = 'med';
else if (score > 0) cls = 'low';
const display = score > 0 ? score : (isCurrent ? Math.round(a.fit_score || 0) : '·');
const curStyle = isCurrent ? 'box-shadow:inset 0 0 0 2px var(--accent-cyan)' : '';
h += '<td class="score ' + cls + '" style="' + curStyle + '">' + display + '</td>';
});
const bestModel = realAgent ? (realAgent.best_model || a.model_short) : a.model_short;
const bestScore = realAgent ? Math.round(realAgent.best_score || 0) : Math.round(a.fit_score || 0);
h += '<td>' + esc(bestModel) + '</td><td style="font-weight:700">' + bestScore + '</td></tr>';
});
t.innerHTML = h + '</tbody>';
}`;
// 2. Add loadRealFitData script after dashboard load
const loadRealFitData = `
// Load real-fit report for cross-model evaluation
try {
const rfRes = await fetch('data/real-fit-report.json');
if (rfRes.ok) window.realFitData = await rfRes.json();
} catch(e) { console.warn('real-fit-report.json not loaded:', e.message); }
`;
let html = fs.readFileSync(INDEX, 'utf-8');
// Patch A: replace renderHeatmap function
const oldPattern = /\/\/ Render Heatmap[\s\S]*?function renderHeatmap\(\)\s*\{[^}]*\{[^}]*\}[^}]*\}/;
const oldMatch = html.match(oldPattern);
if (oldMatch) {
html = html.substring(0, oldMatch.index) + '// Render Heatmap (real-fit enabled)\n' + newRenderHeatmap + html.substring(oldMatch.index + oldMatch[0].length);
console.log('Patched renderHeatmap');
} else {
console.log('Pattern A not found, trying fallback...');
// Fallback: find and replace the specific renderHeatmap block
const start = html.indexOf('function renderHeatmap() {');
if (start !== -1) {
let brace = 0, end = start;
for (let i = start; i < html.length; i++) {
if (html[i] === '{') brace++;
else if (html[i] === '}') { brace--; if (brace === 0) { end = i + 1; break; } }
}
html = html.substring(0, start) + newRenderHeatmap + '\n' + html.substring(end);
console.log('Patched renderHeatmap (fallback)');
}
}
// Patch B: insert real-fit loading after dashboard load
const dashLoadPattern = /window\.dashboardData = await dashRes\.json\(\);/;
if (dashLoadPattern.test(html)) {
html = html.replace(dashLoadPattern, 'window.dashboardData = await dashRes.json();\n' + loadRealFitData.trim());
console.log('Patched init() to load real-fit data');
}
fs.writeFileSync(INDEX, html);
console.log('Done — ' + (fs.statSync(INDEX).size / 1024).toFixed(1) + ' KB');

View File

@@ -0,0 +1,173 @@
#!/usr/bin/env python3
"""
Rebuild real-fit-report.json from SQLite DB.
Usage:
python3 rebuild-report.py
python3 rebuild-report.py --db /path/to/real-fit.db --report /path/to/real-fit-report.json
"""
import argparse
import json
import sqlite3
import time
from datetime import datetime, timezone
from pathlib import Path
def _sync_agents_from_meta(db_path: Path) -> None:
"""Import any missing agents from kilo-meta.json into the DB agents table."""
meta_path = db_path.parent.parent.parent / "kilo-meta.json"
if not meta_path.exists():
return
with open(meta_path) as f:
meta = json.load(f)
conn = sqlite3.connect(str(db_path))
cursor = conn.cursor()
cursor.execute("SELECT name FROM agents")
existing = {r[0] for r in cursor.fetchall()}
for name, info in meta.get("agents", {}).items():
if name in existing:
continue
cursor.execute(
"INSERT OR IGNORE INTO agents (name, description, category, current_model, color, updated) VALUES (?, ?, ?, ?, ?, ?)",
(
name,
info.get("description", ""),
info.get("category", "meta"),
info.get("model", ""),
info.get("color", "#6B7280"),
datetime.now(timezone.utc).isoformat(),
),
)
conn.commit()
conn.close()
def build_report(db_path: Path) -> dict:
_sync_agents_from_meta(db_path)
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute("""
SELECT name, description, category, current_model
FROM agents
""")
agents_meta = {row["name"]: dict(row) for row in cursor.fetchall()}
# Only take evaluations that are NOT HTTP error responses
# AND prefer evaluator='rubric_v2' over 'rubric_v1'
cursor.execute("""
SELECT agent_name, model, total_score, evaluator, response
FROM evaluations
WHERE total_score > 0
AND evaluator NOT LIKE '%rubric_v1%'
AND (response IS NULL
OR (response NOT LIKE '%[HTTP %' AND response != ''))
ORDER BY agent_name, model,
CASE evaluator
WHEN 'evolution-skeptic' THEN 0
WHEN 'rubric_v2' THEN 1
ELSE 2
END,
total_score DESC
""")
# Take the first (best preferred evaluator, highest score) per agent-model
best_evals = {}
for row in cursor.fetchall():
agent = row["agent_name"]
model = row["model"]
score = row["total_score"]
if agent not in best_evals:
best_evals[agent] = {}
if model not in best_evals[agent]:
best_evals[agent][model] = score
# Rebuild fit_scores from selected evaluations only
cursor.execute("""
SELECT agent_name, model, MAX(total_score) as best_score, scores, explanation
FROM evaluations
WHERE total_score > 0
AND evaluator NOT LIKE '%rubric_v1%'
AND (response IS NULL
OR (response NOT LIKE '%[HTTP %' AND response != ''))
GROUP BY agent_name, model
""")
fit_scores = {}
for row in cursor.fetchall():
fit_scores[row["agent_name"]] = {
"model": row["model"],
"fit": row["best_score"],
"explanation": (
f"Best model for {row['agent_name']} is {row['model']} "
f"with avg score {row['best_score']:.1f}. "
"Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)."
),
}
conn.close()
agents_report = {}
for agent_name, meta in agents_meta.items():
evals = best_evals.get(agent_name, {})
if evals:
best_model = max(evals, key=evals.get)
best_score = evals[best_model]
else:
best_model = ""
best_score = 0.0
agents_report[agent_name] = {
"name": agent_name,
"evaluations": evals,
"info": [
meta.get("description") or "",
meta.get("category") or "",
meta.get("current_model") or "",
],
"best_model": best_model,
"best_score": best_score,
}
total_evals = sum(len(evals) for evals in best_evals.values())
generated = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
return {
"generated": generated,
"source": "real-fit-engine-db-filtered",
"total_evaluations": total_evals,
"agents": agents_report,
"fit_scores": fit_scores,
}
def main():
parser = argparse.ArgumentParser(description="Rebuild real-fit-report.json from DB")
parser.add_argument(
"--db",
type=Path,
default=Path(__file__).parent.parent / "data" / "real-fit.db",
help="Path to SQLite DB",
)
parser.add_argument(
"--report",
type=Path,
default=Path(__file__).parent.parent / "data" / "real-fit-report.json",
help="Path to report JSON output",
)
args = parser.parse_args()
report = build_report(args.db)
args.report.parent.mkdir(parents=True, exist_ok=True)
with open(args.report, "w", encoding="utf-8") as f:
json.dump(report, f, indent=2)
print(f"Report rebuilt: {args.report}")
print(f"Agents: {len(report['agents'])}, Evaluations: {report['total_evaluations']}")
if __name__ == "__main__":
main()