feat(evolution): add real-fit dashboard, API, report builder, and docker compose

- real-fit.html: API-driven research dashboard with agent/model heatmap, detail modal with score breakdown and evaluator commentary
- api.py: FastAPI backend serving /api/real-fit-report (dynamic from SQLite), /api/research, /api/evolve-agent/start
- rebuild-report.py: generates real-fit-report.json from SQLite DB for static fallback
- docker-compose.yml: add evolution-api service (Python 3.12, uvicorn) for research endpoints
- index.standalone.html: sync with dashboard data updates
- archive/index.html: standalone dashboard snapshot (263KB)
- .gitignore: exclude *.db, research-jobs.json from tracking
This commit is contained in:
Deploy Bot
2026-05-28 11:55:49 +01:00
parent dbbf4c32e1
commit b95fd41587
13 changed files with 8886 additions and 353 deletions

View File

@@ -5083,7 +5083,7 @@ async function init() {
try {
// Load real dashboard data FIRST (overrides stale agent-versions)
try {
const dashRes = await fetch('data/dashboard-data.json');
const dashRes = await fetch('data/dashboard-data.json', { cache: 'no-cache' });
if (dashRes.ok) {
window.dashboardData = await dashRes.json();
// Sync agentData from dashboard data for all other tabs
@@ -5439,64 +5439,63 @@ function renderRecCard(r, index) {
`;
}
// Render Heatmap — REAL DATA: Agent × Current Model × Real Fit Score
// Render Heatmap — REAL DATA: Agent × Model × Live Ollama Evaluations
function renderHeatmap() {
const esc = str => (str || '').replace(/[&<>"']/g, m => ({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[m]));
const dd = window.dashboardData;
if (!dd || !dd.agents) {
document.getElementById('hmTable').innerHTML = '<tr><td style="color:var(--text-secondary);padding:20px;text-align:center;">⚠️ Нет данных. Запустите анализ.</td></tr>';
document.getElementById('hmTable').innerHTML = '<tr><td style="color:var(--text-secondary);padding:20px;text-align:center;">⚠️ No data. Run analysis.</td></tr>';
return;
}
const agents = dd.agents;
// Get unique models sorted by count of agents
const modelCounts = {};
agents.forEach(a => { modelCounts[a.model_short] = (modelCounts[a.model_short] || 0) + 1; });
const modelList = Object.entries(modelCounts)
.sort((a, b) => b[1] - a[1])
.map(([short]) => {
const m = dd.models[short] || {};
return {
short,
full: 'ollama-cloud/' + short,
name: m.name || short,
avg_fit: m.avg_fit || 0,
agents: m.agents || 0
};
});
// Collect all models from current assignments + realfit evaluations
const modelsSeen = new Set();
dd.agents.forEach(a => { if (a.model_short) modelsSeen.add(a.model_short); });
dd.agents.forEach(a => {
if (a.real_evaluations) Object.keys(a.real_evaluations).forEach(m => { if (m && m !== 'code-skeptic') modelsSeen.add(m); });
});
// Ensure real-fit evaluated models are included even if not current
const modelList = Array.from(modelsSeen).sort();
// Render table: rows=agents, cols=models
const t = document.getElementById('hmTable');
let h = '<thead><tr><th class="hm-role">Agent</th>';
modelList.forEach(m => {
const color = m.avg_fit >= 85 ? '#00ff94' : m.avg_fit >= 70 ? '#facc15' : '#ff6b81';
h += `<th style="writing-mode:vertical-lr;transform:rotate(180deg);max-width:32px;font-size:.56em;padding:3px 1px;">
${esc(m.name)}<br>
<span style="color:${color};font-size:.9em;font-weight:700">avg:${m.avg_fit}</span><br>
<span style="color:var(--text-muted);font-size:.8em">${m.agents}</span>
</th>`;
// Compute avg from dd.agents real_evaluations
let sum = 0, cnt = 0;
dd.agents.forEach(a => { const v = (a.real_evaluations || {})[m]; if (v > 0) { sum += v; cnt++; } });
const avg = cnt > 0 ? Math.round(sum / cnt) : 0;
const color = avg >= 85 ? '#00ff94' : avg >= 70 ? '#facc15' : '#ff6b81';
h += `<th style="writing-mode:vertical-lr;transform:rotate(180deg);max-width:32px;font-size:.56em;padding:3px 1px;">${esc(m)}<br><span style="color:${color};font-size:.9em;font-weight:700">avg:${avg}</span></th>`;
});
h += '</tr></thead><tbody>';
h += '<th>Best</th><th>Score</th></tr></thead><tbody>';
agents.forEach(a => {
dd.agents.forEach(a => {
h += `<tr><td class="hm-r">${esc(a.name)}</td>`;
modelList.forEach((m, j) => {
const isCurrent = a.model_short === m.short;
const score = isCurrent ? a.fit_score : 0; // Only show score for CURRENT model
const cur = isCurrent;
let marks = '';
if (cur) marks += '<span style="border:1px solid var(--accent-cyan);border-radius:50%;padding:1px 3px;font-size:8px">●</span>';
const bg = cur ? hmColor(score) : 'transparent';
const txt = cur ? hmText(score) : 'var(--text-muted)';
h += `<td style="background:${bg};color:${txt};cursor:pointer${cur ? ';box-shadow:inset 0 0 0 2px var(--accent-cyan)' : ''}" class="${cur ? 'hm-cur' : ''}"
title="${esc(a.name)}${esc(m.name)}: ${isCurrent ? 'fit=' + a.fit_score + ', if=' + a.instruction_following : 'не использует этот модель'}"
onmouseover="showTT(event,'${esc(a.name)}','${esc(m.name)}',${isCurrent ? a.fit_score : 0},${isCurrent},${cur},${isCurrent ? a.instruction_following : 0})"
onmouseout="hideTT()"
onclick="openHmModal(event, '${esc(a.name)}', '${esc(m.name)}', ${isCurrent ? a.fit_score : 0}, ${isCurrent ? a.instruction_following : 0})"
>${isCurrent ? a.fit_score : '·'}${marks}</td>`;
modelList.forEach(m => {
const isCurrent = a.model_short === m;
let score = 0;
// Prefer real-fit score, fallback to current fit_score
if (a.real_evaluations && a.real_evaluations[m] > 0) score = Math.round(a.real_evaluations[m]);
else if (isCurrent) score = Math.round(a.fit_score || 0);
let cls = 'na';
if (score >= 90) cls = 'high';
else if (score >= 75) cls = 'good';
else if (score >= 50) cls = 'med';
else if (score > 0) cls = 'low';
const curMark = isCurrent ? ' ●' : '';
const curStyle = isCurrent ? 'box-shadow:inset 0 0 0 2px var(--accent-cyan);' : '';
const bg = score > 0 ? hmColor(score) : 'transparent';
const txt = score >= 75 ? '#0e1219' : 'var(--text-primary)';
const display = score > 0 ? score : (isCurrent ? Math.round(a.fit_score || 0) : '·');
h += `<td class="score ${cls}" style="background:${bg};color:${txt};${curStyle}cursor:pointer" title="${esc(a.name)}${esc(m)}: ${score > 0 ? 'real fit=' + score : (isCurrent ? 'fit=' + a.fit_score : 'no data')}" onclick="openHmModal(event,'${esc(a.name)}','${esc(m)}',${score},${a.instruction_following || 0})">${display}${curMark}</td>`;
});
h += '</tr>';
const bestModel = a.real_best_model || a.model_short;
const bestScore = a.real_best_score ? Math.round(a.real_best_score) : Math.round(a.fit_score || 0);
h += `<td>${esc(bestModel)}</td><td style="font-weight:700">${bestScore}</td></tr>`;
});
t.innerHTML = h + '</tbody>';
}
@@ -5511,29 +5510,6 @@ function hmColor(v) {
return 'rgba(90,104,128,.2)';
}
function hmText(v) {
return v >= 75 ? '#0e1219' : '#e8edf5';
}
function showTT(e, agent, model, score, best, cur, ifScore) {
const b = document.getElementById('ttBox'), o = document.getElementById('ttOverlay');
const ifColor = ifScore >= 85 ? '#00ff94' : ifScore >= 75 ? '#facc15' : '#ff6b81';
const ifLabel = ifScore >= 85 ? 'Excellent' : ifScore >= 75 ? 'Average' : 'Weak';
b.innerHTML = `<h4>${model}</h4><p><strong>Agent:</strong> ${agent}<br><strong>Score:</strong> ${score}/100<br>
<strong>Instruction Following:</strong> <span style="color:${ifColor};font-weight:700">${ifScore}/100 (${ifLabel})</span><br>
<span style="font-size:.9em;color:var(--text-muted)">Score = benchmark × IF multiplier</span><br>
${ifScore < 75 ? '<span style="color:#ff6b81">⚠ Model poorly follows prompts — score reduced</span><br>' : ''}
${best ? '★ <strong>Best fit</strong><br>' : ''}${cur ? '📌 <strong>Current</strong>' : ''}</p>`;
const r = e.target.getBoundingClientRect();
b.style.left = Math.min(r.left, window.innerWidth - 320) + 'px';
b.style.top = (r.bottom + 6) + 'px';
o.classList.add('show');
}
function hideTT() {
document.getElementById('ttOverlay').classList.remove('show');
}
// Current modal state
let hmCurrentAgent = null;
let hmCurrentModel = null;