feat(evolution): add real-fit dashboard, API, report builder, and docker compose

- real-fit.html: API-driven research dashboard with agent/model heatmap, detail modal with score breakdown and evaluator commentary - api.py: FastAPI backend serving /api/real-fit-report (dynamic from SQLite), /api/research, /api/evolve-agent/start - rebuild-report.py: generates real-fit-report.json from SQLite DB for static fallback - docker-compose.yml: add evolution-api service (Python 3.12, uvicorn) for research endpoints - index.standalone.html: sync with dashboard data updates - archive/index.html: standalone dashboard snapshot (263KB) - .gitignore: exclude *.db, research-jobs.json from tracking
2026-05-28 11:55:49 +01:00
parent dbbf4c32e1
commit b95fd41587
13 changed files with 8886 additions and 353 deletions
--- a/agent-evolution/index.standalone.html
+++ b/agent-evolution/index.standalone.html
@@ -5083,7 +5083,7 @@ async function init() {
    try {
        // Load real dashboard data FIRST (overrides stale agent-versions)
        try {
-            const dashRes = await fetch('data/dashboard-data.json');
+            const dashRes = await fetch('data/dashboard-data.json', { cache: 'no-cache' });
            if (dashRes.ok) {
                window.dashboardData = await dashRes.json();
                // Sync agentData from dashboard data for all other tabs
@@ -5439,64 +5439,63 @@ function renderRecCard(r, index) {
    `;
 }

-// Render Heatmap — REAL DATA: Agent × Current Model × Real Fit Score
+// Render Heatmap — REAL DATA: Agent × Model × Live Ollama Evaluations
 function renderHeatmap() {
    const esc = str => (str || '').replace(/[&<>"']/g, m => ({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[m]));
    const dd = window.dashboardData;

    if (!dd || !dd.agents) {
-        document.getElementById('hmTable').innerHTML = '<tr><td style="color:var(--text-secondary);padding:20px;text-align:center;">⚠️ Нет данных. Запустите анализ.</td></tr>';
+        document.getElementById('hmTable').innerHTML = '<tr><td style="color:var(--text-secondary);padding:20px;text-align:center;">⚠️ No data. Run analysis.</td></tr>';
        return;
    }

-    const agents = dd.agents;
-    // Get unique models sorted by count of agents
-    const modelCounts = {};
-    agents.forEach(a => { modelCounts[a.model_short] = (modelCounts[a.model_short] || 0) + 1; });
-    const modelList = Object.entries(modelCounts)
-        .sort((a, b) => b[1] - a[1])
-        .map(([short]) => {
-            const m = dd.models[short] || {};
-            return {
-                short,
-                full: 'ollama-cloud/' + short,
-                name: m.name || short,
-                avg_fit: m.avg_fit || 0,
-                agents: m.agents || 0
-            };
-        });
+    // Collect all models from current assignments + realfit evaluations
+    const modelsSeen = new Set();
+    dd.agents.forEach(a => { if (a.model_short) modelsSeen.add(a.model_short); });
+    dd.agents.forEach(a => {
+        if (a.real_evaluations) Object.keys(a.real_evaluations).forEach(m => { if (m && m !== 'code-skeptic') modelsSeen.add(m); });
+    });
+    // Ensure real-fit evaluated models are included even if not current
+    const modelList = Array.from(modelsSeen).sort();

-    // Render table: rows=agents, cols=models
    const t = document.getElementById('hmTable');
    let h = '<thead><tr><th class="hm-role">Agent</th>';
    modelList.forEach(m => {
-        const color = m.avg_fit >= 85 ? '#00ff94' : m.avg_fit >= 70 ? '#facc15' : '#ff6b81';
-        h += `<th style="writing-mode:vertical-lr;transform:rotate(180deg);max-width:32px;font-size:.56em;padding:3px 1px;">
-            ${esc(m.name)}<br>
-            <span style="color:${color};font-size:.9em;font-weight:700">avg:${m.avg_fit}</span><br>
-            <span style="color:var(--text-muted);font-size:.8em">${m.agents}</span>
-        </th>`;
+        // Compute avg from dd.agents real_evaluations
+        let sum = 0, cnt = 0;
+        dd.agents.forEach(a => { const v = (a.real_evaluations || {})[m]; if (v > 0) { sum += v; cnt++; } });
+        const avg = cnt > 0 ? Math.round(sum / cnt) : 0;
+        const color = avg >= 85 ? '#00ff94' : avg >= 70 ? '#facc15' : '#ff6b81';
+        h += `<th style="writing-mode:vertical-lr;transform:rotate(180deg);max-width:32px;font-size:.56em;padding:3px 1px;">${esc(m)}<br><span style="color:${color};font-size:.9em;font-weight:700">avg:${avg}</span></th>`;
    });
-    h += '</tr></thead><tbody>';
+    h += '<th>Best</th><th>Score</th></tr></thead><tbody>';

-    agents.forEach(a => {
+    dd.agents.forEach(a => {
        h += `<tr><td class="hm-r">${esc(a.name)}</td>`;
-        modelList.forEach((m, j) => {
-            const isCurrent = a.model_short === m.short;
-            const score = isCurrent ? a.fit_score : 0; // Only show score for CURRENT model
-            const cur = isCurrent;
-            let marks = '';
-            if (cur) marks += '<span style="border:1px solid var(--accent-cyan);border-radius:50%;padding:1px 3px;font-size:8px">●</span>';
-            const bg = cur ? hmColor(score) : 'transparent';
-            const txt = cur ? hmText(score) : 'var(--text-muted)';
-            h += `<td style="background:${bg};color:${txt};cursor:pointer${cur ? ';box-shadow:inset 0 0 0 2px var(--accent-cyan)' : ''}" class="${cur ? 'hm-cur' : ''}"
-                title="${esc(a.name)} → ${esc(m.name)}: ${isCurrent ? 'fit=' + a.fit_score + ', if=' + a.instruction_following : 'не использует этот модель'}"
-                onmouseover="showTT(event,'${esc(a.name)}','${esc(m.name)}',${isCurrent ? a.fit_score : 0},${isCurrent},${cur},${isCurrent ? a.instruction_following : 0})"
-                onmouseout="hideTT()"
-                onclick="openHmModal(event, '${esc(a.name)}', '${esc(m.name)}', ${isCurrent ? a.fit_score : 0}, ${isCurrent ? a.instruction_following : 0})"
-            >${isCurrent ? a.fit_score : '·'}${marks}</td>`;
+        modelList.forEach(m => {
+            const isCurrent = a.model_short === m;
+            let score = 0;
+            // Prefer real-fit score, fallback to current fit_score
+            if (a.real_evaluations && a.real_evaluations[m] > 0) score = Math.round(a.real_evaluations[m]);
+            else if (isCurrent) score = Math.round(a.fit_score || 0);
+
+            let cls = 'na';
+            if (score >= 90) cls = 'high';
+            else if (score >= 75) cls = 'good';
+            else if (score >= 50) cls = 'med';
+            else if (score > 0) cls = 'low';
+
+            const curMark = isCurrent ? ' ●' : '';
+            const curStyle = isCurrent ? 'box-shadow:inset 0 0 0 2px var(--accent-cyan);' : '';
+            const bg = score > 0 ? hmColor(score) : 'transparent';
+            const txt = score >= 75 ? '#0e1219' : 'var(--text-primary)';
+            const display = score > 0 ? score : (isCurrent ? Math.round(a.fit_score || 0) : '·');
+
+            h += `<td class="score ${cls}" style="background:${bg};color:${txt};${curStyle}cursor:pointer" title="${esc(a.name)} → ${esc(m)}: ${score > 0 ? 'real fit=' + score : (isCurrent ? 'fit=' + a.fit_score : 'no data')}" onclick="openHmModal(event,'${esc(a.name)}','${esc(m)}',${score},${a.instruction_following || 0})">${display}${curMark}</td>`;
        });
-        h += '</tr>';
+        const bestModel = a.real_best_model || a.model_short;
+        const bestScore = a.real_best_score ? Math.round(a.real_best_score) : Math.round(a.fit_score || 0);
+        h += `<td>${esc(bestModel)}</td><td style="font-weight:700">${bestScore}</td></tr>`;
    });
    t.innerHTML = h + '</tbody>';
 }
@@ -5511,29 +5510,6 @@ function hmColor(v) {
    return 'rgba(90,104,128,.2)';
 }

-function hmText(v) {
-    return v >= 75 ? '#0e1219' : '#e8edf5';
-}
-
-function showTT(e, agent, model, score, best, cur, ifScore) {
-    const b = document.getElementById('ttBox'), o = document.getElementById('ttOverlay');
-    const ifColor = ifScore >= 85 ? '#00ff94' : ifScore >= 75 ? '#facc15' : '#ff6b81';
-    const ifLabel = ifScore >= 85 ? 'Excellent' : ifScore >= 75 ? 'Average' : 'Weak';
-    b.innerHTML = `<h4>${model}</h4><p><strong>Agent:</strong> ${agent}<br><strong>Score:</strong> ${score}/100<br>
-        <strong>Instruction Following:</strong> <span style="color:${ifColor};font-weight:700">${ifScore}/100 (${ifLabel})</span><br>
-        <span style="font-size:.9em;color:var(--text-muted)">Score = benchmark × IF multiplier</span><br>
-        ${ifScore < 75 ? '<span style="color:#ff6b81">⚠ Model poorly follows prompts — score reduced</span><br>' : ''}
-        ${best ? '★ <strong>Best fit</strong><br>' : ''}${cur ? '📌 <strong>Current</strong>' : ''}</p>`;
-    const r = e.target.getBoundingClientRect();
-    b.style.left = Math.min(r.left, window.innerWidth - 320) + 'px';
-    b.style.top = (r.bottom + 6) + 'px';
-    o.classList.add('show');
-}
-
-function hideTT() {
-    document.getElementById('ttOverlay').classList.remove('show');
-}
-
 // Current modal state
 let hmCurrentAgent = null;
 let hmCurrentModel = null;