feat(scripts): add real-fit evaluation engine and supporting test scripts
- real-fit-engine.py: refactored to support --from-report, improved Ollama v1/chat/completions compatibility, agent name normalization - run-focused-eval.py: run evaluations for specific agent/model pairs from CLI - test_ollama_minimal.py/test_real_api.py: Ollama API connectivity tests - real-fit-architecture.md: architecture overview document - tests/scripts/: E2E landing test, analytics capture, evolution heatmap verification - Remove real-fit-recalc.py (superseded by --from-report flag)
This commit is contained in:
93
agent-evolution/data/real-fit.html
Normal file
93
agent-evolution/data/real-fit.html
Normal file
@@ -0,0 +1,93 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Real-Fit Matrix — Agent × Model Performance</title>
|
||||
<style>
|
||||
:root{--bg:#0a0f1a;--bg2:#0f1525;--bg3:#141c2e;--bdr:#1e2d45;--txt:#e8f1ff;--txt2:#8ba3c0;--cyan:#00d4ff;--green:#00ff94;--red:#ff4757;--orange:#ff9f43;--purple:#a855f7;}
|
||||
*{margin:0;padding:0;box-sizing:border-box}
|
||||
body{font-family:system-ui,-apple-system,sans-serif;background:var(--bg);color:var(--txt);min-height:100vh;padding:24px}
|
||||
h1{font-size:1.6rem;background:linear-gradient(90deg,var(--cyan),var(--green));-webkit-background-clip:text;-webkit-text-fill-color:transparent;margin-bottom:8px}
|
||||
.sub{color:var(--txt2);font-size:.85rem;margin-bottom:20px}
|
||||
table{width:100%;border-collapse:collapse;font-size:.82rem}
|
||||
th,td{padding:8px 10px;border:1px solid var(--bdr);text-align:center}
|
||||
th{background:var(--bg2);color:var(--txt2);font-size:.72rem;text-transform:uppercase;letter-spacing:.5px;position:sticky;top:0}
|
||||
td:first-child{text-align:left;font-weight:700;white-space:nowrap}
|
||||
td.score{font-weight:700;font-family:monospace}
|
||||
.hm-cur{box-shadow:inset 0 0 0 2px var(--cyan)}
|
||||
.high{background:rgba(0,255,148,.18);color:var(--green)}
|
||||
.good{background:rgba(0,212,255,.14);color:var(--cyan)}
|
||||
.med{background:rgba(168,85,247,.15);color:var(--purple)}
|
||||
.low{background:rgba(255,71,87,.1);color:var(--red)}
|
||||
.na{background:transparent;color:var(--txt2);font-size:.9rem}
|
||||
.legend{display:flex;gap:12px;flex-wrap:wrap;margin-top:16px;font-size:.78rem;color:var(--txt2)}
|
||||
.legend span{display:flex;align-items:center;gap:4px}
|
||||
.dot{width:14px;height:14px;border-radius:3px}
|
||||
.meta{font-size:.72rem;color:var(--txt2);margin-top:12px}
|
||||
a{color:var(--cyan);text-decoration:none}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Real-Fit Matrix</h1>
|
||||
<div class="sub">Real agent × model evaluation scores via live Ollama API (28 calls, 4 models, 7 agents)</div>
|
||||
|
||||
<div id="matrix"></div>
|
||||
<div class="legend">
|
||||
<span><span class="dot high"></span> 90+ Excellent</span>
|
||||
<span><span class="dot good"></span> 75–89 Good</span>
|
||||
<span><span class="dot med"></span> 50–74 Average</span>
|
||||
<span><span class="dot low"></span> <50 Weak</span>
|
||||
<span style="margin-left:auto">● = assigned model</span>
|
||||
</div>
|
||||
<div class="meta">Data source: <a href="data/real-fit-report.json" target="_blank">real-fit-report.json</a> | Updated: <span id="updated"></span></div>
|
||||
|
||||
<script>
|
||||
async function load() {
|
||||
const res = await fetch('data/real-fit-report.json');
|
||||
const data = await res.json();
|
||||
document.getElementById('updated').textContent = new Date(data.generated).toLocaleString('ru-RU');
|
||||
|
||||
// Extract focused agents (those with >0 evaluations on >1 model)
|
||||
const agents = Object.values(data.agents).filter(a => {
|
||||
const evs = Object.values(a.evaluations);
|
||||
return evs.length > 0 && evs.some(s => s > 0);
|
||||
});
|
||||
|
||||
// Get all models from any agent
|
||||
const models = new Set();
|
||||
agents.forEach(a => Object.keys(a.evaluations).forEach(m => models.add(m)));
|
||||
const modelList = Array.from(models).sort();
|
||||
|
||||
// Build table
|
||||
let html = '<table><thead><tr><th>Agent</th>';
|
||||
modelList.forEach(m => html += `<th>${m}</th>`);
|
||||
html += '<th>Best</th><th>Score</th></tr></thead><tbody>';
|
||||
|
||||
agents.forEach(a => {
|
||||
html += `<tr><td>${a.name}</td>`;
|
||||
modelList.forEach(m => {
|
||||
const score = a.evaluations[m];
|
||||
const isCur = a.info && a.info[2] && a.info[2].includes(m);
|
||||
let cls = 'na';
|
||||
let text = '—';
|
||||
if (score !== undefined && score > 0) {
|
||||
if (score >= 90) cls = 'score high';
|
||||
else if (score >= 75) cls = 'score good';
|
||||
else if (score >= 50) cls = 'score med';
|
||||
else cls = 'score low';
|
||||
text = Math.round(score);
|
||||
}
|
||||
const curCls = isCur ? ' hm-cur' : '';
|
||||
html += `<td class="${cls}${curCls}">${text}${isCur ? ' ●' : ''}</td>`;
|
||||
});
|
||||
html += `<td>${a.best_model}</td><td style="font-weight:700">${Math.round(a.best_score)}</td></tr>`;
|
||||
});
|
||||
|
||||
html += '</tbody></table>';
|
||||
document.getElementById('matrix').innerHTML = html;
|
||||
}
|
||||
load().catch(e => document.getElementById('matrix').innerHTML = 'Error: ' + e);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user