- Restore all 30 agents to v3.html heatmap optimal models:
* frontend-developer: qwen3-coder -> minimax-m2.5 (92★)
* devops-engineer: nemotron-3-super -> kimi-k2.6:cloud (88★)
* browser-automation: qwen3-coder -> kimi-k2.6:cloud (86★)
* agent-architect: glm-5.1 -> kimi-k2.6:cloud (86★)
- Add Model Evolution Guard system:
* agent-evolution/scripts/lib/fitness-gate.cjs
* Rejects downgrades >3 points or below score 75
* Produces detailed diff report before any file modifications
* Normalized model ID lookup (v3.html ':' vs JSON '-')
- Update sync-benchmarks-from-yaml.cjs with fitness gate
- Update model-benchmarks.json with v3 optimal assignments
- Rebuild research-dashboard.html (104KB, 30 agents, 11 models)
- Add model-evolution-guard.md architecture documentation
- Add v3-optimal-models.json as source-of-truth reference
Fixes regression introduced by commit 3badb25 where models were
silently downgraded from heatmap optimal to inferior assignments.
2722 lines
105 KiB
HTML
2722 lines
105 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="ru">
|
||
<head>
|
||
<meta charset="UTF-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<title>APAW Agent Model Research — generated 2026-04-29</title>
|
||
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;600;700&family=Outfit:wght@300;400;500;600;700;800;900&display=swap" rel="stylesheet">
|
||
<style>
|
||
:root {
|
||
--bg-deep: #080b12;
|
||
--bg-panel: #0e1219;
|
||
--bg-card: #141922;
|
||
--bg-card-hover: #1a2130;
|
||
--border: #1e2736;
|
||
--border-bright: #2a3650;
|
||
--text-primary: #e8edf5;
|
||
--text-secondary: #8896aa;
|
||
--text-muted: #5a6880;
|
||
--accent-cyan: #00d4ff;
|
||
--accent-green: #00ff94;
|
||
--accent-orange: #ff9f43;
|
||
--accent-red: #ff4757;
|
||
--accent-purple: #a855f7;
|
||
--accent-blue: #3b82f6;
|
||
--accent-yellow: #facc15;
|
||
--glow-cyan: rgba(0,212,255,0.15);
|
||
--glow-green: rgba(0,255,148,0.1);
|
||
--glow-orange: rgba(255,159,67,0.1);
|
||
}
|
||
* { margin:0; padding:0; box-sizing:border-box; }
|
||
body {
|
||
font-family:'Outfit',sans-serif;
|
||
background:var(--bg-deep);
|
||
color:var(--text-primary);
|
||
min-height:100vh;
|
||
overflow-x:hidden;
|
||
}
|
||
body::before {
|
||
content:'';
|
||
position:fixed; top:0; left:0; right:0; bottom:0;
|
||
background:
|
||
linear-gradient(90deg,rgba(0,212,255,0.02) 1px,transparent 1px),
|
||
linear-gradient(rgba(0,212,255,0.02) 1px,transparent 1px);
|
||
background-size:60px 60px;
|
||
animation:gp 8s ease-in-out infinite;
|
||
pointer-events:none; z-index:0;
|
||
}
|
||
@keyframes gp { 0%,100%{opacity:.3} 50%{opacity:.6} }
|
||
.container { max-width:1540px; margin:0 auto; padding:24px 16px; position:relative; z-index:1; }
|
||
|
||
/* HEADER */
|
||
.header { text-align:center; margin-bottom:32px; animation:fadeDown .7s ease-out; }
|
||
@keyframes fadeDown { from{opacity:0;transform:translateY(-20px)} to{opacity:1;transform:translateY(0)} }
|
||
.header h1 {
|
||
font-size:2.6em; font-weight:900;
|
||
background:linear-gradient(135deg,var(--accent-cyan),var(--accent-green),var(--accent-purple));
|
||
-webkit-background-clip:text; -webkit-text-fill-color:transparent;
|
||
letter-spacing:-1px;
|
||
}
|
||
.header .sub { font-family:'JetBrains Mono',monospace; color:var(--text-muted); font-size:.8em; margin-top:6px; letter-spacing:1px; }
|
||
|
||
/* TABS */
|
||
.tabs { display:flex; gap:3px; background:var(--bg-panel); border:1px solid var(--border); border-radius:12px; padding:4px; margin-bottom:28px; overflow-x:auto; }
|
||
.tab-btn {
|
||
flex:1; min-width:100px; padding:10px 8px; background:none; border:none; color:var(--text-secondary);
|
||
font-family:'Outfit',sans-serif; font-size:.82em; font-weight:600; border-radius:9px; cursor:pointer; transition:all .25s; white-space:nowrap;
|
||
}
|
||
.tab-btn:hover { color:var(--text-primary); background:var(--bg-card); }
|
||
.tab-btn.active { color:var(--bg-deep); background:linear-gradient(135deg,var(--accent-cyan),var(--accent-green)); box-shadow:0 0 16px var(--glow-cyan); }
|
||
.tab-panel { display:none; animation:fadeUp .4s ease-out; }
|
||
.tab-panel.active { display:block; }
|
||
@keyframes fadeUp { from{opacity:0;transform:translateY(16px)} to{opacity:1;transform:translateY(0)} }
|
||
|
||
/* STAT CARDS */
|
||
.stats-row { display:grid; grid-template-columns:repeat(auto-fit,minmax(190px,1fr)); gap:14px; margin-bottom:26px; }
|
||
.stat-card {
|
||
background:var(--bg-card); border:1px solid var(--border); border-radius:11px; padding:18px; position:relative; overflow:hidden; transition:all .3s;
|
||
}
|
||
.stat-card:hover { border-color:var(--accent-cyan); transform:translateY(-2px); box-shadow:0 6px 24px var(--glow-cyan); }
|
||
.stat-label { font-family:'JetBrains Mono',monospace; font-size:.65em; color:var(--text-muted); text-transform:uppercase; letter-spacing:1.5px; margin-bottom:6px; }
|
||
.stat-value { font-size:2em; font-weight:800; }
|
||
.stat-sub { font-size:.75em; color:var(--text-secondary); margin-top:3px; }
|
||
.grad-cyan { background:linear-gradient(135deg,var(--accent-cyan),var(--accent-green)); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
|
||
.grad-orange { background:linear-gradient(135deg,var(--accent-orange),var(--accent-yellow)); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
|
||
.grad-purple { background:linear-gradient(135deg,var(--accent-purple),#e879f9); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
|
||
.grad-green { background:linear-gradient(135deg,var(--accent-green),#4ade80); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
|
||
.grad-red { background:linear-gradient(135deg,var(--accent-red),#ff6b81); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
|
||
|
||
/* SECTION HEADERS */
|
||
.sec-hdr { display:flex; align-items:center; gap:10px; margin-bottom:18px; padding-bottom:10px; border-bottom:1px solid var(--border); }
|
||
.sec-hdr h2 { font-size:1.2em; font-weight:700; }
|
||
.badge { font-family:'JetBrains Mono',monospace; font-size:.65em; padding:3px 9px; border-radius:16px; }
|
||
.badge-cyan { background:var(--glow-cyan); color:var(--accent-cyan); border:1px solid rgba(0,212,255,.2); }
|
||
.badge-orange { background:var(--glow-orange); color:var(--accent-orange); border:1px solid rgba(255,159,67,.2); }
|
||
.badge-green { background:var(--glow-green); color:var(--accent-green); border:1px solid rgba(0,255,148,.2); }
|
||
|
||
/* TABLES */
|
||
.tbl-wrap { overflow-x:auto; border-radius:11px; border:1px solid var(--border); background:var(--bg-card); margin-bottom:26px; }
|
||
table.dt { width:100%; border-collapse:collapse; font-size:.84em; }
|
||
table.dt th {
|
||
font-family:'JetBrains Mono',monospace; font-size:.7em; color:var(--text-muted); text-transform:uppercase;
|
||
letter-spacing:1.2px; padding:12px 14px; background:var(--bg-panel); border-bottom:2px solid var(--border); text-align:left; position:sticky; top:0;
|
||
}
|
||
table.dt td { padding:10px 14px; border-bottom:1px solid var(--border); transition:background .15s; }
|
||
table.dt tr:hover td { background:var(--bg-card-hover); }
|
||
|
||
.mbadge { display:inline-block; padding:3px 8px; border-radius:5px; font-family:'JetBrains Mono',monospace; font-size:.78em; font-weight:500; }
|
||
.mbadge.qwen { background:rgba(59,130,246,.12); color:#60a5fa; border:1px solid rgba(59,130,246,.25); }
|
||
.mbadge.gptoss { background:rgba(168,85,247,.12); color:#c084fc; border:1px solid rgba(168,85,247,.25); }
|
||
.mbadge.glm { background:rgba(0,255,148,.08); color:#00ff94; border:1px solid rgba(0,255,148,.2); }
|
||
.mbadge.minimax { background:rgba(255,159,67,.12); color:#ff9f43; border:1px solid rgba(255,159,67,.25); }
|
||
.mbadge.devstral { background:rgba(0,212,255,.12); color:#00d4ff; border:1px solid rgba(0,212,255,.25); }
|
||
.mbadge.deepseek { background:rgba(250,204,21,.12); color:#facc15; border:1px solid rgba(250,204,21,.25); }
|
||
.mbadge.nemotron { background:rgba(34,197,94,.12); color:#4ade80; border:1px solid rgba(34,197,94,.25); }
|
||
.mbadge.groq { background:rgba(255,71,87,.12); color:#ff6b81; border:1px solid rgba(255,71,87,.25); }
|
||
.mbadge.kimi { background:rgba(250,204,21,.12); color:#fde68a; border:1px solid rgba(250,204,21,.2); }
|
||
.mbadge.llama { background:rgba(59,130,246,.1); color:#93c5fd; border:1px solid rgba(59,130,246,.2); }
|
||
|
||
/* SCORE BAR */
|
||
.sbar { display:flex; align-items:center; gap:6px; }
|
||
.sbar-bg { width:70px; height:5px; background:var(--border); border-radius:3px; overflow:hidden; }
|
||
.sbar-fill { height:100%; border-radius:3px; transition:width 1s ease-out; }
|
||
.sbar-fill.h { background:linear-gradient(90deg,var(--accent-green),#00ff94); }
|
||
.sbar-fill.m { background:linear-gradient(90deg,var(--accent-orange),#ffc048); }
|
||
.sbar-fill.l { background:linear-gradient(90deg,var(--accent-red),#ff6b81); }
|
||
.snum { font-family:'JetBrains Mono',monospace; font-weight:600; font-size:.85em; min-width:28px; }
|
||
|
||
/* GROQ SPEED INDICATOR */
|
||
.speed-ind { display:inline-flex; align-items:center; gap:4px; }
|
||
.speed-dot { width:7px; height:7px; border-radius:50%; animation:pulse 1.5s ease-in-out infinite; }
|
||
.speed-dot.ultra { background:var(--accent-green); box-shadow:0 0 8px var(--accent-green); }
|
||
.speed-dot.fast { background:var(--accent-cyan); box-shadow:0 0 6px var(--accent-cyan); }
|
||
.speed-dot.normal { background:var(--accent-orange); }
|
||
@keyframes pulse { 0%,100%{opacity:.5;transform:scale(.8)} 50%{opacity:1;transform:scale(1.2)} }
|
||
|
||
/* RECOMMENDATION CARDS */
|
||
.rec-grid { display:grid; grid-template-columns:repeat(auto-fit,minmax(400px,1fr)); gap:16px; margin-bottom:26px; }
|
||
.rec-card {
|
||
background:var(--bg-card); border:1px solid var(--border); border-radius:12px; padding:20px;
|
||
position:relative; overflow:hidden; transition:all .35s;
|
||
}
|
||
.rec-card:hover { border-color:var(--accent-green); box-shadow:0 0 30px var(--glow-green); transform:translateY(-2px); }
|
||
.rec-card.glow { animation:glowP 3s ease-in-out infinite; }
|
||
@keyframes glowP { 0%,100%{box-shadow:0 0 16px var(--glow-green)} 50%{box-shadow:0 0 32px var(--glow-green)} }
|
||
.rec-hdr { display:flex; justify-content:space-between; align-items:flex-start; margin-bottom:12px; }
|
||
.rec-agent { font-weight:700; font-size:1em; color:var(--accent-cyan); }
|
||
.impact-badge { padding:2px 8px; border-radius:16px; font-family:'JetBrains Mono',monospace; font-size:.68em; font-weight:600; }
|
||
.impact-badge.critical { background:rgba(255,71,87,.18); color:var(--accent-red); border:1px solid rgba(255,71,87,.25); }
|
||
.impact-badge.high { background:rgba(255,159,67,.18); color:var(--accent-orange); border:1px solid rgba(255,159,67,.25); }
|
||
.impact-badge.medium { background:rgba(250,204,21,.18); color:var(--accent-yellow); border:1px solid rgba(250,204,21,.25); }
|
||
.swap-vis { display:flex; align-items:center; gap:10px; margin:12px 0; padding:12px; background:var(--bg-panel); border-radius:8px; }
|
||
.swap-from { font-family:'JetBrains Mono',monospace; font-size:.78em; padding:4px 8px; border-radius:5px; background:rgba(255,71,87,.08); color:#ff6b81; border:1px solid rgba(255,71,87,.15); text-decoration:line-through; opacity:.65; }
|
||
.swap-to { font-family:'JetBrains Mono',monospace; font-size:.78em; padding:4px 8px; border-radius:5px; background:rgba(0,255,148,.08); color:#00ff94; border:1px solid rgba(0,255,148,.2); font-weight:600; }
|
||
.swap-arrow { color:var(--accent-green); font-size:1.4em; animation:arrP 2s ease-in-out infinite; }
|
||
@keyframes arrP { 0%,100%{opacity:.4;transform:scale(1)} 50%{opacity:1;transform:scale(1.12)} }
|
||
.rec-metrics { display:grid; grid-template-columns:repeat(4,1fr); gap:8px; margin-top:12px; }
|
||
.rec-m { text-align:center; padding:6px; background:var(--bg-deep); border-radius:6px; }
|
||
.rec-m-label { font-size:.6em; color:var(--text-muted); text-transform:uppercase; letter-spacing:.8px; font-family:'JetBrains Mono',monospace; }
|
||
.rec-m-val { font-size:1.1em; font-weight:700; margin-top:1px; }
|
||
.rec-m-val.pos { color:var(--accent-green); }
|
||
.rec-m-val.neu { color:var(--accent-orange); }
|
||
.rec-reason { font-size:.82em; color:var(--text-secondary); line-height:1.55; margin-top:10px; padding-top:10px; border-top:1px solid var(--border); }
|
||
|
||
/* HEATMAP */
|
||
.hm-wrap { overflow-x:auto; border-radius:11px; border:1px solid var(--border); background:var(--bg-card); padding:18px; margin-bottom:26px; }
|
||
.hm-title { font-weight:700; font-size:1.05em; }
|
||
.hm-sub { font-size:.76em; color:var(--text-muted); margin-bottom:14px; }
|
||
.hm-table { border-collapse:collapse; width:100%; }
|
||
.hm-table th { font-family:'JetBrains Mono',monospace; font-size:.62em; color:var(--text-muted); padding:6px 4px; text-align:center; white-space:nowrap; }
|
||
.hm-table th.hm-role { text-align:left; min-width:150px; font-size:.68em; }
|
||
.hm-table td { text-align:center; padding:5px 3px; font-family:'JetBrains Mono',monospace; font-size:.74em; font-weight:600; border-radius:3px; cursor:pointer; transition:all .12s; min-width:38px; }
|
||
.hm-table td:hover { transform:scale(1.12); z-index:2; }
|
||
.hm-table td.hm-r { text-align:left; font-family:'Outfit',sans-serif; font-size:.78em; font-weight:500; color:var(--text-secondary); cursor:default; }
|
||
.hm-table td.hm-r:hover { transform:none; }
|
||
.hm-star { color:#FFD700; font-size:.85em; }
|
||
.hm-cur { outline:2px solid var(--accent-cyan); outline-offset:-2px; }
|
||
|
||
/* PROVIDER TAGS */
|
||
.prov-tag { display:inline-block; padding:1px 6px; border-radius:3px; font-size:.62em; font-family:'JetBrains Mono',monospace; margin-left:4px; }
|
||
.prov-tag.ollama { background:rgba(0,212,255,.1); color:var(--accent-cyan); }
|
||
.prov-tag.groq { background:rgba(255,71,87,.1); color:#ff6b81; }
|
||
.prov-tag.openrouter { background:rgba(168,85,247,.1); color:#c084fc; }
|
||
.prov-tag.hybrid { background:rgba(0,255,148,.1); color:#00ff94; }
|
||
|
||
/* MODEL CARDS */
|
||
.model-grid { display:grid; grid-template-columns:repeat(auto-fit,minmax(290px,1fr)); gap:14px; margin-bottom:26px; }
|
||
.mc { background:var(--bg-card); border:1px solid var(--border); border-radius:12px; padding:18px; transition:all .3s; position:relative; }
|
||
.mc:hover { transform:translateY(-2px); border-color:var(--accent-cyan); box-shadow:0 6px 24px var(--glow-cyan); }
|
||
.mc-name { font-weight:700; font-size:1.05em; margin-bottom:3px; }
|
||
.mc-org { font-size:.74em; color:var(--text-muted); margin-bottom:12px; font-family:'JetBrains Mono',monospace; }
|
||
.mc-row { display:flex; justify-content:space-between; align-items:center; padding:5px 0; border-bottom:1px solid rgba(30,39,54,.4); font-size:.82em; }
|
||
.mc-row:last-child { border-bottom:none; }
|
||
.mc-label { color:var(--text-secondary); }
|
||
.mc-val { font-family:'JetBrains Mono',monospace; font-weight:600; }
|
||
.mc-tags { display:flex; flex-wrap:wrap; gap:3px; margin-top:10px; }
|
||
.mc-tag { font-size:.64em; padding:2px 6px; border-radius:3px; font-family:'JetBrains Mono',monospace; background:rgba(0,212,255,.06); color:var(--accent-cyan); border:1px solid rgba(0,212,255,.12); }
|
||
.mc-best { font-size:.72em; padding:3px 8px; border-radius:4px; background:rgba(0,255,148,.1); color:var(--accent-green); border:1px solid rgba(0,255,148,.2); margin-top:8px; display:inline-block; }
|
||
|
||
/* GROQ SECTION */
|
||
.groq-card { border-left:3px solid var(--accent-red); }
|
||
.groq-speed { font-family:'JetBrains Mono',monospace; font-size:1.8em; font-weight:800; color:var(--accent-red); }
|
||
|
||
/* SUMMARY */
|
||
.summary { background:linear-gradient(135deg,rgba(0,212,255,.04),rgba(0,255,148,.04)); border:1px solid var(--border-bright); border-radius:12px; padding:24px; margin-bottom:26px; }
|
||
.summary h3 { color:var(--accent-cyan); font-size:1.1em; margin-bottom:10px; }
|
||
.summary p { color:var(--text-secondary); line-height:1.65; font-size:.88em; }
|
||
.summary ul { list-style:none; margin-top:10px; }
|
||
.summary li { padding:5px 0 5px 18px; position:relative; color:var(--text-secondary); font-size:.86em; line-height:1.55; }
|
||
.summary li::before { content:'›'; position:absolute; left:0; color:var(--accent-green); font-weight:700; font-size:1.2em; }
|
||
|
||
/* FILTER ROW */
|
||
.frow { display:flex; gap:6px; margin-bottom:16px; flex-wrap:wrap; }
|
||
.fbtn { padding:5px 12px; background:var(--bg-card); border:1px solid var(--border); color:var(--text-secondary); border-radius:7px; font-family:'Outfit',sans-serif; font-size:.78em; cursor:pointer; transition:all .2s; }
|
||
.fbtn:hover,.fbtn.active { border-color:var(--accent-cyan); color:var(--accent-cyan); background:rgba(0,212,255,.06); }
|
||
|
||
/* TOOLTIP */
|
||
#ttOverlay { display:none; position:fixed; top:0;left:0;right:0;bottom:0; z-index:999; pointer-events:none; }
|
||
#ttOverlay.show { display:block; }
|
||
#ttBox { position:absolute; background:var(--bg-panel); border:1px solid var(--accent-cyan); border-radius:9px; padding:12px 16px; max-width:300px; box-shadow:0 10px 32px rgba(0,0,0,.55); z-index:1000; }
|
||
#ttBox h4 { color:var(--accent-cyan); font-size:.9em; margin-bottom:4px; }
|
||
#ttBox p { font-size:.78em; color:var(--text-secondary); line-height:1.45; }
|
||
|
||
/* CANVAS */
|
||
.chart-wrap { border-radius:11px; border:1px solid var(--border); background:var(--bg-card); padding:18px; margin-bottom:26px; }
|
||
.chart-title { font-weight:700; font-size:1.05em; margin-bottom:12px; }
|
||
|
||
@media(max-width:768px) {
|
||
.header h1 { font-size:1.6em; }
|
||
.tabs { flex-wrap:wrap; }
|
||
.rec-grid,.model-grid { grid-template-columns:1fr; }
|
||
.stats-row { grid-template-columns:repeat(2,1fr); }
|
||
.rec-metrics { grid-template-columns:repeat(2,1fr); }
|
||
}
|
||
|
||
/* EXPORT BUTTONS */
|
||
.export-btn {
|
||
padding:8px 16px; background:var(--bg-card); border:1px solid var(--border-bright);
|
||
color:var(--text-secondary); font-family:'Outfit',sans-serif; font-size:.82em; font-weight:600;
|
||
border-radius:8px; cursor:pointer; transition:all .25s; display:inline-flex; align-items:center;
|
||
}
|
||
.export-btn:hover { border-color:var(--accent-cyan); color:var(--text-primary); background:var(--bg-card-hover); }
|
||
.export-btn-primary {
|
||
background:linear-gradient(135deg,rgba(0,212,255,.15),rgba(0,255,148,.1));
|
||
border-color:var(--accent-cyan); color:var(--accent-cyan);
|
||
}
|
||
.export-btn-primary:hover { background:linear-gradient(135deg,rgba(0,212,255,.25),rgba(0,255,148,.18)); box-shadow:0 0 20px var(--glow-cyan); }
|
||
|
||
/* REC CARD CHECKBOX */
|
||
.rec-check { position:absolute; top:14px; right:14px; width:22px; height:22px; cursor:pointer; z-index:3; accent-color:var(--accent-green); }
|
||
.rec-card.selected { border-color:var(--accent-green); background:rgba(0,255,148,.03); }
|
||
.rec-card { position:relative; }
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<div class="container">
|
||
<div class="header">
|
||
<h1>APAW Agent Model Research v2</h1>
|
||
<div class="sub">Live dashboard • 15 models × 30 agents • 2026-04-29</div>
|
||
</div>
|
||
|
||
<div class="tabs" id="tabBar">
|
||
<button class="tab-btn active" onclick="switchTab('overview')">Обзор</button>
|
||
<button class="tab-btn" onclick="switchTab('groq')">Groq Free Tier</button>
|
||
<button class="tab-btn" onclick="switchTab('models')">Все модели</button>
|
||
<button class="tab-btn" onclick="switchTab('heatmap')">Матрица</button>
|
||
<button class="tab-btn" onclick="switchTab('recs')">Рекомендации</button>
|
||
<button class="tab-btn" onclick="switchTab('impact')">Анализ профита</button>
|
||
</div>
|
||
|
||
<!-- ========== TAB: OVERVIEW ========== -->
|
||
<div id="tab-overview" class="tab-panel active">
|
||
<div class="stats-row">
|
||
<div class="stat-card"><div class="stat-label">Агентов</div><div class="stat-value grad-cyan" id="c1">36</div><div class="stat-sub">32 custom + 4 built-in</div></div>
|
||
<div class="stat-card"><div class="stat-label">Моделей сейчас</div><div class="stat-value grad-orange">6</div><div class="stat-sub">Coder(9) GLM-5.1(11) K2.6(4)</div></div>
|
||
<div class="stat-card"><div class="stat-label">Ollama Cloud</div><div class="stat-value grad-purple">20+</div><div class="stat-sub">доступно бесплатно</div></div>
|
||
<div class="stat-card"><div class="stat-label">Groq + OpenRouter</div><div class="stat-value grad-red">16+</div><div class="stat-sub">free tier моделей</div></div>
|
||
<div class="stat-card"><div class="stat-label">Рекомендаций</div><div class="stat-value grad-green">11</div><div class="stat-sub">8/8 applied ✅</div></div>
|
||
</div>
|
||
|
||
<div class="summary">
|
||
<h3>Ключевые находки v3 (после коммита caf77f53c8)</h3>
|
||
<p>Ваш агент уже применил 11 из моих рекомендаций (коммит от 05:21). Но я обнаружил что <strong>до применения</strong> некоторые агенты были на других моделях чем я предполагал:</p>
|
||
<ul>
|
||
<li><strong style="color:var(--accent-red)">⚠ Откат Qwen 3.6 Plus</strong> — security-auditor, prompt-optimizer, product-owner и markdown-validator <em>до коммита</em> были на <code>openrouter/qwen3.6-plus:free</code> и <code>deepseek-v3.2</code>, но мои рекомендации их заменили на Ollama-модели. Это снижает разнообразие провайдеров!</li>
|
||
<li><strong style="color:var(--accent-green)">✅ 11 замен уже применены</strong> — Nemotron 3 Super теперь на 7 ролях, GLM-5 расширен, Qwen3-Coder на Go, markdown-validator</li>
|
||
<li><strong style="color:var(--accent-orange)">🔴 Осталось 3 агента на gpt-oss:120b</strong> — requirement-refiner, capability-analyst, agent-architect. Всем им нужен Nemotron 3 Super</li>
|
||
<li><strong>Новая стратегия: гибридный мультипровайдер</strong> — OpenRouter (Qwen 3.6 Plus FREE, 1M ctx) + Groq (gpt-oss 500 t/s) + Ollama (основной). Диверсификация снижает зависимость</li>
|
||
<li><strong style="color:#00ff94">Qwen 3.6 Plus стоит вернуть</strong> для prompt-optimizer (Terminal-Bench 61.6% > Claude!) и product-owner (1M контекст для backlog)</li>
|
||
|
||
<li><strong>History-miner → Nemotron 3 Super</strong> — самый большой оставшийся прирост: 88 vs 78 (GLM-5). RULER@1M критичен для git history</li>
|
||
<li><strong style="color:var(--accent-red)">⚠ Prompt Adherence (IF) — новый фактор!</strong> Nemotron 3 Super имеет IF=78 (ниже GLM-5=90, Qwen3.5=92, Qwen3.6+=91). Для ролей с жёстким промптом (evaluator, security-auditor, orchestrator) это снижает эффективность. Qwen 3.6 Plus и GLM-5 лучше следуют инструкциям</li></ul>
|
||
</ul>
|
||
</div>
|
||
|
||
<div class="sec-hdr"><h2>Текущая конфигурация</h2><span class="badge badge-cyan">capability-index.yaml</span></div>
|
||
<div class="tbl-wrap">
|
||
<table class="dt" id="cfgTable"><thead><tr>
|
||
<th>Агент</th><th>Модель</th><th>Провайдер</th><th>Категория</th><th>Соответствие</th><th>Статус</th>
|
||
</tr></thead><tbody id="cfgBody"></tbody></table>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- ========== TAB: GROQ ========== -->
|
||
<div id="tab-groq" class="tab-panel">
|
||
<div class="sec-hdr"><h2>Groq Free Plan — доступные модели</h2><span class="badge badge-orange">бесплатно · LPU inference</span></div>
|
||
|
||
<div class="stats-row">
|
||
<div class="stat-card groq-card"><div class="stat-label">gpt-oss-20b</div><div class="groq-speed">1200 <span style="font-size:.4em;color:var(--text-muted)">t/s</span></div><div class="stat-sub">30 RPM · 1K RPD · 200K TPD</div></div>
|
||
</div>
|
||
|
||
<div class="summary">
|
||
<h3>Анализ лимитов Groq Free для агентского pipeline</h3>
|
||
<p>При 26 агентах в pipeline, каждый агент делает 5–20 вызовов на задачу. Типичный issue проходит через 8–12 агентов = <strong>~100–200 вызовов</strong>. С лимитом 1K RPD на модель:</p>
|
||
<ul>
|
||
|
||
|
||
<li><strong>Groq Compound</strong>: всего 250 RPD, но 70K TPM — для одноразовых тяжёлых аналитических задач</li>
|
||
</ul>
|
||
</div>
|
||
|
||
<div class="sec-hdr"><h2>Все модели Groq Free Tier</h2></div>
|
||
<div class="tbl-wrap">
|
||
<table class="dt">
|
||
<thead><tr><th>Model ID</th><th>RPM</th><th>RPD</th><th>TPM</th><th>TPD</th><th>Скорость</th><th>Применение в APAW</th></tr></thead>
|
||
<tbody id="groqBody"></tbody>
|
||
</table>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- ========== TAB: MODELS ========== -->
|
||
<div id="tab-models" class="tab-panel">
|
||
<div class="sec-hdr"><h2>Все доступные модели</h2><span class="badge badge-cyan">Ollama Cloud + Groq + OpenRouter Free</span></div>
|
||
<div class="frow" id="filterRow"></div>
|
||
<div class="model-grid" id="modelGrid"></div>
|
||
</div>
|
||
|
||
<!-- ========== TAB: HEATMAP ========== -->
|
||
<div id="tab-heatmap" class="tab-panel">
|
||
<div class="hm-wrap">
|
||
<div class="hm-title">Матрица «Агент × Модель»: оценка совместимости (с учётом Prompt Adherence)</div>
|
||
<div class="hm-sub">0–100 · Взвешенная оценка = 60% бенчмарк роли + 25% Instruction Following + 15% скорость/контекст · ★ = лучший · <span style="outline:2px solid var(--accent-cyan);outline-offset:-2px;padding:0 3px;border-radius:2px">обведено</span> = текущий · <strong style="color:var(--accent-yellow)">← 11 моделей · 🟢L 🟡M 🔴H = Reasoning Effort →</strong></div>
|
||
<div style="overflow-x:auto"><table class="hm-table" id="hmTable"></table></div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- ========== TAB: RECOMMENDATIONS ========== -->
|
||
<div id="tab-recs" class="tab-panel">
|
||
<div class="sec-hdr"><h2>Рекомендации</h2><span class="badge badge-green">4 замены (2 BROKEN) + 7 подтверждений 06.04.2026</span></div>
|
||
|
||
<div style="display:flex;gap:10px;margin-bottom:18px;flex-wrap:wrap;align-items:center;">
|
||
<button onclick="toggleAllRecs()" class="export-btn" id="selectAllBtn">☑ Выбрать все</button>
|
||
<button onclick="exportJSON()" class="export-btn export-btn-primary" id="exportBtn">
|
||
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" style="vertical-align:-2px;margin-right:4px"><path d="M21 15v4a2 2 0 01-2 2H5a2 2 0 01-2-2v-4"/><polyline points="7 10 12 15 17 10"/><line x1="12" y1="15" x2="12" y2="3"/></svg>
|
||
Скачать JSON для агента
|
||
</button>
|
||
<span id="selectedCount" style="font-family:'JetBrains Mono',monospace;font-size:.75em;color:var(--text-muted);">0 из 11 выбрано</span>
|
||
</div>
|
||
|
||
<div class="rec-grid" id="recGrid"></div>
|
||
|
||
<!-- JSON Preview Modal -->
|
||
<div id="jsonModal" style="display:none;position:fixed;top:0;left:0;right:0;bottom:0;background:rgba(0,0,0,.7);z-index:9999;justify-content:center;align-items:center;padding:20px;">
|
||
<div style="background:var(--bg-panel);border:1px solid var(--accent-cyan);border-radius:14px;max-width:800px;width:100%;max-height:85vh;display:flex;flex-direction:column;box-shadow:0 20px 60px rgba(0,0,0,.5);">
|
||
<div style="display:flex;justify-content:space-between;align-items:center;padding:18px 22px;border-bottom:1px solid var(--border);">
|
||
<div>
|
||
<div style="font-weight:700;font-size:1.05em;">agent-model-recommendations.json</div>
|
||
<div style="font-size:.75em;color:var(--text-muted);margin-top:2px;font-family:'JetBrains Mono',monospace">Готов для передачи агенту-оркестратору</div>
|
||
</div>
|
||
<div style="display:flex;gap:8px;">
|
||
<button onclick="copyJSON()" class="export-btn" id="copyBtn">📋 Копировать</button>
|
||
<button onclick="downloadJSON()" class="export-btn export-btn-primary">⬇ Скачать .json</button>
|
||
<button onclick="closeModal()" class="export-btn" style="border-color:var(--accent-red);color:var(--accent-red);">✕</button>
|
||
</div>
|
||
</div>
|
||
<pre id="jsonPreview" style="flex:1;overflow:auto;padding:18px 22px;margin:0;font-family:'JetBrains Mono',monospace;font-size:.78em;line-height:1.6;color:var(--accent-green);background:var(--bg-deep);border-radius:0 0 14px 14px;"></pre>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- ========== TAB: IMPACT ========== -->
|
||
<div id="tab-impact" class="tab-panel">
|
||
<div class="sec-hdr"><h2>Совокупный анализ профита</h2><span class="badge badge-green">если применить все рекомендации</span></div>
|
||
<div class="stats-row">
|
||
<div class="stat-card"><div class="stat-label">Средний прирост</div><div class="stat-value grad-green">+12</div><div class="stat-sub">пунктов по матрице</div></div>
|
||
<div class="stat-card"><div class="stat-label">Применено</div><div class="stat-value grad-red">8/8</div><div class="stat-sub">все рекомендации ✅</div></div>
|
||
<div class="stat-card"><div class="stat-label">Qwen 3.6+</div><div class="stat-value grad-purple">0</div><div class="stat-sub">полностью на Ollama!</div></div>
|
||
<div class="stat-card"><div class="stat-label">GLM-5.1</div><div class="stat-value grad-orange">12</div><div class="stat-sub">10 custom + 2 built-in</div></div>
|
||
</div>
|
||
<div class="chart-wrap">
|
||
<div class="chart-title">Прирост по категориям: до → после</div>
|
||
<canvas id="impactCanvas" height="340"></canvas>
|
||
</div>
|
||
<div class="summary">
|
||
<h3>Детальный анализ прироста</h3>
|
||
<div id="impactDetails"></div>
|
||
|
||
<div style="margin-top:32px">
|
||
<div class="sec-hdr"><h2>APAW Pipeline vs ТОП закрытых моделей (апрель 2026)</h2></div>
|
||
<p style="font-size:.82em;color:var(--text-muted);margin-bottom:16px">
|
||
Сравнение лучших моделей в вашем pipeline с лидерами рынка по ключевым бенчмаркам.
|
||
<strong style="color:var(--accent-green)">🟢</strong> = APAW обгоняет,
|
||
<strong style="color:var(--accent-yellow)">🟡</strong> = на уровне (±3%),
|
||
<strong style="color:var(--accent-red)">🔴</strong> = отстаёт
|
||
</p>
|
||
<div style="overflow-x:auto">
|
||
<table id="benchTable" style="width:100%;border-collapse:collapse;font-size:.78em;font-family:'JetBrains Mono',monospace">
|
||
</table>
|
||
</div>
|
||
<p style="font-size:.72em;color:var(--text-muted);margin-top:12px">
|
||
* SWE-V = SWE-Bench Verified, SWE-P = SWE-Bench Pro, T-Bench = Terminal-Bench 2.0, LCB = LiveCodeBench, GPQA = GPQA Diamond<br>
|
||
Данные: swebench.com, marc0.dev, tokenmix.ai, ollama.com — апрель 2026. Стоимость: примерная за 1M input tokens.
|
||
</p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<div id="ttOverlay"><div id="ttBox"></div></div>
|
||
|
||
<script>
|
||
// BENCHMARK_DATA_PLACEHOLDER - REPLACED BY BUILD SCRIPT
|
||
// Generated from model-benchmarks.json on 2026-04-29T22:15:07.925Z
|
||
const EMBEDDED_DATA = {
|
||
"version": "1.0.0",
|
||
"generated": "2026-04-29T21:47:05.339Z",
|
||
"source": ".kilo/capability-index.yaml (synced v3 + fitness-gate)",
|
||
"total_agents": 30,
|
||
"total_models_tracked": 11,
|
||
"providers": [
|
||
"ollama",
|
||
"ollama-cloud",
|
||
"openrouter",
|
||
"groq"
|
||
],
|
||
"models": [
|
||
{
|
||
"id": "qwen3-coder-480b",
|
||
"name": "Qwen3-Coder 480B",
|
||
"organization": "Qwen",
|
||
"parameters": "480B/35B active",
|
||
"context_window": "256K→1M",
|
||
"swe_bench": 66.5,
|
||
"if_score": 88,
|
||
"categories": [
|
||
"coding",
|
||
"agent"
|
||
],
|
||
"description": "SOTA open-source кодинг. Сравним с Claude Sonnet 4.",
|
||
"tags": [
|
||
"coding",
|
||
"agent",
|
||
"tools"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "minimax-m2.5",
|
||
"name": "MiniMax M2.5",
|
||
"organization": "MiniMax",
|
||
"parameters": "MoE undisclosed",
|
||
"context_window": "128K",
|
||
"swe_bench": 80.2,
|
||
"if_score": 82,
|
||
"categories": [
|
||
"coding",
|
||
"agent"
|
||
],
|
||
"description": "Лидер SWE-bench 80.2%. Полный lifecycle разработки.",
|
||
"tags": [
|
||
"coding",
|
||
"agent"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "minimax-m2.7",
|
||
"name": "MiniMax M2.7",
|
||
"organization": "MiniMax",
|
||
"parameters": "~10B active",
|
||
"context_window": "128K",
|
||
"swe_bench": 78,
|
||
"if_score": 80,
|
||
"categories": [
|
||
"coding",
|
||
"agent",
|
||
"efficient"
|
||
],
|
||
"description": "Самообучаемая. 56.2% SWE-Pro. 100 TPS. $0.30/M.",
|
||
"tags": [
|
||
"coding",
|
||
"agent",
|
||
"self-evolving"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "deepseek-v4-pro-max",
|
||
"name": "DeepSeek V4-Pro",
|
||
"organization": "DeepSeek",
|
||
"parameters": "1.6T/49B active MoE",
|
||
"context_window": "1M",
|
||
"swe_bench": 80.6,
|
||
"if_score": 89,
|
||
"categories": [
|
||
"coding",
|
||
"agent",
|
||
"reasoning"
|
||
],
|
||
"description": "SWE-V 80.6, LiveCodeBench 93.5(#1!), Terminal-Bench 67.9, Codeforces 3206, 1M ctx, 27% FLOPs vs V3.2. MIT.",
|
||
"tags": [
|
||
"coding",
|
||
"agent",
|
||
"thinking",
|
||
"tools"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama-cloud"
|
||
},
|
||
{
|
||
"id": "deepseek-v4-flash",
|
||
"name": "DeepSeek V4-Pro",
|
||
"organization": "DeepSeek",
|
||
"parameters": "284B/13B active MoE",
|
||
"context_window": "1M",
|
||
"swe_bench": 79,
|
||
"if_score": 86,
|
||
"categories": [
|
||
"coding",
|
||
"efficient",
|
||
"agent"
|
||
],
|
||
"description": "SWE-V ~79%, Flash Max = Pro уровень reasoning. 13B active = ультрабыстрый. 1M ctx. FP4+FP8. MIT.",
|
||
"tags": [
|
||
"coding",
|
||
"efficient",
|
||
"agent",
|
||
"thinking"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama-cloud"
|
||
},
|
||
{
|
||
"id": "kimi-k2-6",
|
||
"name": "Kimi K2.6",
|
||
"organization": "Moonshot AI",
|
||
"parameters": "1T/32B active MoE",
|
||
"context_window": "256K",
|
||
"swe_bench": 80.2,
|
||
"if_score": 91,
|
||
"categories": [
|
||
"coding",
|
||
"agent",
|
||
"multimodal"
|
||
],
|
||
"description": "SWE-Pro 58.6(#1!), SWE-V 80.2, Terminal-Bench 66.7, HLE 54.0(#1!), BrowseComp 83.2. 13h autonomous. 300 sub-agent swarm. Modified MIT.",
|
||
"tags": [
|
||
"coding",
|
||
"agent",
|
||
"swarm",
|
||
"vision",
|
||
"thinking",
|
||
"tools"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama-cloud"
|
||
},
|
||
{
|
||
"id": "nemotron-3-super",
|
||
"name": "Nemotron 3 Super",
|
||
"organization": "NVIDIA",
|
||
"parameters": "120B/12B active",
|
||
"context_window": "1M",
|
||
"swe_bench": 60.5,
|
||
"if_score": 78,
|
||
"categories": [
|
||
"agent",
|
||
"reasoning",
|
||
"efficient"
|
||
],
|
||
"description": "SWE-bench 60.5%. RULER@1M 91.75%! Но IF ниже — Mamba-layers иногда «теряют» инструкции в длинных промптах.",
|
||
"tags": [
|
||
"agent",
|
||
"1M-ctx",
|
||
"thinking"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "glm-5.1",
|
||
"name": "GLM-5",
|
||
"organization": "Z.ai",
|
||
"parameters": "744B/40B active",
|
||
"context_window": "128K",
|
||
"swe_bench": null,
|
||
"if_score": 90,
|
||
"categories": [
|
||
"reasoning",
|
||
"agent"
|
||
],
|
||
"description": "Мощный reasoning. Arena ELO 1451. Отличный instruction following (IFEval ~90+).",
|
||
"tags": [
|
||
"reasoning",
|
||
"agent"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "deepseek-v4",
|
||
"name": "DeepSeek V4-Pro",
|
||
"organization": "DeepSeek",
|
||
"parameters": "Large MoE",
|
||
"context_window": "128K",
|
||
"swe_bench": null,
|
||
"if_score": 75,
|
||
"categories": [
|
||
"reasoning"
|
||
],
|
||
"description": "Хороший reasoning, но IF нестабилен — иногда игнорирует формат вывода.",
|
||
"tags": [
|
||
"reasoning"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "qwen3-5-122b",
|
||
"name": "Qwen 3.5 122B",
|
||
"organization": "Qwen",
|
||
"parameters": "122B/10B active",
|
||
"context_window": "128K",
|
||
"swe_bench": null,
|
||
"if_score": 92,
|
||
"categories": [
|
||
"reasoning",
|
||
"efficient"
|
||
],
|
||
"description": "IFEval 92.6%! Лучший IF среди open-source. Multimodal. Thinking.",
|
||
"tags": [
|
||
"vision",
|
||
"thinking",
|
||
"tools"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "qwen3-coder-next",
|
||
"name": "Qwen3-Coder-Next",
|
||
"organization": "Qwen",
|
||
"parameters": "80B/3B active",
|
||
"context_window": "128K",
|
||
"swe_bench": 70,
|
||
"if_score": 84,
|
||
"categories": [
|
||
"coding",
|
||
"efficient"
|
||
],
|
||
"description": "70% SWE-bench с 3B active! Хороший IF для кодинга.",
|
||
"tags": [
|
||
"coding",
|
||
"efficient",
|
||
"tools"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "cogito-2-1-671b",
|
||
"name": "Cogito 2.1 671B",
|
||
"organization": "Cognitive",
|
||
"parameters": "671B MoE",
|
||
"context_window": "128K",
|
||
"swe_bench": null,
|
||
"if_score": 76,
|
||
"categories": [
|
||
"reasoning"
|
||
],
|
||
"description": "MIT лицензия. 671B total. IF неплохой, но уступает GLM/Qwen.",
|
||
"tags": [
|
||
"reasoning"
|
||
],
|
||
"openrouter": false,
|
||
"provider": "ollama"
|
||
},
|
||
{
|
||
"id": "qwen3-6-plus",
|
||
"name": "Qwen 3.6 Plus",
|
||
"organization": "Qwen",
|
||
"parameters": "Hybrid MoE",
|
||
"context_window": "1M",
|
||
"swe_bench": 78.8,
|
||
"if_score": 91,
|
||
"categories": [
|
||
"coding",
|
||
"agent",
|
||
"reasoning"
|
||
],
|
||
"description": "FREE на OpenRouter! 1M контекст. Always-on CoT. Превосходный IF — наследник Qwen 3.5 (92.6%).",
|
||
"tags": [
|
||
"coding",
|
||
"agent",
|
||
"1M-ctx",
|
||
"free"
|
||
],
|
||
"openrouter": true,
|
||
"provider": "openrouter"
|
||
},
|
||
{
|
||
"id": "step-3-5-flash",
|
||
"name": "Step 3.5 Flash",
|
||
"organization": "StepFun",
|
||
"parameters": "MoE",
|
||
"context_window": "128K",
|
||
"swe_bench": null,
|
||
"if_score": 79,
|
||
"categories": [
|
||
"efficient"
|
||
],
|
||
"description": "Бесплатна на OpenRouter. IF средний.",
|
||
"tags": [
|
||
"efficient",
|
||
"free"
|
||
],
|
||
"openrouter": true,
|
||
"provider": "openrouter"
|
||
},
|
||
{
|
||
"id": "deepseek-r1",
|
||
"name": "DeepSeek R1",
|
||
"organization": "DeepSeek",
|
||
"parameters": "671B MoE",
|
||
"context_window": "128K",
|
||
"swe_bench": null,
|
||
"if_score": 73,
|
||
"categories": [
|
||
"reasoning"
|
||
],
|
||
"description": "Мощные reasoning-цепочки. Но IF слабый — часто генерирует лишний reasoning вместо ответа.",
|
||
"tags": [
|
||
"reasoning",
|
||
"thinking",
|
||
"free"
|
||
],
|
||
"openrouter": true,
|
||
"provider": "openrouter"
|
||
}
|
||
],
|
||
"groq_models": [
|
||
{
|
||
"id": "openai/gpt-oss-20b",
|
||
"rpm": 30,
|
||
"rpd": "1K",
|
||
"tpm": "8K",
|
||
"tpd": "200K",
|
||
"speed": "1200+",
|
||
"use_case": "Ультра-быстрый fallback для лёгких ролей (markdown-validator)."
|
||
},
|
||
{
|
||
"id": "llama-3.1-8b-instant",
|
||
"rpm": 30,
|
||
"rpd": "14.4K",
|
||
"tpm": "6K",
|
||
"tpd": "500K",
|
||
"speed": "~800",
|
||
"use_case": "14.4K RPD! Самый высокий лимит. Для health-check / ping ролей."
|
||
},
|
||
{
|
||
"id": "groq/compound",
|
||
"rpm": 30,
|
||
"rpd": "250",
|
||
"tpm": "70K",
|
||
"tpd": "—",
|
||
"speed": "varies",
|
||
"use_case": "Мультимодельная агрегация. Для research-задач."
|
||
},
|
||
{
|
||
"id": "groq/compound-mini",
|
||
"rpm": 30,
|
||
"rpd": "250",
|
||
"tpm": "70K",
|
||
"tpd": "—",
|
||
"speed": "varies",
|
||
"use_case": "Лёгкая версия compound."
|
||
},
|
||
{
|
||
"id": "llama-prompt-guard-2",
|
||
"rpm": 30,
|
||
"rpd": "14.4K",
|
||
"tpm": "15K",
|
||
"tpd": "500K",
|
||
"speed": "~1K",
|
||
"use_case": "Security: входной фильтр для security-auditor (14.4K RPD!)."
|
||
}
|
||
],
|
||
"agent_model_scores": [
|
||
{
|
||
"agent": "lead-developer",
|
||
"current_model_index": 0,
|
||
"current_model_id": "qwen3-coder-480b",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 92,
|
||
"minimax-m2.5": 86,
|
||
"minimax-m2.7": 82,
|
||
"nemotron-3-super": 70,
|
||
"glm-5.1": 68,
|
||
"deepseek-v4-pro-max": 88,
|
||
"qwen3-5-122b": 66,
|
||
"qwen3-coder-next": 80,
|
||
"qwen3-6-plus": 88,
|
||
"kimi-k2-6": 90
|
||
}
|
||
},
|
||
{
|
||
"agent": "frontend-developer",
|
||
"current_model_index": 1,
|
||
"current_model_id": "minimax-m2.5",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 86,
|
||
"minimax-m2.5": 92,
|
||
"minimax-m2.7": 88,
|
||
"nemotron-3-super": 62,
|
||
"glm-5.1": 56,
|
||
"deepseek-v4-pro-max": 82,
|
||
"qwen3-5-122b": 60,
|
||
"qwen3-coder-next": 76,
|
||
"qwen3-6-plus": 88,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "php-developer",
|
||
"current_model_index": 0,
|
||
"current_model_id": "qwen3-coder-480b",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 87,
|
||
"minimax-m2.5": 76,
|
||
"minimax-m2.7": 72,
|
||
"nemotron-3-super": 64,
|
||
"glm-5.1": 56,
|
||
"deepseek-v4-pro-max": 74,
|
||
"qwen3-5-122b": 60,
|
||
"qwen3-coder-next": 76,
|
||
"qwen3-6-plus": 84,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "python-developer",
|
||
"current_model_index": 0,
|
||
"current_model_id": "qwen3-coder-480b",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 90,
|
||
"minimax-m2.5": 82,
|
||
"minimax-m2.7": 78,
|
||
"nemotron-3-super": 66,
|
||
"glm-5.1": 60,
|
||
"deepseek-v4-pro-max": 78,
|
||
"qwen3-5-122b": 64,
|
||
"qwen3-coder-next": 78,
|
||
"qwen3-6-plus": 88,
|
||
"kimi-k2-6": 88
|
||
}
|
||
},
|
||
{
|
||
"agent": "backend-developer",
|
||
"current_model_index": 0,
|
||
"current_model_id": "qwen3-coder-480b",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 91,
|
||
"minimax-m2.5": 84,
|
||
"minimax-m2.7": 80,
|
||
"nemotron-3-super": 68,
|
||
"glm-5.1": 63,
|
||
"deepseek-v4-pro-max": 86,
|
||
"qwen3-5-122b": 62,
|
||
"qwen3-coder-next": 78,
|
||
"qwen3-6-plus": 87,
|
||
"kimi-k2-6": 90
|
||
}
|
||
},
|
||
{
|
||
"agent": "go-developer",
|
||
"current_model_index": 0,
|
||
"current_model_id": "qwen3-coder-480b",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 85,
|
||
"minimax-m2.5": 78,
|
||
"minimax-m2.7": 74,
|
||
"nemotron-3-super": 66,
|
||
"glm-5.1": 58,
|
||
"deepseek-v4-pro-max": 88,
|
||
"qwen3-5-122b": 58,
|
||
"qwen3-coder-next": 74,
|
||
"qwen3-6-plus": 82,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "flutter-developer",
|
||
"current_model_index": 0,
|
||
"current_model_id": "qwen3-coder-480b",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 86,
|
||
"minimax-m2.5": 70,
|
||
"minimax-m2.7": 66,
|
||
"nemotron-3-super": 60,
|
||
"glm-5.1": 53,
|
||
"deepseek-v4-pro-max": 78,
|
||
"qwen3-5-122b": 58,
|
||
"qwen3-coder-next": 74,
|
||
"qwen3-6-plus": 82,
|
||
"kimi-k2-6": 84
|
||
}
|
||
},
|
||
{
|
||
"agent": "devops-engineer",
|
||
"current_model_index": -1,
|
||
"current_model_id": "kimi-k2.6",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 66,
|
||
"minimax-m2.5": 53,
|
||
"minimax-m2.7": 48,
|
||
"nemotron-3-super": 78,
|
||
"glm-5.1": 75,
|
||
"deepseek-v4-pro-max": 86,
|
||
"qwen3-5-122b": 70,
|
||
"qwen3-coder-next": 54,
|
||
"qwen3-6-plus": 76,
|
||
"kimi-k2-6": 88
|
||
}
|
||
},
|
||
{
|
||
"agent": "sdet-engineer",
|
||
"current_model_index": 0,
|
||
"current_model_id": "qwen3-coder-480b",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 88,
|
||
"minimax-m2.5": 84,
|
||
"minimax-m2.7": 80,
|
||
"nemotron-3-super": 70,
|
||
"glm-5.1": 63,
|
||
"deepseek-v4-pro-max": 84,
|
||
"qwen3-5-122b": 64,
|
||
"qwen3-coder-next": 78,
|
||
"qwen3-6-plus": 84,
|
||
"kimi-k2-6": 87
|
||
}
|
||
},
|
||
{
|
||
"agent": "code-skeptic",
|
||
"current_model_index": 1,
|
||
"current_model_id": "minimax-m2.5",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 82,
|
||
"minimax-m2.5": 85,
|
||
"minimax-m2.7": 80,
|
||
"nemotron-3-super": 73,
|
||
"glm-5.1": 72,
|
||
"deepseek-v4-pro-max": 82,
|
||
"qwen3-5-122b": 70,
|
||
"qwen3-coder-next": 72,
|
||
"qwen3-6-plus": 80,
|
||
"kimi-k2-6": 82
|
||
}
|
||
},
|
||
{
|
||
"agent": "security-auditor",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 76,
|
||
"minimax-m2.5": 74,
|
||
"minimax-m2.7": 68,
|
||
"nemotron-3-super": 76,
|
||
"glm-5.1": 68,
|
||
"deepseek-v4-pro-max": 80,
|
||
"qwen3-5-122b": 72,
|
||
"qwen3-coder-next": 64,
|
||
"qwen3-6-plus": 75,
|
||
"kimi-k2-6": 80
|
||
}
|
||
},
|
||
{
|
||
"agent": "performance-engineer",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 78,
|
||
"minimax-m2.5": 75,
|
||
"minimax-m2.7": 70,
|
||
"nemotron-3-super": 78,
|
||
"glm-5.1": 74,
|
||
"deepseek-v4-pro-max": 84,
|
||
"qwen3-5-122b": 70,
|
||
"qwen3-coder-next": 67,
|
||
"qwen3-6-plus": 76,
|
||
"kimi-k2-6": 82
|
||
}
|
||
},
|
||
{
|
||
"agent": "the-fixer",
|
||
"current_model_index": 1,
|
||
"current_model_id": "minimax-m2.5",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 89,
|
||
"minimax-m2.5": 88,
|
||
"minimax-m2.7": 84,
|
||
"nemotron-3-super": 71,
|
||
"glm-5.1": 64,
|
||
"deepseek-v4-pro-max": 88,
|
||
"qwen3-5-122b": 64,
|
||
"qwen3-coder-next": 82,
|
||
"qwen3-6-plus": 86,
|
||
"kimi-k2-6": 90
|
||
}
|
||
},
|
||
{
|
||
"agent": "browser-automation",
|
||
"current_model_index": -1,
|
||
"current_model_id": "kimi-k2.6",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 87,
|
||
"minimax-m2.5": 72,
|
||
"minimax-m2.7": 68,
|
||
"nemotron-3-super": 61,
|
||
"glm-5.1": 53,
|
||
"deepseek-v4-pro-max": 82,
|
||
"qwen3-5-122b": 56,
|
||
"qwen3-coder-next": 72,
|
||
"qwen3-6-plus": 82,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "visual-tester",
|
||
"current_model_index": 0,
|
||
"current_model_id": "qwen3-coder-480b",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 82,
|
||
"minimax-m2.5": 68,
|
||
"minimax-m2.7": 64,
|
||
"nemotron-3-super": 55,
|
||
"glm-5.1": 48,
|
||
"deepseek-v4-pro-max": 76,
|
||
"qwen3-5-122b": 54,
|
||
"qwen3-coder-next": 66,
|
||
"qwen3-6-plus": 76,
|
||
"kimi-k2-6": 78
|
||
}
|
||
},
|
||
{
|
||
"agent": "system-analyst",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 70,
|
||
"minimax-m2.5": 66,
|
||
"minimax-m2.7": 63,
|
||
"nemotron-3-super": 74,
|
||
"glm-5.1": 82,
|
||
"deepseek-v4-pro-max": 88,
|
||
"qwen3-5-122b": 76,
|
||
"qwen3-coder-next": 58,
|
||
"qwen3-6-plus": 80,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "capability-analyst",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 72,
|
||
"minimax-m2.5": 68,
|
||
"minimax-m2.7": 66,
|
||
"nemotron-3-super": 76,
|
||
"glm-5.1": 78,
|
||
"deepseek-v4-pro-max": 82,
|
||
"qwen3-5-122b": 75,
|
||
"qwen3-coder-next": 60,
|
||
"qwen3-6-plus": 79,
|
||
"kimi-k2-6": 82
|
||
}
|
||
},
|
||
{
|
||
"agent": "orchestrator",
|
||
"current_model_index": -1,
|
||
"current_model_id": "kimi-k2.6",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 74,
|
||
"minimax-m2.5": 70,
|
||
"minimax-m2.7": 68,
|
||
"nemotron-3-super": 80,
|
||
"glm-5.1": 82,
|
||
"deepseek-v4-pro-max": 86,
|
||
"qwen3-5-122b": 78,
|
||
"qwen3-coder-next": 62,
|
||
"qwen3-6-plus": 84,
|
||
"kimi-k2-6": 92
|
||
}
|
||
},
|
||
{
|
||
"agent": "release-manager",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 72,
|
||
"minimax-m2.5": 66,
|
||
"minimax-m2.7": 64,
|
||
"nemotron-3-super": 74,
|
||
"glm-5.1": 76,
|
||
"deepseek-v4-pro-max": 78,
|
||
"qwen3-5-122b": 72,
|
||
"qwen3-coder-next": 60,
|
||
"qwen3-6-plus": 76,
|
||
"kimi-k2-6": 78
|
||
}
|
||
},
|
||
{
|
||
"agent": "evaluator",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 70,
|
||
"minimax-m2.5": 73,
|
||
"minimax-m2.7": 70,
|
||
"nemotron-3-super": 78,
|
||
"glm-5.1": 78,
|
||
"deepseek-v4-pro-max": 84,
|
||
"qwen3-5-122b": 76,
|
||
"qwen3-coder-next": 58,
|
||
"qwen3-6-plus": 81,
|
||
"kimi-k2-6": 84
|
||
}
|
||
},
|
||
{
|
||
"agent": "prompt-optimizer",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 76,
|
||
"minimax-m2.5": 74,
|
||
"minimax-m2.7": 72,
|
||
"nemotron-3-super": 76,
|
||
"glm-5.1": 75,
|
||
"deepseek-v4-pro-max": 80,
|
||
"qwen3-5-122b": 74,
|
||
"qwen3-coder-next": 64,
|
||
"qwen3-6-plus": 83,
|
||
"kimi-k2-6": 82
|
||
}
|
||
},
|
||
{
|
||
"agent": "product-owner",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 60,
|
||
"minimax-m2.5": 56,
|
||
"minimax-m2.7": 54,
|
||
"nemotron-3-super": 74,
|
||
"glm-5.1": 78,
|
||
"deepseek-v4-pro-max": 76,
|
||
"qwen3-5-122b": 74,
|
||
"qwen3-coder-next": 48,
|
||
"qwen3-6-plus": 78,
|
||
"kimi-k2-6": 76
|
||
}
|
||
},
|
||
{
|
||
"agent": "pipeline-judge",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 64,
|
||
"minimax-m2.5": 68,
|
||
"minimax-m2.7": 65,
|
||
"nemotron-3-super": 78,
|
||
"glm-5.1": 76,
|
||
"deepseek-v4-pro-max": 82,
|
||
"qwen3-5-122b": 74,
|
||
"qwen3-coder-next": 56,
|
||
"qwen3-6-plus": 80,
|
||
"kimi-k2-6": 84
|
||
}
|
||
},
|
||
{
|
||
"agent": "workflow-architect",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 68,
|
||
"minimax-m2.5": 62,
|
||
"minimax-m2.7": 60,
|
||
"nemotron-3-super": 76,
|
||
"glm-5.1": 76,
|
||
"deepseek-v4-pro-max": 80,
|
||
"qwen3-5-122b": 72,
|
||
"qwen3-coder-next": 56,
|
||
"qwen3-6-plus": 80,
|
||
"kimi-k2-6": 82
|
||
}
|
||
},
|
||
{
|
||
"agent": "markdown-validator",
|
||
"current_model_index": -1,
|
||
"current_model_id": "nemotron-3-nano:30b",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 43,
|
||
"minimax-m2.5": 38,
|
||
"minimax-m2.7": 36,
|
||
"nemotron-3-super": 52,
|
||
"glm-5.1": 55,
|
||
"deepseek-v4-pro-max": 68,
|
||
"qwen3-5-122b": 56,
|
||
"qwen3-coder-next": 40,
|
||
"qwen3-6-plus": 50,
|
||
"kimi-k2-6": 56
|
||
}
|
||
},
|
||
{
|
||
"agent": "agent-architect",
|
||
"current_model_index": -1,
|
||
"current_model_id": "kimi-k2.6",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 78,
|
||
"minimax-m2.5": 72,
|
||
"minimax-m2.7": 70,
|
||
"nemotron-3-super": 78,
|
||
"glm-5.1": 76,
|
||
"deepseek-v4-pro-max": 82,
|
||
"qwen3-5-122b": 76,
|
||
"qwen3-coder-next": 66,
|
||
"qwen3-6-plus": 82,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "planner",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 72,
|
||
"minimax-m2.5": 68,
|
||
"minimax-m2.7": 66,
|
||
"nemotron-3-super": 80,
|
||
"glm-5.1": 78,
|
||
"deepseek-v4-pro-max": 88,
|
||
"qwen3-5-122b": 78,
|
||
"qwen3-coder-next": 60,
|
||
"qwen3-6-plus": 85,
|
||
"kimi-k2-6": 86
|
||
}
|
||
},
|
||
{
|
||
"agent": "reflector",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 68,
|
||
"minimax-m2.5": 66,
|
||
"minimax-m2.7": 64,
|
||
"nemotron-3-super": 78,
|
||
"glm-5.1": 76,
|
||
"deepseek-v4-pro-max": 84,
|
||
"qwen3-5-122b": 76,
|
||
"qwen3-coder-next": 56,
|
||
"qwen3-6-plus": 82,
|
||
"kimi-k2-6": 80
|
||
}
|
||
},
|
||
{
|
||
"agent": "memory-manager",
|
||
"current_model_index": 6,
|
||
"current_model_id": "nemotron-3-super",
|
||
"reasoning_effort": "M",
|
||
"scores": {
|
||
"qwen3-coder-480b": 63,
|
||
"minimax-m2.5": 58,
|
||
"minimax-m2.7": 56,
|
||
"nemotron-3-super": 86,
|
||
"glm-5.1": 72,
|
||
"deepseek-v4-pro-max": 86,
|
||
"qwen3-5-122b": 70,
|
||
"qwen3-coder-next": 50,
|
||
"qwen3-6-plus": 87,
|
||
"kimi-k2-6": 84
|
||
}
|
||
},
|
||
{
|
||
"agent": "architect-indexer",
|
||
"current_model_index": 7,
|
||
"current_model_id": "glm-5.1",
|
||
"reasoning_effort": "H",
|
||
"scores": {
|
||
"qwen3-coder-480b": 70,
|
||
"minimax-m2.5": 64,
|
||
"minimax-m2.7": 62,
|
||
"nemotron-3-super": 74,
|
||
"glm-5.1": 80,
|
||
"deepseek-v4-pro-max": 78,
|
||
"qwen3-5-122b": 76,
|
||
"qwen3-coder-next": 58,
|
||
"qwen3-6-plus": 80,
|
||
"kimi-k2-6": 84
|
||
}
|
||
}
|
||
],
|
||
"if_scores": {
|
||
"qwen3-coder-480b": 88,
|
||
"minimax-m2.5": 82,
|
||
"minimax-m2.7": 78,
|
||
"nemotron-3-super": 85,
|
||
"glm-5.1": 80,
|
||
"deepseek-v4-pro-max": 88,
|
||
"qwen3-5-122b": 86,
|
||
"qwen3-coder-next": 84,
|
||
"qwen3-6-plus": 90,
|
||
"kimi-k2-6": 91,
|
||
"deepseek-v4-flash": 86
|
||
},
|
||
"agent_current_config": [
|
||
{
|
||
"agent": "lead-developer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "frontend-developer",
|
||
"model": "ollama-cloud/minimax-m2.5",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "php-developer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "python-developer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "backend-developer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "go-developer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "flutter-developer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "devops-engineer",
|
||
"model": "ollama-cloud/kimi-k2.6",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "sdet-engineer",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "code-skeptic",
|
||
"model": "ollama-cloud/minimax-m2.5",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "minimax",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "security-auditor",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "performance-engineer",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "the-fixer",
|
||
"model": "ollama-cloud/minimax-m2.5",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "minimax",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "browser-automation",
|
||
"model": "ollama-cloud/kimi-k2.6",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "visual-tester",
|
||
"model": "ollama-cloud/qwen3-coder:480b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "qwen",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "system-analyst",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "capability-analyst",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "orchestrator",
|
||
"model": "ollama-cloud/kimi-k2.6",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "kimi",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "release-manager",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "evaluator",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "prompt-optimizer",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "product-owner",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "pipeline-judge",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "workflow-architect",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "markdown-validator",
|
||
"model": "ollama-cloud/nemotron-3-nano:30b",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "agent-architect",
|
||
"model": "ollama-cloud/kimi-k2.6",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "planner",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "reflector",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "memory-manager",
|
||
"model": "ollama-cloud/nemotron-3-super",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "nemotron",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
},
|
||
{
|
||
"agent": "architect-indexer",
|
||
"model": "ollama-cloud/glm-5.1",
|
||
"provider": "Ollama Cloud",
|
||
"category": "Process",
|
||
"badge_type": "glm",
|
||
"fit_score": 0,
|
||
"status": "good",
|
||
"previous_model": null
|
||
}
|
||
],
|
||
"recommendations": [
|
||
{
|
||
"agent": "[built-in] debug",
|
||
"from_model": "glm-5.1.1 (88)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "V4-Pro Max (★90) / K2.6 (★90) RE:High",
|
||
"to_provider": "Ollama Cloud",
|
||
"impact": "high",
|
||
"quality_change": "+2%",
|
||
"speed_change": "~1x",
|
||
"context_change": "200K→1M",
|
||
"provider_change": "Ollama Cloud",
|
||
"rationale": "★ матрицы: V4-Pro=90 и K2.6=90 (TIE!), GLM-5.1=88. V4-Pro: LiveCodeBench 93.5(#1!), Terminal 67.9, 1M ctx для полного проекта. K2.6: 13h auto sessions. Оба лучше GLM-5.1. RE:High для debug."
|
||
},
|
||
{
|
||
"agent": "planner",
|
||
"from_model": "nemotron-3-super (80)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "V4-Pro Max (★88) RE:High",
|
||
"to_provider": "Ollama Cloud",
|
||
"impact": "high",
|
||
"quality_change": "+10%",
|
||
"speed_change": "~1x",
|
||
"context_change": "1M",
|
||
"provider_change": "Ollama Cloud",
|
||
"rationale": "★ матрицы: V4-Pro=88(лучший!), K2.6=86, GLM-5.1=85, Nem=80. V4-Pro: GPQA 90.1 (reasoning), 1M ctx сохраняется (vs потеря при K2.6). RE:High для chain-of-thought planning."
|
||
},
|
||
{
|
||
"agent": "go-developer",
|
||
"from_model": "qwen3-coder:480b (85)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "V4-Pro Max (★88) RE:Medium",
|
||
"to_provider": "Ollama Cloud",
|
||
"impact": "medium",
|
||
"quality_change": "+4%",
|
||
"speed_change": "~1x",
|
||
"context_change": "256K→1M",
|
||
"provider_change": "Ollama Cloud",
|
||
"rationale": "★ матрицы: V4-Pro=88(лучший для Go!), K2.6=86, Qwen3Coder=85. DeepSeek модели традиционно сильны в Go/Rust. 1M ctx для крупных Go-проектов."
|
||
},
|
||
{
|
||
"agent": "history-miner",
|
||
"from_model": "nemotron-3-super (★85)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "V4-Pro Max (86) + Nem fallback",
|
||
"to_provider": "Hybrid",
|
||
"impact": "medium",
|
||
"quality_change": "+1%",
|
||
"speed_change": "~1x",
|
||
"context_change": "1M",
|
||
"provider_change": "Ollama Cloud + Ollama",
|
||
"rationale": "V4-Pro=86 чуть лучше Nemotron=85. 1M ctx у обоих. MRCR 83.5 у V4-Pro — лучшее long-context retrieval. Nemotron как fallback (RULER 91.75%)."
|
||
},
|
||
{
|
||
"agent": "frontend-dev → M2.5",
|
||
"from_model": "qwen3-coder (90)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "MiniMax M2.5 (★92) ✅",
|
||
"to_provider": "Ollama",
|
||
"impact": "low",
|
||
"quality_change": "+2%",
|
||
"speed_change": "=",
|
||
"context_change": "204K",
|
||
"provider_change": "Ollama",
|
||
"rationale": "Spec-writing, UI architect. APPLIED."
|
||
},
|
||
{
|
||
"agent": "devops → K2.6",
|
||
"from_model": "deepseek-v3.2",
|
||
"from_provider": "",
|
||
"to_model": "kimi-k2.6:cloud ✅",
|
||
"to_provider": "Ollama Cloud",
|
||
"impact": "low",
|
||
"quality_change": "+35%",
|
||
"speed_change": "=",
|
||
"context_change": "256K",
|
||
"provider_change": "",
|
||
"rationale": "APPLIED."
|
||
},
|
||
{
|
||
"agent": "orchestrator",
|
||
"from_model": "glm-5.1.1 (★90)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "K2.6 (★92) RE:Medium",
|
||
"to_provider": "Ollama Cloud",
|
||
"impact": "medium",
|
||
"quality_change": "+2%",
|
||
"speed_change": "~1x",
|
||
"context_change": "200K→256K",
|
||
"provider_change": "Ollama Cloud",
|
||
"rationale": "K2.6=92★ всё ещё лучший для orchestration. V4-Pro=86 слабее. 300 sub-agent swarm."
|
||
},
|
||
{
|
||
"agent": "the-fixer",
|
||
"from_model": "minimax-m2.5 (★88)",
|
||
"from_provider": "Ollama",
|
||
"to_model": "V4-Pro (★88) / K2.6 (★90)",
|
||
"to_provider": "Ollama Cloud",
|
||
"impact": "medium",
|
||
"quality_change": "+2%",
|
||
"speed_change": "~1x",
|
||
"context_change": "128K→1M/256K",
|
||
"provider_change": "Ollama Cloud",
|
||
"rationale": "K2.6=90(лучший), V4-Pro=88=M2.5. M2.5 SWE-bench 80.2% стабильнее. Не срочно."
|
||
},
|
||
{
|
||
"agent": "Qwen3-Coder (7 coding)",
|
||
"from_model": "qwen3-coder",
|
||
"from_provider": "Ollama",
|
||
"to_model": "✅",
|
||
"to_provider": "",
|
||
"impact": "low",
|
||
"quality_change": "=0%",
|
||
"speed_change": "=",
|
||
"context_change": "256K",
|
||
"provider_change": "Ollama",
|
||
"rationale": "lead=92★, backend=91★, python=90★."
|
||
},
|
||
{
|
||
"agent": "GLM-5.1 (12 agents)",
|
||
"from_model": "glm-5.1.1",
|
||
"from_provider": "Ollama",
|
||
"to_model": "✅",
|
||
"to_provider": "",
|
||
"impact": "low",
|
||
"quality_change": "=0%",
|
||
"speed_change": "=",
|
||
"context_change": "200K",
|
||
"provider_change": "",
|
||
"rationale": "orchestrator=90, system-analyst=90. SWE-Pro #1."
|
||
},
|
||
{
|
||
"agent": "Kimi K2.6 (3 agents)",
|
||
"from_model": "kimi-k2.6",
|
||
"from_provider": "Ollama Cloud",
|
||
"to_model": "✅",
|
||
"to_provider": "",
|
||
"impact": "low",
|
||
"quality_change": "=0%",
|
||
"speed_change": "=",
|
||
"context_change": "256K",
|
||
"provider_change": "",
|
||
"rationale": "devops=88★, browser=86, agent-arch=86."
|
||
}
|
||
],
|
||
"impact_data": [
|
||
{
|
||
"category": "debug GLM5.1→V4-Pro/K2.6",
|
||
"before": 88,
|
||
"after": 90,
|
||
"delta": 2,
|
||
"notes": "LiveCodeBench 93.5, Terminal 67.9"
|
||
},
|
||
{
|
||
"category": "planner Nem→V4-Pro Max",
|
||
"before": 80,
|
||
"after": 88,
|
||
"delta": 8,
|
||
"notes": "★88! GPQA 90.1, 1M ctx"
|
||
},
|
||
{
|
||
"category": "go-dev Coder→V4-Pro Max",
|
||
"before": 85,
|
||
"after": 88,
|
||
"delta": 3,
|
||
"notes": "★88! Go/Rust specialist, 1M ctx"
|
||
},
|
||
{
|
||
"category": "history-miner →V4-Pro",
|
||
"before": 85,
|
||
"after": 86,
|
||
"delta": 1,
|
||
"notes": "MRCR 83.5, long-context"
|
||
},
|
||
{
|
||
"category": "orchestrator →K2.6 (next)",
|
||
"before": 90,
|
||
"after": 92,
|
||
"delta": 2,
|
||
"notes": "300 sub-agent swarm"
|
||
},
|
||
{
|
||
"category": "frontend → M2.5 ✅",
|
||
"before": 90,
|
||
"after": 92,
|
||
"delta": 2,
|
||
"notes": "Spec-writing, UI architect"
|
||
},
|
||
{
|
||
"category": "devops → K2.6 ✅",
|
||
"before": 65,
|
||
"after": 88,
|
||
"delta": 23,
|
||
"notes": "IF:65→91! Terminal 66.7"
|
||
},
|
||
{
|
||
"category": "Qwen3-Coder (7) ✅",
|
||
"before": 90,
|
||
"after": 90,
|
||
"delta": 0,
|
||
"notes": "SOTA coding"
|
||
},
|
||
{
|
||
"category": "GLM-5.1 (12) ✅",
|
||
"before": 87,
|
||
"after": 87,
|
||
"delta": 0,
|
||
"notes": "SWE-Pro #1"
|
||
},
|
||
{
|
||
"category": "Nemotron Super (6) ✅",
|
||
"before": 82,
|
||
"after": 82,
|
||
"delta": 0,
|
||
"notes": "1M ctx, RULER 91.75%"
|
||
}
|
||
],
|
||
"benchmark_comparison": {
|
||
"benchmarks": [
|
||
{
|
||
"name": "SWE-V",
|
||
"full_name": "SWE-Bench Verified",
|
||
"description": "GitHub issue resolution (500 tasks)",
|
||
"roles": "lead-dev, backend, fixer"
|
||
},
|
||
{
|
||
"name": "SWE-P",
|
||
"full_name": "SWE-Bench Pro",
|
||
"description": "Multi-lang, decontaminated (1865 tasks)",
|
||
"roles": "all coding agents"
|
||
},
|
||
{
|
||
"name": "T-Bench",
|
||
"full_name": "Terminal-Bench 2.0",
|
||
"description": "CLI/shell multi-step tasks",
|
||
"roles": "devops, planner, orchestrator"
|
||
},
|
||
{
|
||
"name": "LCB",
|
||
"full_name": "LiveCodeBench",
|
||
"description": "Code gen from specs (held-out)",
|
||
"roles": "sdet, go-dev, python-dev"
|
||
},
|
||
{
|
||
"name": "GPQA",
|
||
"full_name": "GPQA Diamond",
|
||
"description": "PhD-level reasoning",
|
||
"roles": "system-analyst, planner"
|
||
},
|
||
{
|
||
"name": "BComp",
|
||
"full_name": "BrowseComp",
|
||
"description": "Web research & synthesis",
|
||
"roles": "browser-auto, capability-analyst"
|
||
},
|
||
{
|
||
"name": "HLE",
|
||
"full_name": "Humanity Last Exam",
|
||
"description": "Frontier knowledge (with tools)",
|
||
"roles": "agent-architect, evaluator"
|
||
},
|
||
{
|
||
"name": "Ctx",
|
||
"full_name": "Context Window",
|
||
"description": "Max tokens in one pass",
|
||
"roles": "history-miner, memory-mgr"
|
||
},
|
||
{
|
||
"name": "$/M",
|
||
"full_name": "Cost per 1M input",
|
||
"description": "API pricing",
|
||
"roles": "all agents (ROI)"
|
||
}
|
||
],
|
||
"closed_source_models": [
|
||
{
|
||
"name": "Claude Opus 4.7",
|
||
"organization": "Anthropic",
|
||
"scores": [
|
||
87.6,
|
||
64.3,
|
||
69.4,
|
||
null,
|
||
94.2,
|
||
79.3,
|
||
53,
|
||
"1M",
|
||
"$5"
|
||
],
|
||
"color": "#c084fc",
|
||
"note": "#1 апрель 2026"
|
||
},
|
||
{
|
||
"name": "GPT-5.5",
|
||
"organization": "OpenAI",
|
||
"scores": [
|
||
null,
|
||
58.6,
|
||
82.7,
|
||
null,
|
||
null,
|
||
83.4,
|
||
57.2,
|
||
"1M",
|
||
"$5"
|
||
],
|
||
"color": "#ff6b81",
|
||
"note": "Новейший, Terminal #1"
|
||
},
|
||
{
|
||
"name": "GPT-5.4",
|
||
"organization": "OpenAI",
|
||
"scores": [
|
||
78.2,
|
||
59.1,
|
||
75.1,
|
||
null,
|
||
94.4,
|
||
82.7,
|
||
58.7,
|
||
"200K",
|
||
"$2.50"
|
||
],
|
||
"color": "#ff6b81",
|
||
"note": "Reasoning, math"
|
||
},
|
||
{
|
||
"name": "Gemini 3.1 Pro",
|
||
"organization": "Google",
|
||
"scores": [
|
||
80.6,
|
||
46.1,
|
||
68.5,
|
||
null,
|
||
94.3,
|
||
85.9,
|
||
51.4,
|
||
"2M",
|
||
"$2"
|
||
],
|
||
"color": "#facc15",
|
||
"note": "ARC-AGI 77.1%, дешёвый"
|
||
},
|
||
{
|
||
"name": "Claude Sonnet 4.6",
|
||
"organization": "Anthropic",
|
||
"scores": [
|
||
79.6,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
"200K",
|
||
"$3"
|
||
],
|
||
"color": "#c084fc",
|
||
"note": "5× дешевле Opus"
|
||
},
|
||
{
|
||
"name": "GPT-5.3-Codex",
|
||
"organization": "OpenAI",
|
||
"scores": [
|
||
85,
|
||
57,
|
||
77.3,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
"200K",
|
||
"$6"
|
||
],
|
||
"color": "#ff6b81",
|
||
"note": "Coding specialist"
|
||
}
|
||
],
|
||
"apaw_models": [
|
||
{
|
||
"name": "Kimi K2.6",
|
||
"organization": "APAW",
|
||
"scores": [
|
||
80.2,
|
||
58.6,
|
||
66.7,
|
||
87.2,
|
||
null,
|
||
83.2,
|
||
54,
|
||
"256K",
|
||
"$0.95"
|
||
],
|
||
"color": "#00ff94",
|
||
"note": "devops, browser, architect (3)"
|
||
},
|
||
{
|
||
"name": "GLM-5.1",
|
||
"organization": "APAW",
|
||
"scores": [
|
||
null,
|
||
58.4,
|
||
63.5,
|
||
null,
|
||
86.2,
|
||
68.7,
|
||
null,
|
||
"200K",
|
||
"~$0.50"
|
||
],
|
||
"color": "#00ff94",
|
||
"note": "12 agents! orchestrator, eval..."
|
||
},
|
||
{
|
||
"name": "V4-Pro Max",
|
||
"organization": "APAW",
|
||
"scores": [
|
||
80.6,
|
||
55.4,
|
||
67.9,
|
||
93.5,
|
||
90.1,
|
||
83.4,
|
||
48.2,
|
||
"1M",
|
||
"$0.42"
|
||
],
|
||
"color": "#00d4ff",
|
||
"note": "planner, go-dev (рек.)"
|
||
},
|
||
{
|
||
"name": "Qwen3-Coder 480B",
|
||
"organization": "APAW",
|
||
"scores": [
|
||
66.5,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
"256K",
|
||
"~$0.50"
|
||
],
|
||
"color": "#00ff94",
|
||
"note": "7 coding agents"
|
||
},
|
||
{
|
||
"name": "MiniMax M2.5",
|
||
"organization": "APAW",
|
||
"scores": [
|
||
80.2,
|
||
51.3,
|
||
null,
|
||
null,
|
||
null,
|
||
76.3,
|
||
null,
|
||
"204K",
|
||
"$0.15"
|
||
],
|
||
"color": "#00ff94",
|
||
"note": "frontend, skeptic, fixer (3)"
|
||
},
|
||
{
|
||
"name": "Nemotron Super",
|
||
"organization": "APAW",
|
||
"scores": [
|
||
60.5,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
null,
|
||
"1M",
|
||
"~$0.40"
|
||
],
|
||
"color": "#00ff94",
|
||
"note": "6 agents (memory, history)"
|
||
}
|
||
]
|
||
}
|
||
};
|
||
|
||
// === MAP EMBEDDED_DATA -> original v3 format ===
|
||
const allModels = EMBEDDED_DATA.models || [];
|
||
const scoreModelIds = Object.keys((EMBEDDED_DATA.agent_model_scores || [])[0]?.scores || {});
|
||
const activeModels = allModels.filter(m => scoreModelIds.includes(m.id));
|
||
|
||
const cfg = (EMBEDDED_DATA.agent_current_config || []).map(c => {
|
||
const modelId = (c.model || '').replace('ollama-cloud/', '');
|
||
const badge = c.badge_type || (
|
||
modelId.includes('qwen3') ? 'qwen' :
|
||
modelId.includes('minimax') ? 'minimax' :
|
||
modelId.includes('nemotron') ? 'nemotron' :
|
||
modelId.includes('glm') ? 'glm' :
|
||
modelId.includes('kimi') ? 'kimi' :
|
||
modelId.includes('deepseek') ? 'deepseek' : 'groq'
|
||
);
|
||
return { a: c.agent, m: modelId, p: c.provider || 'Ollama', cat: c.category || 'General', b: badge, fit: c.fit_score || 0, s: c.status || 'good', prev: c.previous_model };
|
||
});
|
||
|
||
const groqModels = (EMBEDDED_DATA.groq_models || []).map(g => ({
|
||
id: g.id, rpm: g.rpm, rpd: g.rpd, tpm: g.tpm, tpd: g.tpd, speed: g.speed, use: g.use_case
|
||
}));
|
||
|
||
const ollamaModels = activeModels.map(m => ({
|
||
n: m.name, org: m.organization, par: m.parameters, ctx: m.context_window,
|
||
swe: m.swe_bench, ifScore: m.if_score, cat: m.categories || [],
|
||
str: m.description, tags: m.tags || [], or: m.openrouter, groqSpeed: m.speed_tps
|
||
}));
|
||
|
||
const ifScores = {};
|
||
activeModels.forEach((m, i) => { if (m.if_score) ifScores[i] = m.if_score; });
|
||
|
||
const hmModels = activeModels.map(m => ({
|
||
n: m.display_name || m.name?.split(' ').pop() || m.id,
|
||
p: m.provider === 'ollama-cloud' ? 'Ollama Cloud' : m.provider === 'openrouter' ? 'OpenRouter' : m.provider || 'Ollama',
|
||
if: m.if_score || 0
|
||
}));
|
||
|
||
const hmAgents = (EMBEDDED_DATA.agent_model_scores || []).map(ag => {
|
||
const scores = activeModels.map(m => ag.scores?.[m.id] ?? 0);
|
||
const fullModelId = allModels[ag.current_model_index]?.id;
|
||
const c = activeModels.findIndex(m => m.id === fullModelId);
|
||
return { n: ag.agent, c: c, re: ag.reasoning_effort || 'M', s: scores };
|
||
});
|
||
|
||
const recs = (EMBEDDED_DATA.recommendations || []).map(r => ({
|
||
a: r.agent, from: r.from_model, fromP: r.from_provider || 'Ollama',
|
||
to: r.to_model, toP: r.to_provider || 'Ollama', imp: r.impact || 'low',
|
||
q: r.quality_change || '0', sp: r.speed_change || '=', ctx: r.context_change || '-',
|
||
prov: r.provider_change || r.to_provider || 'Ollama', r: r.rationale
|
||
}));
|
||
|
||
const impactData = (EMBEDDED_DATA.impact_data || []).map(d => ({
|
||
cat: d.category, b: d.before, a: d.after, d: d.delta, n: d.notes || d.note
|
||
}));
|
||
|
||
// ======================= RENDER =======================
|
||
function switchTab(id) {
|
||
document.querySelectorAll('.tab-panel').forEach(p=>p.classList.remove('active'));
|
||
document.querySelectorAll('.tab-btn').forEach(b=>b.classList.remove('active'));
|
||
document.getElementById('tab-'+id).classList.add('active');
|
||
event.target.classList.add('active');
|
||
if(id==='impact') requestAnimationFrame(()=>setTimeout(drawChart,50));
|
||
}
|
||
|
||
function renderCfg() {
|
||
const b=document.getElementById('cfgBody');
|
||
b.innerHTML=cfg.map((c,i)=>{
|
||
const si=c.s==='optimal'?'✅':c.s==='good'?'🟡':c.s==='overspec'?'🔵':c.s==='new'?'🆕':c.s==='broken'?'💀':'🔴';
|
||
const st=c.s==='optimal'?'Оптимально':c.s==='good'?'Хорошо':c.s==='overspec'?'Overspec':c.s==='new'?'Не назначена':c.s==='broken'?'НЕ РАБОТАЕТ':'Улучшить';
|
||
const bc=c.fit>=85?'h':c.fit>=70?'m':'l';
|
||
const sc=c.fit>=85?'var(--accent-green)':c.fit>=70?'var(--accent-orange)':'var(--accent-red)';
|
||
const prevHtml=c.prev?`<div style="font-size:.68em;color:var(--text-muted);margin-top:2px;text-decoration:line-through">${c.prev}</div>`:'';
|
||
return `<tr style="animation:fadeUp .3s ${i*.03}s ease-out both">
|
||
<td style="font-weight:600">${c.a}</td>
|
||
<td><span class="mbadge ${c.b}">${c.m}</span>${prevHtml}</td>
|
||
<td><span class="prov-tag ${c.p.toLowerCase()}">${c.p}</span></td>
|
||
<td style="color:var(--text-secondary)">${c.cat}</td>
|
||
<td><div class="sbar"><div class="sbar-bg"><div class="sbar-fill ${bc}" style="width:${c.fit}%"></div></div><span class="snum" style="color:${sc}">${c.fit}</span></div></td>
|
||
<td>${si} ${st}</td></tr>`;
|
||
}).join('');
|
||
}
|
||
|
||
function renderGroq() {
|
||
const b=document.getElementById('groqBody');
|
||
b.innerHTML=groqModels.map(g=>{
|
||
const spd=parseInt(g.speed)||0;
|
||
const dotCls=spd>=800?'ultra':spd>=400?'fast':'normal';
|
||
return `<tr>
|
||
<td><span class="mbadge groq">${g.id}</span></td>
|
||
<td>${g.rpm}</td><td>${g.rpd}</td><td>${g.tpm}</td><td>${g.tpd}</td>
|
||
<td><div class="speed-ind"><span class="speed-dot ${dotCls}"></span> ${g.speed} t/s</div></td>
|
||
<td style="color:var(--text-secondary);font-size:.82em;max-width:280px">${g.use}</td></tr>`;
|
||
}).join('');
|
||
}
|
||
|
||
function renderModels() {
|
||
const grid=document.getElementById('modelGrid');
|
||
const cats=new Set(); ollamaModels.forEach(m=>m.cat.forEach(c=>cats.add(c)));
|
||
document.getElementById('filterRow').innerHTML='<button class="fbtn active" onclick="filterM(\'all\',this)">Все</button>'+
|
||
[...cats].map(c=>`<button class="fbtn" onclick="filterM('${c}',this)">${c}</button>`).join('');
|
||
|
||
grid.innerHTML=ollamaModels.map((m,i)=>{
|
||
const bc=m.swe&&m.swe>=75?'var(--accent-green)':m.swe&&m.swe>=60?'var(--accent-cyan)':'var(--border)';
|
||
return `<div class="mc" style="animation:fadeUp .35s ${i*.05}s ease-out both;border-color:${bc}" data-cats='${JSON.stringify(m.cat)}'>
|
||
<div class="mc-name">${m.n} ${m.groq?'<span class="prov-tag groq">Groq '+m.groqSpeed+'t/s</span>':''}${m.or?'<span class="prov-tag openrouter">OpenRouter FREE</span>':''}</div>
|
||
<div class="mc-org">${m.org} · ${m.par} · ctx ${m.ctx}</div>
|
||
${m.swe?`<div class="mc-row"><span class="mc-label">SWE-bench</span><span class="mc-val" style="color:${m.swe>=75?'var(--accent-green)':m.swe>=60?'var(--accent-cyan)':'var(--accent-orange)'}">${m.swe}%</span></div>`:''}
|
||
${m.ifScore?`<div class="mc-row"><span class="mc-label">Prompt Adherence (IF)</span><span class="mc-val" style="color:${m.ifScore>=88?'var(--accent-green)':m.ifScore>=80?'var(--accent-cyan)':m.ifScore>=72?'var(--accent-orange)':'var(--accent-red)'}">${m.ifScore}<small>/100</small> ${m.ifScore>=88?'🎯':''}${m.ifScore<75?'⚠️':''}</span></div>`:''}
|
||
<div style="font-size:.78em;color:var(--text-secondary);line-height:1.45;margin-top:6px">${m.str}</div>
|
||
<div class="mc-tags">${m.tags.map(t=>`<span class="mc-tag">${t}</span>`).join('')}</div>
|
||
</div>`;
|
||
}).join('');
|
||
}
|
||
|
||
function filterM(cat,btn) {
|
||
document.querySelectorAll('.fbtn').forEach(b=>b.classList.remove('active'));
|
||
btn.classList.add('active');
|
||
document.querySelectorAll('.mc').forEach(c=>{
|
||
if(cat==='all'){c.style.display='';return;}
|
||
c.style.display=JSON.parse(c.dataset.cats).includes(cat)?'':'none';
|
||
});
|
||
}
|
||
|
||
function hmColor(v) {
|
||
if(v>=88) return 'rgba(0,255,148,.8)';
|
||
if(v>=82) return 'rgba(0,212,255,.7)';
|
||
if(v>=75) return 'rgba(59,130,246,.6)';
|
||
if(v>=68) return 'rgba(168,85,247,.45)';
|
||
if(v>=60) return 'rgba(255,159,67,.4)';
|
||
if(v>=50) return 'rgba(255,71,87,.3)';
|
||
return 'rgba(90,104,128,.2)';
|
||
}
|
||
function hmText(v) { return v>=75?'#0e1219':'#e8edf5'; }
|
||
|
||
function renderHeatmap() {
|
||
const t=document.getElementById('hmTable');
|
||
let h='<thead><tr><th class="hm-role">Агент</th>';
|
||
hmModels.forEach(m=>{
|
||
const ifColor = m.if >= 85 ? '#00ff94' : m.if >= 75 ? '#facc15' : '#ff6b81';
|
||
h+=`<th style="writing-mode:vertical-lr;transform:rotate(180deg);max-width:32px;font-size:.56em;padding:3px 1px;">
|
||
${m.n}<br>
|
||
<span style="color:${m.p==='Groq'?'#ff6b81':m.p==='Both'?'#c084fc':m.p.includes('Open')||m.p.includes('OR')?'#e879f9':'var(--accent-cyan)'};font-size:.85em">${m.p}</span><br>
|
||
<span style="color:${ifColor};font-size:.9em;font-weight:700" title="Instruction Following score">IF:${m.if}</span>
|
||
</th>`;
|
||
});
|
||
h+='</tr></thead><tbody>';
|
||
const reIcons = {"L":"🟢","M":"🟡","H":"🔴"};
|
||
hmAgents.forEach(ag=>{
|
||
const mx=Math.max(...ag.s);
|
||
const reIcon = reIcons[ag.re] || '🟡';
|
||
h+=`<tr><td class="hm-r">${reIcon} ${ag.n}</td>`;
|
||
ag.s.forEach((s,j)=>{
|
||
const best=s===mx, cur=j===ag.c;
|
||
const ifNote = hmModels[j].if < 75 ? ' ⚠' : '';
|
||
h+=`<td style="background:${hmColor(s)};color:${hmText(s)}" class="${cur?'hm-cur':''}"
|
||
onmouseover="showTT(event,'${ag.n}','${hmModels[j].n} (${hmModels[j].p})',${s},${best},${cur},${hmModels[j].if})"
|
||
onmouseout="hideTT()">${s}${best?'<span class="hm-star">★</span>':''}${ifNote}</td>`;
|
||
});
|
||
h+='</tr>';
|
||
});
|
||
t.innerHTML=h+'</tbody>';
|
||
}
|
||
|
||
function showTT(e,agent,model,score,best,cur,ifScore) {
|
||
const b=document.getElementById('ttBox'),o=document.getElementById('ttOverlay');
|
||
const ifColor = ifScore >= 85 ? '#00ff94' : ifScore >= 75 ? '#facc15' : '#ff6b81';
|
||
const ifLabel = ifScore >= 85 ? 'Отлично' : ifScore >= 75 ? 'Средне' : 'Слабо';
|
||
b.innerHTML=`<h4>${model}</h4><p><strong>Агент:</strong> ${agent}<br><strong>Итоговая оценка:</strong> ${score}/100<br>
|
||
<strong>Instruction Following:</strong> <span style="color:${ifColor};font-weight:700">${ifScore}/100 (${ifLabel})</span><br>
|
||
<span style="font-size:.9em;color:var(--text-muted)">Оценка = бенчмарк × IF-множитель</span><br>
|
||
${ifScore < 75 ? '<span style="color:#ff6b81">⚠ Модель плохо следует промпту и роли — оценка снижена</span><br>' : ''}
|
||
${best?'★ <strong>Лучший выбор</strong><br>':''}${cur?'📌 <strong>Текущий выбор</strong>':''}</p>`;
|
||
const r=e.target.getBoundingClientRect();
|
||
b.style.left=Math.min(r.left,window.innerWidth-320)+'px';
|
||
b.style.top=(r.bottom+6)+'px';
|
||
o.classList.add('show');
|
||
}
|
||
function hideTT() { document.getElementById('ttOverlay').classList.remove('show'); }
|
||
|
||
function renderRecs() {
|
||
const g=document.getElementById('recGrid');
|
||
g.innerHTML=recs.map((r,i)=>`
|
||
<div class="rec-card ${r.imp==='critical'?'glow':''} selected" data-idx="${i}" style="animation:fadeUp .4s ${i*.06}s ease-out both">
|
||
<input type="checkbox" class="rec-check" checked onchange="updateRecSelection()" data-idx="${i}">
|
||
<div class="rec-hdr">
|
||
<div class="rec-agent">${r.a}</div>
|
||
<span class="impact-badge ${r.imp}">${r.imp.toUpperCase()}</span>
|
||
</div>
|
||
<div class="swap-vis">
|
||
<span class="swap-from">${r.from} <span class="prov-tag ${r.fromP.toLowerCase()}">${r.fromP}</span></span>
|
||
<span class="swap-arrow">→</span>
|
||
<span class="swap-to">${r.to} <span class="prov-tag ${r.toP.toLowerCase()}">${r.toP}</span></span>
|
||
</div>
|
||
<div class="rec-metrics">
|
||
<div class="rec-m"><div class="rec-m-label">Качество</div><div class="rec-m-val pos">${r.q}</div></div>
|
||
<div class="rec-m"><div class="rec-m-label">Скорость</div><div class="rec-m-val ${r.sp.includes('10')? 'pos':'neu'}">${r.sp}</div></div>
|
||
<div class="rec-m"><div class="rec-m-label">Контекст</div><div class="rec-m-val ${r.ctx.includes('→')?'pos':'neu'}">${r.ctx}</div></div>
|
||
<div class="rec-m"><div class="rec-m-label">Провайдер</div><div class="rec-m-val" style="font-size:.7em;color:var(--text-secondary)">${r.prov}</div></div>
|
||
</div>
|
||
<div class="rec-reason">${r.r}</div>
|
||
</div>`).join('');
|
||
updateRecSelection();
|
||
}
|
||
|
||
// ===== EXPORT SYSTEM =====
|
||
|
||
function updateRecSelection() {
|
||
const checks = document.querySelectorAll('.rec-check');
|
||
let count = 0;
|
||
checks.forEach(ch => {
|
||
const card = ch.closest('.rec-card');
|
||
if (ch.checked) { card.classList.add('selected'); count++; }
|
||
else { card.classList.remove('selected'); }
|
||
});
|
||
document.getElementById('selectedCount').textContent = count + ' из ' + recs.length + ' выбрано';
|
||
document.getElementById('exportBtn').style.opacity = count > 0 ? '1' : '.4';
|
||
}
|
||
|
||
function toggleAllRecs() {
|
||
const checks = document.querySelectorAll('.rec-check');
|
||
const allChecked = [...checks].every(c => c.checked);
|
||
checks.forEach(c => c.checked = !allChecked);
|
||
document.getElementById('selectAllBtn').textContent = allChecked ? '☑ Выбрать все' : '☐ Снять все';
|
||
updateRecSelection();
|
||
}
|
||
|
||
function buildExportJSON() {
|
||
const checks = document.querySelectorAll('.rec-check');
|
||
const selected = [];
|
||
checks.forEach(ch => {
|
||
if (!ch.checked) return;
|
||
const idx = parseInt(ch.dataset.idx);
|
||
const r = recs[idx];
|
||
|
||
// Build the model string for capability-index.yaml
|
||
const modelMap = {
|
||
'nemotron-3-super': 'ollama-cloud/nemotron-3-super-120b-a12b',
|
||
'nemotron-3-super + Groq burst': 'ollama-cloud/nemotron-3-super-120b-a12b',
|
||
'qwen3-coder:480b': 'ollama-cloud/qwen3-coder:480b',
|
||
'glm-5 (перенастроить)': 'ollama-cloud/glm-5',
|
||
};
|
||
const fallbackMap = {
|
||
};
|
||
|
||
const entry = {
|
||
agent: r.a,
|
||
action: 'update_model',
|
||
current_model: r.from,
|
||
current_provider: r.fromP.toLowerCase(),
|
||
recommended_model: modelMap[r.to] || 'ollama-cloud/' + r.to,
|
||
recommended_provider: r.toP.toLowerCase(),
|
||
impact: r.imp,
|
||
expected_improvement: {
|
||
quality: r.q,
|
||
speed: r.sp,
|
||
context_window: r.ctx
|
||
},
|
||
rationale: r.r.replace(/<[^>]*>/g, ''),
|
||
};
|
||
|
||
if (fallbackMap[r.to]) {
|
||
entry.fallback_model = fallbackMap[r.to];
|
||
entry.fallback_strategy = 'speed-burst';
|
||
entry.fallback_note = 'Use Groq for low-volume speed-critical calls; primary on Ollama Cloud';
|
||
}
|
||
|
||
selected.push(entry);
|
||
});
|
||
|
||
return {
|
||
"$schema": "https://app.kilo.ai/agent-recommendations.json",
|
||
"generated": new Date().toISOString(),
|
||
"source": "APAW Agent Model Research v3",
|
||
"target_file": ".kilo/capability-index.yaml",
|
||
"total_recommendations": selected.length,
|
||
"summary": {
|
||
"avg_quality_improvement": "+18%",
|
||
"providers_used": ["ollama-cloud", "groq", "openrouter"],
|
||
"key_models": [
|
||
"nemotron-3-super-120b-a12b (1M ctx, SWE-bench 60.5%, RULER@1M 91.75%)",
|
||
"qwen3-coder:480b (SWE-bench 66.5%, best open-source coding)",
|
||
"qwen3.6-plus (FREE OpenRouter, 1M ctx, SWE-bench 78.8%)",
|
||
"gemma4:31b (Intelligence Index 39, thinking, vision)",
|
||
"minimax-m2.5 (SWE-bench 80.2%, best coding overall)"
|
||
]
|
||
},
|
||
"recommendations": selected,
|
||
"capability_index_patch": selected.map(s => ({
|
||
agent: s.agent,
|
||
set: { model: s.recommended_model }
|
||
}))
|
||
};
|
||
}
|
||
|
||
function exportJSON() {
|
||
const checks = document.querySelectorAll('.rec-check:checked');
|
||
if (checks.length === 0) return;
|
||
const json = buildExportJSON();
|
||
const formatted = JSON.stringify(json, null, 2);
|
||
document.getElementById('jsonPreview').textContent = formatted;
|
||
document.getElementById('jsonModal').style.display = 'flex';
|
||
}
|
||
|
||
function closeModal() {
|
||
document.getElementById('jsonModal').style.display = 'none';
|
||
}
|
||
|
||
function copyJSON() {
|
||
const text = document.getElementById('jsonPreview').textContent;
|
||
navigator.clipboard.writeText(text).then(() => {
|
||
const btn = document.getElementById('copyBtn');
|
||
btn.textContent = '✅ Скопировано!';
|
||
setTimeout(() => btn.textContent = '📋 Копировать', 2000);
|
||
});
|
||
}
|
||
|
||
function downloadJSON() {
|
||
const json = buildExportJSON();
|
||
const blob = new Blob([JSON.stringify(json, null, 2)], { type: 'application/json' });
|
||
const url = URL.createObjectURL(blob);
|
||
const a = document.createElement('a');
|
||
a.href = url;
|
||
a.download = 'agent-model-recommendations.json';
|
||
document.body.appendChild(a);
|
||
a.click();
|
||
document.body.removeChild(a);
|
||
URL.revokeObjectURL(url);
|
||
}
|
||
|
||
function renderImpact() {
|
||
const d=document.getElementById('impactDetails');
|
||
d.innerHTML=impactData.map(x=>`
|
||
<div style="display:flex;align-items:center;gap:14px;padding:10px 0;border-bottom:1px solid var(--border)">
|
||
<div style="flex:1"><div style="font-weight:600;font-size:.88em">${x.cat}</div><div style="font-size:.74em;color:var(--text-muted);margin-top:2px">${x.n}</div></div>
|
||
<div style="display:flex;align-items:center;gap:10px">
|
||
<span style="font-family:'JetBrains Mono',monospace;color:var(--text-muted);font-size:.82em">${x.b}</span>
|
||
<span style="color:${x.d>0?'var(--accent-green)':'var(--text-muted)'};font-size:1.1em">→</span>
|
||
<span style="font-family:'JetBrains Mono',monospace;color:${x.d>0?'var(--accent-green)':'var(--text-secondary)'};font-size:.9em;font-weight:700">${x.a}</span>
|
||
<span style="font-family:'JetBrains Mono',monospace;font-size:.78em;padding:2px 6px;border-radius:3px;
|
||
background:${x.d>0?'rgba(0,255,148,.08)':'rgba(90,104,128,.08)'};color:${x.d>0?'var(--accent-green)':'var(--text-muted)'}">
|
||
${x.d>0?'+'+x.d:'=0'}</span>
|
||
</div>
|
||
</div>`).join('');
|
||
|
||
// Render APAW vs TOP benchmark table
|
||
renderBenchmarkComparison();
|
||
}
|
||
|
||
function renderBenchmarkComparison() {
|
||
// Expanded benchmarks: 9 dimensions covering all APAW roles
|
||
const benchmarks = [
|
||
{name:'SWE-V',full:'SWE-Bench Verified',desc:'GitHub issue resolution (500 tasks)',role:'lead-dev, backend, fixer'},
|
||
{name:'SWE-P',full:'SWE-Bench Pro',desc:'Multi-lang, decontaminated (1865 tasks)',role:'all coding agents'},
|
||
{name:'T-Bench',full:'Terminal-Bench 2.0',desc:'CLI/shell multi-step tasks',role:'devops, planner, orchestrator'},
|
||
{name:'LCB',full:'LiveCodeBench',desc:'Code gen from specs (held-out)',role:'sdet, go-dev, python-dev'},
|
||
{name:'GPQA',full:'GPQA Diamond',desc:'PhD-level reasoning',role:'system-analyst, planner'},
|
||
{name:'BComp',full:'BrowseComp',desc:'Web research & synthesis',role:'browser-auto, capability-analyst'},
|
||
{name:'HLE',full:'Humanity Last Exam',desc:'Frontier knowledge (with tools)',role:'agent-architect, evaluator'},
|
||
{name:'Ctx',full:'Context Window',desc:'Max tokens in one pass',role:'history-miner, memory-mgr'},
|
||
{name:'$/M',full:'Cost per 1M input',desc:'API pricing',role:'all agents (ROI)'},
|
||
];
|
||
|
||
const models = [
|
||
// === TOP CLOSED-SOURCE (April 2026 leaders) ===
|
||
{name:'Claude Opus 4.7',type:'closed',org:'Anthropic',
|
||
scores:[87.6,64.3,69.4,null,94.2,79.3,53.0,'1M','$5'],color:'#c084fc',
|
||
note:'#1 апрель 2026'},
|
||
{name:'GPT-5.5',type:'closed',org:'OpenAI',
|
||
scores:[null,58.6,82.7,null,null,83.4,57.2,'1M','$5'],color:'#ff6b81',
|
||
note:'Новейший, Terminal #1'},
|
||
{name:'GPT-5.4',type:'closed',org:'OpenAI',
|
||
scores:[78.2,59.1,75.1,null,94.4,82.7,58.7,'200K','$2.50'],color:'#ff6b81',
|
||
note:'Reasoning, math'},
|
||
{name:'Gemini 3.1 Pro',type:'closed',org:'Google',
|
||
scores:[80.6,46.1,68.5,null,94.3,85.9,51.4,'2M','$2'],color:'#facc15',
|
||
note:'ARC-AGI 77.1%, дешёвый'},
|
||
{name:'Claude Sonnet 4.6',type:'closed',org:'Anthropic',
|
||
scores:[79.6,null,null,null,null,null,null,'200K','$3'],color:'#c084fc',
|
||
note:'5× дешевле Opus'},
|
||
{name:'GPT-5.3-Codex',type:'closed',org:'OpenAI',
|
||
scores:[85.0,57.0,77.3,null,null,null,null,'200K','$6'],color:'#ff6b81',
|
||
note:'Coding specialist'},
|
||
|
||
// === APAW PIPELINE MODELS ===
|
||
{name:'Kimi K2.6',type:'apaw',org:'APAW',
|
||
scores:[80.2,58.6,66.7,87.2,null,83.2,54.0,'256K','$0.95'],color:'#00ff94',
|
||
note:'devops, browser, architect (3)'},
|
||
{name:'GLM-5.1',type:'apaw',org:'APAW',
|
||
scores:[null,58.4,63.5,null,86.2,68.7,null,'200K','~$0.50'],color:'#00ff94',
|
||
note:'12 agents! orchestrator, eval...'},
|
||
{name:'V4-Pro Max',type:'apaw',org:'APAW',
|
||
scores:[80.6,55.4,67.9,93.5,90.1,83.4,48.2,'1M','$0.42'],color:'#00d4ff',
|
||
note:'planner, go-dev (рек.)'},
|
||
{name:'Qwen3-Coder 480B',type:'apaw',org:'APAW',
|
||
scores:[66.5,null,null,null,null,null,null,'256K','~$0.50'],color:'#00ff94',
|
||
note:'7 coding agents'},
|
||
{name:'MiniMax M2.5',type:'apaw',org:'APAW',
|
||
scores:[80.2,51.3,null,null,null,76.3,null,'204K','$0.15'],color:'#00ff94',
|
||
note:'frontend, skeptic, fixer (3)'},
|
||
{name:'Nemotron Super',type:'apaw',org:'APAW',
|
||
scores:[60.5,null,null,null,null,null,null,'1M','~$0.40'],color:'#00ff94',
|
||
note:'6 agents (memory, history)'},
|
||
];
|
||
|
||
const t = document.getElementById('benchTable');
|
||
let h = '<thead><tr><th style="text-align:left;padding:8px 6px;border-bottom:2px solid var(--border);font-size:.85em">Модель</th>';
|
||
benchmarks.forEach(b => {
|
||
h += '<th style="padding:8px 3px;border-bottom:2px solid var(--border);font-size:.68em;max-width:60px" title="'+b.full+': '+b.desc+'\nРоли: '+b.role+'">'+b.name+'</th>';
|
||
});
|
||
h += '</tr></thead><tbody>';
|
||
|
||
// Calculate APAW best per benchmark
|
||
const apawBest = benchmarks.map((b,i) => {
|
||
let best = 0;
|
||
models.filter(m=>m.type==='apaw').forEach(m => {
|
||
const v = m.scores[i];
|
||
if(typeof v === 'number' && v > best) best = v;
|
||
});
|
||
return best;
|
||
});
|
||
|
||
// Calculate closed best per benchmark
|
||
const closedBest = benchmarks.map((b,i) => {
|
||
let best = 0;
|
||
models.filter(m=>m.type==='closed').forEach(m => {
|
||
const v = m.scores[i];
|
||
if(typeof v === 'number' && v > best) best = v;
|
||
});
|
||
return best;
|
||
});
|
||
|
||
models.forEach((m,mi) => {
|
||
if(mi === 6) h += '<tr><td colspan="'+(benchmarks.length+1)+'" style="padding:5px;background:rgba(0,212,255,.06);font-weight:700;font-size:.8em;color:var(--accent-cyan);text-align:center">— APAW Pipeline (open-source, $0.15–$0.95/M) —</td></tr>';
|
||
|
||
h += '<tr style="'+(m.type==='apaw'?'background:rgba(0,255,148,.02)':'')+'">';
|
||
h += '<td style="padding:6px;border-bottom:1px solid var(--border);white-space:nowrap"><span style="font-weight:600;color:'+m.color+';font-size:.88em">'+m.name+'</span>';
|
||
h += '<div style="font-size:.65em;color:var(--text-muted)">'+m.note+'</div></td>';
|
||
|
||
m.scores.forEach((s,si) => {
|
||
let val, cellColor = 'var(--text-secondary)', bg = 'transparent';
|
||
|
||
if(s === null) { val = '—'; cellColor = 'rgba(90,104,128,.4)'; }
|
||
else if(typeof s === 'string' && s.startsWith('$')) {
|
||
val = s;
|
||
if(m.type === 'apaw') { cellColor = '#00ff94'; bg = 'rgba(0,255,148,.06)'; }
|
||
}
|
||
else if(typeof s === 'string') { val = s; }
|
||
else {
|
||
val = s.toFixed(1);
|
||
if(m.type === 'apaw' && si < 7) {
|
||
const cb = closedBest[si];
|
||
if(cb > 0) {
|
||
const diff = s - cb;
|
||
if(diff >= 0) { cellColor='#00ff94'; bg='rgba(0,255,148,.1)'; val+=' 🟢'; }
|
||
else if(diff > -5) { cellColor='#facc15'; bg='rgba(250,204,21,.06)'; val+=' 🟡'; }
|
||
else { cellColor='#ff6b81'; bg='rgba(255,107,129,.06)'; val+=' 🔴'; }
|
||
}
|
||
}
|
||
if(m.type === 'closed' && si < 7) {
|
||
// Highlight if APAW beats this closed model
|
||
const ab = apawBest[si];
|
||
if(ab > 0 && s < ab) { bg='rgba(255,107,129,.04)'; }
|
||
}
|
||
}
|
||
h += '<td style="padding:5px 3px;border-bottom:1px solid var(--border);text-align:center;color:'+cellColor+';background:'+bg+';font-size:.78em">'+val+'</td>';
|
||
});
|
||
h += '</tr>';
|
||
});
|
||
|
||
// === Summary row: APAW best vs Closed best ===
|
||
h += '<tr style="background:rgba(0,212,255,.05)"><td style="padding:8px;font-weight:700;color:var(--accent-cyan);font-size:.85em">APAW лучший</td>';
|
||
benchmarks.forEach((b,i) => {
|
||
if(i < 7) {
|
||
const ab = apawBest[i], cb = closedBest[i];
|
||
if(ab === 0) { h += '<td style="padding:8px 3px;text-align:center;font-size:.78em;color:var(--text-muted)">—</td>'; return; }
|
||
const diff = ab - cb;
|
||
const icon = diff >= 0 ? '🟢' : diff > -5 ? '🟡' : '🔴';
|
||
const pct = cb > 0 ? ((ab/cb)*100-100).toFixed(1) : '?';
|
||
const sign = diff >= 0 ? '+' : '';
|
||
h += '<td style="padding:6px 3px;text-align:center;font-weight:700;font-size:.78em"><span style="color:'+(diff>=0?'#00ff94':diff>-5?'#facc15':'#ff6b81')+'">'+ab.toFixed(1)+'</span><div style="font-size:.7em;color:var(--text-muted)">'+sign+diff.toFixed(1)+' '+icon+'</div></td>';
|
||
} else if(i === 7) {
|
||
h += '<td style="padding:8px 3px;text-align:center;font-size:.78em;color:var(--accent-green)">1M ✅</td>';
|
||
} else {
|
||
h += '<td style="padding:8px 3px;text-align:center;font-weight:700;font-size:.82em;color:var(--accent-green)">10-33× 🟢</td>';
|
||
}
|
||
});
|
||
h += '</tr>';
|
||
|
||
// === Role-based average row ===
|
||
h += '<tr style="background:rgba(0,255,148,.04)"><td style="padding:8px;font-weight:700;color:var(--accent-green);font-size:.82em">Средняя по ролям APAW*</td>';
|
||
// Calculate weighted average across all roles
|
||
const roleAvg = [78.2, 55.8, 65.7, 90.4, 88.2, 78.4, 51.1]; // pre-calculated across all 36 agents
|
||
const closedAvg = [82.2, 57.8, 74.6, null, 94.3, 83.4, 54.8];
|
||
roleAvg.forEach((ra,i) => {
|
||
if(i < 7 && ra > 0) {
|
||
const ca = closedAvg[i];
|
||
if(!ca) { h += '<td style="padding:6px 3px;text-align:center;font-size:.82em;color:var(--accent-green);font-weight:700">'+ra.toFixed(1)+'</td>'; return; }
|
||
const diff = ra - ca;
|
||
const col = diff >= 0 ? '#00ff94' : diff > -8 ? '#facc15' : '#ff6b81';
|
||
h += '<td style="padding:6px 3px;text-align:center;font-weight:700;font-size:.82em"><span style="color:'+col+'">'+ra.toFixed(1)+'</span><div style="font-size:.65em;color:var(--text-muted)">vs '+ca.toFixed(1)+'</div></td>';
|
||
} else if(i === 7) {
|
||
h += '<td style="padding:8px 3px;text-align:center;font-size:.78em;color:var(--accent-green)">573K avg</td>';
|
||
} else {
|
||
h += '<td style="padding:8px 3px;text-align:center;font-weight:700;font-size:.82em;color:var(--accent-green)">$0.49 avg</td>';
|
||
}
|
||
});
|
||
h += '</tr>';
|
||
|
||
t.innerHTML = h + '</tbody>';
|
||
}
|
||
|
||
|
||
function drawChart() {
|
||
const c=document.getElementById('impactCanvas');
|
||
if(!c || !c.offsetParent) return; // skip if hidden
|
||
const ctx=c.getContext('2d');
|
||
const dpr = window.devicePixelRatio || 1;
|
||
const cssW = c.parentElement.clientWidth - 36;
|
||
const cssH = 340;
|
||
c.width = cssW * dpr;
|
||
c.height = cssH * dpr;
|
||
c.style.width = cssW + 'px';
|
||
c.style.height = cssH + 'px';
|
||
ctx.scale(dpr, dpr);
|
||
ctx.clearRect(0,0,cssW,cssH);
|
||
|
||
const data = impactData;
|
||
if(!data.length) return;
|
||
const barW = Math.min(38, (cssW - 180) / data.length / 2 - 4);
|
||
const cL = 48, cB = cssH - 60, cH = cB - 20, mx = 100;
|
||
|
||
// Grid lines
|
||
ctx.strokeStyle='rgba(30,39,54,.7)'; ctx.lineWidth=1;
|
||
for(let i=0;i<=5;i++){
|
||
const y=cB-(cH*(i*20)/mx);
|
||
ctx.beginPath();ctx.moveTo(cL,y);ctx.lineTo(cssW-16,y);ctx.stroke();
|
||
ctx.fillStyle='#5a6880';ctx.font='10px JetBrains Mono,monospace';ctx.textAlign='right';ctx.fillText(i*20,cL-6,y+3);
|
||
}
|
||
|
||
// Bars
|
||
data.forEach((d,i)=>{
|
||
const x = cL + 28 + i * ((cssW - cL - 40) / data.length);
|
||
// Before bar (red)
|
||
const h1 = (d.b/mx)*cH;
|
||
ctx.fillStyle='rgba(255,71,87,.4)';
|
||
ctx.fillRect(x, cB-h1, barW, h1);
|
||
// After bar (green or grey)
|
||
const h2 = (d.a/mx)*cH;
|
||
ctx.fillStyle = d.d > 0 ? 'rgba(0,255,148,.55)' : 'rgba(136,150,170,.35)';
|
||
ctx.fillRect(x+barW+3, cB-h2, barW, h2);
|
||
// Delta label
|
||
if(d.d > 0){
|
||
ctx.fillStyle='#00ff94';ctx.font='bold 10px JetBrains Mono,monospace';
|
||
ctx.textAlign='center';ctx.fillText('+'+d.d, x+barW+1, cB-Math.max(h1,h2)-6);
|
||
}
|
||
// Category label (rotated)
|
||
ctx.save();
|
||
ctx.translate(x+barW, cB+10);
|
||
ctx.rotate(-0.4);
|
||
ctx.fillStyle='#8896aa';ctx.font='8px Outfit,sans-serif';ctx.textAlign='left';
|
||
const label = d.cat.replace(/\s*\(.*?\)/g,'').substring(0,22);
|
||
ctx.fillText(label, 0, 0);
|
||
ctx.restore();
|
||
});
|
||
|
||
// Legend
|
||
ctx.fillStyle='rgba(255,71,87,.4)';ctx.fillRect(cssW-180,8,12,12);
|
||
ctx.fillStyle='#8896aa';ctx.font='11px Outfit,sans-serif';ctx.textAlign='left';ctx.fillText('Текущий score',cssW-162,18);
|
||
ctx.fillStyle='rgba(0,255,148,.55)';ctx.fillRect(cssW-180,26,12,12);
|
||
ctx.fillText('После замены',cssW-162,36);
|
||
}
|
||
|
||
// ======================= INIT =======================
|
||
document.addEventListener('DOMContentLoaded',()=>{
|
||
renderCfg(); renderGroq(); renderModels(); renderHeatmap(); renderRecs(); renderImpact();
|
||
});
|
||
window.addEventListener('resize',()=>{ if(document.getElementById('tab-impact').classList.contains('active')) drawChart(); });
|
||
</script>
|
||
</body>
|
||
</html>
|