Files
APAW/agent-evolution/ideas/apaw_agent_model_research_v3.html
¨NW¨ 3badb259cc feat: bidirectional research dashboard + agent config fixes
- Integrate apaw_agent_model_research_v3.html as standalone dashboard
- Add model-benchmarks.json with 32 agents, 11 scored models, 11 recommendations
- Add build-research-dashboard.ts: inject live data into template → standalone HTML
- Add rebuild-template.cjs: regenerate template from v3.html source
- Add sync-benchmarks-from-yaml.cjs: sync YAML → JSON round-trip
- Add sync-model-research.ts: apply recommendation matrix to config files
- Add model-benchmarks.schema.json and model-research.schema.json for validation
- Add bidirectional-data-flow.md architecture documentation
- Add log-execution.cjs pipeline hook
- Update capability-index.yaml: add fallback_models, failover_strategy
- Update kilo-meta.json, kilo.jsonc, KILO_SPEC.md with synced models
- Update evolution.md / research.md / self-evolution.md / evolutionary-sync.md docs
- Fix security-auditor.md: quote YAML color (#DC2626)
- Fix orchestrator.md: remove duplicate devops-engineer key
- Build research-dashboard.html (106KB standalone) + dated archive
2026-04-29 21:04:22 +01:00

1169 lines
77 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>APAW KiloCode — Agent Model Research v3 (Ollama + Groq + OpenRouter)</title>
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;600;700&family=Outfit:wght@300;400;500;600;700;800;900&display=swap" rel="stylesheet">
<style>
:root {
--bg-deep: #080b12;
--bg-panel: #0e1219;
--bg-card: #141922;
--bg-card-hover: #1a2130;
--border: #1e2736;
--border-bright: #2a3650;
--text-primary: #e8edf5;
--text-secondary: #8896aa;
--text-muted: #5a6880;
--accent-cyan: #00d4ff;
--accent-green: #00ff94;
--accent-orange: #ff9f43;
--accent-red: #ff4757;
--accent-purple: #a855f7;
--accent-blue: #3b82f6;
--accent-yellow: #facc15;
--glow-cyan: rgba(0,212,255,0.15);
--glow-green: rgba(0,255,148,0.1);
--glow-orange: rgba(255,159,67,0.1);
}
* { margin:0; padding:0; box-sizing:border-box; }
body {
font-family:'Outfit',sans-serif;
background:var(--bg-deep);
color:var(--text-primary);
min-height:100vh;
overflow-x:hidden;
}
body::before {
content:'';
position:fixed; top:0; left:0; right:0; bottom:0;
background:
linear-gradient(90deg,rgba(0,212,255,0.02) 1px,transparent 1px),
linear-gradient(rgba(0,212,255,0.02) 1px,transparent 1px);
background-size:60px 60px;
animation:gp 8s ease-in-out infinite;
pointer-events:none; z-index:0;
}
@keyframes gp { 0%,100%{opacity:.3} 50%{opacity:.6} }
.container { max-width:1540px; margin:0 auto; padding:24px 16px; position:relative; z-index:1; }
/* HEADER */
.header { text-align:center; margin-bottom:32px; animation:fadeDown .7s ease-out; }
@keyframes fadeDown { from{opacity:0;transform:translateY(-20px)} to{opacity:1;transform:translateY(0)} }
.header h1 {
font-size:2.6em; font-weight:900;
background:linear-gradient(135deg,var(--accent-cyan),var(--accent-green),var(--accent-purple));
-webkit-background-clip:text; -webkit-text-fill-color:transparent;
letter-spacing:-1px;
}
.header .sub { font-family:'JetBrains Mono',monospace; color:var(--text-muted); font-size:.8em; margin-top:6px; letter-spacing:1px; }
/* TABS */
.tabs { display:flex; gap:3px; background:var(--bg-panel); border:1px solid var(--border); border-radius:12px; padding:4px; margin-bottom:28px; overflow-x:auto; }
.tab-btn {
flex:1; min-width:100px; padding:10px 8px; background:none; border:none; color:var(--text-secondary);
font-family:'Outfit',sans-serif; font-size:.82em; font-weight:600; border-radius:9px; cursor:pointer; transition:all .25s; white-space:nowrap;
}
.tab-btn:hover { color:var(--text-primary); background:var(--bg-card); }
.tab-btn.active { color:var(--bg-deep); background:linear-gradient(135deg,var(--accent-cyan),var(--accent-green)); box-shadow:0 0 16px var(--glow-cyan); }
.tab-panel { display:none; animation:fadeUp .4s ease-out; }
.tab-panel.active { display:block; }
@keyframes fadeUp { from{opacity:0;transform:translateY(16px)} to{opacity:1;transform:translateY(0)} }
/* STAT CARDS */
.stats-row { display:grid; grid-template-columns:repeat(auto-fit,minmax(190px,1fr)); gap:14px; margin-bottom:26px; }
.stat-card {
background:var(--bg-card); border:1px solid var(--border); border-radius:11px; padding:18px; position:relative; overflow:hidden; transition:all .3s;
}
.stat-card:hover { border-color:var(--accent-cyan); transform:translateY(-2px); box-shadow:0 6px 24px var(--glow-cyan); }
.stat-label { font-family:'JetBrains Mono',monospace; font-size:.65em; color:var(--text-muted); text-transform:uppercase; letter-spacing:1.5px; margin-bottom:6px; }
.stat-value { font-size:2em; font-weight:800; }
.stat-sub { font-size:.75em; color:var(--text-secondary); margin-top:3px; }
.grad-cyan { background:linear-gradient(135deg,var(--accent-cyan),var(--accent-green)); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
.grad-orange { background:linear-gradient(135deg,var(--accent-orange),var(--accent-yellow)); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
.grad-purple { background:linear-gradient(135deg,var(--accent-purple),#e879f9); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
.grad-green { background:linear-gradient(135deg,var(--accent-green),#4ade80); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
.grad-red { background:linear-gradient(135deg,var(--accent-red),#ff6b81); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
/* SECTION HEADERS */
.sec-hdr { display:flex; align-items:center; gap:10px; margin-bottom:18px; padding-bottom:10px; border-bottom:1px solid var(--border); }
.sec-hdr h2 { font-size:1.2em; font-weight:700; }
.badge { font-family:'JetBrains Mono',monospace; font-size:.65em; padding:3px 9px; border-radius:16px; }
.badge-cyan { background:var(--glow-cyan); color:var(--accent-cyan); border:1px solid rgba(0,212,255,.2); }
.badge-orange { background:var(--glow-orange); color:var(--accent-orange); border:1px solid rgba(255,159,67,.2); }
.badge-green { background:var(--glow-green); color:var(--accent-green); border:1px solid rgba(0,255,148,.2); }
/* TABLES */
.tbl-wrap { overflow-x:auto; border-radius:11px; border:1px solid var(--border); background:var(--bg-card); margin-bottom:26px; }
table.dt { width:100%; border-collapse:collapse; font-size:.84em; }
table.dt th {
font-family:'JetBrains Mono',monospace; font-size:.7em; color:var(--text-muted); text-transform:uppercase;
letter-spacing:1.2px; padding:12px 14px; background:var(--bg-panel); border-bottom:2px solid var(--border); text-align:left; position:sticky; top:0;
}
table.dt td { padding:10px 14px; border-bottom:1px solid var(--border); transition:background .15s; }
table.dt tr:hover td { background:var(--bg-card-hover); }
.mbadge { display:inline-block; padding:3px 8px; border-radius:5px; font-family:'JetBrains Mono',monospace; font-size:.78em; font-weight:500; }
.mbadge.qwen { background:rgba(59,130,246,.12); color:#60a5fa; border:1px solid rgba(59,130,246,.25); }
.mbadge.gptoss { background:rgba(168,85,247,.12); color:#c084fc; border:1px solid rgba(168,85,247,.25); }
.mbadge.glm { background:rgba(0,255,148,.08); color:#00ff94; border:1px solid rgba(0,255,148,.2); }
.mbadge.minimax { background:rgba(255,159,67,.12); color:#ff9f43; border:1px solid rgba(255,159,67,.25); }
.mbadge.devstral { background:rgba(0,212,255,.12); color:#00d4ff; border:1px solid rgba(0,212,255,.25); }
.mbadge.deepseek { background:rgba(250,204,21,.12); color:#facc15; border:1px solid rgba(250,204,21,.25); }
.mbadge.nemotron { background:rgba(34,197,94,.12); color:#4ade80; border:1px solid rgba(34,197,94,.25); }
.mbadge.groq { background:rgba(255,71,87,.12); color:#ff6b81; border:1px solid rgba(255,71,87,.25); }
.mbadge.kimi { background:rgba(250,204,21,.12); color:#fde68a; border:1px solid rgba(250,204,21,.2); }
.mbadge.llama { background:rgba(59,130,246,.1); color:#93c5fd; border:1px solid rgba(59,130,246,.2); }
/* SCORE BAR */
.sbar { display:flex; align-items:center; gap:6px; }
.sbar-bg { width:70px; height:5px; background:var(--border); border-radius:3px; overflow:hidden; }
.sbar-fill { height:100%; border-radius:3px; transition:width 1s ease-out; }
.sbar-fill.h { background:linear-gradient(90deg,var(--accent-green),#00ff94); }
.sbar-fill.m { background:linear-gradient(90deg,var(--accent-orange),#ffc048); }
.sbar-fill.l { background:linear-gradient(90deg,var(--accent-red),#ff6b81); }
.snum { font-family:'JetBrains Mono',monospace; font-weight:600; font-size:.85em; min-width:28px; }
/* GROQ SPEED INDICATOR */
.speed-ind { display:inline-flex; align-items:center; gap:4px; }
.speed-dot { width:7px; height:7px; border-radius:50%; animation:pulse 1.5s ease-in-out infinite; }
.speed-dot.ultra { background:var(--accent-green); box-shadow:0 0 8px var(--accent-green); }
.speed-dot.fast { background:var(--accent-cyan); box-shadow:0 0 6px var(--accent-cyan); }
.speed-dot.normal { background:var(--accent-orange); }
@keyframes pulse { 0%,100%{opacity:.5;transform:scale(.8)} 50%{opacity:1;transform:scale(1.2)} }
/* RECOMMENDATION CARDS */
.rec-grid { display:grid; grid-template-columns:repeat(auto-fit,minmax(400px,1fr)); gap:16px; margin-bottom:26px; }
.rec-card {
background:var(--bg-card); border:1px solid var(--border); border-radius:12px; padding:20px;
position:relative; overflow:hidden; transition:all .35s;
}
.rec-card:hover { border-color:var(--accent-green); box-shadow:0 0 30px var(--glow-green); transform:translateY(-2px); }
.rec-card.glow { animation:glowP 3s ease-in-out infinite; }
@keyframes glowP { 0%,100%{box-shadow:0 0 16px var(--glow-green)} 50%{box-shadow:0 0 32px var(--glow-green)} }
.rec-hdr { display:flex; justify-content:space-between; align-items:flex-start; margin-bottom:12px; }
.rec-agent { font-weight:700; font-size:1em; color:var(--accent-cyan); }
.impact-badge { padding:2px 8px; border-radius:16px; font-family:'JetBrains Mono',monospace; font-size:.68em; font-weight:600; }
.impact-badge.critical { background:rgba(255,71,87,.18); color:var(--accent-red); border:1px solid rgba(255,71,87,.25); }
.impact-badge.high { background:rgba(255,159,67,.18); color:var(--accent-orange); border:1px solid rgba(255,159,67,.25); }
.impact-badge.medium { background:rgba(250,204,21,.18); color:var(--accent-yellow); border:1px solid rgba(250,204,21,.25); }
.swap-vis { display:flex; align-items:center; gap:10px; margin:12px 0; padding:12px; background:var(--bg-panel); border-radius:8px; }
.swap-from { font-family:'JetBrains Mono',monospace; font-size:.78em; padding:4px 8px; border-radius:5px; background:rgba(255,71,87,.08); color:#ff6b81; border:1px solid rgba(255,71,87,.15); text-decoration:line-through; opacity:.65; }
.swap-to { font-family:'JetBrains Mono',monospace; font-size:.78em; padding:4px 8px; border-radius:5px; background:rgba(0,255,148,.08); color:#00ff94; border:1px solid rgba(0,255,148,.2); font-weight:600; }
.swap-arrow { color:var(--accent-green); font-size:1.4em; animation:arrP 2s ease-in-out infinite; }
@keyframes arrP { 0%,100%{opacity:.4;transform:scale(1)} 50%{opacity:1;transform:scale(1.12)} }
.rec-metrics { display:grid; grid-template-columns:repeat(4,1fr); gap:8px; margin-top:12px; }
.rec-m { text-align:center; padding:6px; background:var(--bg-deep); border-radius:6px; }
.rec-m-label { font-size:.6em; color:var(--text-muted); text-transform:uppercase; letter-spacing:.8px; font-family:'JetBrains Mono',monospace; }
.rec-m-val { font-size:1.1em; font-weight:700; margin-top:1px; }
.rec-m-val.pos { color:var(--accent-green); }
.rec-m-val.neu { color:var(--accent-orange); }
.rec-reason { font-size:.82em; color:var(--text-secondary); line-height:1.55; margin-top:10px; padding-top:10px; border-top:1px solid var(--border); }
/* HEATMAP */
.hm-wrap { overflow-x:auto; border-radius:11px; border:1px solid var(--border); background:var(--bg-card); padding:18px; margin-bottom:26px; }
.hm-title { font-weight:700; font-size:1.05em; }
.hm-sub { font-size:.76em; color:var(--text-muted); margin-bottom:14px; }
.hm-table { border-collapse:collapse; width:100%; }
.hm-table th { font-family:'JetBrains Mono',monospace; font-size:.62em; color:var(--text-muted); padding:6px 4px; text-align:center; white-space:nowrap; }
.hm-table th.hm-role { text-align:left; min-width:150px; font-size:.68em; }
.hm-table td { text-align:center; padding:5px 3px; font-family:'JetBrains Mono',monospace; font-size:.74em; font-weight:600; border-radius:3px; cursor:pointer; transition:all .12s; min-width:38px; }
.hm-table td:hover { transform:scale(1.12); z-index:2; }
.hm-table td.hm-r { text-align:left; font-family:'Outfit',sans-serif; font-size:.78em; font-weight:500; color:var(--text-secondary); cursor:default; }
.hm-table td.hm-r:hover { transform:none; }
.hm-star { color:#FFD700; font-size:.85em; }
.hm-cur { outline:2px solid var(--accent-cyan); outline-offset:-2px; }
/* PROVIDER TAGS */
.prov-tag { display:inline-block; padding:1px 6px; border-radius:3px; font-size:.62em; font-family:'JetBrains Mono',monospace; margin-left:4px; }
.prov-tag.ollama { background:rgba(0,212,255,.1); color:var(--accent-cyan); }
.prov-tag.groq { background:rgba(255,71,87,.1); color:#ff6b81; }
.prov-tag.openrouter { background:rgba(168,85,247,.1); color:#c084fc; }
.prov-tag.hybrid { background:rgba(0,255,148,.1); color:#00ff94; }
/* MODEL CARDS */
.model-grid { display:grid; grid-template-columns:repeat(auto-fit,minmax(290px,1fr)); gap:14px; margin-bottom:26px; }
.mc { background:var(--bg-card); border:1px solid var(--border); border-radius:12px; padding:18px; transition:all .3s; position:relative; }
.mc:hover { transform:translateY(-2px); border-color:var(--accent-cyan); box-shadow:0 6px 24px var(--glow-cyan); }
.mc-name { font-weight:700; font-size:1.05em; margin-bottom:3px; }
.mc-org { font-size:.74em; color:var(--text-muted); margin-bottom:12px; font-family:'JetBrains Mono',monospace; }
.mc-row { display:flex; justify-content:space-between; align-items:center; padding:5px 0; border-bottom:1px solid rgba(30,39,54,.4); font-size:.82em; }
.mc-row:last-child { border-bottom:none; }
.mc-label { color:var(--text-secondary); }
.mc-val { font-family:'JetBrains Mono',monospace; font-weight:600; }
.mc-tags { display:flex; flex-wrap:wrap; gap:3px; margin-top:10px; }
.mc-tag { font-size:.64em; padding:2px 6px; border-radius:3px; font-family:'JetBrains Mono',monospace; background:rgba(0,212,255,.06); color:var(--accent-cyan); border:1px solid rgba(0,212,255,.12); }
.mc-best { font-size:.72em; padding:3px 8px; border-radius:4px; background:rgba(0,255,148,.1); color:var(--accent-green); border:1px solid rgba(0,255,148,.2); margin-top:8px; display:inline-block; }
/* GROQ SECTION */
.groq-card { border-left:3px solid var(--accent-red); }
.groq-speed { font-family:'JetBrains Mono',monospace; font-size:1.8em; font-weight:800; color:var(--accent-red); }
/* SUMMARY */
.summary { background:linear-gradient(135deg,rgba(0,212,255,.04),rgba(0,255,148,.04)); border:1px solid var(--border-bright); border-radius:12px; padding:24px; margin-bottom:26px; }
.summary h3 { color:var(--accent-cyan); font-size:1.1em; margin-bottom:10px; }
.summary p { color:var(--text-secondary); line-height:1.65; font-size:.88em; }
.summary ul { list-style:none; margin-top:10px; }
.summary li { padding:5px 0 5px 18px; position:relative; color:var(--text-secondary); font-size:.86em; line-height:1.55; }
.summary li::before { content:''; position:absolute; left:0; color:var(--accent-green); font-weight:700; font-size:1.2em; }
/* FILTER ROW */
.frow { display:flex; gap:6px; margin-bottom:16px; flex-wrap:wrap; }
.fbtn { padding:5px 12px; background:var(--bg-card); border:1px solid var(--border); color:var(--text-secondary); border-radius:7px; font-family:'Outfit',sans-serif; font-size:.78em; cursor:pointer; transition:all .2s; }
.fbtn:hover,.fbtn.active { border-color:var(--accent-cyan); color:var(--accent-cyan); background:rgba(0,212,255,.06); }
/* TOOLTIP */
#ttOverlay { display:none; position:fixed; top:0;left:0;right:0;bottom:0; z-index:999; pointer-events:none; }
#ttOverlay.show { display:block; }
#ttBox { position:absolute; background:var(--bg-panel); border:1px solid var(--accent-cyan); border-radius:9px; padding:12px 16px; max-width:300px; box-shadow:0 10px 32px rgba(0,0,0,.55); z-index:1000; }
#ttBox h4 { color:var(--accent-cyan); font-size:.9em; margin-bottom:4px; }
#ttBox p { font-size:.78em; color:var(--text-secondary); line-height:1.45; }
/* CANVAS */
.chart-wrap { border-radius:11px; border:1px solid var(--border); background:var(--bg-card); padding:18px; margin-bottom:26px; }
.chart-title { font-weight:700; font-size:1.05em; margin-bottom:12px; }
@media(max-width:768px) {
.header h1 { font-size:1.6em; }
.tabs { flex-wrap:wrap; }
.rec-grid,.model-grid { grid-template-columns:1fr; }
.stats-row { grid-template-columns:repeat(2,1fr); }
.rec-metrics { grid-template-columns:repeat(2,1fr); }
}
/* EXPORT BUTTONS */
.export-btn {
padding:8px 16px; background:var(--bg-card); border:1px solid var(--border-bright);
color:var(--text-secondary); font-family:'Outfit',sans-serif; font-size:.82em; font-weight:600;
border-radius:8px; cursor:pointer; transition:all .25s; display:inline-flex; align-items:center;
}
.export-btn:hover { border-color:var(--accent-cyan); color:var(--text-primary); background:var(--bg-card-hover); }
.export-btn-primary {
background:linear-gradient(135deg,rgba(0,212,255,.15),rgba(0,255,148,.1));
border-color:var(--accent-cyan); color:var(--accent-cyan);
}
.export-btn-primary:hover { background:linear-gradient(135deg,rgba(0,212,255,.25),rgba(0,255,148,.18)); box-shadow:0 0 20px var(--glow-cyan); }
/* REC CARD CHECKBOX */
.rec-check { position:absolute; top:14px; right:14px; width:22px; height:22px; cursor:pointer; z-index:3; accent-color:var(--accent-green); }
.rec-card.selected { border-color:var(--accent-green); background:rgba(0,255,148,.03); }
.rec-card { position:relative; }
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>APAW Agent Model Research v2</h1>
<div class="sub">capability-index.yaml · Ollama Cloud + OpenRouter · GLM-5.1 + Qwen 3.6+ · April 2026 · April 2026</div>
</div>
<div class="tabs" id="tabBar">
<button class="tab-btn active" onclick="switchTab('overview')">Обзор</button>
<button class="tab-btn" onclick="switchTab('groq')">Groq Free Tier</button>
<button class="tab-btn" onclick="switchTab('models')">Все модели</button>
<button class="tab-btn" onclick="switchTab('heatmap')">Матрица</button>
<button class="tab-btn" onclick="switchTab('recs')">Рекомендации</button>
<button class="tab-btn" onclick="switchTab('impact')">Анализ профита</button>
</div>
<!-- ========== TAB: OVERVIEW ========== -->
<div id="tab-overview" class="tab-panel active">
<div class="stats-row">
<div class="stat-card"><div class="stat-label">Агентов</div><div class="stat-value grad-cyan" id="c1">36</div><div class="stat-sub">32 custom + 4 built-in</div></div>
<div class="stat-card"><div class="stat-label">Моделей сейчас</div><div class="stat-value grad-orange">6</div><div class="stat-sub">Coder(9) GLM-5.1(11) K2.6(4)</div></div>
<div class="stat-card"><div class="stat-label">Ollama Cloud</div><div class="stat-value grad-purple">20+</div><div class="stat-sub">доступно бесплатно</div></div>
<div class="stat-card"><div class="stat-label">Groq + OpenRouter</div><div class="stat-value grad-red">16+</div><div class="stat-sub">free tier моделей</div></div>
<div class="stat-card"><div class="stat-label">Рекомендаций</div><div class="stat-value grad-green">11</div><div class="stat-sub">8/8 applied ✅</div></div>
</div>
<div class="summary">
<h3>Ключевые находки v3 (после коммита caf77f53c8)</h3>
<p>Ваш агент уже применил 11 из моих рекомендаций (коммит от 05:21). Но я обнаружил что <strong>до применения</strong> некоторые агенты были на других моделях чем я предполагал:</p>
<ul>
<li><strong style="color:var(--accent-red)">⚠ Откат Qwen 3.6 Plus</strong> — security-auditor, prompt-optimizer, product-owner и markdown-validator <em>до коммита</em> были на <code>openrouter/qwen3.6-plus:free</code> и <code>deepseek-v3.2</code>, но мои рекомендации их заменили на Ollama-модели. Это снижает разнообразие провайдеров!</li>
<li><strong style="color:var(--accent-green)">✅ 11 замен уже применены</strong> — Nemotron 3 Super теперь на 7 ролях, GLM-5 расширен, Qwen3-Coder на Go, markdown-validator</li>
<li><strong style="color:var(--accent-orange)">🔴 Осталось 3 агента на gpt-oss:120b</strong> — requirement-refiner, capability-analyst, agent-architect. Всем им нужен Nemotron 3 Super</li>
<li><strong>Новая стратегия: гибридный мультипровайдер</strong> — OpenRouter (Qwen 3.6 Plus FREE, 1M ctx) + Groq (gpt-oss 500 t/s) + Ollama (основной). Диверсификация снижает зависимость</li>
<li><strong style="color:#00ff94">Qwen 3.6 Plus стоит вернуть</strong> для prompt-optimizer (Terminal-Bench 61.6% > Claude!) и product-owner (1M контекст для backlog)</li>
<li><strong>History-miner → Nemotron 3 Super</strong> — самый большой оставшийся прирост: 88 vs 78 (GLM-5). RULER@1M критичен для git history</li>
<li><strong style="color:var(--accent-red)">⚠ Prompt Adherence (IF) — новый фактор!</strong> Nemotron 3 Super имеет IF=78 (ниже GLM-5=90, Qwen3.5=92, Qwen3.6+=91). Для ролей с жёстким промптом (evaluator, security-auditor, orchestrator) это снижает эффективность. Qwen 3.6 Plus и GLM-5 лучше следуют инструкциям</li></ul>
</ul>
</div>
<div class="sec-hdr"><h2>Текущая конфигурация</h2><span class="badge badge-cyan">capability-index.yaml</span></div>
<div class="tbl-wrap">
<table class="dt" id="cfgTable"><thead><tr>
<th>Агент</th><th>Модель</th><th>Провайдер</th><th>Категория</th><th>Соответствие</th><th>Статус</th>
</tr></thead><tbody id="cfgBody"></tbody></table>
</div>
</div>
<!-- ========== TAB: GROQ ========== -->
<div id="tab-groq" class="tab-panel">
<div class="sec-hdr"><h2>Groq Free Plan — доступные модели</h2><span class="badge badge-orange">бесплатно · LPU inference</span></div>
<div class="stats-row">
<div class="stat-card groq-card"><div class="stat-label">gpt-oss-20b</div><div class="groq-speed">1200 <span style="font-size:.4em;color:var(--text-muted)">t/s</span></div><div class="stat-sub">30 RPM · 1K RPD · 200K TPD</div></div>
</div>
<div class="summary">
<h3>Анализ лимитов Groq Free для агентского pipeline</h3>
<p>При 26 агентах в pipeline, каждый агент делает 520 вызовов на задачу. Типичный issue проходит через 812 агентов = <strong>~100200 вызовов</strong>. С лимитом 1K RPD на модель:</p>
<ul>
<li><strong>Groq Compound</strong>: всего 250 RPD, но 70K TPM — для одноразовых тяжёлых аналитических задач</li>
</ul>
</div>
<div class="sec-hdr"><h2>Все модели Groq Free Tier</h2></div>
<div class="tbl-wrap">
<table class="dt">
<thead><tr><th>Model ID</th><th>RPM</th><th>RPD</th><th>TPM</th><th>TPD</th><th>Скорость</th><th>Применение в APAW</th></tr></thead>
<tbody id="groqBody"></tbody>
</table>
</div>
</div>
<!-- ========== TAB: MODELS ========== -->
<div id="tab-models" class="tab-panel">
<div class="sec-hdr"><h2>Все доступные модели</h2><span class="badge badge-cyan">Ollama Cloud + Groq + OpenRouter Free</span></div>
<div class="frow" id="filterRow"></div>
<div class="model-grid" id="modelGrid"></div>
</div>
<!-- ========== TAB: HEATMAP ========== -->
<div id="tab-heatmap" class="tab-panel">
<div class="hm-wrap">
<div class="hm-title">Матрица «Агент × Модель»: оценка совместимости (с учётом Prompt Adherence)</div>
<div class="hm-sub">0100 · Взвешенная оценка = 60% бенчмарк роли + 25% Instruction Following + 15% скорость/контекст · ★ = лучший · <span style="outline:2px solid var(--accent-cyan);outline-offset:-2px;padding:0 3px;border-radius:2px">обведено</span> = текущий · <strong style="color:var(--accent-yellow)">← 11 моделей · 🟢L 🟡M 🔴H = Reasoning Effort →</strong></div>
<div style="overflow-x:auto"><table class="hm-table" id="hmTable"></table></div>
</div>
</div>
<!-- ========== TAB: RECOMMENDATIONS ========== -->
<div id="tab-recs" class="tab-panel">
<div class="sec-hdr"><h2>Рекомендации</h2><span class="badge badge-green">4 замены (2 BROKEN) + 7 подтверждений 06.04.2026</span></div>
<div style="display:flex;gap:10px;margin-bottom:18px;flex-wrap:wrap;align-items:center;">
<button onclick="toggleAllRecs()" class="export-btn" id="selectAllBtn">☑ Выбрать все</button>
<button onclick="exportJSON()" class="export-btn export-btn-primary" id="exportBtn">
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" style="vertical-align:-2px;margin-right:4px"><path d="M21 15v4a2 2 0 01-2 2H5a2 2 0 01-2-2v-4"/><polyline points="7 10 12 15 17 10"/><line x1="12" y1="15" x2="12" y2="3"/></svg>
Скачать JSON для агента
</button>
<span id="selectedCount" style="font-family:'JetBrains Mono',monospace;font-size:.75em;color:var(--text-muted);">0 из 11 выбрано</span>
</div>
<div class="rec-grid" id="recGrid"></div>
<!-- JSON Preview Modal -->
<div id="jsonModal" style="display:none;position:fixed;top:0;left:0;right:0;bottom:0;background:rgba(0,0,0,.7);z-index:9999;justify-content:center;align-items:center;padding:20px;">
<div style="background:var(--bg-panel);border:1px solid var(--accent-cyan);border-radius:14px;max-width:800px;width:100%;max-height:85vh;display:flex;flex-direction:column;box-shadow:0 20px 60px rgba(0,0,0,.5);">
<div style="display:flex;justify-content:space-between;align-items:center;padding:18px 22px;border-bottom:1px solid var(--border);">
<div>
<div style="font-weight:700;font-size:1.05em;">agent-model-recommendations.json</div>
<div style="font-size:.75em;color:var(--text-muted);margin-top:2px;font-family:'JetBrains Mono',monospace">Готов для передачи агенту-оркестратору</div>
</div>
<div style="display:flex;gap:8px;">
<button onclick="copyJSON()" class="export-btn" id="copyBtn">📋 Копировать</button>
<button onclick="downloadJSON()" class="export-btn export-btn-primary">⬇ Скачать .json</button>
<button onclick="closeModal()" class="export-btn" style="border-color:var(--accent-red);color:var(--accent-red);"></button>
</div>
</div>
<pre id="jsonPreview" style="flex:1;overflow:auto;padding:18px 22px;margin:0;font-family:'JetBrains Mono',monospace;font-size:.78em;line-height:1.6;color:var(--accent-green);background:var(--bg-deep);border-radius:0 0 14px 14px;"></pre>
</div>
</div>
</div>
<!-- ========== TAB: IMPACT ========== -->
<div id="tab-impact" class="tab-panel">
<div class="sec-hdr"><h2>Совокупный анализ профита</h2><span class="badge badge-green">если применить все рекомендации</span></div>
<div class="stats-row">
<div class="stat-card"><div class="stat-label">Средний прирост</div><div class="stat-value grad-green">+12</div><div class="stat-sub">пунктов по матрице</div></div>
<div class="stat-card"><div class="stat-label">Применено</div><div class="stat-value grad-red">8/8</div><div class="stat-sub">все рекомендации ✅</div></div>
<div class="stat-card"><div class="stat-label">Qwen 3.6+</div><div class="stat-value grad-purple">0</div><div class="stat-sub">полностью на Ollama!</div></div>
<div class="stat-card"><div class="stat-label">GLM-5.1</div><div class="stat-value grad-orange">12</div><div class="stat-sub">10 custom + 2 built-in</div></div>
</div>
<div class="chart-wrap">
<div class="chart-title">Прирост по категориям: до → после</div>
<canvas id="impactCanvas" height="340"></canvas>
</div>
<div class="summary">
<h3>Детальный анализ прироста</h3>
<div id="impactDetails"></div>
<div style="margin-top:32px">
<div class="sec-hdr"><h2>APAW Pipeline vs ТОП закрытых моделей (апрель 2026)</h2></div>
<p style="font-size:.82em;color:var(--text-muted);margin-bottom:16px">
Сравнение лучших моделей в вашем pipeline с лидерами рынка по ключевым бенчмаркам.
<strong style="color:var(--accent-green)">🟢</strong> = APAW обгоняет,
<strong style="color:var(--accent-yellow)">🟡</strong> = на уровне (±3%),
<strong style="color:var(--accent-red)">🔴</strong> = отстаёт
</p>
<div style="overflow-x:auto">
<table id="benchTable" style="width:100%;border-collapse:collapse;font-size:.78em;font-family:'JetBrains Mono',monospace">
</table>
</div>
<p style="font-size:.72em;color:var(--text-muted);margin-top:12px">
* SWE-V = SWE-Bench Verified, SWE-P = SWE-Bench Pro, T-Bench = Terminal-Bench 2.0, LCB = LiveCodeBench, GPQA = GPQA Diamond<br>
Данные: swebench.com, marc0.dev, tokenmix.ai, ollama.com — апрель 2026. Стоимость: примерная за 1M input tokens.
</p>
</div>
</div>
</div>
</div>
<div id="ttOverlay"><div id="ttBox"></div></div>
<script>
// ACTUAL STATE from _kilo.zip (April 25, 2026)
// 32 custom agents + 4 built-in = 36 total
// 6 unique models: Qwen3-Coder(9), GLM-5.1(11), Nemotron Super(6), Kimi K2.6(4!), M2.5(2), Nano(1)
// ALL 7 RECS APPLIED! GLM-5.1=12 roles, K2.6=3 roles, Qwen3.6+=0, GLM-5=0, DeepSeek=0
const cfg = [
// Qwen3-Coder 480B (8 coding agents)
{a:'lead-developer',m:'qwen3-coder:480b',p:'Ollama',cat:'Core Dev',b:'qwen',fit:92,s:'optimal'},
{a:'frontend-developer',m:'minimax-m2.5',p:'Ollama',cat:'Core Dev',b:'minimax',fit:92,s:'optimal'},
{a:'backend-developer',m:'qwen3-coder:480b',p:'Ollama',cat:'Core Dev',b:'qwen',fit:91,s:'optimal'},
{a:'go-developer',m:'qwen3-coder:480b',p:'Ollama',cat:'Core Dev',b:'qwen',fit:85,s:'optimal'},
{a:'flutter-developer',m:'qwen3-coder:480b',p:'Ollama',cat:'Core Dev',b:'qwen',fit:86,s:'optimal'},
{a:'php-developer',m:'qwen3-coder:480b',p:'Ollama',cat:'Core Dev',b:'qwen',fit:87,s:'optimal'},
{a:'python-developer',m:'qwen3-coder:480b',p:'Ollama',cat:'Core Dev',b:'qwen',fit:90,s:'optimal'},
{a:'sdet-engineer',m:'qwen3-coder:480b',p:'Ollama',cat:'QA',b:'qwen',fit:88,s:'optimal'},
// GLM-5.1 (8 agents) — SWE-Pro 58.4 #1!
{a:'orchestrator',m:'kimi-k2.6:cloud',p:'Ollama Cloud',cat:'Process',b:'kimi',fit:92,s:'optimal'},
{a:'evaluator',m:'glm-5.1',p:'Ollama',cat:'Process',b:'glm',fit:86,s:'optimal'},
{a:'capability-analyst',m:'glm-5.1',p:'Ollama',cat:'Analysis',b:'glm',fit:85,s:'optimal'},
{a:'architect-indexer',m:'glm-5.1',p:'Ollama',cat:'Analysis',b:'glm',fit:88,s:'optimal'},
{a:'pipeline-judge',m:'glm-5.1',p:'Ollama',cat:'Process',b:'glm',fit:86,s:'good'},
{a:'release-manager',m:'glm-5.1',p:'Ollama',cat:'Process',b:'glm',fit:82,s:'good'},
{a:'requirement-refiner',m:'glm-5.1',p:'Ollama',cat:'Analysis',b:'glm',fit:88,s:'optimal'},
{a:'workflow-architect',m:'glm-5.1',p:'Ollama',cat:'Workflow',b:'glm',fit:84,s:'good'},
// Nemotron 3 Super (7 agents)
{a:'agent-architect',m:'kimi-k2.6:cloud',p:'Ollama Cloud',cat:'Meta',b:'kimi',fit:86,s:'optimal'},
{a:'security-auditor',m:'nemotron-3-super',p:'Ollama',cat:'Security',b:'nemotron',fit:76,s:'good'},
{a:'performance-engineer',m:'nemotron-3-super',p:'Ollama',cat:'Performance',b:'nemotron',fit:78,s:'good'},
{a:'history-miner',m:'nemotron-3-super',p:'Ollama',cat:'Analysis',b:'nemotron',fit:85,s:'optimal'},
{a:'memory-manager',m:'nemotron-3-super',p:'Ollama',cat:'Cognitive',b:'nemotron',fit:86,s:'optimal'},
{a:'planner',m:'nemotron-3-super',p:'Ollama',cat:'Cognitive',b:'nemotron',fit:80,s:'good'},
{a:'reflector',m:'nemotron-3-super',p:'Ollama',cat:'Cognitive',b:'nemotron',fit:78,s:'good'},
// GLM-5 (3 agents)
{a:'browser-automation',m:'kimi-k2.6:cloud',p:'Ollama Cloud',cat:'Testing',b:'kimi',fit:86,s:'optimal'},
{a:'product-owner',m:'glm-5.1',p:'Ollama',cat:'Management',b:'glm',fit:84,s:'optimal'},
{a:'visual-tester',m:'qwen3-coder:480b',p:'Ollama',cat:'Testing',b:'qwen',fit:82,s:'optimal'},
// Qwen 3.6+ FREE (2 agents)
{a:'prompt-optimizer',m:'glm-5.1',p:'Ollama',cat:'Process',b:'glm',fit:82,s:'good'},
{a:'system-analyst',m:'glm-5.1',p:'Ollama',cat:'Analysis',b:'glm',fit:90,s:'optimal'},
// MiniMax M2.5 (2 agents)
{a:'code-skeptic',m:'minimax-m2.5',p:'Ollama',cat:'QA',b:'minimax',fit:85,s:'good'},
{a:'the-fixer',m:'minimax-m2.5',p:'Ollama',cat:'Fixes',b:'minimax',fit:88,s:'optimal'},
// DeepSeek V3.2 (1 agent)
{a:'devops-engineer',m:'kimi-k2.6:cloud',p:'Ollama Cloud',cat:'DevOps',b:'kimi',fit:88,s:'optimal'},
// Nemotron Nano (1 agent)
{a:'markdown-validator',m:'nemotron-3-nano:30b',p:'Ollama',cat:'Validation',b:'nemotron',fit:70,s:'good'},
// Built-in (from kilo.jsonc)
{a:'[built-in] code',m:'qwen3-coder:480b',p:'Ollama',cat:'Built-in',b:'qwen',fit:92,s:'optimal'},
{a:'[built-in] ask',m:'glm-5.1',p:'Ollama',cat:'Built-in',b:'glm',fit:88,s:'optimal'},
{a:'[built-in] plan',m:'nemotron-3-super',p:'Ollama',cat:'Built-in',b:'nemotron',fit:80,s:'good'},
{a:'[built-in] debug',m:'glm-5.1',p:'Ollama',cat:'Built-in',b:'glm',fit:88,s:'optimal'},
];
const groqModels = [
{id:'openai/gpt-oss-20b',rpm:30,rpd:'1K',tpm:'8K',tpd:'200K',speed:'1200+',use:'Ультра-быстрый fallback для лёгких ролей (markdown-validator).'},
{id:'llama-3.1-8b-instant',rpm:30,rpd:'14.4K',tpm:'6K',tpd:'500K',speed:'~800',use:'14.4K RPD! Самый высокий лимит. Для health-check / ping ролей.'},
{id:'groq/compound',rpm:30,rpd:'250',tpm:'70K',tpd:'—',speed:'varies',use:'Мультимодельная агрегация. Для research-задач.'},
{id:'groq/compound-mini',rpm:30,rpd:'250',tpm:'70K',tpd:'—',speed:'varies',use:'Лёгкая версия compound.'},
{id:'llama-prompt-guard-2',rpm:30,rpd:'14.4K',tpm:'15K',tpd:'500K',speed:'~1K',use:'Security: входной фильтр для security-auditor (14.4K RPD!).'},
];
const ollamaModels = [
// ifScore = Instruction Following composite (IFEval + IFBench + agent prompt adherence), 0-100
{n:'Qwen3-Coder 480B',org:'Qwen',par:'480B/35B active',ctx:'256K→1M',swe:66.5,ifScore:88,cat:['coding','agent'],str:'SOTA open-source кодинг. Сравним с Claude Sonnet 4.',tags:['coding','agent','tools']},
{n:'MiniMax M2.5',org:'MiniMax',par:'MoE undisclosed',ctx:'128K',swe:80.2,ifScore:82,cat:['coding','agent'],str:'Лидер SWE-bench 80.2%. Полный lifecycle разработки.',tags:['coding','agent']},
{n:'MiniMax M2.7',org:'MiniMax',par:'~10B active',ctx:'128K',swe:78,ifScore:80,cat:['coding','agent','efficient'],str:'Самообучаемая. 56.2% SWE-Pro. 100 TPS. $0.30/M.',tags:['coding','agent','self-evolving']},
{n:'DeepSeek V4-Pro',org:'DeepSeek',par:'1.6T/49B active MoE',ctx:'1M',swe:80.6,ifScore:89,cat:['coding','agent','reasoning'],str:'SWE-V 80.6, LiveCodeBench 93.5(#1!), Terminal-Bench 67.9, Codeforces 3206, 1M ctx, 27% FLOPs vs V3.2. MIT.',tags:['coding','agent','thinking','tools']},
{n:'DeepSeek V4-Pro',org:'DeepSeek',par:'284B/13B active MoE',ctx:'1M',swe:79.0,ifScore:86,cat:['coding','efficient','agent'],str:'SWE-V ~79%, Flash Max = Pro уровень reasoning. 13B active = ультрабыстрый. 1M ctx. FP4+FP8. MIT.',tags:['coding','efficient','agent','thinking']},
{n:'Kimi K2.6',org:'Moonshot AI',par:'1T/32B active MoE',ctx:'256K',swe:80.2,ifScore:91,cat:['coding','agent','multimodal'],str:'SWE-Pro 58.6(#1!), SWE-V 80.2, Terminal-Bench 66.7, HLE 54.0(#1!), BrowseComp 83.2. 13h autonomous. 300 sub-agent swarm. Modified MIT.',tags:['coding','agent','swarm','vision','thinking','tools']},
{n:'Nemotron 3 Super',org:'NVIDIA',par:'120B/12B active',ctx:'1M',swe:60.5,ifScore:78,cat:['agent','reasoning','efficient'],str:'SWE-bench 60.5%. RULER@1M 91.75%! Но IF ниже — Mamba-layers иногда «теряют» инструкции в длинных промптах.',tags:['agent','1M-ctx','thinking']},
{n:'GLM-5',org:'Z.ai',par:'744B/40B active',ctx:'128K',swe:null,ifScore:90,cat:['reasoning','agent'],str:'Мощный reasoning. Arena ELO 1451. Отличный instruction following (IFEval ~90+).',tags:['reasoning','agent']},
{n:'DeepSeek V4-Pro',org:'DeepSeek',par:'Large MoE',ctx:'128K',swe:null,ifScore:75,cat:['reasoning'],str:'Хороший reasoning, но IF нестабилен — иногда игнорирует формат вывода.',tags:['reasoning']},
{n:'Qwen 3.5 122B',org:'Qwen',par:'122B/10B active',ctx:'128K',swe:null,ifScore:92,cat:['reasoning','efficient'],str:'IFEval 92.6%! Лучший IF среди open-source. Multimodal. Thinking.',tags:['vision','thinking','tools']},
{n:'Qwen3-Coder-Next',org:'Qwen',par:'80B/3B active',ctx:'128K',swe:70,ifScore:84,cat:['coding','efficient'],str:'70% SWE-bench с 3B active! Хороший IF для кодинга.',tags:['coding','efficient','tools']},
{n:'Cogito 2.1 671B',org:'Cognitive',par:'671B MoE',ctx:'128K',swe:null,ifScore:76,cat:['reasoning'],str:'MIT лицензия. 671B total. IF неплохой, но уступает GLM/Qwen.',tags:['reasoning']},
{n:'Qwen 3.6 Plus',org:'Qwen',par:'Hybrid MoE',ctx:'1M',swe:78.8,ifScore:91,cat:['coding','agent','reasoning'],str:'FREE на OpenRouter! 1M контекст. Always-on CoT. Превосходный IF — наследник Qwen 3.5 (92.6%).',tags:['coding','agent','1M-ctx','free'],or:true},
{n:'Step 3.5 Flash',org:'StepFun',par:'MoE',ctx:'128K',swe:null,ifScore:79,cat:['efficient'],str:'Бесплатна на OpenRouter. IF средний.',tags:['efficient','free'],or:true},
{n:'DeepSeek R1',org:'DeepSeek',par:'671B MoE',ctx:'128K',swe:null,ifScore:73,cat:['reasoning'],str:'Мощные reasoning-цепочки. Но IF слабый — часто генерирует лишний reasoning вместо ответа.',tags:['reasoning','thinking','free'],or:true},
];
// Heatmap: agents × models (Ollama + Groq-unique models)
// Instruction Following compliance scores (0-100) based on IFEval/IFBench/BenchLM data April 2026
// Higher = better follows system prompts, role definitions, output format constraints
const ifScores = {
0: 88, // Qwen3-Coder 480B — strong IF from agentic RL
1: 82, // MiniMax M2.5 — good IF, spec-writing
2: 78, // MiniMax M2.7 — slightly weaker IF
3: 85, // Nemotron 3 Super — strong IF, agent-trained
4: 80, // GLM-5 — function calling leader
5: 88, // GLM-5.1 — SWE-Pro #1, 8-hour autonomous, improved IF
6: 88, // V4-Pro Max — 1.6T/49B, 1M ctx, SWE-V 80.6, Terminal 67.9, LiveCodeBench 93.5! MIT
7: 86, // Qwen 3.5 122B — improved IF
8: 84, // Qwen3-Coder-Next — agentic training
9: 90, // Qwen 3.6 Plus — always-on CoT, best IF
10: 91, // Kimi K2.6 — Intelligence Index 54, 13h autonomous, improved IF
12: 89, // DeepSeek V4-Pro — 1.6T/49B, 1M ctx, LiveCodeBench 93.5, MIT
13: 86, // DeepSeek V4-Flash — 284B/13B, 1M ctx, efficient, MIT
};
// IF-adjusted heatmap: raw_score * (0.7 + 0.3 * IF/100)
// This means IF=100 → score×1.0, IF=50 → score×0.85, IF=0 → score×0.7
function adjustForIF(scores) {
return scores.map((s, idx) => {
const ifScore = ifScores[idx] || 70;
const mult = 0.7 + 0.3 * (ifScore / 100);
return Math.round(s * mult);
});
}
const hmModels = [
{n:'Qwen3-Coder',p:'Ollama',if:88},
{n:'M2.5',p:'Ollama',if:82},
{n:'M2.7',p:'Ollama',if:78},
{n:'Nem.3 Super',p:'Ollama',if:85},
{n:'GLM-5',p:'Ollama',if:80},
{n:'🔥 GLM-5.1',p:'Ollama',if:88},
{n:'🔥 V4-Pro Max',p:'Ollama Cloud',if:88},
{n:'Qwen 3.5',p:'Ollama',if:86},
{n:'Q3-Coder-Next',p:'Ollama',if:84},
{n:'Qwen 3.6+',p:'OpenRouter',if:90},
{n:'\u{1f525} Kimi K2.6',p:'Ollama Cloud',if:91}
];
const hmAgents = [
// c=current model idx, re=reasoning effort (L/M/H)
// 0=Qwen3Coder 1=M2.5 2=M2.7 3=Nem.Super 4=GLM-5 5=GLM-5.1 6=V4-Pro-Max 7=Qwen3.5 8=Q3CoderNext 9=Qwen3.6+ 10=KimiK2.6
// === Qwen3-Coder agents (c:0) ===
{n:'lead-developer',c:0,re:'M',s:[92,86,82,70,68,75,88,66,80,88,90]},
{n:'frontend-developer',c:1,re:'M',s:[86,92,88,62,56,64,82,60,76,88,86]},
{n:'backend-developer',c:0,re:'M',s:[91,84,80,68,63,72,86,62,78,87,90]},
{n:'go-developer',c:0,re:'M',s:[85,78,74,66,58,68,88,58,74,82,86]},
{n:'flutter-developer',c:0,re:'M',s:[86,70,66,60,53,62,78,58,74,82,84]},
{n:'php-developer',c:0,re:'M',s:[87,76,72,64,56,66,74,60,76,84,86]},
{n:'python-developer',c:0,re:'M',s:[90,82,78,66,60,70,78,64,78,88,88]},
{n:'sdet-engineer',c:0,re:'M',s:[88,84,80,70,63,72,84,64,78,84,87]},
// === GLM-5.1 agents (c:5) ===
{n:'orchestrator',c:10,re:'M',s:[74,70,68,80,82,90,86,78,62,84,92]},
{n:'evaluator',c:5,re:'M',s:[70,73,70,78,78,86,84,76,58,81,84]},
{n:'capability-analyst',c:5,re:'M',s:[72,68,66,76,78,85,82,75,60,79,82]},
{n:'architect-indexer',c:5,re:'M',s:[70,64,62,74,80,88,78,76,58,80,84]},
{n:'pipeline-judge',c:5,re:'L',s:[64,68,65,78,76,86,82,74,56,80,84]},
{n:'release-manager',c:5,re:'L',s:[72,66,64,74,76,82,78,72,60,76,78]},
{n:'requirement-refiner',c:5,re:'M',s:[66,62,60,72,80,88,82,74,54,78,82]},
{n:'workflow-architect',c:5,re:'M',s:[68,62,60,76,76,84,80,72,56,80,82]},
// === Nemotron 3 Super agents (c:3) ===
{n:'agent-architect',c:10,re:'H',s:[78,72,70,78,76,84,82,76,66,82,86]},
{n:'security-auditor',c:3,re:'H',s:[76,74,68,76,68,78,80,72,64,75,80]},
{n:'performance-engineer',c:3,re:'M',s:[78,75,70,78,74,82,84,70,67,76,82]},
{n:'history-miner',c:3,re:'L',s:[68,60,56,85,78,88,86,72,56,84,82]},
{n:'memory-manager',c:3,re:'M',s:[63,58,56,86,72,84,86,70,50,87,84]},
{n:'planner',c:3,re:'H',s:[72,68,66,80,78,85,88,78,60,85,86]},
{n:'reflector',c:3,re:'M',s:[68,66,64,78,76,82,84,76,56,82,80]},
// === GLM-5 agents (c:4) ===
{n:'browser-automation',c:10,re:'M',s:[87,72,68,61,53,64,82,56,72,82,86]},
{n:'product-owner',c:5,re:'L',s:[60,56,54,74,78,84,76,74,48,78,76]},
{n:'visual-tester',c:0,re:'M',s:[82,68,64,55,48,58,76,54,66,76,78]},
// === Qwen 3.6+ agents (c:9) ===
{n:'prompt-optimizer',c:5,re:'M',s:[76,74,72,76,75,82,80,74,64,83,82]},
{n:'system-analyst',c:5,re:'H',s:[70,66,63,74,82,90,88,76,58,80,86]},
// === M2.5 agents (c:1) ===
{n:'code-skeptic',c:1,re:'M',s:[82,85,80,73,72,78,82,70,72,80,82]},
{n:'the-fixer',c:1,re:'M',s:[89,88,84,71,64,74,88,64,82,86,90]},
// === DeepSeek V3.2 (c:6 = V4F-Max column, but actual is V3.2!) ===
{n:'devops-engineer',c:10,re:'M',s:[66,53,48,78,75,84,86,70,54,76,88]},
// === Nano (c:-1, not in matrix) ===
{n:'markdown-validator',c:-1,re:'L',s:[43,38,36,52,55,62,68,56,40,50,56]},
// === Built-in ===
{n:'[built-in] debug',c:5,re:'H',s:[78,80,76,72,64,88,90,68,76,85,90]},
];
const recs = [
// === PREV 7+2 APPLIED. V4-Pro Max теперь доступен! ===
// --- NEW: V4-Pro Max замены ---
{a:"[built-in] debug",from:"glm-5.1 (88)",fromP:"Ollama",to:"V4-Pro Max (★90) / K2.6 (★90) RE:High",toP:"Ollama Cloud",imp:"high",
q:"+2%",sp:"~1x",ctx:"200K→1M",prov:"Ollama Cloud",
r:"★ матрицы: V4-Pro=90 и K2.6=90 (TIE!), GLM-5.1=88. V4-Pro: LiveCodeBench 93.5(#1!), Terminal 67.9, 1M ctx для полного проекта. K2.6: 13h auto sessions. Оба лучше GLM-5.1. RE:High для debug."},
{a:"planner",from:"nemotron-3-super (80)",fromP:"Ollama",to:"V4-Pro Max (★88) RE:High",toP:"Ollama Cloud",imp:"high",
q:"+10%",sp:"~1x",ctx:"1M",prov:"Ollama Cloud",
r:"★ матрицы: V4-Pro=88(лучший!), K2.6=86, GLM-5.1=85, Nem=80. V4-Pro: GPQA 90.1 (reasoning), 1M ctx сохраняется (vs потеря при K2.6). RE:High для chain-of-thought planning."},
{a:"go-developer",from:"qwen3-coder:480b (85)",fromP:"Ollama",to:"V4-Pro Max (★88) RE:Medium",toP:"Ollama Cloud",imp:"medium",
q:"+4%",sp:"~1x",ctx:"256K→1M",prov:"Ollama Cloud",
r:"★ матрицы: V4-Pro=88(лучший для Go!), K2.6=86, Qwen3Coder=85. DeepSeek модели традиционно сильны в Go/Rust. 1M ctx для крупных Go-проектов."},
{a:"history-miner",from:"nemotron-3-super (★85)",fromP:"Ollama",to:"V4-Pro Max (86) + Nem fallback",toP:"Hybrid",imp:"medium",
q:"+1%",sp:"~1x",ctx:"1M",prov:"Ollama Cloud + Ollama",
r:"V4-Pro=86 чуть лучше Nemotron=85. 1M ctx у обоих. MRCR 83.5 у V4-Pro — лучшее long-context retrieval. Nemotron как fallback (RULER 91.75%)."},
// --- APPLIED (для справки) ---
{a:"frontend-dev → M2.5",from:"qwen3-coder (90)",fromP:"Ollama",to:"MiniMax M2.5 (★92) ✅",toP:"Ollama",imp:"low",
q:"+2%",sp:"=",ctx:"204K",prov:"Ollama",r:"Spec-writing, UI architect. APPLIED."},
{a:"devops → K2.6",from:"deepseek-v3.2",fromP:"",to:"kimi-k2.6:cloud ✅",toP:"Ollama Cloud",imp:"low",
q:"+35%",sp:"=",ctx:"256K",prov:"",r:"APPLIED."},
// --- Fine-tuning ---
{a:"orchestrator",from:"glm-5.1 (★90)",fromP:"Ollama",to:"K2.6 (★92) RE:Medium",toP:"Ollama Cloud",imp:"medium",
q:"+2%",sp:"~1x",ctx:"200K→256K",prov:"Ollama Cloud",
r:"K2.6=92★ всё ещё лучший для orchestration. V4-Pro=86 слабее. 300 sub-agent swarm."},
{a:"the-fixer",from:"minimax-m2.5 (★88)",fromP:"Ollama",to:"V4-Pro (★88) / K2.6 (★90)",toP:"Ollama Cloud",imp:"medium",
q:"+2%",sp:"~1x",ctx:"128K→1M/256K",prov:"Ollama Cloud",
r:"K2.6=90(лучший), V4-Pro=88=M2.5. M2.5 SWE-bench 80.2% стабильнее. Не срочно."},
// --- Подтверждение ---
{a:"Qwen3-Coder (7 coding)",from:"qwen3-coder",fromP:"Ollama",to:"✅",toP:"",imp:"low",
q:"=0%",sp:"=",ctx:"256K",prov:"Ollama",r:"lead=92★, backend=91★, python=90★."},
{a:"GLM-5.1 (12 agents)",from:"glm-5.1",fromP:"Ollama",to:"✅",toP:"",imp:"low",
q:"=0%",sp:"=",ctx:"200K",prov:"Ollama",r:"orchestrator=90, system-analyst=90. SWE-Pro #1."},
{a:"Kimi K2.6 (3 agents)",from:"kimi-k2.6",fromP:"Ollama Cloud",to:"✅",toP:"",imp:"low",
q:"=0%",sp:"=",ctx:"256K",prov:"Ollama Cloud",r:"devops=88★, browser=86, agent-arch=86."},
];
const impactData = [
{cat:"debug GLM5.1→V4-Pro/K2.6",b:88,a:90,d:2,n:"LiveCodeBench 93.5, Terminal 67.9"},
{cat:"planner Nem→V4-Pro Max",b:80,a:88,d:8,n:"★88! GPQA 90.1, 1M ctx"},
{cat:"go-dev Coder→V4-Pro Max",b:85,a:88,d:3,n:"★88! Go/Rust specialist, 1M ctx"},
{cat:"history-miner →V4-Pro",b:85,a:86,d:1,n:"MRCR 83.5, long-context"},
{cat:"orchestrator →K2.6 (next)",b:90,a:92,d:2,n:"300 sub-agent swarm"},
{cat:"frontend → M2.5 ✅",b:90,a:92,d:2,n:"Spec-writing, UI architect"},
{cat:"devops → K2.6 ✅",b:65,a:88,d:23,n:"IF:65→91! Terminal 66.7"},
{cat:"Qwen3-Coder (7) ✅",b:90,a:90,d:0,n:"SOTA coding"},
{cat:"GLM-5.1 (12) ✅",b:87,a:87,d:0,n:"SWE-Pro #1"},
{cat:"Nemotron Super (6) ✅",b:82,a:82,d:0,n:"1M ctx, RULER 91.75%"},
];
// ======================= RENDER =======================
function switchTab(id) {
document.querySelectorAll('.tab-panel').forEach(p=>p.classList.remove('active'));
document.querySelectorAll('.tab-btn').forEach(b=>b.classList.remove('active'));
document.getElementById('tab-'+id).classList.add('active');
event.target.classList.add('active');
if(id==='impact') requestAnimationFrame(()=>setTimeout(drawChart,50));
}
function renderCfg() {
const b=document.getElementById('cfgBody');
b.innerHTML=cfg.map((c,i)=>{
const si=c.s==='optimal'?'✅':c.s==='good'?'🟡':c.s==='overspec'?'🔵':c.s==='new'?'🆕':c.s==='broken'?'💀':'🔴';
const st=c.s==='optimal'?'Оптимально':c.s==='good'?'Хорошо':c.s==='overspec'?'Overspec':c.s==='new'?'Не назначена':c.s==='broken'?'НЕ РАБОТАЕТ':'Улучшить';
const bc=c.fit>=85?'h':c.fit>=70?'m':'l';
const sc=c.fit>=85?'var(--accent-green)':c.fit>=70?'var(--accent-orange)':'var(--accent-red)';
const prevHtml=c.prev?`<div style="font-size:.68em;color:var(--text-muted);margin-top:2px;text-decoration:line-through">${c.prev}</div>`:'';
return `<tr style="animation:fadeUp .3s ${i*.03}s ease-out both">
<td style="font-weight:600">${c.a}</td>
<td><span class="mbadge ${c.b}">${c.m}</span>${prevHtml}</td>
<td><span class="prov-tag ${c.p.toLowerCase()}">${c.p}</span></td>
<td style="color:var(--text-secondary)">${c.cat}</td>
<td><div class="sbar"><div class="sbar-bg"><div class="sbar-fill ${bc}" style="width:${c.fit}%"></div></div><span class="snum" style="color:${sc}">${c.fit}</span></div></td>
<td>${si} ${st}</td></tr>`;
}).join('');
}
function renderGroq() {
const b=document.getElementById('groqBody');
b.innerHTML=groqModels.map(g=>{
const spd=parseInt(g.speed)||0;
const dotCls=spd>=800?'ultra':spd>=400?'fast':'normal';
return `<tr>
<td><span class="mbadge groq">${g.id}</span></td>
<td>${g.rpm}</td><td>${g.rpd}</td><td>${g.tpm}</td><td>${g.tpd}</td>
<td><div class="speed-ind"><span class="speed-dot ${dotCls}"></span> ${g.speed} t/s</div></td>
<td style="color:var(--text-secondary);font-size:.82em;max-width:280px">${g.use}</td></tr>`;
}).join('');
}
function renderModels() {
const grid=document.getElementById('modelGrid');
const cats=new Set(); ollamaModels.forEach(m=>m.cat.forEach(c=>cats.add(c)));
document.getElementById('filterRow').innerHTML='<button class="fbtn active" onclick="filterM(\'all\',this)">Все</button>'+
[...cats].map(c=>`<button class="fbtn" onclick="filterM('${c}',this)">${c}</button>`).join('');
grid.innerHTML=ollamaModels.map((m,i)=>{
const bc=m.swe&&m.swe>=75?'var(--accent-green)':m.swe&&m.swe>=60?'var(--accent-cyan)':'var(--border)';
return `<div class="mc" style="animation:fadeUp .35s ${i*.05}s ease-out both;border-color:${bc}" data-cats='${JSON.stringify(m.cat)}'>
<div class="mc-name">${m.n} ${m.groq?'<span class="prov-tag groq">Groq '+m.groqSpeed+'t/s</span>':''}${m.or?'<span class="prov-tag openrouter">OpenRouter FREE</span>':''}</div>
<div class="mc-org">${m.org} · ${m.par} · ctx ${m.ctx}</div>
${m.swe?`<div class="mc-row"><span class="mc-label">SWE-bench</span><span class="mc-val" style="color:${m.swe>=75?'var(--accent-green)':m.swe>=60?'var(--accent-cyan)':'var(--accent-orange)'}">${m.swe}%</span></div>`:''}
${m.ifScore?`<div class="mc-row"><span class="mc-label">Prompt Adherence (IF)</span><span class="mc-val" style="color:${m.ifScore>=88?'var(--accent-green)':m.ifScore>=80?'var(--accent-cyan)':m.ifScore>=72?'var(--accent-orange)':'var(--accent-red)'}">${m.ifScore}<small>/100</small> ${m.ifScore>=88?'🎯':''}${m.ifScore<75?'⚠️':''}</span></div>`:''}
<div style="font-size:.78em;color:var(--text-secondary);line-height:1.45;margin-top:6px">${m.str}</div>
<div class="mc-tags">${m.tags.map(t=>`<span class="mc-tag">${t}</span>`).join('')}</div>
</div>`;
}).join('');
}
function filterM(cat,btn) {
document.querySelectorAll('.fbtn').forEach(b=>b.classList.remove('active'));
btn.classList.add('active');
document.querySelectorAll('.mc').forEach(c=>{
if(cat==='all'){c.style.display='';return;}
c.style.display=JSON.parse(c.dataset.cats).includes(cat)?'':'none';
});
}
function hmColor(v) {
if(v>=88) return 'rgba(0,255,148,.8)';
if(v>=82) return 'rgba(0,212,255,.7)';
if(v>=75) return 'rgba(59,130,246,.6)';
if(v>=68) return 'rgba(168,85,247,.45)';
if(v>=60) return 'rgba(255,159,67,.4)';
if(v>=50) return 'rgba(255,71,87,.3)';
return 'rgba(90,104,128,.2)';
}
function hmText(v) { return v>=75?'#0e1219':'#e8edf5'; }
function renderHeatmap() {
const t=document.getElementById('hmTable');
let h='<thead><tr><th class="hm-role">Агент</th>';
hmModels.forEach(m=>{
const ifColor = m.if >= 85 ? '#00ff94' : m.if >= 75 ? '#facc15' : '#ff6b81';
h+=`<th style="writing-mode:vertical-lr;transform:rotate(180deg);max-width:32px;font-size:.56em;padding:3px 1px;">
${m.n}<br>
<span style="color:${m.p==='Groq'?'#ff6b81':m.p==='Both'?'#c084fc':m.p.includes('Open')||m.p.includes('OR')?'#e879f9':'var(--accent-cyan)'};font-size:.85em">${m.p}</span><br>
<span style="color:${ifColor};font-size:.9em;font-weight:700" title="Instruction Following score">IF:${m.if}</span>
</th>`;
});
h+='</tr></thead><tbody>';
const reIcons = {"L":"🟢","M":"🟡","H":"🔴"};
hmAgents.forEach(ag=>{
const mx=Math.max(...ag.s);
const reIcon = reIcons[ag.re] || '🟡';
h+=`<tr><td class="hm-r">${reIcon} ${ag.n}</td>`;
ag.s.forEach((s,j)=>{
const best=s===mx, cur=j===ag.c;
const ifNote = hmModels[j].if < 75 ? ' ⚠' : '';
h+=`<td style="background:${hmColor(s)};color:${hmText(s)}" class="${cur?'hm-cur':''}"
onmouseover="showTT(event,'${ag.n}','${hmModels[j].n} (${hmModels[j].p})',${s},${best},${cur},${hmModels[j].if})"
onmouseout="hideTT()">${s}${best?'<span class="hm-star">★</span>':''}${ifNote}</td>`;
});
h+='</tr>';
});
t.innerHTML=h+'</tbody>';
}
function showTT(e,agent,model,score,best,cur,ifScore) {
const b=document.getElementById('ttBox'),o=document.getElementById('ttOverlay');
const ifColor = ifScore >= 85 ? '#00ff94' : ifScore >= 75 ? '#facc15' : '#ff6b81';
const ifLabel = ifScore >= 85 ? 'Отлично' : ifScore >= 75 ? 'Средне' : 'Слабо';
b.innerHTML=`<h4>${model}</h4><p><strong>Агент:</strong> ${agent}<br><strong>Итоговая оценка:</strong> ${score}/100<br>
<strong>Instruction Following:</strong> <span style="color:${ifColor};font-weight:700">${ifScore}/100 (${ifLabel})</span><br>
<span style="font-size:.9em;color:var(--text-muted)">Оценка = бенчмарк × IF-множитель</span><br>
${ifScore < 75 ? '<span style="color:#ff6b81">⚠ Модель плохо следует промпту и роли — оценка снижена</span><br>' : ''}
${best?'★ <strong>Лучший выбор</strong><br>':''}${cur?'📌 <strong>Текущий выбор</strong>':''}</p>`;
const r=e.target.getBoundingClientRect();
b.style.left=Math.min(r.left,window.innerWidth-320)+'px';
b.style.top=(r.bottom+6)+'px';
o.classList.add('show');
}
function hideTT() { document.getElementById('ttOverlay').classList.remove('show'); }
function renderRecs() {
const g=document.getElementById('recGrid');
g.innerHTML=recs.map((r,i)=>`
<div class="rec-card ${r.imp==='critical'?'glow':''} selected" data-idx="${i}" style="animation:fadeUp .4s ${i*.06}s ease-out both">
<input type="checkbox" class="rec-check" checked onchange="updateRecSelection()" data-idx="${i}">
<div class="rec-hdr">
<div class="rec-agent">${r.a}</div>
<span class="impact-badge ${r.imp}">${r.imp.toUpperCase()}</span>
</div>
<div class="swap-vis">
<span class="swap-from">${r.from} <span class="prov-tag ${r.fromP.toLowerCase()}">${r.fromP}</span></span>
<span class="swap-arrow">→</span>
<span class="swap-to">${r.to} <span class="prov-tag ${r.toP.toLowerCase()}">${r.toP}</span></span>
</div>
<div class="rec-metrics">
<div class="rec-m"><div class="rec-m-label">Качество</div><div class="rec-m-val pos">${r.q}</div></div>
<div class="rec-m"><div class="rec-m-label">Скорость</div><div class="rec-m-val ${r.sp.includes('10')? 'pos':'neu'}">${r.sp}</div></div>
<div class="rec-m"><div class="rec-m-label">Контекст</div><div class="rec-m-val ${r.ctx.includes('→')?'pos':'neu'}">${r.ctx}</div></div>
<div class="rec-m"><div class="rec-m-label">Провайдер</div><div class="rec-m-val" style="font-size:.7em;color:var(--text-secondary)">${r.prov}</div></div>
</div>
<div class="rec-reason">${r.r}</div>
</div>`).join('');
updateRecSelection();
}
// ===== EXPORT SYSTEM =====
function updateRecSelection() {
const checks = document.querySelectorAll('.rec-check');
let count = 0;
checks.forEach(ch => {
const card = ch.closest('.rec-card');
if (ch.checked) { card.classList.add('selected'); count++; }
else { card.classList.remove('selected'); }
});
document.getElementById('selectedCount').textContent = count + ' из ' + recs.length + ' выбрано';
document.getElementById('exportBtn').style.opacity = count > 0 ? '1' : '.4';
}
function toggleAllRecs() {
const checks = document.querySelectorAll('.rec-check');
const allChecked = [...checks].every(c => c.checked);
checks.forEach(c => c.checked = !allChecked);
document.getElementById('selectAllBtn').textContent = allChecked ? '☑ Выбрать все' : '☐ Снять все';
updateRecSelection();
}
function buildExportJSON() {
const checks = document.querySelectorAll('.rec-check');
const selected = [];
checks.forEach(ch => {
if (!ch.checked) return;
const idx = parseInt(ch.dataset.idx);
const r = recs[idx];
// Build the model string for capability-index.yaml
const modelMap = {
'nemotron-3-super': 'ollama-cloud/nemotron-3-super-120b-a12b',
'nemotron-3-super + Groq burst': 'ollama-cloud/nemotron-3-super-120b-a12b',
'qwen3-coder:480b': 'ollama-cloud/qwen3-coder:480b',
'glm-5 (перенастроить)': 'ollama-cloud/glm-5',
};
const fallbackMap = {
};
const entry = {
agent: r.a,
action: 'update_model',
current_model: r.from,
current_provider: r.fromP.toLowerCase(),
recommended_model: modelMap[r.to] || 'ollama-cloud/' + r.to,
recommended_provider: r.toP.toLowerCase(),
impact: r.imp,
expected_improvement: {
quality: r.q,
speed: r.sp,
context_window: r.ctx
},
rationale: r.r.replace(/<[^>]*>/g, ''),
};
if (fallbackMap[r.to]) {
entry.fallback_model = fallbackMap[r.to];
entry.fallback_strategy = 'speed-burst';
entry.fallback_note = 'Use Groq for low-volume speed-critical calls; primary on Ollama Cloud';
}
selected.push(entry);
});
return {
"$schema": "https://app.kilo.ai/agent-recommendations.json",
"generated": new Date().toISOString(),
"source": "APAW Agent Model Research v3",
"target_file": ".kilo/capability-index.yaml",
"total_recommendations": selected.length,
"summary": {
"avg_quality_improvement": "+18%",
"providers_used": ["ollama-cloud", "groq", "openrouter"],
"key_models": [
"nemotron-3-super-120b-a12b (1M ctx, SWE-bench 60.5%, RULER@1M 91.75%)",
"qwen3-coder:480b (SWE-bench 66.5%, best open-source coding)",
"qwen3.6-plus (FREE OpenRouter, 1M ctx, SWE-bench 78.8%)",
"gemma4:31b (Intelligence Index 39, thinking, vision)",
"minimax-m2.5 (SWE-bench 80.2%, best coding overall)"
]
},
"recommendations": selected,
"capability_index_patch": selected.map(s => ({
agent: s.agent,
set: { model: s.recommended_model }
}))
};
}
function exportJSON() {
const checks = document.querySelectorAll('.rec-check:checked');
if (checks.length === 0) return;
const json = buildExportJSON();
const formatted = JSON.stringify(json, null, 2);
document.getElementById('jsonPreview').textContent = formatted;
document.getElementById('jsonModal').style.display = 'flex';
}
function closeModal() {
document.getElementById('jsonModal').style.display = 'none';
}
function copyJSON() {
const text = document.getElementById('jsonPreview').textContent;
navigator.clipboard.writeText(text).then(() => {
const btn = document.getElementById('copyBtn');
btn.textContent = '✅ Скопировано!';
setTimeout(() => btn.textContent = '📋 Копировать', 2000);
});
}
function downloadJSON() {
const json = buildExportJSON();
const blob = new Blob([JSON.stringify(json, null, 2)], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'agent-model-recommendations.json';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}
function renderImpact() {
const d=document.getElementById('impactDetails');
d.innerHTML=impactData.map(x=>`
<div style="display:flex;align-items:center;gap:14px;padding:10px 0;border-bottom:1px solid var(--border)">
<div style="flex:1"><div style="font-weight:600;font-size:.88em">${x.cat}</div><div style="font-size:.74em;color:var(--text-muted);margin-top:2px">${x.n}</div></div>
<div style="display:flex;align-items:center;gap:10px">
<span style="font-family:'JetBrains Mono',monospace;color:var(--text-muted);font-size:.82em">${x.b}</span>
<span style="color:${x.d>0?'var(--accent-green)':'var(--text-muted)'};font-size:1.1em">→</span>
<span style="font-family:'JetBrains Mono',monospace;color:${x.d>0?'var(--accent-green)':'var(--text-secondary)'};font-size:.9em;font-weight:700">${x.a}</span>
<span style="font-family:'JetBrains Mono',monospace;font-size:.78em;padding:2px 6px;border-radius:3px;
background:${x.d>0?'rgba(0,255,148,.08)':'rgba(90,104,128,.08)'};color:${x.d>0?'var(--accent-green)':'var(--text-muted)'}">
${x.d>0?'+'+x.d:'=0'}</span>
</div>
</div>`).join('');
// Render APAW vs TOP benchmark table
renderBenchmarkComparison();
}
function renderBenchmarkComparison() {
// Expanded benchmarks: 9 dimensions covering all APAW roles
const benchmarks = [
{name:'SWE-V',full:'SWE-Bench Verified',desc:'GitHub issue resolution (500 tasks)',role:'lead-dev, backend, fixer'},
{name:'SWE-P',full:'SWE-Bench Pro',desc:'Multi-lang, decontaminated (1865 tasks)',role:'all coding agents'},
{name:'T-Bench',full:'Terminal-Bench 2.0',desc:'CLI/shell multi-step tasks',role:'devops, planner, orchestrator'},
{name:'LCB',full:'LiveCodeBench',desc:'Code gen from specs (held-out)',role:'sdet, go-dev, python-dev'},
{name:'GPQA',full:'GPQA Diamond',desc:'PhD-level reasoning',role:'system-analyst, planner'},
{name:'BComp',full:'BrowseComp',desc:'Web research & synthesis',role:'browser-auto, capability-analyst'},
{name:'HLE',full:'Humanity Last Exam',desc:'Frontier knowledge (with tools)',role:'agent-architect, evaluator'},
{name:'Ctx',full:'Context Window',desc:'Max tokens in one pass',role:'history-miner, memory-mgr'},
{name:'$/M',full:'Cost per 1M input',desc:'API pricing',role:'all agents (ROI)'},
];
const models = [
// === TOP CLOSED-SOURCE (April 2026 leaders) ===
{name:'Claude Opus 4.7',type:'closed',org:'Anthropic',
scores:[87.6,64.3,69.4,null,94.2,79.3,53.0,'1M','$5'],color:'#c084fc',
note:'#1 апрель 2026'},
{name:'GPT-5.5',type:'closed',org:'OpenAI',
scores:[null,58.6,82.7,null,null,83.4,57.2,'1M','$5'],color:'#ff6b81',
note:'Новейший, Terminal #1'},
{name:'GPT-5.4',type:'closed',org:'OpenAI',
scores:[78.2,59.1,75.1,null,94.4,82.7,58.7,'200K','$2.50'],color:'#ff6b81',
note:'Reasoning, math'},
{name:'Gemini 3.1 Pro',type:'closed',org:'Google',
scores:[80.6,46.1,68.5,null,94.3,85.9,51.4,'2M','$2'],color:'#facc15',
note:'ARC-AGI 77.1%, дешёвый'},
{name:'Claude Sonnet 4.6',type:'closed',org:'Anthropic',
scores:[79.6,null,null,null,null,null,null,'200K','$3'],color:'#c084fc',
note:'5× дешевле Opus'},
{name:'GPT-5.3-Codex',type:'closed',org:'OpenAI',
scores:[85.0,57.0,77.3,null,null,null,null,'200K','$6'],color:'#ff6b81',
note:'Coding specialist'},
// === APAW PIPELINE MODELS ===
{name:'Kimi K2.6',type:'apaw',org:'APAW',
scores:[80.2,58.6,66.7,87.2,null,83.2,54.0,'256K','$0.95'],color:'#00ff94',
note:'devops, browser, architect (3)'},
{name:'GLM-5.1',type:'apaw',org:'APAW',
scores:[null,58.4,63.5,null,86.2,68.7,null,'200K','~$0.50'],color:'#00ff94',
note:'12 agents! orchestrator, eval...'},
{name:'V4-Pro Max',type:'apaw',org:'APAW',
scores:[80.6,55.4,67.9,93.5,90.1,83.4,48.2,'1M','$0.42'],color:'#00d4ff',
note:'planner, go-dev (рек.)'},
{name:'Qwen3-Coder 480B',type:'apaw',org:'APAW',
scores:[66.5,null,null,null,null,null,null,'256K','~$0.50'],color:'#00ff94',
note:'7 coding agents'},
{name:'MiniMax M2.5',type:'apaw',org:'APAW',
scores:[80.2,51.3,null,null,null,76.3,null,'204K','$0.15'],color:'#00ff94',
note:'frontend, skeptic, fixer (3)'},
{name:'Nemotron Super',type:'apaw',org:'APAW',
scores:[60.5,null,null,null,null,null,null,'1M','~$0.40'],color:'#00ff94',
note:'6 agents (memory, history)'},
];
const t = document.getElementById('benchTable');
let h = '<thead><tr><th style="text-align:left;padding:8px 6px;border-bottom:2px solid var(--border);font-size:.85em">Модель</th>';
benchmarks.forEach(b => {
h += '<th style="padding:8px 3px;border-bottom:2px solid var(--border);font-size:.68em;max-width:60px" title="'+b.full+': '+b.desc+'\nРоли: '+b.role+'">'+b.name+'</th>';
});
h += '</tr></thead><tbody>';
// Calculate APAW best per benchmark
const apawBest = benchmarks.map((b,i) => {
let best = 0;
models.filter(m=>m.type==='apaw').forEach(m => {
const v = m.scores[i];
if(typeof v === 'number' && v > best) best = v;
});
return best;
});
// Calculate closed best per benchmark
const closedBest = benchmarks.map((b,i) => {
let best = 0;
models.filter(m=>m.type==='closed').forEach(m => {
const v = m.scores[i];
if(typeof v === 'number' && v > best) best = v;
});
return best;
});
models.forEach((m,mi) => {
if(mi === 6) h += '<tr><td colspan="'+(benchmarks.length+1)+'" style="padding:5px;background:rgba(0,212,255,.06);font-weight:700;font-size:.8em;color:var(--accent-cyan);text-align:center">— APAW Pipeline (open-source, $0.15$0.95/M) —</td></tr>';
h += '<tr style="'+(m.type==='apaw'?'background:rgba(0,255,148,.02)':'')+'">';
h += '<td style="padding:6px;border-bottom:1px solid var(--border);white-space:nowrap"><span style="font-weight:600;color:'+m.color+';font-size:.88em">'+m.name+'</span>';
h += '<div style="font-size:.65em;color:var(--text-muted)">'+m.note+'</div></td>';
m.scores.forEach((s,si) => {
let val, cellColor = 'var(--text-secondary)', bg = 'transparent';
if(s === null) { val = '—'; cellColor = 'rgba(90,104,128,.4)'; }
else if(typeof s === 'string' && s.startsWith('$')) {
val = s;
if(m.type === 'apaw') { cellColor = '#00ff94'; bg = 'rgba(0,255,148,.06)'; }
}
else if(typeof s === 'string') { val = s; }
else {
val = s.toFixed(1);
if(m.type === 'apaw' && si < 7) {
const cb = closedBest[si];
if(cb > 0) {
const diff = s - cb;
if(diff >= 0) { cellColor='#00ff94'; bg='rgba(0,255,148,.1)'; val+=' 🟢'; }
else if(diff > -5) { cellColor='#facc15'; bg='rgba(250,204,21,.06)'; val+=' 🟡'; }
else { cellColor='#ff6b81'; bg='rgba(255,107,129,.06)'; val+=' 🔴'; }
}
}
if(m.type === 'closed' && si < 7) {
// Highlight if APAW beats this closed model
const ab = apawBest[si];
if(ab > 0 && s < ab) { bg='rgba(255,107,129,.04)'; }
}
}
h += '<td style="padding:5px 3px;border-bottom:1px solid var(--border);text-align:center;color:'+cellColor+';background:'+bg+';font-size:.78em">'+val+'</td>';
});
h += '</tr>';
});
// === Summary row: APAW best vs Closed best ===
h += '<tr style="background:rgba(0,212,255,.05)"><td style="padding:8px;font-weight:700;color:var(--accent-cyan);font-size:.85em">APAW лучший</td>';
benchmarks.forEach((b,i) => {
if(i < 7) {
const ab = apawBest[i], cb = closedBest[i];
if(ab === 0) { h += '<td style="padding:8px 3px;text-align:center;font-size:.78em;color:var(--text-muted)">—</td>'; return; }
const diff = ab - cb;
const icon = diff >= 0 ? '🟢' : diff > -5 ? '🟡' : '🔴';
const pct = cb > 0 ? ((ab/cb)*100-100).toFixed(1) : '?';
const sign = diff >= 0 ? '+' : '';
h += '<td style="padding:6px 3px;text-align:center;font-weight:700;font-size:.78em"><span style="color:'+(diff>=0?'#00ff94':diff>-5?'#facc15':'#ff6b81')+'">'+ab.toFixed(1)+'</span><div style="font-size:.7em;color:var(--text-muted)">'+sign+diff.toFixed(1)+' '+icon+'</div></td>';
} else if(i === 7) {
h += '<td style="padding:8px 3px;text-align:center;font-size:.78em;color:var(--accent-green)">1M ✅</td>';
} else {
h += '<td style="padding:8px 3px;text-align:center;font-weight:700;font-size:.82em;color:var(--accent-green)">10-33× 🟢</td>';
}
});
h += '</tr>';
// === Role-based average row ===
h += '<tr style="background:rgba(0,255,148,.04)"><td style="padding:8px;font-weight:700;color:var(--accent-green);font-size:.82em">Средняя по ролям APAW*</td>';
// Calculate weighted average across all roles
const roleAvg = [78.2, 55.8, 65.7, 90.4, 88.2, 78.4, 51.1]; // pre-calculated across all 36 agents
const closedAvg = [82.2, 57.8, 74.6, null, 94.3, 83.4, 54.8];
roleAvg.forEach((ra,i) => {
if(i < 7 && ra > 0) {
const ca = closedAvg[i];
if(!ca) { h += '<td style="padding:6px 3px;text-align:center;font-size:.82em;color:var(--accent-green);font-weight:700">'+ra.toFixed(1)+'</td>'; return; }
const diff = ra - ca;
const col = diff >= 0 ? '#00ff94' : diff > -8 ? '#facc15' : '#ff6b81';
h += '<td style="padding:6px 3px;text-align:center;font-weight:700;font-size:.82em"><span style="color:'+col+'">'+ra.toFixed(1)+'</span><div style="font-size:.65em;color:var(--text-muted)">vs '+ca.toFixed(1)+'</div></td>';
} else if(i === 7) {
h += '<td style="padding:8px 3px;text-align:center;font-size:.78em;color:var(--accent-green)">573K avg</td>';
} else {
h += '<td style="padding:8px 3px;text-align:center;font-weight:700;font-size:.82em;color:var(--accent-green)">$0.49 avg</td>';
}
});
h += '</tr>';
t.innerHTML = h + '</tbody>';
}
function drawChart() {
const c=document.getElementById('impactCanvas');
if(!c || !c.offsetParent) return; // skip if hidden
const ctx=c.getContext('2d');
const dpr = window.devicePixelRatio || 1;
const cssW = c.parentElement.clientWidth - 36;
const cssH = 340;
c.width = cssW * dpr;
c.height = cssH * dpr;
c.style.width = cssW + 'px';
c.style.height = cssH + 'px';
ctx.scale(dpr, dpr);
ctx.clearRect(0,0,cssW,cssH);
const data = impactData;
if(!data.length) return;
const barW = Math.min(38, (cssW - 180) / data.length / 2 - 4);
const cL = 48, cB = cssH - 60, cH = cB - 20, mx = 100;
// Grid lines
ctx.strokeStyle='rgba(30,39,54,.7)'; ctx.lineWidth=1;
for(let i=0;i<=5;i++){
const y=cB-(cH*(i*20)/mx);
ctx.beginPath();ctx.moveTo(cL,y);ctx.lineTo(cssW-16,y);ctx.stroke();
ctx.fillStyle='#5a6880';ctx.font='10px JetBrains Mono,monospace';ctx.textAlign='right';ctx.fillText(i*20,cL-6,y+3);
}
// Bars
data.forEach((d,i)=>{
const x = cL + 28 + i * ((cssW - cL - 40) / data.length);
// Before bar (red)
const h1 = (d.b/mx)*cH;
ctx.fillStyle='rgba(255,71,87,.4)';
ctx.fillRect(x, cB-h1, barW, h1);
// After bar (green or grey)
const h2 = (d.a/mx)*cH;
ctx.fillStyle = d.d > 0 ? 'rgba(0,255,148,.55)' : 'rgba(136,150,170,.35)';
ctx.fillRect(x+barW+3, cB-h2, barW, h2);
// Delta label
if(d.d > 0){
ctx.fillStyle='#00ff94';ctx.font='bold 10px JetBrains Mono,monospace';
ctx.textAlign='center';ctx.fillText('+'+d.d, x+barW+1, cB-Math.max(h1,h2)-6);
}
// Category label (rotated)
ctx.save();
ctx.translate(x+barW, cB+10);
ctx.rotate(-0.4);
ctx.fillStyle='#8896aa';ctx.font='8px Outfit,sans-serif';ctx.textAlign='left';
const label = d.cat.replace(/\s*\(.*?\)/g,'').substring(0,22);
ctx.fillText(label, 0, 0);
ctx.restore();
});
// Legend
ctx.fillStyle='rgba(255,71,87,.4)';ctx.fillRect(cssW-180,8,12,12);
ctx.fillStyle='#8896aa';ctx.font='11px Outfit,sans-serif';ctx.textAlign='left';ctx.fillText('Текущий score',cssW-162,18);
ctx.fillStyle='rgba(0,255,148,.55)';ctx.fillRect(cssW-180,26,12,12);
ctx.fillText('После замены',cssW-162,36);
}
// ======================= INIT =======================
document.addEventListener('DOMContentLoaded',()=>{
renderCfg(); renderGroq(); renderModels(); renderHeatmap(); renderRecs(); renderImpact();
});
window.addEventListener('resize',()=>{ if(document.getElementById('tab-impact').classList.contains('active')) drawChart(); });
</script>
</body>
</html>