- Integrate apaw_agent_model_research_v3.html as standalone dashboard - Add model-benchmarks.json with 32 agents, 11 scored models, 11 recommendations - Add build-research-dashboard.ts: inject live data into template → standalone HTML - Add rebuild-template.cjs: regenerate template from v3.html source - Add sync-benchmarks-from-yaml.cjs: sync YAML → JSON round-trip - Add sync-model-research.ts: apply recommendation matrix to config files - Add model-benchmarks.schema.json and model-research.schema.json for validation - Add bidirectional-data-flow.md architecture documentation - Add log-execution.cjs pipeline hook - Update capability-index.yaml: add fallback_models, failover_strategy - Update kilo-meta.json, kilo.jsonc, KILO_SPEC.md with synced models - Update evolution.md / research.md / self-evolution.md / evolutionary-sync.md docs - Fix security-auditor.md: quote YAML color (#DC2626) - Fix orchestrator.md: remove duplicate devops-engineer key - Build research-dashboard.html (106KB standalone) + dated archive
1169 lines
77 KiB
HTML
1169 lines
77 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="ru">
|
||
<head>
|
||
<meta charset="UTF-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<title>APAW KiloCode — Agent Model Research v3 (Ollama + Groq + OpenRouter)</title>
|
||
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;600;700&family=Outfit:wght@300;400;500;600;700;800;900&display=swap" rel="stylesheet">
|
||
<style>
|
||
:root {
|
||
--bg-deep: #080b12;
|
||
--bg-panel: #0e1219;
|
||
--bg-card: #141922;
|
||
--bg-card-hover: #1a2130;
|
||
--border: #1e2736;
|
||
--border-bright: #2a3650;
|
||
--text-primary: #e8edf5;
|
||
--text-secondary: #8896aa;
|
||
--text-muted: #5a6880;
|
||
--accent-cyan: #00d4ff;
|
||
--accent-green: #00ff94;
|
||
--accent-orange: #ff9f43;
|
||
--accent-red: #ff4757;
|
||
--accent-purple: #a855f7;
|
||
--accent-blue: #3b82f6;
|
||
--accent-yellow: #facc15;
|
||
--glow-cyan: rgba(0,212,255,0.15);
|
||
--glow-green: rgba(0,255,148,0.1);
|
||
--glow-orange: rgba(255,159,67,0.1);
|
||
}
|
||
* { margin:0; padding:0; box-sizing:border-box; }
|
||
body {
|
||
font-family:'Outfit',sans-serif;
|
||
background:var(--bg-deep);
|
||
color:var(--text-primary);
|
||
min-height:100vh;
|
||
overflow-x:hidden;
|
||
}
|
||
body::before {
|
||
content:'';
|
||
position:fixed; top:0; left:0; right:0; bottom:0;
|
||
background:
|
||
linear-gradient(90deg,rgba(0,212,255,0.02) 1px,transparent 1px),
|
||
linear-gradient(rgba(0,212,255,0.02) 1px,transparent 1px);
|
||
background-size:60px 60px;
|
||
animation:gp 8s ease-in-out infinite;
|
||
pointer-events:none; z-index:0;
|
||
}
|
||
@keyframes gp { 0%,100%{opacity:.3} 50%{opacity:.6} }
|
||
.container { max-width:1540px; margin:0 auto; padding:24px 16px; position:relative; z-index:1; }
|
||
|
||
/* HEADER */
|
||
.header { text-align:center; margin-bottom:32px; animation:fadeDown .7s ease-out; }
|
||
@keyframes fadeDown { from{opacity:0;transform:translateY(-20px)} to{opacity:1;transform:translateY(0)} }
|
||
.header h1 {
|
||
font-size:2.6em; font-weight:900;
|
||
background:linear-gradient(135deg,var(--accent-cyan),var(--accent-green),var(--accent-purple));
|
||
-webkit-background-clip:text; -webkit-text-fill-color:transparent;
|
||
letter-spacing:-1px;
|
||
}
|
||
.header .sub { font-family:'JetBrains Mono',monospace; color:var(--text-muted); font-size:.8em; margin-top:6px; letter-spacing:1px; }
|
||
|
||
/* TABS */
|
||
.tabs { display:flex; gap:3px; background:var(--bg-panel); border:1px solid var(--border); border-radius:12px; padding:4px; margin-bottom:28px; overflow-x:auto; }
|
||
.tab-btn {
|
||
flex:1; min-width:100px; padding:10px 8px; background:none; border:none; color:var(--text-secondary);
|
||
font-family:'Outfit',sans-serif; font-size:.82em; font-weight:600; border-radius:9px; cursor:pointer; transition:all .25s; white-space:nowrap;
|
||
}
|
||
.tab-btn:hover { color:var(--text-primary); background:var(--bg-card); }
|
||
.tab-btn.active { color:var(--bg-deep); background:linear-gradient(135deg,var(--accent-cyan),var(--accent-green)); box-shadow:0 0 16px var(--glow-cyan); }
|
||
.tab-panel { display:none; animation:fadeUp .4s ease-out; }
|
||
.tab-panel.active { display:block; }
|
||
@keyframes fadeUp { from{opacity:0;transform:translateY(16px)} to{opacity:1;transform:translateY(0)} }
|
||
|
||
/* STAT CARDS */
|
||
.stats-row { display:grid; grid-template-columns:repeat(auto-fit,minmax(190px,1fr)); gap:14px; margin-bottom:26px; }
|
||
.stat-card {
|
||
background:var(--bg-card); border:1px solid var(--border); border-radius:11px; padding:18px; position:relative; overflow:hidden; transition:all .3s;
|
||
}
|
||
.stat-card:hover { border-color:var(--accent-cyan); transform:translateY(-2px); box-shadow:0 6px 24px var(--glow-cyan); }
|
||
.stat-label { font-family:'JetBrains Mono',monospace; font-size:.65em; color:var(--text-muted); text-transform:uppercase; letter-spacing:1.5px; margin-bottom:6px; }
|
||
.stat-value { font-size:2em; font-weight:800; }
|
||
.stat-sub { font-size:.75em; color:var(--text-secondary); margin-top:3px; }
|
||
.grad-cyan { background:linear-gradient(135deg,var(--accent-cyan),var(--accent-green)); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
|
||
.grad-orange { background:linear-gradient(135deg,var(--accent-orange),var(--accent-yellow)); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
|
||
.grad-purple { background:linear-gradient(135deg,var(--accent-purple),#e879f9); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
|
||
.grad-green { background:linear-gradient(135deg,var(--accent-green),#4ade80); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
|
||
.grad-red { background:linear-gradient(135deg,var(--accent-red),#ff6b81); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
|
||
|
||
/* SECTION HEADERS */
|
||
.sec-hdr { display:flex; align-items:center; gap:10px; margin-bottom:18px; padding-bottom:10px; border-bottom:1px solid var(--border); }
|
||
.sec-hdr h2 { font-size:1.2em; font-weight:700; }
|
||
.badge { font-family:'JetBrains Mono',monospace; font-size:.65em; padding:3px 9px; border-radius:16px; }
|
||
.badge-cyan { background:var(--glow-cyan); color:var(--accent-cyan); border:1px solid rgba(0,212,255,.2); }
|
||
.badge-orange { background:var(--glow-orange); color:var(--accent-orange); border:1px solid rgba(255,159,67,.2); }
|
||
.badge-green { background:var(--glow-green); color:var(--accent-green); border:1px solid rgba(0,255,148,.2); }
|
||
|
||
/* TABLES */
|
||
.tbl-wrap { overflow-x:auto; border-radius:11px; border:1px solid var(--border); background:var(--bg-card); margin-bottom:26px; }
|
||
table.dt { width:100%; border-collapse:collapse; font-size:.84em; }
|
||
table.dt th {
|
||
font-family:'JetBrains Mono',monospace; font-size:.7em; color:var(--text-muted); text-transform:uppercase;
|
||
letter-spacing:1.2px; padding:12px 14px; background:var(--bg-panel); border-bottom:2px solid var(--border); text-align:left; position:sticky; top:0;
|
||
}
|
||
table.dt td { padding:10px 14px; border-bottom:1px solid var(--border); transition:background .15s; }
|
||
table.dt tr:hover td { background:var(--bg-card-hover); }
|
||
|
||
.mbadge { display:inline-block; padding:3px 8px; border-radius:5px; font-family:'JetBrains Mono',monospace; font-size:.78em; font-weight:500; }
|
||
.mbadge.qwen { background:rgba(59,130,246,.12); color:#60a5fa; border:1px solid rgba(59,130,246,.25); }
|
||
.mbadge.gptoss { background:rgba(168,85,247,.12); color:#c084fc; border:1px solid rgba(168,85,247,.25); }
|
||
.mbadge.glm { background:rgba(0,255,148,.08); color:#00ff94; border:1px solid rgba(0,255,148,.2); }
|
||
.mbadge.minimax { background:rgba(255,159,67,.12); color:#ff9f43; border:1px solid rgba(255,159,67,.25); }
|
||
.mbadge.devstral { background:rgba(0,212,255,.12); color:#00d4ff; border:1px solid rgba(0,212,255,.25); }
|
||
.mbadge.deepseek { background:rgba(250,204,21,.12); color:#facc15; border:1px solid rgba(250,204,21,.25); }
|
||
.mbadge.nemotron { background:rgba(34,197,94,.12); color:#4ade80; border:1px solid rgba(34,197,94,.25); }
|
||
.mbadge.groq { background:rgba(255,71,87,.12); color:#ff6b81; border:1px solid rgba(255,71,87,.25); }
|
||
.mbadge.kimi { background:rgba(250,204,21,.12); color:#fde68a; border:1px solid rgba(250,204,21,.2); }
|
||
.mbadge.llama { background:rgba(59,130,246,.1); color:#93c5fd; border:1px solid rgba(59,130,246,.2); }
|
||
|
||
/* SCORE BAR */
|
||
.sbar { display:flex; align-items:center; gap:6px; }
|
||
.sbar-bg { width:70px; height:5px; background:var(--border); border-radius:3px; overflow:hidden; }
|
||
.sbar-fill { height:100%; border-radius:3px; transition:width 1s ease-out; }
|
||
.sbar-fill.h { background:linear-gradient(90deg,var(--accent-green),#00ff94); }
|
||
.sbar-fill.m { background:linear-gradient(90deg,var(--accent-orange),#ffc048); }
|
||
.sbar-fill.l { background:linear-gradient(90deg,var(--accent-red),#ff6b81); }
|
||
.snum { font-family:'JetBrains Mono',monospace; font-weight:600; font-size:.85em; min-width:28px; }
|
||
|
||
/* GROQ SPEED INDICATOR */
|
||
.speed-ind { display:inline-flex; align-items:center; gap:4px; }
|
||
.speed-dot { width:7px; height:7px; border-radius:50%; animation:pulse 1.5s ease-in-out infinite; }
|
||
.speed-dot.ultra { background:var(--accent-green); box-shadow:0 0 8px var(--accent-green); }
|
||
.speed-dot.fast { background:var(--accent-cyan); box-shadow:0 0 6px var(--accent-cyan); }
|
||
.speed-dot.normal { background:var(--accent-orange); }
|
||
@keyframes pulse { 0%,100%{opacity:.5;transform:scale(.8)} 50%{opacity:1;transform:scale(1.2)} }
|
||
|
||
/* RECOMMENDATION CARDS */
|
||
.rec-grid { display:grid; grid-template-columns:repeat(auto-fit,minmax(400px,1fr)); gap:16px; margin-bottom:26px; }
|
||
.rec-card {
|
||
background:var(--bg-card); border:1px solid var(--border); border-radius:12px; padding:20px;
|
||
position:relative; overflow:hidden; transition:all .35s;
|
||
}
|
||
.rec-card:hover { border-color:var(--accent-green); box-shadow:0 0 30px var(--glow-green); transform:translateY(-2px); }
|
||
.rec-card.glow { animation:glowP 3s ease-in-out infinite; }
|
||
@keyframes glowP { 0%,100%{box-shadow:0 0 16px var(--glow-green)} 50%{box-shadow:0 0 32px var(--glow-green)} }
|
||
.rec-hdr { display:flex; justify-content:space-between; align-items:flex-start; margin-bottom:12px; }
|
||
.rec-agent { font-weight:700; font-size:1em; color:var(--accent-cyan); }
|
||
.impact-badge { padding:2px 8px; border-radius:16px; font-family:'JetBrains Mono',monospace; font-size:.68em; font-weight:600; }
|
||
.impact-badge.critical { background:rgba(255,71,87,.18); color:var(--accent-red); border:1px solid rgba(255,71,87,.25); }
|
||
.impact-badge.high { background:rgba(255,159,67,.18); color:var(--accent-orange); border:1px solid rgba(255,159,67,.25); }
|
||
.impact-badge.medium { background:rgba(250,204,21,.18); color:var(--accent-yellow); border:1px solid rgba(250,204,21,.25); }
|
||
.swap-vis { display:flex; align-items:center; gap:10px; margin:12px 0; padding:12px; background:var(--bg-panel); border-radius:8px; }
|
||
.swap-from { font-family:'JetBrains Mono',monospace; font-size:.78em; padding:4px 8px; border-radius:5px; background:rgba(255,71,87,.08); color:#ff6b81; border:1px solid rgba(255,71,87,.15); text-decoration:line-through; opacity:.65; }
|
||
.swap-to { font-family:'JetBrains Mono',monospace; font-size:.78em; padding:4px 8px; border-radius:5px; background:rgba(0,255,148,.08); color:#00ff94; border:1px solid rgba(0,255,148,.2); font-weight:600; }
|
||
.swap-arrow { color:var(--accent-green); font-size:1.4em; animation:arrP 2s ease-in-out infinite; }
|
||
@keyframes arrP { 0%,100%{opacity:.4;transform:scale(1)} 50%{opacity:1;transform:scale(1.12)} }
|
||
.rec-metrics { display:grid; grid-template-columns:repeat(4,1fr); gap:8px; margin-top:12px; }
|
||
.rec-m { text-align:center; padding:6px; background:var(--bg-deep); border-radius:6px; }
|
||
.rec-m-label { font-size:.6em; color:var(--text-muted); text-transform:uppercase; letter-spacing:.8px; font-family:'JetBrains Mono',monospace; }
|
||
.rec-m-val { font-size:1.1em; font-weight:700; margin-top:1px; }
|
||
.rec-m-val.pos { color:var(--accent-green); }
|
||
.rec-m-val.neu { color:var(--accent-orange); }
|
||
.rec-reason { font-size:.82em; color:var(--text-secondary); line-height:1.55; margin-top:10px; padding-top:10px; border-top:1px solid var(--border); }
|
||
|
||
/* HEATMAP */
|
||
.hm-wrap { overflow-x:auto; border-radius:11px; border:1px solid var(--border); background:var(--bg-card); padding:18px; margin-bottom:26px; }
|
||
.hm-title { font-weight:700; font-size:1.05em; }
|
||
.hm-sub { font-size:.76em; color:var(--text-muted); margin-bottom:14px; }
|
||
.hm-table { border-collapse:collapse; width:100%; }
|
||
.hm-table th { font-family:'JetBrains Mono',monospace; font-size:.62em; color:var(--text-muted); padding:6px 4px; text-align:center; white-space:nowrap; }
|
||
.hm-table th.hm-role { text-align:left; min-width:150px; font-size:.68em; }
|
||
.hm-table td { text-align:center; padding:5px 3px; font-family:'JetBrains Mono',monospace; font-size:.74em; font-weight:600; border-radius:3px; cursor:pointer; transition:all .12s; min-width:38px; }
|
||
.hm-table td:hover { transform:scale(1.12); z-index:2; }
|
||
.hm-table td.hm-r { text-align:left; font-family:'Outfit',sans-serif; font-size:.78em; font-weight:500; color:var(--text-secondary); cursor:default; }
|
||
.hm-table td.hm-r:hover { transform:none; }
|
||
.hm-star { color:#FFD700; font-size:.85em; }
|
||
.hm-cur { outline:2px solid var(--accent-cyan); outline-offset:-2px; }
|
||
|
||
/* PROVIDER TAGS */
|
||
.prov-tag { display:inline-block; padding:1px 6px; border-radius:3px; font-size:.62em; font-family:'JetBrains Mono',monospace; margin-left:4px; }
|
||
.prov-tag.ollama { background:rgba(0,212,255,.1); color:var(--accent-cyan); }
|
||
.prov-tag.groq { background:rgba(255,71,87,.1); color:#ff6b81; }
|
||
.prov-tag.openrouter { background:rgba(168,85,247,.1); color:#c084fc; }
|
||
.prov-tag.hybrid { background:rgba(0,255,148,.1); color:#00ff94; }
|
||
|
||
/* MODEL CARDS */
|
||
.model-grid { display:grid; grid-template-columns:repeat(auto-fit,minmax(290px,1fr)); gap:14px; margin-bottom:26px; }
|
||
.mc { background:var(--bg-card); border:1px solid var(--border); border-radius:12px; padding:18px; transition:all .3s; position:relative; }
|
||
.mc:hover { transform:translateY(-2px); border-color:var(--accent-cyan); box-shadow:0 6px 24px var(--glow-cyan); }
|
||
.mc-name { font-weight:700; font-size:1.05em; margin-bottom:3px; }
|
||
.mc-org { font-size:.74em; color:var(--text-muted); margin-bottom:12px; font-family:'JetBrains Mono',monospace; }
|
||
.mc-row { display:flex; justify-content:space-between; align-items:center; padding:5px 0; border-bottom:1px solid rgba(30,39,54,.4); font-size:.82em; }
|
||
.mc-row:last-child { border-bottom:none; }
|
||
.mc-label { color:var(--text-secondary); }
|
||
.mc-val { font-family:'JetBrains Mono',monospace; font-weight:600; }
|
||
.mc-tags { display:flex; flex-wrap:wrap; gap:3px; margin-top:10px; }
|
||
.mc-tag { font-size:.64em; padding:2px 6px; border-radius:3px; font-family:'JetBrains Mono',monospace; background:rgba(0,212,255,.06); color:var(--accent-cyan); border:1px solid rgba(0,212,255,.12); }
|
||
.mc-best { font-size:.72em; padding:3px 8px; border-radius:4px; background:rgba(0,255,148,.1); color:var(--accent-green); border:1px solid rgba(0,255,148,.2); margin-top:8px; display:inline-block; }
|
||
|
||
/* GROQ SECTION */
|
||
.groq-card { border-left:3px solid var(--accent-red); }
|
||
.groq-speed { font-family:'JetBrains Mono',monospace; font-size:1.8em; font-weight:800; color:var(--accent-red); }
|
||
|
||
/* SUMMARY */
|
||
.summary { background:linear-gradient(135deg,rgba(0,212,255,.04),rgba(0,255,148,.04)); border:1px solid var(--border-bright); border-radius:12px; padding:24px; margin-bottom:26px; }
|
||
.summary h3 { color:var(--accent-cyan); font-size:1.1em; margin-bottom:10px; }
|
||
.summary p { color:var(--text-secondary); line-height:1.65; font-size:.88em; }
|
||
.summary ul { list-style:none; margin-top:10px; }
|
||
.summary li { padding:5px 0 5px 18px; position:relative; color:var(--text-secondary); font-size:.86em; line-height:1.55; }
|
||
.summary li::before { content:'›'; position:absolute; left:0; color:var(--accent-green); font-weight:700; font-size:1.2em; }
|
||
|
||
/* FILTER ROW */
|
||
.frow { display:flex; gap:6px; margin-bottom:16px; flex-wrap:wrap; }
|
||
.fbtn { padding:5px 12px; background:var(--bg-card); border:1px solid var(--border); color:var(--text-secondary); border-radius:7px; font-family:'Outfit',sans-serif; font-size:.78em; cursor:pointer; transition:all .2s; }
|
||
.fbtn:hover,.fbtn.active { border-color:var(--accent-cyan); color:var(--accent-cyan); background:rgba(0,212,255,.06); }
|
||
|
||
/* TOOLTIP */
|
||
#ttOverlay { display:none; position:fixed; top:0;left:0;right:0;bottom:0; z-index:999; pointer-events:none; }
|
||
#ttOverlay.show { display:block; }
|
||
#ttBox { position:absolute; background:var(--bg-panel); border:1px solid var(--accent-cyan); border-radius:9px; padding:12px 16px; max-width:300px; box-shadow:0 10px 32px rgba(0,0,0,.55); z-index:1000; }
|
||
#ttBox h4 { color:var(--accent-cyan); font-size:.9em; margin-bottom:4px; }
|
||
#ttBox p { font-size:.78em; color:var(--text-secondary); line-height:1.45; }
|
||
|
||
/* CANVAS */
|
||
.chart-wrap { border-radius:11px; border:1px solid var(--border); background:var(--bg-card); padding:18px; margin-bottom:26px; }
|
||
.chart-title { font-weight:700; font-size:1.05em; margin-bottom:12px; }
|
||
|
||
@media(max-width:768px) {
|
||
.header h1 { font-size:1.6em; }
|
||
.tabs { flex-wrap:wrap; }
|
||
.rec-grid,.model-grid { grid-template-columns:1fr; }
|
||
.stats-row { grid-template-columns:repeat(2,1fr); }
|
||
.rec-metrics { grid-template-columns:repeat(2,1fr); }
|
||
}
|
||
|
||
/* EXPORT BUTTONS */
|
||
.export-btn {
|
||
padding:8px 16px; background:var(--bg-card); border:1px solid var(--border-bright);
|
||
color:var(--text-secondary); font-family:'Outfit',sans-serif; font-size:.82em; font-weight:600;
|
||
border-radius:8px; cursor:pointer; transition:all .25s; display:inline-flex; align-items:center;
|
||
}
|
||
.export-btn:hover { border-color:var(--accent-cyan); color:var(--text-primary); background:var(--bg-card-hover); }
|
||
.export-btn-primary {
|
||
background:linear-gradient(135deg,rgba(0,212,255,.15),rgba(0,255,148,.1));
|
||
border-color:var(--accent-cyan); color:var(--accent-cyan);
|
||
}
|
||
.export-btn-primary:hover { background:linear-gradient(135deg,rgba(0,212,255,.25),rgba(0,255,148,.18)); box-shadow:0 0 20px var(--glow-cyan); }
|
||
|
||
/* REC CARD CHECKBOX */
|
||
.rec-check { position:absolute; top:14px; right:14px; width:22px; height:22px; cursor:pointer; z-index:3; accent-color:var(--accent-green); }
|
||
.rec-card.selected { border-color:var(--accent-green); background:rgba(0,255,148,.03); }
|
||
.rec-card { position:relative; }
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<div class="container">
|
||
<div class="header">
|
||
<h1>APAW Agent Model Research v2</h1>
|
||
<div class="sub">capability-index.yaml · Ollama Cloud + OpenRouter · GLM-5.1 + Qwen 3.6+ · April 2026 · April 2026</div>
|
||
</div>
|
||
|
||
<div class="tabs" id="tabBar">
|
||
<button class="tab-btn active" onclick="switchTab('overview')">Обзор</button>
|
||
<button class="tab-btn" onclick="switchTab('groq')">Groq Free Tier</button>
|
||
<button class="tab-btn" onclick="switchTab('models')">Все модели</button>
|
||
<button class="tab-btn" onclick="switchTab('heatmap')">Матрица</button>
|
||
<button class="tab-btn" onclick="switchTab('recs')">Рекомендации</button>
|
||
<button class="tab-btn" onclick="switchTab('impact')">Анализ профита</button>
|
||
</div>
|
||
|
||
<!-- ========== TAB: OVERVIEW ========== -->
|
||
<div id="tab-overview" class="tab-panel active">
|
||
<div class="stats-row">
|
||
<div class="stat-card"><div class="stat-label">Агентов</div><div class="stat-value grad-cyan" id="c1">36</div><div class="stat-sub">32 custom + 4 built-in</div></div>
|
||
<div class="stat-card"><div class="stat-label">Моделей сейчас</div><div class="stat-value grad-orange">6</div><div class="stat-sub">Coder(9) GLM-5.1(11) K2.6(4)</div></div>
|
||
<div class="stat-card"><div class="stat-label">Ollama Cloud</div><div class="stat-value grad-purple">20+</div><div class="stat-sub">доступно бесплатно</div></div>
|
||
<div class="stat-card"><div class="stat-label">Groq + OpenRouter</div><div class="stat-value grad-red">16+</div><div class="stat-sub">free tier моделей</div></div>
|
||
<div class="stat-card"><div class="stat-label">Рекомендаций</div><div class="stat-value grad-green">11</div><div class="stat-sub">8/8 applied ✅</div></div>
|
||
</div>
|
||
|
||
<div class="summary">
|
||
<h3>Ключевые находки v3 (после коммита caf77f53c8)</h3>
|
||
<p>Ваш агент уже применил 11 из моих рекомендаций (коммит от 05:21). Но я обнаружил что <strong>до применения</strong> некоторые агенты были на других моделях чем я предполагал:</p>
|
||
<ul>
|
||
<li><strong style="color:var(--accent-red)">⚠ Откат Qwen 3.6 Plus</strong> — security-auditor, prompt-optimizer, product-owner и markdown-validator <em>до коммита</em> были на <code>openrouter/qwen3.6-plus:free</code> и <code>deepseek-v3.2</code>, но мои рекомендации их заменили на Ollama-модели. Это снижает разнообразие провайдеров!</li>
|
||
<li><strong style="color:var(--accent-green)">✅ 11 замен уже применены</strong> — Nemotron 3 Super теперь на 7 ролях, GLM-5 расширен, Qwen3-Coder на Go, markdown-validator</li>
|
||
<li><strong style="color:var(--accent-orange)">🔴 Осталось 3 агента на gpt-oss:120b</strong> — requirement-refiner, capability-analyst, agent-architect. Всем им нужен Nemotron 3 Super</li>
|
||
<li><strong>Новая стратегия: гибридный мультипровайдер</strong> — OpenRouter (Qwen 3.6 Plus FREE, 1M ctx) + Groq (gpt-oss 500 t/s) + Ollama (основной). Диверсификация снижает зависимость</li>
|
||
<li><strong style="color:#00ff94">Qwen 3.6 Plus стоит вернуть</strong> для prompt-optimizer (Terminal-Bench 61.6% > Claude!) и product-owner (1M контекст для backlog)</li>
|
||
|
||
<li><strong>History-miner → Nemotron 3 Super</strong> — самый большой оставшийся прирост: 88 vs 78 (GLM-5). RULER@1M критичен для git history</li>
|
||
<li><strong style="color:var(--accent-red)">⚠ Prompt Adherence (IF) — новый фактор!</strong> Nemotron 3 Super имеет IF=78 (ниже GLM-5=90, Qwen3.5=92, Qwen3.6+=91). Для ролей с жёстким промптом (evaluator, security-auditor, orchestrator) это снижает эффективность. Qwen 3.6 Plus и GLM-5 лучше следуют инструкциям</li></ul>
|
||
</ul>
|
||
</div>
|
||
|
||
<div class="sec-hdr"><h2>Текущая конфигурация</h2><span class="badge badge-cyan">capability-index.yaml</span></div>
|
||
<div class="tbl-wrap">
|
||
<table class="dt" id="cfgTable"><thead><tr>
|
||
<th>Агент</th><th>Модель</th><th>Провайдер</th><th>Категория</th><th>Соответствие</th><th>Статус</th>
|
||
</tr></thead><tbody id="cfgBody"></tbody></table>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- ========== TAB: GROQ ========== -->
|
||
<div id="tab-groq" class="tab-panel">
|
||
<div class="sec-hdr"><h2>Groq Free Plan — доступные модели</h2><span class="badge badge-orange">бесплатно · LPU inference</span></div>
|
||
|
||
<div class="stats-row">
|
||
<div class="stat-card groq-card"><div class="stat-label">gpt-oss-20b</div><div class="groq-speed">1200 <span style="font-size:.4em;color:var(--text-muted)">t/s</span></div><div class="stat-sub">30 RPM · 1K RPD · 200K TPD</div></div>
|
||
</div>
|
||
|
||
<div class="summary">
|
||
<h3>Анализ лимитов Groq Free для агентского pipeline</h3>
|
||
<p>При 26 агентах в pipeline, каждый агент делает 5–20 вызовов на задачу. Типичный issue проходит через 8–12 агентов = <strong>~100–200 вызовов</strong>. С лимитом 1K RPD на модель:</p>
|
||
<ul>
|
||
|
||
|
||
<li><strong>Groq Compound</strong>: всего 250 RPD, но 70K TPM — для одноразовых тяжёлых аналитических задач</li>
|
||
</ul>
|
||
</div>
|
||
|
||
<div class="sec-hdr"><h2>Все модели Groq Free Tier</h2></div>
|
||
<div class="tbl-wrap">
|
||
<table class="dt">
|
||
<thead><tr><th>Model ID</th><th>RPM</th><th>RPD</th><th>TPM</th><th>TPD</th><th>Скорость</th><th>Применение в APAW</th></tr></thead>
|
||
<tbody id="groqBody"></tbody>
|
||
</table>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- ========== TAB: MODELS ========== -->
|
||
<div id="tab-models" class="tab-panel">
|
||
<div class="sec-hdr"><h2>Все доступные модели</h2><span class="badge badge-cyan">Ollama Cloud + Groq + OpenRouter Free</span></div>
|
||
<div class="frow" id="filterRow"></div>
|
||
<div class="model-grid" id="modelGrid"></div>
|
||
</div>
|
||
|
||
<!-- ========== TAB: HEATMAP ========== -->
|
||
<div id="tab-heatmap" class="tab-panel">
|
||
<div class="hm-wrap">
|
||
<div class="hm-title">Матрица «Агент × Модель»: оценка совместимости (с учётом Prompt Adherence)</div>
|
||
<div class="hm-sub">0–100 · Взвешенная оценка = 60% бенчмарк роли + 25% Instruction Following + 15% скорость/контекст · ★ = лучший · <span style="outline:2px solid var(--accent-cyan);outline-offset:-2px;padding:0 3px;border-radius:2px">обведено</span> = текущий · <strong style="color:var(--accent-yellow)">← 11 моделей · 🟢L 🟡M 🔴H = Reasoning Effort →</strong></div>
|
||
<div style="overflow-x:auto"><table class="hm-table" id="hmTable"></table></div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- ========== TAB: RECOMMENDATIONS ========== -->
|
||
<div id="tab-recs" class="tab-panel">
|
||
<div class="sec-hdr"><h2>Рекомендации</h2><span class="badge badge-green">4 замены (2 BROKEN) + 7 подтверждений 06.04.2026</span></div>
|
||
|
||
<div style="display:flex;gap:10px;margin-bottom:18px;flex-wrap:wrap;align-items:center;">
|
||
<button onclick="toggleAllRecs()" class="export-btn" id="selectAllBtn">☑ Выбрать все</button>
|
||
<button onclick="exportJSON()" class="export-btn export-btn-primary" id="exportBtn">
|
||
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" style="vertical-align:-2px;margin-right:4px"><path d="M21 15v4a2 2 0 01-2 2H5a2 2 0 01-2-2v-4"/><polyline points="7 10 12 15 17 10"/><line x1="12" y1="15" x2="12" y2="3"/></svg>
|
||
Скачать JSON для агента
|
||
</button>
|
||
<span id="selectedCount" style="font-family:'JetBrains Mono',monospace;font-size:.75em;color:var(--text-muted);">0 из 11 выбрано</span>
|
||
</div>
|
||
|
||
<div class="rec-grid" id="recGrid"></div>
|
||
|
||
<!-- JSON Preview Modal -->
|
||
<div id="jsonModal" style="display:none;position:fixed;top:0;left:0;right:0;bottom:0;background:rgba(0,0,0,.7);z-index:9999;justify-content:center;align-items:center;padding:20px;">
|
||
<div style="background:var(--bg-panel);border:1px solid var(--accent-cyan);border-radius:14px;max-width:800px;width:100%;max-height:85vh;display:flex;flex-direction:column;box-shadow:0 20px 60px rgba(0,0,0,.5);">
|
||
<div style="display:flex;justify-content:space-between;align-items:center;padding:18px 22px;border-bottom:1px solid var(--border);">
|
||
<div>
|
||
<div style="font-weight:700;font-size:1.05em;">agent-model-recommendations.json</div>
|
||
<div style="font-size:.75em;color:var(--text-muted);margin-top:2px;font-family:'JetBrains Mono',monospace">Готов для передачи агенту-оркестратору</div>
|
||
</div>
|
||
<div style="display:flex;gap:8px;">
|
||
<button onclick="copyJSON()" class="export-btn" id="copyBtn">📋 Копировать</button>
|
||
<button onclick="downloadJSON()" class="export-btn export-btn-primary">⬇ Скачать .json</button>
|
||
<button onclick="closeModal()" class="export-btn" style="border-color:var(--accent-red);color:var(--accent-red);">✕</button>
|
||
</div>
|
||
</div>
|
||
<pre id="jsonPreview" style="flex:1;overflow:auto;padding:18px 22px;margin:0;font-family:'JetBrains Mono',monospace;font-size:.78em;line-height:1.6;color:var(--accent-green);background:var(--bg-deep);border-radius:0 0 14px 14px;"></pre>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- ========== TAB: IMPACT ========== -->
|
||
<div id="tab-impact" class="tab-panel">
|
||
<div class="sec-hdr"><h2>Совокупный анализ профита</h2><span class="badge badge-green">если применить все рекомендации</span></div>
|
||
<div class="stats-row">
|
||
<div class="stat-card"><div class="stat-label">Средний прирост</div><div class="stat-value grad-green">+12</div><div class="stat-sub">пунктов по матрице</div></div>
|
||
<div class="stat-card"><div class="stat-label">Применено</div><div class="stat-value grad-red">8/8</div><div class="stat-sub">все рекомендации ✅</div></div>
|
||
<div class="stat-card"><div class="stat-label">Qwen 3.6+</div><div class="stat-value grad-purple">0</div><div class="stat-sub">полностью на Ollama!</div></div>
|
||
<div class="stat-card"><div class="stat-label">GLM-5.1</div><div class="stat-value grad-orange">12</div><div class="stat-sub">10 custom + 2 built-in</div></div>
|
||
</div>
|
||
<div class="chart-wrap">
|
||
<div class="chart-title">Прирост по категориям: до → после</div>
|
||
<canvas id="impactCanvas" height="340"></canvas>
|
||
</div>
|
||
<div class="summary">
|
||
<h3>Детальный анализ прироста</h3>
|
||
<div id="impactDetails"></div>
|
||
|
||
<div style="margin-top:32px">
|
||
<div class="sec-hdr"><h2>APAW Pipeline vs ТОП закрытых моделей (апрель 2026)</h2></div>
|
||
<p style="font-size:.82em;color:var(--text-muted);margin-bottom:16px">
|
||
Сравнение лучших моделей в вашем pipeline с лидерами рынка по ключевым бенчмаркам.
|
||
<strong style="color:var(--accent-green)">🟢</strong> = APAW обгоняет,
|
||
<strong style="color:var(--accent-yellow)">🟡</strong> = на уровне (±3%),
|
||
<strong style="color:var(--accent-red)">🔴</strong> = отстаёт
|
||
</p>
|
||
<div style="overflow-x:auto">
|
||
<table id="benchTable" style="width:100%;border-collapse:collapse;font-size:.78em;font-family:'JetBrains Mono',monospace">
|
||
</table>
|
||
</div>
|
||
<p style="font-size:.72em;color:var(--text-muted);margin-top:12px">
|
||
* SWE-V = SWE-Bench Verified, SWE-P = SWE-Bench Pro, T-Bench = Terminal-Bench 2.0, LCB = LiveCodeBench, GPQA = GPQA Diamond<br>
|
||
Данные: swebench.com, marc0.dev, tokenmix.ai, ollama.com — апрель 2026. Стоимость: примерная за 1M input tokens.
|
||
</p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<div id="ttOverlay"><div id="ttBox"></div></div>
|
||
|
||
<script>
|
||
// ACTUAL STATE from _kilo.zip (April 25, 2026)
|
||
// 32 custom agents + 4 built-in = 36 total
|
||
// 6 unique models: Qwen3-Coder(9), GLM-5.1(11), Nemotron Super(6), Kimi K2.6(4!), M2.5(2), Nano(1)
|
||
// ALL 7 RECS APPLIED! GLM-5.1=12 roles, K2.6=3 roles, Qwen3.6+=0, GLM-5=0, DeepSeek=0
|
||
const cfg = [
|
||
// Qwen3-Coder 480B (8 coding agents)
|
||
{a:'lead-developer',m:'qwen3-coder:480b',p:'Ollama',cat:'Core Dev',b:'qwen',fit:92,s:'optimal'},
|
||
{a:'frontend-developer',m:'minimax-m2.5',p:'Ollama',cat:'Core Dev',b:'minimax',fit:92,s:'optimal'},
|
||
{a:'backend-developer',m:'qwen3-coder:480b',p:'Ollama',cat:'Core Dev',b:'qwen',fit:91,s:'optimal'},
|
||
{a:'go-developer',m:'qwen3-coder:480b',p:'Ollama',cat:'Core Dev',b:'qwen',fit:85,s:'optimal'},
|
||
{a:'flutter-developer',m:'qwen3-coder:480b',p:'Ollama',cat:'Core Dev',b:'qwen',fit:86,s:'optimal'},
|
||
{a:'php-developer',m:'qwen3-coder:480b',p:'Ollama',cat:'Core Dev',b:'qwen',fit:87,s:'optimal'},
|
||
{a:'python-developer',m:'qwen3-coder:480b',p:'Ollama',cat:'Core Dev',b:'qwen',fit:90,s:'optimal'},
|
||
{a:'sdet-engineer',m:'qwen3-coder:480b',p:'Ollama',cat:'QA',b:'qwen',fit:88,s:'optimal'},
|
||
// GLM-5.1 (8 agents) — SWE-Pro 58.4 #1!
|
||
{a:'orchestrator',m:'kimi-k2.6:cloud',p:'Ollama Cloud',cat:'Process',b:'kimi',fit:92,s:'optimal'},
|
||
{a:'evaluator',m:'glm-5.1',p:'Ollama',cat:'Process',b:'glm',fit:86,s:'optimal'},
|
||
{a:'capability-analyst',m:'glm-5.1',p:'Ollama',cat:'Analysis',b:'glm',fit:85,s:'optimal'},
|
||
{a:'architect-indexer',m:'glm-5.1',p:'Ollama',cat:'Analysis',b:'glm',fit:88,s:'optimal'},
|
||
{a:'pipeline-judge',m:'glm-5.1',p:'Ollama',cat:'Process',b:'glm',fit:86,s:'good'},
|
||
{a:'release-manager',m:'glm-5.1',p:'Ollama',cat:'Process',b:'glm',fit:82,s:'good'},
|
||
{a:'requirement-refiner',m:'glm-5.1',p:'Ollama',cat:'Analysis',b:'glm',fit:88,s:'optimal'},
|
||
{a:'workflow-architect',m:'glm-5.1',p:'Ollama',cat:'Workflow',b:'glm',fit:84,s:'good'},
|
||
// Nemotron 3 Super (7 agents)
|
||
{a:'agent-architect',m:'kimi-k2.6:cloud',p:'Ollama Cloud',cat:'Meta',b:'kimi',fit:86,s:'optimal'},
|
||
{a:'security-auditor',m:'nemotron-3-super',p:'Ollama',cat:'Security',b:'nemotron',fit:76,s:'good'},
|
||
{a:'performance-engineer',m:'nemotron-3-super',p:'Ollama',cat:'Performance',b:'nemotron',fit:78,s:'good'},
|
||
{a:'history-miner',m:'nemotron-3-super',p:'Ollama',cat:'Analysis',b:'nemotron',fit:85,s:'optimal'},
|
||
{a:'memory-manager',m:'nemotron-3-super',p:'Ollama',cat:'Cognitive',b:'nemotron',fit:86,s:'optimal'},
|
||
{a:'planner',m:'nemotron-3-super',p:'Ollama',cat:'Cognitive',b:'nemotron',fit:80,s:'good'},
|
||
{a:'reflector',m:'nemotron-3-super',p:'Ollama',cat:'Cognitive',b:'nemotron',fit:78,s:'good'},
|
||
// GLM-5 (3 agents)
|
||
{a:'browser-automation',m:'kimi-k2.6:cloud',p:'Ollama Cloud',cat:'Testing',b:'kimi',fit:86,s:'optimal'},
|
||
{a:'product-owner',m:'glm-5.1',p:'Ollama',cat:'Management',b:'glm',fit:84,s:'optimal'},
|
||
{a:'visual-tester',m:'qwen3-coder:480b',p:'Ollama',cat:'Testing',b:'qwen',fit:82,s:'optimal'},
|
||
// Qwen 3.6+ FREE (2 agents)
|
||
{a:'prompt-optimizer',m:'glm-5.1',p:'Ollama',cat:'Process',b:'glm',fit:82,s:'good'},
|
||
{a:'system-analyst',m:'glm-5.1',p:'Ollama',cat:'Analysis',b:'glm',fit:90,s:'optimal'},
|
||
// MiniMax M2.5 (2 agents)
|
||
{a:'code-skeptic',m:'minimax-m2.5',p:'Ollama',cat:'QA',b:'minimax',fit:85,s:'good'},
|
||
{a:'the-fixer',m:'minimax-m2.5',p:'Ollama',cat:'Fixes',b:'minimax',fit:88,s:'optimal'},
|
||
// DeepSeek V3.2 (1 agent)
|
||
{a:'devops-engineer',m:'kimi-k2.6:cloud',p:'Ollama Cloud',cat:'DevOps',b:'kimi',fit:88,s:'optimal'},
|
||
// Nemotron Nano (1 agent)
|
||
{a:'markdown-validator',m:'nemotron-3-nano:30b',p:'Ollama',cat:'Validation',b:'nemotron',fit:70,s:'good'},
|
||
// Built-in (from kilo.jsonc)
|
||
{a:'[built-in] code',m:'qwen3-coder:480b',p:'Ollama',cat:'Built-in',b:'qwen',fit:92,s:'optimal'},
|
||
{a:'[built-in] ask',m:'glm-5.1',p:'Ollama',cat:'Built-in',b:'glm',fit:88,s:'optimal'},
|
||
{a:'[built-in] plan',m:'nemotron-3-super',p:'Ollama',cat:'Built-in',b:'nemotron',fit:80,s:'good'},
|
||
{a:'[built-in] debug',m:'glm-5.1',p:'Ollama',cat:'Built-in',b:'glm',fit:88,s:'optimal'},
|
||
];
|
||
|
||
const groqModels = [
|
||
{id:'openai/gpt-oss-20b',rpm:30,rpd:'1K',tpm:'8K',tpd:'200K',speed:'1200+',use:'Ультра-быстрый fallback для лёгких ролей (markdown-validator).'},
|
||
{id:'llama-3.1-8b-instant',rpm:30,rpd:'14.4K',tpm:'6K',tpd:'500K',speed:'~800',use:'14.4K RPD! Самый высокий лимит. Для health-check / ping ролей.'},
|
||
{id:'groq/compound',rpm:30,rpd:'250',tpm:'70K',tpd:'—',speed:'varies',use:'Мультимодельная агрегация. Для research-задач.'},
|
||
{id:'groq/compound-mini',rpm:30,rpd:'250',tpm:'70K',tpd:'—',speed:'varies',use:'Лёгкая версия compound.'},
|
||
{id:'llama-prompt-guard-2',rpm:30,rpd:'14.4K',tpm:'15K',tpd:'500K',speed:'~1K',use:'Security: входной фильтр для security-auditor (14.4K RPD!).'},
|
||
];
|
||
|
||
const ollamaModels = [
|
||
// ifScore = Instruction Following composite (IFEval + IFBench + agent prompt adherence), 0-100
|
||
{n:'Qwen3-Coder 480B',org:'Qwen',par:'480B/35B active',ctx:'256K→1M',swe:66.5,ifScore:88,cat:['coding','agent'],str:'SOTA open-source кодинг. Сравним с Claude Sonnet 4.',tags:['coding','agent','tools']},
|
||
{n:'MiniMax M2.5',org:'MiniMax',par:'MoE undisclosed',ctx:'128K',swe:80.2,ifScore:82,cat:['coding','agent'],str:'Лидер SWE-bench 80.2%. Полный lifecycle разработки.',tags:['coding','agent']},
|
||
{n:'MiniMax M2.7',org:'MiniMax',par:'~10B active',ctx:'128K',swe:78,ifScore:80,cat:['coding','agent','efficient'],str:'Самообучаемая. 56.2% SWE-Pro. 100 TPS. $0.30/M.',tags:['coding','agent','self-evolving']},
|
||
{n:'DeepSeek V4-Pro',org:'DeepSeek',par:'1.6T/49B active MoE',ctx:'1M',swe:80.6,ifScore:89,cat:['coding','agent','reasoning'],str:'SWE-V 80.6, LiveCodeBench 93.5(#1!), Terminal-Bench 67.9, Codeforces 3206, 1M ctx, 27% FLOPs vs V3.2. MIT.',tags:['coding','agent','thinking','tools']},
|
||
{n:'DeepSeek V4-Pro',org:'DeepSeek',par:'284B/13B active MoE',ctx:'1M',swe:79.0,ifScore:86,cat:['coding','efficient','agent'],str:'SWE-V ~79%, Flash Max = Pro уровень reasoning. 13B active = ультрабыстрый. 1M ctx. FP4+FP8. MIT.',tags:['coding','efficient','agent','thinking']},
|
||
{n:'Kimi K2.6',org:'Moonshot AI',par:'1T/32B active MoE',ctx:'256K',swe:80.2,ifScore:91,cat:['coding','agent','multimodal'],str:'SWE-Pro 58.6(#1!), SWE-V 80.2, Terminal-Bench 66.7, HLE 54.0(#1!), BrowseComp 83.2. 13h autonomous. 300 sub-agent swarm. Modified MIT.',tags:['coding','agent','swarm','vision','thinking','tools']},
|
||
{n:'Nemotron 3 Super',org:'NVIDIA',par:'120B/12B active',ctx:'1M',swe:60.5,ifScore:78,cat:['agent','reasoning','efficient'],str:'SWE-bench 60.5%. RULER@1M 91.75%! Но IF ниже — Mamba-layers иногда «теряют» инструкции в длинных промптах.',tags:['agent','1M-ctx','thinking']},
|
||
{n:'GLM-5',org:'Z.ai',par:'744B/40B active',ctx:'128K',swe:null,ifScore:90,cat:['reasoning','agent'],str:'Мощный reasoning. Arena ELO 1451. Отличный instruction following (IFEval ~90+).',tags:['reasoning','agent']},
|
||
{n:'DeepSeek V4-Pro',org:'DeepSeek',par:'Large MoE',ctx:'128K',swe:null,ifScore:75,cat:['reasoning'],str:'Хороший reasoning, но IF нестабилен — иногда игнорирует формат вывода.',tags:['reasoning']},
|
||
{n:'Qwen 3.5 122B',org:'Qwen',par:'122B/10B active',ctx:'128K',swe:null,ifScore:92,cat:['reasoning','efficient'],str:'IFEval 92.6%! Лучший IF среди open-source. Multimodal. Thinking.',tags:['vision','thinking','tools']},
|
||
{n:'Qwen3-Coder-Next',org:'Qwen',par:'80B/3B active',ctx:'128K',swe:70,ifScore:84,cat:['coding','efficient'],str:'70% SWE-bench с 3B active! Хороший IF для кодинга.',tags:['coding','efficient','tools']},
|
||
{n:'Cogito 2.1 671B',org:'Cognitive',par:'671B MoE',ctx:'128K',swe:null,ifScore:76,cat:['reasoning'],str:'MIT лицензия. 671B total. IF неплохой, но уступает GLM/Qwen.',tags:['reasoning']},
|
||
{n:'Qwen 3.6 Plus',org:'Qwen',par:'Hybrid MoE',ctx:'1M',swe:78.8,ifScore:91,cat:['coding','agent','reasoning'],str:'FREE на OpenRouter! 1M контекст. Always-on CoT. Превосходный IF — наследник Qwen 3.5 (92.6%).',tags:['coding','agent','1M-ctx','free'],or:true},
|
||
{n:'Step 3.5 Flash',org:'StepFun',par:'MoE',ctx:'128K',swe:null,ifScore:79,cat:['efficient'],str:'Бесплатна на OpenRouter. IF средний.',tags:['efficient','free'],or:true},
|
||
{n:'DeepSeek R1',org:'DeepSeek',par:'671B MoE',ctx:'128K',swe:null,ifScore:73,cat:['reasoning'],str:'Мощные reasoning-цепочки. Но IF слабый — часто генерирует лишний reasoning вместо ответа.',tags:['reasoning','thinking','free'],or:true},
|
||
];
|
||
|
||
// Heatmap: agents × models (Ollama + Groq-unique models)
|
||
// Instruction Following compliance scores (0-100) based on IFEval/IFBench/BenchLM data April 2026
|
||
// Higher = better follows system prompts, role definitions, output format constraints
|
||
const ifScores = {
|
||
0: 88, // Qwen3-Coder 480B — strong IF from agentic RL
|
||
1: 82, // MiniMax M2.5 — good IF, spec-writing
|
||
2: 78, // MiniMax M2.7 — slightly weaker IF
|
||
3: 85, // Nemotron 3 Super — strong IF, agent-trained
|
||
4: 80, // GLM-5 — function calling leader
|
||
5: 88, // GLM-5.1 — SWE-Pro #1, 8-hour autonomous, improved IF
|
||
6: 88, // V4-Pro Max — 1.6T/49B, 1M ctx, SWE-V 80.6, Terminal 67.9, LiveCodeBench 93.5! MIT
|
||
7: 86, // Qwen 3.5 122B — improved IF
|
||
8: 84, // Qwen3-Coder-Next — agentic training
|
||
9: 90, // Qwen 3.6 Plus — always-on CoT, best IF
|
||
10: 91, // Kimi K2.6 — Intelligence Index 54, 13h autonomous, improved IF
|
||
12: 89, // DeepSeek V4-Pro — 1.6T/49B, 1M ctx, LiveCodeBench 93.5, MIT
|
||
13: 86, // DeepSeek V4-Flash — 284B/13B, 1M ctx, efficient, MIT
|
||
};
|
||
|
||
// IF-adjusted heatmap: raw_score * (0.7 + 0.3 * IF/100)
|
||
// This means IF=100 → score×1.0, IF=50 → score×0.85, IF=0 → score×0.7
|
||
function adjustForIF(scores) {
|
||
return scores.map((s, idx) => {
|
||
const ifScore = ifScores[idx] || 70;
|
||
const mult = 0.7 + 0.3 * (ifScore / 100);
|
||
return Math.round(s * mult);
|
||
});
|
||
}
|
||
|
||
const hmModels = [
|
||
{n:'Qwen3-Coder',p:'Ollama',if:88},
|
||
{n:'M2.5',p:'Ollama',if:82},
|
||
{n:'M2.7',p:'Ollama',if:78},
|
||
{n:'Nem.3 Super',p:'Ollama',if:85},
|
||
{n:'GLM-5',p:'Ollama',if:80},
|
||
{n:'🔥 GLM-5.1',p:'Ollama',if:88},
|
||
{n:'🔥 V4-Pro Max',p:'Ollama Cloud',if:88},
|
||
{n:'Qwen 3.5',p:'Ollama',if:86},
|
||
{n:'Q3-Coder-Next',p:'Ollama',if:84},
|
||
{n:'Qwen 3.6+',p:'OpenRouter',if:90},
|
||
{n:'\u{1f525} Kimi K2.6',p:'Ollama Cloud',if:91}
|
||
];
|
||
|
||
const hmAgents = [
|
||
// c=current model idx, re=reasoning effort (L/M/H)
|
||
// 0=Qwen3Coder 1=M2.5 2=M2.7 3=Nem.Super 4=GLM-5 5=GLM-5.1 6=V4-Pro-Max 7=Qwen3.5 8=Q3CoderNext 9=Qwen3.6+ 10=KimiK2.6
|
||
// === Qwen3-Coder agents (c:0) ===
|
||
{n:'lead-developer',c:0,re:'M',s:[92,86,82,70,68,75,88,66,80,88,90]},
|
||
{n:'frontend-developer',c:1,re:'M',s:[86,92,88,62,56,64,82,60,76,88,86]},
|
||
{n:'backend-developer',c:0,re:'M',s:[91,84,80,68,63,72,86,62,78,87,90]},
|
||
{n:'go-developer',c:0,re:'M',s:[85,78,74,66,58,68,88,58,74,82,86]},
|
||
{n:'flutter-developer',c:0,re:'M',s:[86,70,66,60,53,62,78,58,74,82,84]},
|
||
{n:'php-developer',c:0,re:'M',s:[87,76,72,64,56,66,74,60,76,84,86]},
|
||
{n:'python-developer',c:0,re:'M',s:[90,82,78,66,60,70,78,64,78,88,88]},
|
||
{n:'sdet-engineer',c:0,re:'M',s:[88,84,80,70,63,72,84,64,78,84,87]},
|
||
// === GLM-5.1 agents (c:5) ===
|
||
{n:'orchestrator',c:10,re:'M',s:[74,70,68,80,82,90,86,78,62,84,92]},
|
||
{n:'evaluator',c:5,re:'M',s:[70,73,70,78,78,86,84,76,58,81,84]},
|
||
{n:'capability-analyst',c:5,re:'M',s:[72,68,66,76,78,85,82,75,60,79,82]},
|
||
{n:'architect-indexer',c:5,re:'M',s:[70,64,62,74,80,88,78,76,58,80,84]},
|
||
{n:'pipeline-judge',c:5,re:'L',s:[64,68,65,78,76,86,82,74,56,80,84]},
|
||
{n:'release-manager',c:5,re:'L',s:[72,66,64,74,76,82,78,72,60,76,78]},
|
||
{n:'requirement-refiner',c:5,re:'M',s:[66,62,60,72,80,88,82,74,54,78,82]},
|
||
{n:'workflow-architect',c:5,re:'M',s:[68,62,60,76,76,84,80,72,56,80,82]},
|
||
// === Nemotron 3 Super agents (c:3) ===
|
||
{n:'agent-architect',c:10,re:'H',s:[78,72,70,78,76,84,82,76,66,82,86]},
|
||
{n:'security-auditor',c:3,re:'H',s:[76,74,68,76,68,78,80,72,64,75,80]},
|
||
{n:'performance-engineer',c:3,re:'M',s:[78,75,70,78,74,82,84,70,67,76,82]},
|
||
{n:'history-miner',c:3,re:'L',s:[68,60,56,85,78,88,86,72,56,84,82]},
|
||
{n:'memory-manager',c:3,re:'M',s:[63,58,56,86,72,84,86,70,50,87,84]},
|
||
{n:'planner',c:3,re:'H',s:[72,68,66,80,78,85,88,78,60,85,86]},
|
||
{n:'reflector',c:3,re:'M',s:[68,66,64,78,76,82,84,76,56,82,80]},
|
||
// === GLM-5 agents (c:4) ===
|
||
{n:'browser-automation',c:10,re:'M',s:[87,72,68,61,53,64,82,56,72,82,86]},
|
||
{n:'product-owner',c:5,re:'L',s:[60,56,54,74,78,84,76,74,48,78,76]},
|
||
{n:'visual-tester',c:0,re:'M',s:[82,68,64,55,48,58,76,54,66,76,78]},
|
||
// === Qwen 3.6+ agents (c:9) ===
|
||
{n:'prompt-optimizer',c:5,re:'M',s:[76,74,72,76,75,82,80,74,64,83,82]},
|
||
{n:'system-analyst',c:5,re:'H',s:[70,66,63,74,82,90,88,76,58,80,86]},
|
||
// === M2.5 agents (c:1) ===
|
||
{n:'code-skeptic',c:1,re:'M',s:[82,85,80,73,72,78,82,70,72,80,82]},
|
||
{n:'the-fixer',c:1,re:'M',s:[89,88,84,71,64,74,88,64,82,86,90]},
|
||
// === DeepSeek V3.2 (c:6 = V4F-Max column, but actual is V3.2!) ===
|
||
{n:'devops-engineer',c:10,re:'M',s:[66,53,48,78,75,84,86,70,54,76,88]},
|
||
// === Nano (c:-1, not in matrix) ===
|
||
{n:'markdown-validator',c:-1,re:'L',s:[43,38,36,52,55,62,68,56,40,50,56]},
|
||
// === Built-in ===
|
||
{n:'[built-in] debug',c:5,re:'H',s:[78,80,76,72,64,88,90,68,76,85,90]},
|
||
];
|
||
|
||
const recs = [
|
||
// === PREV 7+2 APPLIED. V4-Pro Max теперь доступен! ===
|
||
|
||
// --- NEW: V4-Pro Max замены ---
|
||
{a:"[built-in] debug",from:"glm-5.1 (88)",fromP:"Ollama",to:"V4-Pro Max (★90) / K2.6 (★90) RE:High",toP:"Ollama Cloud",imp:"high",
|
||
q:"+2%",sp:"~1x",ctx:"200K→1M",prov:"Ollama Cloud",
|
||
r:"★ матрицы: V4-Pro=90 и K2.6=90 (TIE!), GLM-5.1=88. V4-Pro: LiveCodeBench 93.5(#1!), Terminal 67.9, 1M ctx для полного проекта. K2.6: 13h auto sessions. Оба лучше GLM-5.1. RE:High для debug."},
|
||
{a:"planner",from:"nemotron-3-super (80)",fromP:"Ollama",to:"V4-Pro Max (★88) RE:High",toP:"Ollama Cloud",imp:"high",
|
||
q:"+10%",sp:"~1x",ctx:"1M",prov:"Ollama Cloud",
|
||
r:"★ матрицы: V4-Pro=88(лучший!), K2.6=86, GLM-5.1=85, Nem=80. V4-Pro: GPQA 90.1 (reasoning), 1M ctx сохраняется (vs потеря при K2.6). RE:High для chain-of-thought planning."},
|
||
{a:"go-developer",from:"qwen3-coder:480b (85)",fromP:"Ollama",to:"V4-Pro Max (★88) RE:Medium",toP:"Ollama Cloud",imp:"medium",
|
||
q:"+4%",sp:"~1x",ctx:"256K→1M",prov:"Ollama Cloud",
|
||
r:"★ матрицы: V4-Pro=88(лучший для Go!), K2.6=86, Qwen3Coder=85. DeepSeek модели традиционно сильны в Go/Rust. 1M ctx для крупных Go-проектов."},
|
||
{a:"history-miner",from:"nemotron-3-super (★85)",fromP:"Ollama",to:"V4-Pro Max (86) + Nem fallback",toP:"Hybrid",imp:"medium",
|
||
q:"+1%",sp:"~1x",ctx:"1M",prov:"Ollama Cloud + Ollama",
|
||
r:"V4-Pro=86 чуть лучше Nemotron=85. 1M ctx у обоих. MRCR 83.5 у V4-Pro — лучшее long-context retrieval. Nemotron как fallback (RULER 91.75%)."},
|
||
|
||
// --- APPLIED (для справки) ---
|
||
{a:"frontend-dev → M2.5",from:"qwen3-coder (90)",fromP:"Ollama",to:"MiniMax M2.5 (★92) ✅",toP:"Ollama",imp:"low",
|
||
q:"+2%",sp:"=",ctx:"204K",prov:"Ollama",r:"Spec-writing, UI architect. APPLIED."},
|
||
{a:"devops → K2.6",from:"deepseek-v3.2",fromP:"",to:"kimi-k2.6:cloud ✅",toP:"Ollama Cloud",imp:"low",
|
||
q:"+35%",sp:"=",ctx:"256K",prov:"",r:"APPLIED."},
|
||
|
||
// --- Fine-tuning ---
|
||
{a:"orchestrator",from:"glm-5.1 (★90)",fromP:"Ollama",to:"K2.6 (★92) RE:Medium",toP:"Ollama Cloud",imp:"medium",
|
||
q:"+2%",sp:"~1x",ctx:"200K→256K",prov:"Ollama Cloud",
|
||
r:"K2.6=92★ всё ещё лучший для orchestration. V4-Pro=86 слабее. 300 sub-agent swarm."},
|
||
{a:"the-fixer",from:"minimax-m2.5 (★88)",fromP:"Ollama",to:"V4-Pro (★88) / K2.6 (★90)",toP:"Ollama Cloud",imp:"medium",
|
||
q:"+2%",sp:"~1x",ctx:"128K→1M/256K",prov:"Ollama Cloud",
|
||
r:"K2.6=90(лучший), V4-Pro=88=M2.5. M2.5 SWE-bench 80.2% стабильнее. Не срочно."},
|
||
|
||
// --- Подтверждение ---
|
||
{a:"Qwen3-Coder (7 coding)",from:"qwen3-coder",fromP:"Ollama",to:"✅",toP:"",imp:"low",
|
||
q:"=0%",sp:"=",ctx:"256K",prov:"Ollama",r:"lead=92★, backend=91★, python=90★."},
|
||
{a:"GLM-5.1 (12 agents)",from:"glm-5.1",fromP:"Ollama",to:"✅",toP:"",imp:"low",
|
||
q:"=0%",sp:"=",ctx:"200K",prov:"Ollama",r:"orchestrator=90, system-analyst=90. SWE-Pro #1."},
|
||
{a:"Kimi K2.6 (3 agents)",from:"kimi-k2.6",fromP:"Ollama Cloud",to:"✅",toP:"",imp:"low",
|
||
q:"=0%",sp:"=",ctx:"256K",prov:"Ollama Cloud",r:"devops=88★, browser=86, agent-arch=86."},
|
||
];
|
||
|
||
const impactData = [
|
||
{cat:"debug GLM5.1→V4-Pro/K2.6",b:88,a:90,d:2,n:"LiveCodeBench 93.5, Terminal 67.9"},
|
||
{cat:"planner Nem→V4-Pro Max",b:80,a:88,d:8,n:"★88! GPQA 90.1, 1M ctx"},
|
||
{cat:"go-dev Coder→V4-Pro Max",b:85,a:88,d:3,n:"★88! Go/Rust specialist, 1M ctx"},
|
||
{cat:"history-miner →V4-Pro",b:85,a:86,d:1,n:"MRCR 83.5, long-context"},
|
||
{cat:"orchestrator →K2.6 (next)",b:90,a:92,d:2,n:"300 sub-agent swarm"},
|
||
{cat:"frontend → M2.5 ✅",b:90,a:92,d:2,n:"Spec-writing, UI architect"},
|
||
{cat:"devops → K2.6 ✅",b:65,a:88,d:23,n:"IF:65→91! Terminal 66.7"},
|
||
{cat:"Qwen3-Coder (7) ✅",b:90,a:90,d:0,n:"SOTA coding"},
|
||
{cat:"GLM-5.1 (12) ✅",b:87,a:87,d:0,n:"SWE-Pro #1"},
|
||
{cat:"Nemotron Super (6) ✅",b:82,a:82,d:0,n:"1M ctx, RULER 91.75%"},
|
||
];
|
||
|
||
|
||
// ======================= RENDER =======================
|
||
function switchTab(id) {
|
||
document.querySelectorAll('.tab-panel').forEach(p=>p.classList.remove('active'));
|
||
document.querySelectorAll('.tab-btn').forEach(b=>b.classList.remove('active'));
|
||
document.getElementById('tab-'+id).classList.add('active');
|
||
event.target.classList.add('active');
|
||
if(id==='impact') requestAnimationFrame(()=>setTimeout(drawChart,50));
|
||
}
|
||
|
||
function renderCfg() {
|
||
const b=document.getElementById('cfgBody');
|
||
b.innerHTML=cfg.map((c,i)=>{
|
||
const si=c.s==='optimal'?'✅':c.s==='good'?'🟡':c.s==='overspec'?'🔵':c.s==='new'?'🆕':c.s==='broken'?'💀':'🔴';
|
||
const st=c.s==='optimal'?'Оптимально':c.s==='good'?'Хорошо':c.s==='overspec'?'Overspec':c.s==='new'?'Не назначена':c.s==='broken'?'НЕ РАБОТАЕТ':'Улучшить';
|
||
const bc=c.fit>=85?'h':c.fit>=70?'m':'l';
|
||
const sc=c.fit>=85?'var(--accent-green)':c.fit>=70?'var(--accent-orange)':'var(--accent-red)';
|
||
const prevHtml=c.prev?`<div style="font-size:.68em;color:var(--text-muted);margin-top:2px;text-decoration:line-through">${c.prev}</div>`:'';
|
||
return `<tr style="animation:fadeUp .3s ${i*.03}s ease-out both">
|
||
<td style="font-weight:600">${c.a}</td>
|
||
<td><span class="mbadge ${c.b}">${c.m}</span>${prevHtml}</td>
|
||
<td><span class="prov-tag ${c.p.toLowerCase()}">${c.p}</span></td>
|
||
<td style="color:var(--text-secondary)">${c.cat}</td>
|
||
<td><div class="sbar"><div class="sbar-bg"><div class="sbar-fill ${bc}" style="width:${c.fit}%"></div></div><span class="snum" style="color:${sc}">${c.fit}</span></div></td>
|
||
<td>${si} ${st}</td></tr>`;
|
||
}).join('');
|
||
}
|
||
|
||
function renderGroq() {
|
||
const b=document.getElementById('groqBody');
|
||
b.innerHTML=groqModels.map(g=>{
|
||
const spd=parseInt(g.speed)||0;
|
||
const dotCls=spd>=800?'ultra':spd>=400?'fast':'normal';
|
||
return `<tr>
|
||
<td><span class="mbadge groq">${g.id}</span></td>
|
||
<td>${g.rpm}</td><td>${g.rpd}</td><td>${g.tpm}</td><td>${g.tpd}</td>
|
||
<td><div class="speed-ind"><span class="speed-dot ${dotCls}"></span> ${g.speed} t/s</div></td>
|
||
<td style="color:var(--text-secondary);font-size:.82em;max-width:280px">${g.use}</td></tr>`;
|
||
}).join('');
|
||
}
|
||
|
||
function renderModels() {
|
||
const grid=document.getElementById('modelGrid');
|
||
const cats=new Set(); ollamaModels.forEach(m=>m.cat.forEach(c=>cats.add(c)));
|
||
document.getElementById('filterRow').innerHTML='<button class="fbtn active" onclick="filterM(\'all\',this)">Все</button>'+
|
||
[...cats].map(c=>`<button class="fbtn" onclick="filterM('${c}',this)">${c}</button>`).join('');
|
||
|
||
grid.innerHTML=ollamaModels.map((m,i)=>{
|
||
const bc=m.swe&&m.swe>=75?'var(--accent-green)':m.swe&&m.swe>=60?'var(--accent-cyan)':'var(--border)';
|
||
return `<div class="mc" style="animation:fadeUp .35s ${i*.05}s ease-out both;border-color:${bc}" data-cats='${JSON.stringify(m.cat)}'>
|
||
<div class="mc-name">${m.n} ${m.groq?'<span class="prov-tag groq">Groq '+m.groqSpeed+'t/s</span>':''}${m.or?'<span class="prov-tag openrouter">OpenRouter FREE</span>':''}</div>
|
||
<div class="mc-org">${m.org} · ${m.par} · ctx ${m.ctx}</div>
|
||
${m.swe?`<div class="mc-row"><span class="mc-label">SWE-bench</span><span class="mc-val" style="color:${m.swe>=75?'var(--accent-green)':m.swe>=60?'var(--accent-cyan)':'var(--accent-orange)'}">${m.swe}%</span></div>`:''}
|
||
${m.ifScore?`<div class="mc-row"><span class="mc-label">Prompt Adherence (IF)</span><span class="mc-val" style="color:${m.ifScore>=88?'var(--accent-green)':m.ifScore>=80?'var(--accent-cyan)':m.ifScore>=72?'var(--accent-orange)':'var(--accent-red)'}">${m.ifScore}<small>/100</small> ${m.ifScore>=88?'🎯':''}${m.ifScore<75?'⚠️':''}</span></div>`:''}
|
||
<div style="font-size:.78em;color:var(--text-secondary);line-height:1.45;margin-top:6px">${m.str}</div>
|
||
<div class="mc-tags">${m.tags.map(t=>`<span class="mc-tag">${t}</span>`).join('')}</div>
|
||
</div>`;
|
||
}).join('');
|
||
}
|
||
|
||
function filterM(cat,btn) {
|
||
document.querySelectorAll('.fbtn').forEach(b=>b.classList.remove('active'));
|
||
btn.classList.add('active');
|
||
document.querySelectorAll('.mc').forEach(c=>{
|
||
if(cat==='all'){c.style.display='';return;}
|
||
c.style.display=JSON.parse(c.dataset.cats).includes(cat)?'':'none';
|
||
});
|
||
}
|
||
|
||
function hmColor(v) {
|
||
if(v>=88) return 'rgba(0,255,148,.8)';
|
||
if(v>=82) return 'rgba(0,212,255,.7)';
|
||
if(v>=75) return 'rgba(59,130,246,.6)';
|
||
if(v>=68) return 'rgba(168,85,247,.45)';
|
||
if(v>=60) return 'rgba(255,159,67,.4)';
|
||
if(v>=50) return 'rgba(255,71,87,.3)';
|
||
return 'rgba(90,104,128,.2)';
|
||
}
|
||
function hmText(v) { return v>=75?'#0e1219':'#e8edf5'; }
|
||
|
||
function renderHeatmap() {
|
||
const t=document.getElementById('hmTable');
|
||
let h='<thead><tr><th class="hm-role">Агент</th>';
|
||
hmModels.forEach(m=>{
|
||
const ifColor = m.if >= 85 ? '#00ff94' : m.if >= 75 ? '#facc15' : '#ff6b81';
|
||
h+=`<th style="writing-mode:vertical-lr;transform:rotate(180deg);max-width:32px;font-size:.56em;padding:3px 1px;">
|
||
${m.n}<br>
|
||
<span style="color:${m.p==='Groq'?'#ff6b81':m.p==='Both'?'#c084fc':m.p.includes('Open')||m.p.includes('OR')?'#e879f9':'var(--accent-cyan)'};font-size:.85em">${m.p}</span><br>
|
||
<span style="color:${ifColor};font-size:.9em;font-weight:700" title="Instruction Following score">IF:${m.if}</span>
|
||
</th>`;
|
||
});
|
||
h+='</tr></thead><tbody>';
|
||
const reIcons = {"L":"🟢","M":"🟡","H":"🔴"};
|
||
hmAgents.forEach(ag=>{
|
||
const mx=Math.max(...ag.s);
|
||
const reIcon = reIcons[ag.re] || '🟡';
|
||
h+=`<tr><td class="hm-r">${reIcon} ${ag.n}</td>`;
|
||
ag.s.forEach((s,j)=>{
|
||
const best=s===mx, cur=j===ag.c;
|
||
const ifNote = hmModels[j].if < 75 ? ' ⚠' : '';
|
||
h+=`<td style="background:${hmColor(s)};color:${hmText(s)}" class="${cur?'hm-cur':''}"
|
||
onmouseover="showTT(event,'${ag.n}','${hmModels[j].n} (${hmModels[j].p})',${s},${best},${cur},${hmModels[j].if})"
|
||
onmouseout="hideTT()">${s}${best?'<span class="hm-star">★</span>':''}${ifNote}</td>`;
|
||
});
|
||
h+='</tr>';
|
||
});
|
||
t.innerHTML=h+'</tbody>';
|
||
}
|
||
|
||
function showTT(e,agent,model,score,best,cur,ifScore) {
|
||
const b=document.getElementById('ttBox'),o=document.getElementById('ttOverlay');
|
||
const ifColor = ifScore >= 85 ? '#00ff94' : ifScore >= 75 ? '#facc15' : '#ff6b81';
|
||
const ifLabel = ifScore >= 85 ? 'Отлично' : ifScore >= 75 ? 'Средне' : 'Слабо';
|
||
b.innerHTML=`<h4>${model}</h4><p><strong>Агент:</strong> ${agent}<br><strong>Итоговая оценка:</strong> ${score}/100<br>
|
||
<strong>Instruction Following:</strong> <span style="color:${ifColor};font-weight:700">${ifScore}/100 (${ifLabel})</span><br>
|
||
<span style="font-size:.9em;color:var(--text-muted)">Оценка = бенчмарк × IF-множитель</span><br>
|
||
${ifScore < 75 ? '<span style="color:#ff6b81">⚠ Модель плохо следует промпту и роли — оценка снижена</span><br>' : ''}
|
||
${best?'★ <strong>Лучший выбор</strong><br>':''}${cur?'📌 <strong>Текущий выбор</strong>':''}</p>`;
|
||
const r=e.target.getBoundingClientRect();
|
||
b.style.left=Math.min(r.left,window.innerWidth-320)+'px';
|
||
b.style.top=(r.bottom+6)+'px';
|
||
o.classList.add('show');
|
||
}
|
||
function hideTT() { document.getElementById('ttOverlay').classList.remove('show'); }
|
||
|
||
function renderRecs() {
|
||
const g=document.getElementById('recGrid');
|
||
g.innerHTML=recs.map((r,i)=>`
|
||
<div class="rec-card ${r.imp==='critical'?'glow':''} selected" data-idx="${i}" style="animation:fadeUp .4s ${i*.06}s ease-out both">
|
||
<input type="checkbox" class="rec-check" checked onchange="updateRecSelection()" data-idx="${i}">
|
||
<div class="rec-hdr">
|
||
<div class="rec-agent">${r.a}</div>
|
||
<span class="impact-badge ${r.imp}">${r.imp.toUpperCase()}</span>
|
||
</div>
|
||
<div class="swap-vis">
|
||
<span class="swap-from">${r.from} <span class="prov-tag ${r.fromP.toLowerCase()}">${r.fromP}</span></span>
|
||
<span class="swap-arrow">→</span>
|
||
<span class="swap-to">${r.to} <span class="prov-tag ${r.toP.toLowerCase()}">${r.toP}</span></span>
|
||
</div>
|
||
<div class="rec-metrics">
|
||
<div class="rec-m"><div class="rec-m-label">Качество</div><div class="rec-m-val pos">${r.q}</div></div>
|
||
<div class="rec-m"><div class="rec-m-label">Скорость</div><div class="rec-m-val ${r.sp.includes('10')? 'pos':'neu'}">${r.sp}</div></div>
|
||
<div class="rec-m"><div class="rec-m-label">Контекст</div><div class="rec-m-val ${r.ctx.includes('→')?'pos':'neu'}">${r.ctx}</div></div>
|
||
<div class="rec-m"><div class="rec-m-label">Провайдер</div><div class="rec-m-val" style="font-size:.7em;color:var(--text-secondary)">${r.prov}</div></div>
|
||
</div>
|
||
<div class="rec-reason">${r.r}</div>
|
||
</div>`).join('');
|
||
updateRecSelection();
|
||
}
|
||
|
||
// ===== EXPORT SYSTEM =====
|
||
|
||
function updateRecSelection() {
|
||
const checks = document.querySelectorAll('.rec-check');
|
||
let count = 0;
|
||
checks.forEach(ch => {
|
||
const card = ch.closest('.rec-card');
|
||
if (ch.checked) { card.classList.add('selected'); count++; }
|
||
else { card.classList.remove('selected'); }
|
||
});
|
||
document.getElementById('selectedCount').textContent = count + ' из ' + recs.length + ' выбрано';
|
||
document.getElementById('exportBtn').style.opacity = count > 0 ? '1' : '.4';
|
||
}
|
||
|
||
function toggleAllRecs() {
|
||
const checks = document.querySelectorAll('.rec-check');
|
||
const allChecked = [...checks].every(c => c.checked);
|
||
checks.forEach(c => c.checked = !allChecked);
|
||
document.getElementById('selectAllBtn').textContent = allChecked ? '☑ Выбрать все' : '☐ Снять все';
|
||
updateRecSelection();
|
||
}
|
||
|
||
function buildExportJSON() {
|
||
const checks = document.querySelectorAll('.rec-check');
|
||
const selected = [];
|
||
checks.forEach(ch => {
|
||
if (!ch.checked) return;
|
||
const idx = parseInt(ch.dataset.idx);
|
||
const r = recs[idx];
|
||
|
||
// Build the model string for capability-index.yaml
|
||
const modelMap = {
|
||
'nemotron-3-super': 'ollama-cloud/nemotron-3-super-120b-a12b',
|
||
'nemotron-3-super + Groq burst': 'ollama-cloud/nemotron-3-super-120b-a12b',
|
||
'qwen3-coder:480b': 'ollama-cloud/qwen3-coder:480b',
|
||
'glm-5 (перенастроить)': 'ollama-cloud/glm-5',
|
||
};
|
||
const fallbackMap = {
|
||
};
|
||
|
||
const entry = {
|
||
agent: r.a,
|
||
action: 'update_model',
|
||
current_model: r.from,
|
||
current_provider: r.fromP.toLowerCase(),
|
||
recommended_model: modelMap[r.to] || 'ollama-cloud/' + r.to,
|
||
recommended_provider: r.toP.toLowerCase(),
|
||
impact: r.imp,
|
||
expected_improvement: {
|
||
quality: r.q,
|
||
speed: r.sp,
|
||
context_window: r.ctx
|
||
},
|
||
rationale: r.r.replace(/<[^>]*>/g, ''),
|
||
};
|
||
|
||
if (fallbackMap[r.to]) {
|
||
entry.fallback_model = fallbackMap[r.to];
|
||
entry.fallback_strategy = 'speed-burst';
|
||
entry.fallback_note = 'Use Groq for low-volume speed-critical calls; primary on Ollama Cloud';
|
||
}
|
||
|
||
selected.push(entry);
|
||
});
|
||
|
||
return {
|
||
"$schema": "https://app.kilo.ai/agent-recommendations.json",
|
||
"generated": new Date().toISOString(),
|
||
"source": "APAW Agent Model Research v3",
|
||
"target_file": ".kilo/capability-index.yaml",
|
||
"total_recommendations": selected.length,
|
||
"summary": {
|
||
"avg_quality_improvement": "+18%",
|
||
"providers_used": ["ollama-cloud", "groq", "openrouter"],
|
||
"key_models": [
|
||
"nemotron-3-super-120b-a12b (1M ctx, SWE-bench 60.5%, RULER@1M 91.75%)",
|
||
"qwen3-coder:480b (SWE-bench 66.5%, best open-source coding)",
|
||
"qwen3.6-plus (FREE OpenRouter, 1M ctx, SWE-bench 78.8%)",
|
||
"gemma4:31b (Intelligence Index 39, thinking, vision)",
|
||
"minimax-m2.5 (SWE-bench 80.2%, best coding overall)"
|
||
]
|
||
},
|
||
"recommendations": selected,
|
||
"capability_index_patch": selected.map(s => ({
|
||
agent: s.agent,
|
||
set: { model: s.recommended_model }
|
||
}))
|
||
};
|
||
}
|
||
|
||
function exportJSON() {
|
||
const checks = document.querySelectorAll('.rec-check:checked');
|
||
if (checks.length === 0) return;
|
||
const json = buildExportJSON();
|
||
const formatted = JSON.stringify(json, null, 2);
|
||
document.getElementById('jsonPreview').textContent = formatted;
|
||
document.getElementById('jsonModal').style.display = 'flex';
|
||
}
|
||
|
||
function closeModal() {
|
||
document.getElementById('jsonModal').style.display = 'none';
|
||
}
|
||
|
||
function copyJSON() {
|
||
const text = document.getElementById('jsonPreview').textContent;
|
||
navigator.clipboard.writeText(text).then(() => {
|
||
const btn = document.getElementById('copyBtn');
|
||
btn.textContent = '✅ Скопировано!';
|
||
setTimeout(() => btn.textContent = '📋 Копировать', 2000);
|
||
});
|
||
}
|
||
|
||
function downloadJSON() {
|
||
const json = buildExportJSON();
|
||
const blob = new Blob([JSON.stringify(json, null, 2)], { type: 'application/json' });
|
||
const url = URL.createObjectURL(blob);
|
||
const a = document.createElement('a');
|
||
a.href = url;
|
||
a.download = 'agent-model-recommendations.json';
|
||
document.body.appendChild(a);
|
||
a.click();
|
||
document.body.removeChild(a);
|
||
URL.revokeObjectURL(url);
|
||
}
|
||
|
||
function renderImpact() {
|
||
const d=document.getElementById('impactDetails');
|
||
d.innerHTML=impactData.map(x=>`
|
||
<div style="display:flex;align-items:center;gap:14px;padding:10px 0;border-bottom:1px solid var(--border)">
|
||
<div style="flex:1"><div style="font-weight:600;font-size:.88em">${x.cat}</div><div style="font-size:.74em;color:var(--text-muted);margin-top:2px">${x.n}</div></div>
|
||
<div style="display:flex;align-items:center;gap:10px">
|
||
<span style="font-family:'JetBrains Mono',monospace;color:var(--text-muted);font-size:.82em">${x.b}</span>
|
||
<span style="color:${x.d>0?'var(--accent-green)':'var(--text-muted)'};font-size:1.1em">→</span>
|
||
<span style="font-family:'JetBrains Mono',monospace;color:${x.d>0?'var(--accent-green)':'var(--text-secondary)'};font-size:.9em;font-weight:700">${x.a}</span>
|
||
<span style="font-family:'JetBrains Mono',monospace;font-size:.78em;padding:2px 6px;border-radius:3px;
|
||
background:${x.d>0?'rgba(0,255,148,.08)':'rgba(90,104,128,.08)'};color:${x.d>0?'var(--accent-green)':'var(--text-muted)'}">
|
||
${x.d>0?'+'+x.d:'=0'}</span>
|
||
</div>
|
||
</div>`).join('');
|
||
|
||
// Render APAW vs TOP benchmark table
|
||
renderBenchmarkComparison();
|
||
}
|
||
|
||
function renderBenchmarkComparison() {
|
||
// Expanded benchmarks: 9 dimensions covering all APAW roles
|
||
const benchmarks = [
|
||
{name:'SWE-V',full:'SWE-Bench Verified',desc:'GitHub issue resolution (500 tasks)',role:'lead-dev, backend, fixer'},
|
||
{name:'SWE-P',full:'SWE-Bench Pro',desc:'Multi-lang, decontaminated (1865 tasks)',role:'all coding agents'},
|
||
{name:'T-Bench',full:'Terminal-Bench 2.0',desc:'CLI/shell multi-step tasks',role:'devops, planner, orchestrator'},
|
||
{name:'LCB',full:'LiveCodeBench',desc:'Code gen from specs (held-out)',role:'sdet, go-dev, python-dev'},
|
||
{name:'GPQA',full:'GPQA Diamond',desc:'PhD-level reasoning',role:'system-analyst, planner'},
|
||
{name:'BComp',full:'BrowseComp',desc:'Web research & synthesis',role:'browser-auto, capability-analyst'},
|
||
{name:'HLE',full:'Humanity Last Exam',desc:'Frontier knowledge (with tools)',role:'agent-architect, evaluator'},
|
||
{name:'Ctx',full:'Context Window',desc:'Max tokens in one pass',role:'history-miner, memory-mgr'},
|
||
{name:'$/M',full:'Cost per 1M input',desc:'API pricing',role:'all agents (ROI)'},
|
||
];
|
||
|
||
const models = [
|
||
// === TOP CLOSED-SOURCE (April 2026 leaders) ===
|
||
{name:'Claude Opus 4.7',type:'closed',org:'Anthropic',
|
||
scores:[87.6,64.3,69.4,null,94.2,79.3,53.0,'1M','$5'],color:'#c084fc',
|
||
note:'#1 апрель 2026'},
|
||
{name:'GPT-5.5',type:'closed',org:'OpenAI',
|
||
scores:[null,58.6,82.7,null,null,83.4,57.2,'1M','$5'],color:'#ff6b81',
|
||
note:'Новейший, Terminal #1'},
|
||
{name:'GPT-5.4',type:'closed',org:'OpenAI',
|
||
scores:[78.2,59.1,75.1,null,94.4,82.7,58.7,'200K','$2.50'],color:'#ff6b81',
|
||
note:'Reasoning, math'},
|
||
{name:'Gemini 3.1 Pro',type:'closed',org:'Google',
|
||
scores:[80.6,46.1,68.5,null,94.3,85.9,51.4,'2M','$2'],color:'#facc15',
|
||
note:'ARC-AGI 77.1%, дешёвый'},
|
||
{name:'Claude Sonnet 4.6',type:'closed',org:'Anthropic',
|
||
scores:[79.6,null,null,null,null,null,null,'200K','$3'],color:'#c084fc',
|
||
note:'5× дешевле Opus'},
|
||
{name:'GPT-5.3-Codex',type:'closed',org:'OpenAI',
|
||
scores:[85.0,57.0,77.3,null,null,null,null,'200K','$6'],color:'#ff6b81',
|
||
note:'Coding specialist'},
|
||
|
||
// === APAW PIPELINE MODELS ===
|
||
{name:'Kimi K2.6',type:'apaw',org:'APAW',
|
||
scores:[80.2,58.6,66.7,87.2,null,83.2,54.0,'256K','$0.95'],color:'#00ff94',
|
||
note:'devops, browser, architect (3)'},
|
||
{name:'GLM-5.1',type:'apaw',org:'APAW',
|
||
scores:[null,58.4,63.5,null,86.2,68.7,null,'200K','~$0.50'],color:'#00ff94',
|
||
note:'12 agents! orchestrator, eval...'},
|
||
{name:'V4-Pro Max',type:'apaw',org:'APAW',
|
||
scores:[80.6,55.4,67.9,93.5,90.1,83.4,48.2,'1M','$0.42'],color:'#00d4ff',
|
||
note:'planner, go-dev (рек.)'},
|
||
{name:'Qwen3-Coder 480B',type:'apaw',org:'APAW',
|
||
scores:[66.5,null,null,null,null,null,null,'256K','~$0.50'],color:'#00ff94',
|
||
note:'7 coding agents'},
|
||
{name:'MiniMax M2.5',type:'apaw',org:'APAW',
|
||
scores:[80.2,51.3,null,null,null,76.3,null,'204K','$0.15'],color:'#00ff94',
|
||
note:'frontend, skeptic, fixer (3)'},
|
||
{name:'Nemotron Super',type:'apaw',org:'APAW',
|
||
scores:[60.5,null,null,null,null,null,null,'1M','~$0.40'],color:'#00ff94',
|
||
note:'6 agents (memory, history)'},
|
||
];
|
||
|
||
const t = document.getElementById('benchTable');
|
||
let h = '<thead><tr><th style="text-align:left;padding:8px 6px;border-bottom:2px solid var(--border);font-size:.85em">Модель</th>';
|
||
benchmarks.forEach(b => {
|
||
h += '<th style="padding:8px 3px;border-bottom:2px solid var(--border);font-size:.68em;max-width:60px" title="'+b.full+': '+b.desc+'\nРоли: '+b.role+'">'+b.name+'</th>';
|
||
});
|
||
h += '</tr></thead><tbody>';
|
||
|
||
// Calculate APAW best per benchmark
|
||
const apawBest = benchmarks.map((b,i) => {
|
||
let best = 0;
|
||
models.filter(m=>m.type==='apaw').forEach(m => {
|
||
const v = m.scores[i];
|
||
if(typeof v === 'number' && v > best) best = v;
|
||
});
|
||
return best;
|
||
});
|
||
|
||
// Calculate closed best per benchmark
|
||
const closedBest = benchmarks.map((b,i) => {
|
||
let best = 0;
|
||
models.filter(m=>m.type==='closed').forEach(m => {
|
||
const v = m.scores[i];
|
||
if(typeof v === 'number' && v > best) best = v;
|
||
});
|
||
return best;
|
||
});
|
||
|
||
models.forEach((m,mi) => {
|
||
if(mi === 6) h += '<tr><td colspan="'+(benchmarks.length+1)+'" style="padding:5px;background:rgba(0,212,255,.06);font-weight:700;font-size:.8em;color:var(--accent-cyan);text-align:center">— APAW Pipeline (open-source, $0.15–$0.95/M) —</td></tr>';
|
||
|
||
h += '<tr style="'+(m.type==='apaw'?'background:rgba(0,255,148,.02)':'')+'">';
|
||
h += '<td style="padding:6px;border-bottom:1px solid var(--border);white-space:nowrap"><span style="font-weight:600;color:'+m.color+';font-size:.88em">'+m.name+'</span>';
|
||
h += '<div style="font-size:.65em;color:var(--text-muted)">'+m.note+'</div></td>';
|
||
|
||
m.scores.forEach((s,si) => {
|
||
let val, cellColor = 'var(--text-secondary)', bg = 'transparent';
|
||
|
||
if(s === null) { val = '—'; cellColor = 'rgba(90,104,128,.4)'; }
|
||
else if(typeof s === 'string' && s.startsWith('$')) {
|
||
val = s;
|
||
if(m.type === 'apaw') { cellColor = '#00ff94'; bg = 'rgba(0,255,148,.06)'; }
|
||
}
|
||
else if(typeof s === 'string') { val = s; }
|
||
else {
|
||
val = s.toFixed(1);
|
||
if(m.type === 'apaw' && si < 7) {
|
||
const cb = closedBest[si];
|
||
if(cb > 0) {
|
||
const diff = s - cb;
|
||
if(diff >= 0) { cellColor='#00ff94'; bg='rgba(0,255,148,.1)'; val+=' 🟢'; }
|
||
else if(diff > -5) { cellColor='#facc15'; bg='rgba(250,204,21,.06)'; val+=' 🟡'; }
|
||
else { cellColor='#ff6b81'; bg='rgba(255,107,129,.06)'; val+=' 🔴'; }
|
||
}
|
||
}
|
||
if(m.type === 'closed' && si < 7) {
|
||
// Highlight if APAW beats this closed model
|
||
const ab = apawBest[si];
|
||
if(ab > 0 && s < ab) { bg='rgba(255,107,129,.04)'; }
|
||
}
|
||
}
|
||
h += '<td style="padding:5px 3px;border-bottom:1px solid var(--border);text-align:center;color:'+cellColor+';background:'+bg+';font-size:.78em">'+val+'</td>';
|
||
});
|
||
h += '</tr>';
|
||
});
|
||
|
||
// === Summary row: APAW best vs Closed best ===
|
||
h += '<tr style="background:rgba(0,212,255,.05)"><td style="padding:8px;font-weight:700;color:var(--accent-cyan);font-size:.85em">APAW лучший</td>';
|
||
benchmarks.forEach((b,i) => {
|
||
if(i < 7) {
|
||
const ab = apawBest[i], cb = closedBest[i];
|
||
if(ab === 0) { h += '<td style="padding:8px 3px;text-align:center;font-size:.78em;color:var(--text-muted)">—</td>'; return; }
|
||
const diff = ab - cb;
|
||
const icon = diff >= 0 ? '🟢' : diff > -5 ? '🟡' : '🔴';
|
||
const pct = cb > 0 ? ((ab/cb)*100-100).toFixed(1) : '?';
|
||
const sign = diff >= 0 ? '+' : '';
|
||
h += '<td style="padding:6px 3px;text-align:center;font-weight:700;font-size:.78em"><span style="color:'+(diff>=0?'#00ff94':diff>-5?'#facc15':'#ff6b81')+'">'+ab.toFixed(1)+'</span><div style="font-size:.7em;color:var(--text-muted)">'+sign+diff.toFixed(1)+' '+icon+'</div></td>';
|
||
} else if(i === 7) {
|
||
h += '<td style="padding:8px 3px;text-align:center;font-size:.78em;color:var(--accent-green)">1M ✅</td>';
|
||
} else {
|
||
h += '<td style="padding:8px 3px;text-align:center;font-weight:700;font-size:.82em;color:var(--accent-green)">10-33× 🟢</td>';
|
||
}
|
||
});
|
||
h += '</tr>';
|
||
|
||
// === Role-based average row ===
|
||
h += '<tr style="background:rgba(0,255,148,.04)"><td style="padding:8px;font-weight:700;color:var(--accent-green);font-size:.82em">Средняя по ролям APAW*</td>';
|
||
// Calculate weighted average across all roles
|
||
const roleAvg = [78.2, 55.8, 65.7, 90.4, 88.2, 78.4, 51.1]; // pre-calculated across all 36 agents
|
||
const closedAvg = [82.2, 57.8, 74.6, null, 94.3, 83.4, 54.8];
|
||
roleAvg.forEach((ra,i) => {
|
||
if(i < 7 && ra > 0) {
|
||
const ca = closedAvg[i];
|
||
if(!ca) { h += '<td style="padding:6px 3px;text-align:center;font-size:.82em;color:var(--accent-green);font-weight:700">'+ra.toFixed(1)+'</td>'; return; }
|
||
const diff = ra - ca;
|
||
const col = diff >= 0 ? '#00ff94' : diff > -8 ? '#facc15' : '#ff6b81';
|
||
h += '<td style="padding:6px 3px;text-align:center;font-weight:700;font-size:.82em"><span style="color:'+col+'">'+ra.toFixed(1)+'</span><div style="font-size:.65em;color:var(--text-muted)">vs '+ca.toFixed(1)+'</div></td>';
|
||
} else if(i === 7) {
|
||
h += '<td style="padding:8px 3px;text-align:center;font-size:.78em;color:var(--accent-green)">573K avg</td>';
|
||
} else {
|
||
h += '<td style="padding:8px 3px;text-align:center;font-weight:700;font-size:.82em;color:var(--accent-green)">$0.49 avg</td>';
|
||
}
|
||
});
|
||
h += '</tr>';
|
||
|
||
t.innerHTML = h + '</tbody>';
|
||
}
|
||
|
||
|
||
function drawChart() {
|
||
const c=document.getElementById('impactCanvas');
|
||
if(!c || !c.offsetParent) return; // skip if hidden
|
||
const ctx=c.getContext('2d');
|
||
const dpr = window.devicePixelRatio || 1;
|
||
const cssW = c.parentElement.clientWidth - 36;
|
||
const cssH = 340;
|
||
c.width = cssW * dpr;
|
||
c.height = cssH * dpr;
|
||
c.style.width = cssW + 'px';
|
||
c.style.height = cssH + 'px';
|
||
ctx.scale(dpr, dpr);
|
||
ctx.clearRect(0,0,cssW,cssH);
|
||
|
||
const data = impactData;
|
||
if(!data.length) return;
|
||
const barW = Math.min(38, (cssW - 180) / data.length / 2 - 4);
|
||
const cL = 48, cB = cssH - 60, cH = cB - 20, mx = 100;
|
||
|
||
// Grid lines
|
||
ctx.strokeStyle='rgba(30,39,54,.7)'; ctx.lineWidth=1;
|
||
for(let i=0;i<=5;i++){
|
||
const y=cB-(cH*(i*20)/mx);
|
||
ctx.beginPath();ctx.moveTo(cL,y);ctx.lineTo(cssW-16,y);ctx.stroke();
|
||
ctx.fillStyle='#5a6880';ctx.font='10px JetBrains Mono,monospace';ctx.textAlign='right';ctx.fillText(i*20,cL-6,y+3);
|
||
}
|
||
|
||
// Bars
|
||
data.forEach((d,i)=>{
|
||
const x = cL + 28 + i * ((cssW - cL - 40) / data.length);
|
||
// Before bar (red)
|
||
const h1 = (d.b/mx)*cH;
|
||
ctx.fillStyle='rgba(255,71,87,.4)';
|
||
ctx.fillRect(x, cB-h1, barW, h1);
|
||
// After bar (green or grey)
|
||
const h2 = (d.a/mx)*cH;
|
||
ctx.fillStyle = d.d > 0 ? 'rgba(0,255,148,.55)' : 'rgba(136,150,170,.35)';
|
||
ctx.fillRect(x+barW+3, cB-h2, barW, h2);
|
||
// Delta label
|
||
if(d.d > 0){
|
||
ctx.fillStyle='#00ff94';ctx.font='bold 10px JetBrains Mono,monospace';
|
||
ctx.textAlign='center';ctx.fillText('+'+d.d, x+barW+1, cB-Math.max(h1,h2)-6);
|
||
}
|
||
// Category label (rotated)
|
||
ctx.save();
|
||
ctx.translate(x+barW, cB+10);
|
||
ctx.rotate(-0.4);
|
||
ctx.fillStyle='#8896aa';ctx.font='8px Outfit,sans-serif';ctx.textAlign='left';
|
||
const label = d.cat.replace(/\s*\(.*?\)/g,'').substring(0,22);
|
||
ctx.fillText(label, 0, 0);
|
||
ctx.restore();
|
||
});
|
||
|
||
// Legend
|
||
ctx.fillStyle='rgba(255,71,87,.4)';ctx.fillRect(cssW-180,8,12,12);
|
||
ctx.fillStyle='#8896aa';ctx.font='11px Outfit,sans-serif';ctx.textAlign='left';ctx.fillText('Текущий score',cssW-162,18);
|
||
ctx.fillStyle='rgba(0,255,148,.55)';ctx.fillRect(cssW-180,26,12,12);
|
||
ctx.fillText('После замены',cssW-162,36);
|
||
}
|
||
|
||
// ======================= INIT =======================
|
||
document.addEventListener('DOMContentLoaded',()=>{
|
||
renderCfg(); renderGroq(); renderModels(); renderHeatmap(); renderRecs(); renderImpact();
|
||
});
|
||
window.addEventListener('resize',()=>{ if(document.getElementById('tab-impact').classList.contains('active')) drawChart(); });
|
||
</script>
|
||
</body>
|
||
</html>
|