feat(evolution): add real-fit dashboard, API, report builder, and docker compose
- real-fit.html: API-driven research dashboard with agent/model heatmap, detail modal with score breakdown and evaluator commentary - api.py: FastAPI backend serving /api/real-fit-report (dynamic from SQLite), /api/research, /api/evolve-agent/start - rebuild-report.py: generates real-fit-report.json from SQLite DB for static fallback - docker-compose.yml: add evolution-api service (Python 3.12, uvicorn) for research endpoints - index.standalone.html: sync with dashboard data updates - archive/index.html: standalone dashboard snapshot (263KB) - .gitignore: exclude *.db, research-jobs.json from tracking
This commit is contained in:
173
agent-evolution/scripts/rebuild-report.py
Normal file
173
agent-evolution/scripts/rebuild-report.py
Normal file
@@ -0,0 +1,173 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Rebuild real-fit-report.json from SQLite DB.
|
||||
|
||||
Usage:
|
||||
python3 rebuild-report.py
|
||||
python3 rebuild-report.py --db /path/to/real-fit.db --report /path/to/real-fit-report.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sqlite3
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _sync_agents_from_meta(db_path: Path) -> None:
|
||||
"""Import any missing agents from kilo-meta.json into the DB agents table."""
|
||||
meta_path = db_path.parent.parent.parent / "kilo-meta.json"
|
||||
if not meta_path.exists():
|
||||
return
|
||||
with open(meta_path) as f:
|
||||
meta = json.load(f)
|
||||
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT name FROM agents")
|
||||
existing = {r[0] for r in cursor.fetchall()}
|
||||
|
||||
for name, info in meta.get("agents", {}).items():
|
||||
if name in existing:
|
||||
continue
|
||||
cursor.execute(
|
||||
"INSERT OR IGNORE INTO agents (name, description, category, current_model, color, updated) VALUES (?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
name,
|
||||
info.get("description", ""),
|
||||
info.get("category", "meta"),
|
||||
info.get("model", ""),
|
||||
info.get("color", "#6B7280"),
|
||||
datetime.now(timezone.utc).isoformat(),
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def build_report(db_path: Path) -> dict:
|
||||
_sync_agents_from_meta(db_path)
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
SELECT name, description, category, current_model
|
||||
FROM agents
|
||||
""")
|
||||
agents_meta = {row["name"]: dict(row) for row in cursor.fetchall()}
|
||||
|
||||
# Only take evaluations that are NOT HTTP error responses
|
||||
# AND prefer evaluator='rubric_v2' over 'rubric_v1'
|
||||
cursor.execute("""
|
||||
SELECT agent_name, model, total_score, evaluator, response
|
||||
FROM evaluations
|
||||
WHERE total_score > 0
|
||||
AND evaluator NOT LIKE '%rubric_v1%'
|
||||
AND (response IS NULL
|
||||
OR (response NOT LIKE '%[HTTP %' AND response != ''))
|
||||
ORDER BY agent_name, model,
|
||||
CASE evaluator
|
||||
WHEN 'evolution-skeptic' THEN 0
|
||||
WHEN 'rubric_v2' THEN 1
|
||||
ELSE 2
|
||||
END,
|
||||
total_score DESC
|
||||
""")
|
||||
|
||||
# Take the first (best preferred evaluator, highest score) per agent-model
|
||||
best_evals = {}
|
||||
for row in cursor.fetchall():
|
||||
agent = row["agent_name"]
|
||||
model = row["model"]
|
||||
score = row["total_score"]
|
||||
if agent not in best_evals:
|
||||
best_evals[agent] = {}
|
||||
if model not in best_evals[agent]:
|
||||
best_evals[agent][model] = score
|
||||
|
||||
# Rebuild fit_scores from selected evaluations only
|
||||
cursor.execute("""
|
||||
SELECT agent_name, model, MAX(total_score) as best_score, scores, explanation
|
||||
FROM evaluations
|
||||
WHERE total_score > 0
|
||||
AND evaluator NOT LIKE '%rubric_v1%'
|
||||
AND (response IS NULL
|
||||
OR (response NOT LIKE '%[HTTP %' AND response != ''))
|
||||
GROUP BY agent_name, model
|
||||
""")
|
||||
fit_scores = {}
|
||||
for row in cursor.fetchall():
|
||||
fit_scores[row["agent_name"]] = {
|
||||
"model": row["model"],
|
||||
"fit": row["best_score"],
|
||||
"explanation": (
|
||||
f"Best model for {row['agent_name']} is {row['model']} "
|
||||
f"with avg score {row['best_score']:.1f}. "
|
||||
"Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)."
|
||||
),
|
||||
}
|
||||
|
||||
conn.close()
|
||||
|
||||
agents_report = {}
|
||||
for agent_name, meta in agents_meta.items():
|
||||
evals = best_evals.get(agent_name, {})
|
||||
if evals:
|
||||
best_model = max(evals, key=evals.get)
|
||||
best_score = evals[best_model]
|
||||
else:
|
||||
best_model = ""
|
||||
best_score = 0.0
|
||||
agents_report[agent_name] = {
|
||||
"name": agent_name,
|
||||
"evaluations": evals,
|
||||
"info": [
|
||||
meta.get("description") or "",
|
||||
meta.get("category") or "",
|
||||
meta.get("current_model") or "",
|
||||
],
|
||||
"best_model": best_model,
|
||||
"best_score": best_score,
|
||||
}
|
||||
|
||||
total_evals = sum(len(evals) for evals in best_evals.values())
|
||||
generated = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||
|
||||
return {
|
||||
"generated": generated,
|
||||
"source": "real-fit-engine-db-filtered",
|
||||
"total_evaluations": total_evals,
|
||||
"agents": agents_report,
|
||||
"fit_scores": fit_scores,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Rebuild real-fit-report.json from DB")
|
||||
parser.add_argument(
|
||||
"--db",
|
||||
type=Path,
|
||||
default=Path(__file__).parent.parent / "data" / "real-fit.db",
|
||||
help="Path to SQLite DB",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--report",
|
||||
type=Path,
|
||||
default=Path(__file__).parent.parent / "data" / "real-fit-report.json",
|
||||
help="Path to report JSON output",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
report = build_report(args.db)
|
||||
args.report.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(args.report, "w", encoding="utf-8") as f:
|
||||
json.dump(report, f, indent=2)
|
||||
|
||||
print(f"Report rebuilt: {args.report}")
|
||||
print(f"Agents: {len(report['agents'])}, Evaluations: {report['total_evaluations']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user