- rebuild-report.py: sync current_model from kilo-meta.json (UPDATE not only INSERT) - real-fit-report.json: regenerated from DB after agents table model rename - real-fit.db: 10 agents updated: current_model pro-max → pro - real-fit.html: remove stale model alias fallback
178 lines
5.6 KiB
Python
178 lines
5.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Rebuild real-fit-report.json from SQLite DB.
|
|
|
|
Usage:
|
|
python3 rebuild-report.py
|
|
python3 rebuild-report.py --db /path/to/real-fit.db --report /path/to/real-fit-report.json
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import sqlite3
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
|
|
def _sync_agents_from_meta(db_path: Path) -> None:
|
|
"""Import any missing agents from kilo-meta.json into the DB agents table."""
|
|
meta_path = db_path.parent.parent.parent / "kilo-meta.json"
|
|
if not meta_path.exists():
|
|
return
|
|
with open(meta_path) as f:
|
|
meta = json.load(f)
|
|
|
|
conn = sqlite3.connect(str(db_path))
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT name FROM agents")
|
|
existing = {r[0] for r in cursor.fetchall()}
|
|
|
|
for name, info in meta.get("agents", {}).items():
|
|
if name in existing:
|
|
cursor.execute(
|
|
"UPDATE agents SET current_model = ? WHERE name = ?",
|
|
(info.get("model", ""), name),
|
|
)
|
|
else:
|
|
cursor.execute(
|
|
"INSERT INTO agents (name, description, category, current_model, color, updated) VALUES (?, ?, ?, ?, ?, ?)",
|
|
(
|
|
name,
|
|
info.get("description", ""),
|
|
info.get("category", "meta"),
|
|
info.get("model", ""),
|
|
info.get("color", "#6B7280"),
|
|
datetime.now(timezone.utc).isoformat(),
|
|
),
|
|
)
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
|
|
def build_report(db_path: Path) -> dict:
|
|
_sync_agents_from_meta(db_path)
|
|
conn = sqlite3.connect(str(db_path))
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("""
|
|
SELECT name, description, category, current_model
|
|
FROM agents
|
|
""")
|
|
agents_meta = {row["name"]: dict(row) for row in cursor.fetchall()}
|
|
|
|
# Only take evaluations that are NOT HTTP error responses
|
|
# AND prefer evaluator='rubric_v2' over 'rubric_v1'
|
|
cursor.execute("""
|
|
SELECT agent_name, model, total_score, evaluator, response
|
|
FROM evaluations
|
|
WHERE total_score > 0
|
|
AND evaluator NOT LIKE '%rubric_v1%'
|
|
AND (response IS NULL
|
|
OR (response NOT LIKE '%[HTTP %' AND response != ''))
|
|
ORDER BY agent_name, model,
|
|
CASE evaluator
|
|
WHEN 'evolution-skeptic' THEN 0
|
|
WHEN 'rubric_v2' THEN 1
|
|
ELSE 2
|
|
END,
|
|
total_score DESC
|
|
""")
|
|
|
|
# Take the first (best preferred evaluator, highest score) per agent-model
|
|
best_evals = {}
|
|
for row in cursor.fetchall():
|
|
agent = row["agent_name"]
|
|
model = row["model"]
|
|
score = row["total_score"]
|
|
if agent not in best_evals:
|
|
best_evals[agent] = {}
|
|
if model not in best_evals[agent]:
|
|
best_evals[agent][model] = score
|
|
|
|
# Rebuild fit_scores from selected evaluations only
|
|
cursor.execute("""
|
|
SELECT agent_name, model, MAX(total_score) as best_score, scores, explanation
|
|
FROM evaluations
|
|
WHERE total_score > 0
|
|
AND evaluator NOT LIKE '%rubric_v1%'
|
|
AND (response IS NULL
|
|
OR (response NOT LIKE '%[HTTP %' AND response != ''))
|
|
GROUP BY agent_name, model
|
|
""")
|
|
fit_scores = {}
|
|
for row in cursor.fetchall():
|
|
fit_scores[row["agent_name"]] = {
|
|
"model": row["model"],
|
|
"fit": row["best_score"],
|
|
"explanation": (
|
|
f"Best model for {row['agent_name']} is {row['model']} "
|
|
f"with avg score {row['best_score']:.1f}. "
|
|
"Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)."
|
|
),
|
|
}
|
|
|
|
conn.close()
|
|
|
|
agents_report = {}
|
|
for agent_name, meta in agents_meta.items():
|
|
evals = best_evals.get(agent_name, {})
|
|
if evals:
|
|
best_model = max(evals, key=evals.get)
|
|
best_score = evals[best_model]
|
|
else:
|
|
best_model = ""
|
|
best_score = 0.0
|
|
agents_report[agent_name] = {
|
|
"name": agent_name,
|
|
"evaluations": evals,
|
|
"info": [
|
|
meta.get("description") or "",
|
|
meta.get("category") or "",
|
|
meta.get("current_model") or "",
|
|
],
|
|
"best_model": best_model,
|
|
"best_score": best_score,
|
|
}
|
|
|
|
total_evals = sum(len(evals) for evals in best_evals.values())
|
|
generated = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
|
|
|
return {
|
|
"generated": generated,
|
|
"source": "real-fit-engine-db-filtered",
|
|
"total_evaluations": total_evals,
|
|
"agents": agents_report,
|
|
"fit_scores": fit_scores,
|
|
}
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Rebuild real-fit-report.json from DB")
|
|
parser.add_argument(
|
|
"--db",
|
|
type=Path,
|
|
default=Path(__file__).parent.parent / "data" / "real-fit.db",
|
|
help="Path to SQLite DB",
|
|
)
|
|
parser.add_argument(
|
|
"--report",
|
|
type=Path,
|
|
default=Path(__file__).parent.parent / "data" / "real-fit-report.json",
|
|
help="Path to report JSON output",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
report = build_report(args.db)
|
|
args.report.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(args.report, "w", encoding="utf-8") as f:
|
|
json.dump(report, f, indent=2)
|
|
|
|
print(f"Report rebuilt: {args.report}")
|
|
print(f"Agents: {len(report['agents'])}, Evaluations: {report['total_evaluations']}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|