APAW/agent-evolution/scripts/rebuild-report.py

#!/usr/bin/env python3
"""
Rebuild real-fit-report.json from SQLite DB.

Usage:
    python3 rebuild-report.py
    python3 rebuild-report.py --db /path/to/real-fit.db --report /path/to/real-fit-report.json
"""

import argparse
import json
import sqlite3
import time
from datetime import datetime, timezone
from pathlib import Path


def _sync_agents_from_meta(db_path: Path) -> None:
    """Import any missing agents from kilo-meta.json into the DB agents table."""
    meta_path = db_path.parent.parent.parent / "kilo-meta.json"
    if not meta_path.exists():
        return
    with open(meta_path) as f:
        meta = json.load(f)

    conn = sqlite3.connect(str(db_path))
    cursor = conn.cursor()
    cursor.execute("SELECT name FROM agents")
    existing = {r[0] for r in cursor.fetchall()}

    for name, info in meta.get("agents", {}).items():
        if name in existing:
            cursor.execute(
                "UPDATE agents SET current_model = ? WHERE name = ?",
                (info.get("model", ""), name),
            )
        else:
            cursor.execute(
                "INSERT INTO agents (name, description, category, current_model, color, updated) VALUES (?, ?, ?, ?, ?, ?)",
                (
                    name,
                    info.get("description", ""),
                    info.get("category", "meta"),
                    info.get("model", ""),
                    info.get("color", "#6B7280"),
                    datetime.now(timezone.utc).isoformat(),
                ),
            )
    conn.commit()
    conn.close()


def build_report(db_path: Path) -> dict:
    _sync_agents_from_meta(db_path)
    conn = sqlite3.connect(str(db_path))
    conn.row_factory = sqlite3.Row
    cursor = conn.cursor()

    cursor.execute("""
        SELECT name, description, category, current_model
        FROM agents
    """)
    agents_meta = {row["name"]: dict(row) for row in cursor.fetchall()}

    # Only take evaluations that are NOT HTTP error responses
    # AND prefer evaluator='rubric_v2' over 'rubric_v1'
    cursor.execute("""
        SELECT agent_name, model, total_score, evaluator, response
        FROM evaluations
        WHERE total_score > 0
          AND evaluator NOT LIKE '%rubric_v1%'
          AND (response IS NULL
               OR (response NOT LIKE '%[HTTP %' AND response != ''))
        ORDER BY agent_name, model,
            CASE evaluator
                WHEN 'evolution-skeptic' THEN 0
                WHEN 'rubric_v2' THEN 1
                ELSE 2
            END,
            total_score DESC
    """)

    # Take the first (best preferred evaluator, highest score) per agent-model
    best_evals = {}
    for row in cursor.fetchall():
        agent = row["agent_name"]
        model = row["model"]
        score = row["total_score"]
        if agent not in best_evals:
            best_evals[agent] = {}
        if model not in best_evals[agent]:
            best_evals[agent][model] = score

    # Rebuild fit_scores from selected evaluations only
    cursor.execute("""
        SELECT agent_name, model, MAX(total_score) as best_score, scores, explanation
        FROM evaluations
        WHERE total_score > 0
          AND evaluator NOT LIKE '%rubric_v1%'
          AND (response IS NULL
               OR (response NOT LIKE '%[HTTP %' AND response != ''))
        GROUP BY agent_name, model
    """)
    fit_scores = {}
    for row in cursor.fetchall():
        fit_scores[row["agent_name"]] = {
            "model": row["model"],
            "fit": row["best_score"],
            "explanation": (
                f"Best model for {row['agent_name']} is {row['model']} "
                f"with avg score {row['best_score']:.1f}. "
                "Evaluator preference: evolution-skeptic > rubric_v2 > rubric_v1 (ignored HTTP errors)."
            ),
        }

    conn.close()

    agents_report = {}
    for agent_name, meta in agents_meta.items():
        evals = best_evals.get(agent_name, {})
        if evals:
            best_model = max(evals, key=evals.get)
            best_score = evals[best_model]
        else:
            best_model = ""
            best_score = 0.0
        agents_report[agent_name] = {
            "name": agent_name,
            "evaluations": evals,
            "info": [
                meta.get("description") or "",
                meta.get("category") or "",
                meta.get("current_model") or "",
            ],
            "best_model": best_model,
            "best_score": best_score,
        }

    total_evals = sum(len(evals) for evals in best_evals.values())
    generated = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())

    return {
        "generated": generated,
        "source": "real-fit-engine-db-filtered",
        "total_evaluations": total_evals,
        "agents": agents_report,
        "fit_scores": fit_scores,
    }


def main():
    parser = argparse.ArgumentParser(description="Rebuild real-fit-report.json from DB")
    parser.add_argument(
        "--db",
        type=Path,
        default=Path(__file__).parent.parent / "data" / "real-fit.db",
        help="Path to SQLite DB",
    )
    parser.add_argument(
        "--report",
        type=Path,
        default=Path(__file__).parent.parent / "data" / "real-fit-report.json",
        help="Path to report JSON output",
    )
    args = parser.parse_args()

    report = build_report(args.db)
    args.report.parent.mkdir(parents=True, exist_ok=True)
    with open(args.report, "w", encoding="utf-8") as f:
        json.dump(report, f, indent=2)

    print(f"Report rebuilt: {args.report}")
    print(f"Agents: {len(report['agents'])}, Evaluations: {report['total_evaluations']}")


if __name__ == "__main__":
    main()