Files
APAW/scripts/test_real_api.py
Deploy Bot 4071551476 feat(scripts): add real-fit evaluation engine and supporting test scripts
- real-fit-engine.py: refactored to support --from-report, improved Ollama v1/chat/completions compatibility, agent name normalization
- run-focused-eval.py: run evaluations for specific agent/model pairs from CLI
- test_ollama_minimal.py/test_real_api.py: Ollama API connectivity tests
- real-fit-architecture.md: architecture overview document
- tests/scripts/: E2E landing test, analytics capture, evolution heatmap verification
- Remove real-fit-recalc.py (superseded by --from-report flag)
2026-05-28 11:57:46 +01:00

32 lines
1.1 KiB
Python

#!/usr/bin/env python3
import sys, os
os.environ.setdefault("OLLAMA_KEY", "feaa56e2dff045af989346ca74cb33a6.xzJ-plOVSgTL1FbmL8PZZ3Wx")
os.environ.setdefault("OLLAMA_HOST", "https://api.ollama.com")
sys.path.insert(0, "scripts")
from real_fit_engine import call_ollama, evaluate_response, init_db, import_from_evolution, generate_prompts
import sqlite3
init_db()
import_from_evolution()
generate_prompts()
conn = sqlite3.connect("agent-evolution/data/real-fit.db")
row = conn.execute("SELECT system_prompt, user_prompt, expected_keywords, rubric FROM test_prompts WHERE agent_name = ?", ("code-skeptic",)).fetchone()
conn.close()
if row:
system, user, expected, rubric = row
print("=== REAL Ollama: code-skeptic x kimi-k2.6 ===")
resp, latency, tokens = call_ollama("kimi-k2.6", system, user, expected)
print(f"Latency: {latency}ms")
print(f"Tokens: {tokens}")
print("Response (first 300 chars):")
print(resp[:300])
print("\n...")
ev = evaluate_response(resp, expected, rubric)
print(f"Score: {ev['total']:.1f}")
print(f"Explanation: {ev['explanation']}")
else:
print("No prompt found for code-skeptic")