- real-fit-engine.py: refactored to support --from-report, improved Ollama v1/chat/completions compatibility, agent name normalization - run-focused-eval.py: run evaluations for specific agent/model pairs from CLI - test_ollama_minimal.py/test_real_api.py: Ollama API connectivity tests - real-fit-architecture.md: architecture overview document - tests/scripts/: E2E landing test, analytics capture, evolution heatmap verification - Remove real-fit-recalc.py (superseded by --from-report flag)
32 lines
1.1 KiB
Python
32 lines
1.1 KiB
Python
#!/usr/bin/env python3
|
|
import sys, os
|
|
os.environ.setdefault("OLLAMA_KEY", "feaa56e2dff045af989346ca74cb33a6.xzJ-plOVSgTL1FbmL8PZZ3Wx")
|
|
os.environ.setdefault("OLLAMA_HOST", "https://api.ollama.com")
|
|
|
|
sys.path.insert(0, "scripts")
|
|
from real_fit_engine import call_ollama, evaluate_response, init_db, import_from_evolution, generate_prompts
|
|
import sqlite3
|
|
|
|
init_db()
|
|
import_from_evolution()
|
|
generate_prompts()
|
|
|
|
conn = sqlite3.connect("agent-evolution/data/real-fit.db")
|
|
row = conn.execute("SELECT system_prompt, user_prompt, expected_keywords, rubric FROM test_prompts WHERE agent_name = ?", ("code-skeptic",)).fetchone()
|
|
conn.close()
|
|
|
|
if row:
|
|
system, user, expected, rubric = row
|
|
print("=== REAL Ollama: code-skeptic x kimi-k2.6 ===")
|
|
resp, latency, tokens = call_ollama("kimi-k2.6", system, user, expected)
|
|
print(f"Latency: {latency}ms")
|
|
print(f"Tokens: {tokens}")
|
|
print("Response (first 300 chars):")
|
|
print(resp[:300])
|
|
print("\n...")
|
|
ev = evaluate_response(resp, expected, rubric)
|
|
print(f"Score: {ev['total']:.1f}")
|
|
print(f"Explanation: {ev['explanation']}")
|
|
else:
|
|
print("No prompt found for code-skeptic")
|