Files
APAW/scripts/cross-checker.py
Deploy Bot 226c948a7d feat: milestone 78 — blocking workflow-cross-checker gate + clean stale aliases
- Create scripts/cross-checker.py deterministic gate script
- Create .kilo/rules/cross-check-list.md documentation
- Append cross-checker to pre-commit hook (blocks commit on FAIL)
- Remove 42 stale fallback aliases from capability-index.yaml

Cross-checker currently reports 4 pre-existing FAILs assigned to:
- Issue #123 (USE_MOCK + hardcoded API key in real-fit-engine.py)
- Issue #128 (event.target in dashboard dist/ideas files)

Issues: #124, #123, #128
2026-06-01 12:18:08 +01:00

267 lines
10 KiB
Python

#!/usr/bin/env python3
"""
Deterministic cross-checker gate script.
Runs after the single-source-of-truth sync hook.
No AI inference, no network calls. Must complete in <2 seconds.
Exit 1 if any FAIL check triggers.
"""
import glob
import json
import os
import re
import sys
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
KILO_META = os.path.join(ROOT, "kilo-meta.json")
KILO_JSONC = os.path.join(ROOT, "kilo.jsonc")
CAP_INDEX = os.path.join(ROOT, ".kilo", "capability-index.yaml")
AGENTS_DIR = os.path.join(ROOT, ".kilo", "agents")
SCRIPTS_DIR = os.path.join(ROOT, "scripts")
EVOLUTION_DIR = os.path.join(ROOT, "agent-evolution")
failures = []
warnings = []
def load_json(path):
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def load_yaml(path):
try:
import yaml
except ImportError:
print("[FAIL] cross-checker: PyYAML not installed, cannot load capability-index.yaml")
sys.exit(1)
with open(path, "r", encoding="utf-8") as f:
return yaml.safe_load(f)
def fmt_fail(path, line, msg):
return f"[FAIL] {path}:{line}: {msg}"
def fmt_warn(path, line, msg):
return f"[WARN] {path}:{line}: {msg}"
# ------------------------------------------------------------------
# CHECK 1 — stale fallback aliases in capability-index.yaml
# ------------------------------------------------------------------
def check1_stale_fallbacks():
meta = load_json(KILO_META)
valid_models = {a["model"] for a in meta.get("agents", {}).values()}
for cmd in meta.get("commands", {}).values():
if "model" in cmd:
valid_models.add(cmd["model"])
data = load_yaml(CAP_INDEX)
ci_agents = data.get("agents", {})
for name, block in ci_agents.items():
if not isinstance(block, dict):
continue
fallbacks = block.get("fallback_models", [])
for alias in fallbacks:
if alias not in valid_models:
failures.append(fmt_fail(CAP_INDEX, 0,
f"Agent '{name}' fallback_models contains stale alias '{alias}'"))
# prefix collision WARN (belt-and-suspenders)
agent_models = {n: a["model"] for n, a in meta.get("agents", {}).items()}
items = list(agent_models.items())
for i, (n1, m1) in enumerate(items):
for n2, m2 in items[i + 1:]:
if m1 != m2 and (m1.startswith(m2) or m2.startswith(m1)):
warnings.append(fmt_warn(KILO_META, 0,
f"Model prefix collision: '{n1}' ({m1}) vs '{n2}' ({m2})"))
# ------------------------------------------------------------------
# CHECK 2 — config sync drift across 4 sources
# ------------------------------------------------------------------
def check2_config_sync():
meta = load_json(KILO_META)
agents = meta.get("agents", {})
# --- kilo.jsonc ---
kj_models = {}
if os.path.exists(KILO_JSONC):
with open(KILO_JSONC, "r", encoding="utf-8") as f:
raw = f.read()
for name in agents:
m = re.search(rf'^ "{re.escape(name)}"\s*:\s*\{{', raw, re.MULTILINE)
if m:
model_match = re.search(rf'^ "model"\s*:\s*"([^"]*)"', raw[m.start():], re.MULTILINE)
if model_match:
kj_models[name] = model_match.group(1)
# --- capability-index.yaml ---
ci_models = {}
data = load_yaml(CAP_INDEX)
for name, block in data.get("agents", {}).items():
if isinstance(block, dict) and "model" in block:
ci_models[name] = block["model"]
# --- agent frontmatters ---
fm_models = {}
for filepath in sorted(glob.glob(os.path.join(AGENTS_DIR, "*.md"))):
fname = os.path.basename(filepath)
name = fname[:-3]
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
if not content.startswith("---"):
continue
parts = content.split("---", 2)
if len(parts) < 3:
continue
fm_text = parts[1]
model_match = re.search(r'^model:\s*(.*)$', fm_text, re.MULTILINE)
if model_match:
fm_models[name] = model_match.group(1).strip().strip('"')
# compare all 4
for name, agent_meta in agents.items():
meta_model = agent_meta.get("model")
if meta_model is None:
continue
if name in kj_models and kj_models[name] != meta_model:
failures.append(fmt_fail("kilo.jsonc", 0,
f"Agent '{name}' model mismatch: kilo-meta='{meta_model}' vs kilo.jsonc='{kj_models[name]}'"))
if name in ci_models and ci_models[name] != meta_model:
failures.append(fmt_fail("capability-index.yaml", 0,
f"Agent '{name}' model mismatch: kilo-meta='{meta_model}' vs capability-index='{ci_models[name]}'"))
if name in fm_models and fm_models[name] != meta_model:
failures.append(fmt_fail(f".kilo/agents/{name}.md", 0,
f"Agent '{name}' model mismatch: kilo-meta='{meta_model}' vs frontmatter='{fm_models[name]}'"))
# agents present in derived files but missing in kilo-meta
for src, src_name in [(kj_models, "kilo.jsonc"),
(ci_models, "capability-index"),
(fm_models, "agent frontmatter")]:
for name in src:
if name not in agents:
failures.append(fmt_fail("kilo-meta.json", 0,
f"Stale agent '{name}' found in {src_name} but absent in kilo-meta.json"))
# ------------------------------------------------------------------
# CHECK 3 — USE_MOCK in production scripts
# ------------------------------------------------------------------
def check3_use_mock():
for root, _, files in os.walk(SCRIPTS_DIR):
# skip tests subdir
if "tests" in root.split(os.sep):
continue
for fname in files:
if not (fname.endswith(".py") or fname.endswith(".cjs")
or fname.endswith(".ts") or fname.endswith(".js")
or fname.endswith(".sh")):
continue
# skip self
if fname == "cross-checker.py":
continue
path = os.path.join(root, fname)
rel = os.path.relpath(path, ROOT)
with open(path, "r", encoding="utf-8", errors="replace") as f:
for lineno, line in enumerate(f, 1):
if re.search(r'\bUSE_MOCK\b', line) or re.search(r'\bMOCK\b', line):
# exclude comments that just mention the word mock
stripped = line.split("#", 1)[0].split("//", 1)[0]
if re.search(r'\bUSE_MOCK\b', stripped) or re.search(r'\bMOCK\b', stripped):
failures.append(fmt_fail(rel, lineno,
"USE_MOCK/MOCK found in production script"))
# ------------------------------------------------------------------
# CHECK 4 — unquoted color in YAML frontmatter
# ------------------------------------------------------------------
def check4_unquoted_color():
for filepath in sorted(glob.glob(os.path.join(AGENTS_DIR, "*.md"))):
rel = os.path.relpath(filepath, ROOT)
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
if not content.startswith("---"):
continue
parts = content.split("---", 2)
if len(parts) < 3:
continue
fm_text = parts[1]
for lineno, line in enumerate(fm_text.splitlines(), 1):
# unquoted color: starts with "color: #" but not "color: \"#"
if re.search(r'^color:\s*#', line) and not re.search(r'^color:\s*"#', line):
failures.append(fmt_fail(rel, lineno,
f"Unquoted color in YAML frontmatter: {line.strip()}"))
# ------------------------------------------------------------------
# CHECK 5 — hardcoded API keys in scripts/
# ------------------------------------------------------------------
def check5_hardcoded_keys():
secret_re = re.compile(r'\b\w*(?:KEY|TOKEN|SECRET|PASS|API)\w*\s*=\s*["\'][^\s"\']{20,}["\']')
for root, _, files in os.walk(SCRIPTS_DIR):
if "tests" in root.split(os.sep):
continue
for fname in files:
if not (fname.endswith(".py") or fname.endswith(".cjs")
or fname.endswith(".ts") or fname.endswith(".js")
or fname.endswith(".sh")):
continue
path = os.path.join(root, fname)
rel = os.path.relpath(path, ROOT)
with open(path, "r", encoding="utf-8", errors="replace") as f:
for lineno, line in enumerate(f, 1):
# ignore comments
stripped = line.split("#", 1)[0].split("//", 1)[0]
if secret_re.search(stripped):
failures.append(fmt_fail(rel, lineno,
"Hardcoded API key pattern detected"))
# ------------------------------------------------------------------
# CHECK 6 — event.target in dashboard code (agent-evolution non-archive)
# ------------------------------------------------------------------
def check6_event_target():
for root, _, files in os.walk(EVOLUTION_DIR):
# skip archive dirs
if "archive" in root.split(os.sep):
continue
for fname in files:
if not (fname.endswith(".html") or fname.endswith(".js")
or fname.endswith(".ts") or fname.endswith(".jsx")
or fname.endswith(".tsx")):
continue
path = os.path.join(root, fname)
rel = os.path.relpath(path, ROOT)
with open(path, "r", encoding="utf-8", errors="replace") as f:
for lineno, line in enumerate(f, 1):
if "event.target" in line:
failures.append(fmt_fail(rel, lineno,
"Dashboard is frozen; event.target fixes belong in archive only"))
# ------------------------------------------------------------------
# Main
# ------------------------------------------------------------------
def main():
check1_stale_fallbacks()
check2_config_sync()
check3_use_mock()
check4_unquoted_color()
check5_hardcoded_keys()
check6_event_target()
for w in warnings:
print(w)
for f in failures:
print(f)
if failures:
sys.exit(1)
sys.exit(0)
if __name__ == "__main__":
main()