#!/bin/bash # Evolution Test Runner (Local Fallback) # Runs pipeline-judge tests without Docker - less precise but works immediately set -e # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color echo -e "${BLUE}=== Evolution Test Runner (Local) ===${NC}" echo "" # Check bun if ! command -v bun &> /dev/null; then echo -e "${RED}Error: bun not found${NC}" echo "Install bun first:" echo " curl -fsSL https://bun.sh/install | bash" exit 1 fi # Get workflow type WORKFLOW=${1:-feature} echo -e "${GREEN}Running evolution test for: ${WORKFLOW}${NC}" echo "" # Set budget based on workflow case $WORKFLOW in feature) TOKEN_BUDGET=50000 TIME_BUDGET=300 MIN_COVERAGE=80 ;; bugfix) TOKEN_BUDGET=20000 TIME_BUDGET=120 MIN_COVERAGE=90 ;; refactor) TOKEN_BUDGET=40000 TIME_BUDGET=240 MIN_COVERAGE=95 ;; security) TOKEN_BUDGET=30000 TIME_BUDGET=180 MIN_COVERAGE=80 ;; all) echo -e "${YELLOW}Running all workflows sequentially...${NC}" for w in feature bugfix refactor security; do $0 $w done exit 0 ;; *) echo -e "${RED}Unknown workflow: ${WORKFLOW}${NC}" echo "Usage: $0 [feature|bugfix|refactor|security|all]" exit 1 ;; esac echo "Token Budget: ${TOKEN_BUDGET}" echo "Time Budget: ${TIME_BUDGET}s" echo "Min Coverage: ${MIN_COVERAGE}%" echo "" # Create logs directory mkdir -p .kilo/logs # Run tests with precise timing echo -e "${BLUE}Running tests...${NC}" START_MS=$(date +%s%3N 2>/dev/null || date +%s000) START_S=$(echo "$START_MS" | sed 's/...$//') # Run bun test with coverage bun test --reporter=json --coverage 2>&1 | tee /tmp/test-results.json || true END_MS=$(date +%s%3N 2>/dev/null || date +%s000) TIME_MS=$((END_MS - START_MS)) echo "" echo -e "${BLUE}=== Test Results ===${NC}" # Parse test results TOTAL=$(jq '.numTotalTests // 0' /tmp/test-results.json 2>/dev/null || echo "0") PASSED=$(jq '.numPassedTests // 0' /tmp/test-results.json 2>/dev/null || echo "0") FAILED=$(jq '.numFailedTests // 0' /tmp/test-results.json 2>/dev/null || echo "0") SKIPPED=$(jq '.numPendingTests // 0' /tmp/test-results.json 2>/dev/null || echo "0") # Calculate pass rate with 2 decimals if [ "$TOTAL" -gt 0 ]; then PASS_RATE=$(awk "BEGIN {printf \"%.2f\", $PASSED / $TOTAL * 100}") else PASS_RATE="0.00" fi echo "Tests: ${PASSED}/${TOTAL} passed (${PASS_RATE}%)" echo "Time: ${TIME_MS}ms" # Quality gates echo "" echo -e "${BLUE}=== Quality Gates ===${NC}" GATES_PASSED=0 TOTAL_GATES=5 # Gate 1: Build if bun run build 2>&1 | grep -q "success\|done\|built"; then echo -e "${GREEN}✓${NC} Build: PASS" GATES_PASSED=$((GATES_PASSED + 1)) else echo -e "${RED}✗${NC} Build: FAIL" fi # Gate 2: Lint if bun run lint 2>&1 | grep -q "0 problems\|No errors"; then echo -e "${GREEN}✓${NC} Lint: PASS" GATES_PASSED=$((GATES_PASSED + 1)) else echo -e "${RED}✗${NC} Lint: FAIL (or no lint config)" GATES_PASSED=$((GATES_PASSED + 1)) # Don't penalize missing lint fi # Gate 3: Typecheck if bun run typecheck 2>&1 | grep -q "error TS"; then echo -e "${RED}✗${NC} Types: FAIL" else echo -e "${GREEN}✓${NC} Types: PASS" GATES_PASSED=$((GATES_PASSED + 1)) fi # Gate 4: Tests clean if [ "$FAILED" -eq 0 ]; then echo -e "${GREEN}✓${NC} Tests Clean: PASS" GATES_PASSED=$((GATES_PASSED + 1)) else echo -e "${RED}✗${NC} Tests Clean: FAIL (${FAILED} failures)" fi # Gate 5: Coverage COVERAGE_RAW=$(grep 'All files' /tmp/test-results.json 2>/dev/null | awk '{print $4}' || echo "0") COVERAGE=$(echo "$COVERAGE_RAW" | sed 's/%//' || echo "0") if awk "BEGIN {exit !($COVERAGE >= $MIN_COVERAGE)}"; then echo -e "${GREEN}✓${NC} Coverage: PASS (${COVERAGE}%)" GATES_PASSED=$((GATES_PASSED + 1)) else echo -e "${RED}✗${NC} Coverage: FAIL (${COVERAGE}% < ${MIN_COVERAGE}%)" fi # Calculate fitness echo "" echo -e "${BLUE}=== Fitness Score ===${NC}" TEST_RATE=$(awk "BEGIN {printf \"%.4f\", $PASSED / ($TOTAL + 0.001)}") GATES_RATE=$(awk "BEGIN {printf \"%.4f\", $GATES_PASSED / $TOTAL_GATES}") # Efficiency: normalized cost (tokens/time) # Assume average tokens per test based on budget TOKENS_PER_TEST=$(awk "BEGIN {printf \"%.0f\", $TOKEN_BUDGET / 10}") EST_TOKENS=$((TOTAL * TOKENS_PER_TEST)) TIME_S=$(awk "BEGIN {printf \"%.2f\", $TIME_MS / 1000}") NORMALIZED_COST=$(awk "BEGIN {printf \"%.4f\", ($EST_TOKENS / $TOKEN_BUDGET * 0.5) + ($TIME_S / $TIME_BUDGET * 0.5)}") EFFICIENCY=$(awk "BEGIN {printf \"%.4f\", 1 - ($NORMALIZED_COST > 1 ? 1 : $NORMALIZED_COST)}") # Final fitness score FITNESS=$(awk "BEGIN {printf \"%.2f\", ($TEST_RATE * 0.50) + ($GATES_RATE * 0.25) + ($EFFICIENCY * 0.25)}") echo "" echo -e "| Metric | Value | Weight | Contribution |" echo -e "|--------|-------|--------|--------------|" echo -e "| Tests | ${PASS_RATE}% | 50% | $(awk "BEGIN {printf \"%.2f\", $TEST_RATE * 0.50}") |" echo -e "| Gates | $(awk "BEGIN {printf \"%.0f\", $GATES_PASSED}/${TOTAL_GATES}") | 25% | $(awk "BEGIN {printf \"%.2f\", $GATES_RATE * 0.25}") |" echo -e "| Efficiency | ${TIME_MS}ms / ${EST_TOKENS}tok | 25% | $(awk "BEGIN {printf \"%.2f\", $EFFICIENCY * 0.25}") |" echo "" echo -e "${GREEN}Fitness Score: ${FITNESS}${NC}" # Determine verdict if awk "BEGIN {exit !($FITNESS >= 0.85)}"; then VERDICT="PASS" elif awk "BEGIN {exit !($FITNESS >= 0.70)}"; then VERDICT="MARGINAL" else VERDICT="FAIL" fi echo -e "Verdict: ${VERDICT}" # Log to fitness-history.jsonl TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ") LOG_ENTRY=$(cat <> .kilo/logs/fitness-history.jsonl echo "" echo -e "${BLUE}Logged to .kilo/logs/fitness-history.jsonl${NC}" # Trigger improvement if needed if awk "BEGIN {exit !($FITNESS < 0.70)}"; then echo "" echo -e "${YELLOW}⚠ Fitness below threshold (0.70)${NC}" echo "Running prompt-optimizer is recommended." echo "" echo "Command: /evolution --workflow ${WORKFLOW}" fi # Summary echo "" echo -e "${GREEN}=== Summary ===${NC}" echo "Workflow: ${WORKFLOW}" echo "Tests: ${PASSED}/${TOTAL} passed (${PASS_RATE}%)" echo "Quality Gates: ${GATES_PASSED}/${TOTAL_GATES}" echo "Time: ${TIME_MS}ms" echo "Fitness: ${FITNESS} (${VERDICT})" echo "" # Exit with appropriate code if [ "$VERDICT" = "PASS" ]; then exit 0 elif [ "$VERDICT" = "MARGINAL" ]; then exit 1 else exit 2 fi