#!/bin/bash # Evolution Test Runner # Runs pipeline-judge tests with precise measurements set -e # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color echo -e "${BLUE}=== Evolution Test Runner ===${NC}" echo "" # Check Docker if ! command -v docker &> /dev/null; then echo -e "${RED}Error: Docker not found${NC}" echo "Please install Docker Desktop first:" echo " winget install Docker.DockerDesktop" echo "" echo "Or use alternatives:" echo " 1. Use WSL2 with Docker" echo " 2. Run tests locally (less precise):" echo " bun test --reporter=json --coverage" exit 1 fi # Docker daemon check if ! docker info &> /dev/null; then echo -e "${YELLOW}Warning: Docker daemon not running${NC}" echo "Starting Docker Desktop..." open -a "Docker" 2>/dev/null || start "Docker Desktop" 2>/dev/null || true sleep 30 fi # Build evolution test container echo -e "${BLUE}Building evolution test container...${NC}" docker-compose -f docker/evolution-test/docker-compose.yml build # Run specific workflow test WORKFLOW=${1:-feature} echo -e "${GREEN}Running evolution test for: ${WORKFLOW}${NC}" case $WORKFLOW in feature) docker-compose -f docker/evolution-test/docker-compose.yml up evolution-feature ;; bugfix) docker-compose -f docker/evolution-test/docker-compose.yml up evolution-bugfix ;; refactor) docker-compose -f docker/evolution-test/docker-compose.yml up evolution-refactor ;; security) docker-compose -f docker/evolution-test/docker-compose.yml up evolution-security ;; all) echo -e "${BLUE}Running ALL workflow tests in parallel...${NC}" docker-compose -f docker/evolution-test/docker-compose.yml up docker-compose -f docker/evolution-test/docker-compose.yml up fitness-aggregator ;; *) echo -e "${RED}Unknown workflow: ${WORKFLOW}${NC}" echo "Usage: $0 [feature|bugfix|refactor|security|all]" exit 1 ;; esac # Parse results echo "" echo -e "${BLUE}=== Test Results ===${NC}" if [ -f ".kilo/logs/fitness-history.jsonl" ]; then echo -e "${GREEN}Latest fitness scores:${NC}" tail -4 .kilo/logs/fitness-history.jsonl | while read -r line; do FITNESS=$(echo "$line" | jq -r '.fitness // empty') WORKFLOW=$(echo "$line" | jq -r '.workflow // empty') TIME_MS=$(echo "$line" | jq -r '.time_ms // empty') TOKENS=$(echo "$line" | jq -r '.tokens // empty') echo " ${WORKFLOW}: fitness=${FITNESS}, time=${TIME_MS}ms, tokens=${TOKENS}" done else echo -e "${YELLOW}No fitness history found${NC}" fi # Cleanup echo "" echo -e "${BLUE}Cleaning up...${NC}" docker-compose -f docker/evolution-test/docker-compose.yml down -v 2>/dev/null || true echo -e "${GREEN}Done!${NC}"