From 01ce40ae8ad7fae635c11e862bffb5c067679f67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C2=A8NW=C2=A8?= <¨neroworld@mail.ru¨>
Date: Mon, 6 Apr 2026 01:36:26 +0100
Subject: [PATCH] restore: Docker evolution test files for remote usage

Docker files restored for use on other machines with Docker/WSL2.

Available test methods:
1. Docker (isolated environment):
   docker-compose -f docker/evolution-test/docker-compose.yml up evolution-feature

2. Local (bun runtime):
   docker/evolution-test/run-local-test.bat feature
   ./docker/evolution-test/run-local-test.sh feature

Both methods provide:
- Millisecond precision timing
- Fitness score with 2 decimal places
- JSONL logging to .kilo/logs/fitness-history.jsonl
---
 docker/evolution-test/Dockerfile             |  25 ++
 docker/evolution-test/docker-compose.yml     |  88 +++++++
 docker/evolution-test/run-evolution-test.bat |  65 ++++++
 docker/evolution-test/run-evolution-test.sh  |  92 ++++++++
 docker/evolution-test/run-local-test.bat     | 162 +++++++++++++
 docker/evolution-test/run-local-test.sh      | 230 +++++++++++++++++++
 6 files changed, 662 insertions(+)
 create mode 100644 docker/evolution-test/Dockerfile
 create mode 100644 docker/evolution-test/docker-compose.yml
 create mode 100644 docker/evolution-test/run-evolution-test.bat
 create mode 100644 docker/evolution-test/run-evolution-test.sh
 create mode 100644 docker/evolution-test/run-local-test.bat
 create mode 100644 docker/evolution-test/run-local-test.sh

diff --git a/docker/evolution-test/Dockerfile b/docker/evolution-test/Dockerfile
new file mode 100644
index 0000000..999d13a
--- /dev/null
+++ b/docker/evolution-test/Dockerfile
@@ -0,0 +1,25 @@
+# Evolution Test Container
+# Used for testing pipeline-judge fitness scoring with precise measurements
+
+FROM oven/bun:1 AS base
+
+WORKDIR /app
+
+# Install TypeScript and testing tools
+RUN bun add -g typescript @types/node
+
+# Copy project files
+COPY . /app/
+
+# Install dependencies
+RUN bun install
+
+# Create logs directory
+RUN mkdir -p .kilo/logs
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s \
+  CMD bun test --reporter=json || exit 1
+
+# Default command - run tests with precise timing
+CMD ["bun", "test", "--reporter=json"]
\ No newline at end of file
diff --git a/docker/evolution-test/docker-compose.yml b/docker/evolution-test/docker-compose.yml
new file mode 100644
index 0000000..3cec235
--- /dev/null
+++ b/docker/evolution-test/docker-compose.yml
@@ -0,0 +1,88 @@
+# Evolution Test Containers
+# Run multiple workflow tests in parallel
+
+version: '3.8'
+
+services:
+  # Evolution test runner for feature workflow
+  evolution-feature:
+    build:
+      context: ../..
+      dockerfile: docker/evolution-test/Dockerfile
+    container_name: evolution-feature
+    environment:
+      - WORKFLOW_TYPE=feature
+      - TOKEN_BUDGET=50000
+      - TIME_BUDGET=300
+      - MIN_COVERAGE=80
+    volumes:
+      - ../../.kilo/logs:/app/.kilo/logs
+      - ../../src:/app/src
+    command: bun test --reporter=json --coverage
+
+  # Evolution test runner for bugfix workflow
+  evolution-bugfix:
+    build:
+      context: ../..
+      dockerfile: docker/evolution-test/Dockerfile
+    container_name: evolution-bugfix
+    environment:
+      - WORKFLOW_TYPE=bugfix
+      - TOKEN_BUDGET=20000
+      - TIME_BUDGET=120
+      - MIN_COVERAGE=90
+    volumes:
+      - ../../.kilo/logs:/app/.kilo/logs
+      - ../../src:/app/src
+    command: bun test --reporter=json --coverage
+
+  # Evolution test runner for refactor workflow
+  evolution-refactor:
+    build:
+      context: ../..
+      dockerfile: docker/evolution-test/Dockerfile
+    container_name: evolution-refactor
+    environment:
+      - WORKFLOW_TYPE=refactor
+      - TOKEN_BUDGET=40000
+      - TIME_BUDGET=240
+      - MIN_COVERAGE=95
+    volumes:
+      - ../../.kilo/logs:/app/.kilo/logs
+      - ../../src:/app/src
+    command: bun test --reporter=json --coverage
+
+  # Evolution test runner for security workflow
+  evolution-security:
+    build:
+      context: ../..
+      dockerfile: docker/evolution-test/Dockerfile
+    container_name: evolution-security
+    environment:
+      - WORKFLOW_TYPE=security
+      - TOKEN_BUDGET=30000
+      - TIME_BUDGET=180
+      - MIN_COVERAGE=80
+    volumes:
+      - ../../.kilo/logs:/app/.kilo/logs
+      - ../../src:/app/src
+    command: bun test --reporter=json --coverage
+
+  # Fitness aggregator - collects results from all containers
+  fitness-aggregator:
+    image: oven/bun:1
+    container_name: fitness-aggregator
+    depends_on:
+      - evolution-feature
+      - evolution-bugfix
+      - evolution-refactor
+      - evolution-security
+    volumes:
+      - ../../.kilo/logs:/app/.kilo/logs
+    working_dir: /app
+    command: |
+      sh -c "
+        echo 'Aggregating fitness scores...'
+        cat .kilo/logs/fitness-history.jsonl | tail -4 > .kilo/logs/fitness-latest.jsonl
+        echo 'Fitness aggregation complete.'
+      "
\ No newline at end of file
diff --git a/docker/evolution-test/run-evolution-test.bat b/docker/evolution-test/run-evolution-test.bat
new file mode 100644
index 0000000..1c44e77
--- /dev/null
+++ b/docker/evolution-test/run-evolution-test.bat
@@ -0,0 +1,65 @@
+@echo off
+REM Evolution Test Runner for Windows
+REM Runs pipeline-judge tests with precise measurements
+
+setlocal enabledelayedexpansion
+
+echo === Evolution Test Runner ===
+echo.
+
+REM Check Docker
+where docker >nul 2>&1
+if %errorlevel% neq 0 (
+    echo Error: Docker not found
+    echo Please install Docker Desktop first:
+    echo   winget install Docker.DockerDesktop
+    echo.
+    echo Or run tests locally ^(less precise^):
+    echo   bun test --reporter=json --coverage
+    exit /b 1
+)
+
+REM Check Docker daemon
+docker info >nul 2>&1
+if %errorlevel% neq 0 (
+    echo Warning: Docker daemon not running
+    echo Please start Docker Desktop and try again
+    exit /b 1
+)
+
+REM Get workflow type
+set WORKFLOW=%1
+if "%WORKFLOW%"=="" set WORKFLOW=feature
+
+echo Running evolution test for: %WORKFLOW%
+echo.
+
+REM Build container
+echo Building evolution test container...
+docker-compose -f docker/evolution-test/docker-compose.yml build
+
+REM Run test
+if "%WORKFLOW%"=="all" (
+    echo Running ALL workflow tests in parallel...
+    docker-compose -f docker/evolution-test/docker-compose.yml up
+    docker-compose -f docker/evolution-test/docker-compose.yml up fitness-aggregator
+) else (
+    docker-compose -f docker/evolution-test/docker-compose.yml up evolution-%WORKFLOW%
+)
+
+REM Show results
+echo.
+echo === Test Results ===
+if exist .kilo\logs\fitness-history.jsonl (
+    echo Latest fitness scores:
+    powershell -Command "Get-Content .kilo\logs\fitness-history.jsonl -Tail 4 | ForEach-Object { $j = $_ | ConvertFrom-Json; Write-Host ('  ' + $j.workflow + ': fitness=' + $j.fitness + ', time=' + $j.time_ms + 'ms, tokens=' + $j.tokens) }"
+) else (
+    echo No fitness history found
+)
+
+REM Cleanup
+echo.
+echo Cleaning up...
+docker-compose -f docker/evolution-test/docker-compose.yml down -v 2>nul
+
+echo Done!
\ No newline at end of file
diff --git a/docker/evolution-test/run-evolution-test.sh b/docker/evolution-test/run-evolution-test.sh
new file mode 100644
index 0000000..c222e20
--- /dev/null
+++ b/docker/evolution-test/run-evolution-test.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# Evolution Test Runner
+# Runs pipeline-judge tests with precise measurements
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+echo -e "${BLUE}=== Evolution Test Runner ===${NC}"
+echo ""
+
+# Check Docker
+if ! command -v docker &> /dev/null; then
+    echo -e "${RED}Error: Docker not found${NC}"
+    echo "Please install Docker Desktop first:"
+    echo "  winget install Docker.DockerDesktop"
+    echo ""
+    echo "Or use alternatives:"
+    echo "  1. Use WSL2 with Docker"
+    echo "  2. Run tests locally (less precise):"
+    echo "     bun test --reporter=json --coverage"
+    exit 1
+fi
+
+# Docker daemon check
+if ! docker info &> /dev/null; then
+    echo -e "${YELLOW}Warning: Docker daemon not running${NC}"
+    echo "Starting Docker Desktop..."
+    open -a "Docker" 2>/dev/null || start "Docker Desktop" 2>/dev/null || true
+    sleep 30
+fi
+
+# Build evolution test container
+echo -e "${BLUE}Building evolution test container...${NC}"
+docker-compose -f docker/evolution-test/docker-compose.yml build
+
+# Run specific workflow test
+WORKFLOW=${1:-feature}
+echo -e "${GREEN}Running evolution test for: ${WORKFLOW}${NC}"
+
+case $WORKFLOW in
+    feature)
+        docker-compose -f docker/evolution-test/docker-compose.yml up evolution-feature
+        ;;
+    bugfix)
+        docker-compose -f docker/evolution-test/docker-compose.yml up evolution-bugfix
+        ;;
+    refactor)
+        docker-compose -f docker/evolution-test/docker-compose.yml up evolution-refactor
+        ;;
+    security)
+        docker-compose -f docker/evolution-test/docker-compose.yml up evolution-security
+        ;;
+    all)
+        echo -e "${BLUE}Running ALL workflow tests in parallel...${NC}"
+        docker-compose -f docker/evolution-test/docker-compose.yml up
+        docker-compose -f docker/evolution-test/docker-compose.yml up fitness-aggregator
+        ;;
+    *)
+        echo -e "${RED}Unknown workflow: ${WORKFLOW}${NC}"
+        echo "Usage: $0 [feature|bugfix|refactor|security|all]"
+        exit 1
+        ;;
+esac
+
+# Parse results
+echo ""
+echo -e "${BLUE}=== Test Results ===${NC}"
+if [ -f ".kilo/logs/fitness-history.jsonl" ]; then
+    echo -e "${GREEN}Latest fitness scores:${NC}"
+    tail -4 .kilo/logs/fitness-history.jsonl | while read -r line; do
+        FITNESS=$(echo "$line" | jq -r '.fitness // empty')
+        WORKFLOW=$(echo "$line" | jq -r '.workflow // empty')
+        TIME_MS=$(echo "$line" | jq -r '.time_ms // empty')
+        TOKENS=$(echo "$line" | jq -r '.tokens // empty')
+        echo "  ${WORKFLOW}: fitness=${FITNESS}, time=${TIME_MS}ms, tokens=${TOKENS}"
+    done
+else
+    echo -e "${YELLOW}No fitness history found${NC}"
+fi
+
+# Cleanup
+echo ""
+echo -e "${BLUE}Cleaning up...${NC}"
+docker-compose -f docker/evolution-test/docker-compose.yml down -v 2>/dev/null || true
+
+echo -e "${GREEN}Done!${NC}"
\ No newline at end of file
diff --git a/docker/evolution-test/run-local-test.bat b/docker/evolution-test/run-local-test.bat
new file mode 100644
index 0000000..941be28
--- /dev/null
+++ b/docker/evolution-test/run-local-test.bat
@@ -0,0 +1,162 @@
+@echo off
+REM Evolution Test Runner (Local Fallback)
+REM Runs pipeline-judge tests without Docker - less precise but works immediately
+
+setlocal enabledelayedexpansion
+
+echo === Evolution Test Runner (Local) ===
+echo.
+
+REM Check bun
+where bun >nul 2>&1
+if %errorlevel% neq 0 (
+    echo Error: bun not found
+    echo Install bun first from https://bun.sh
+    exit /b 1
+)
+
+REM Get workflow type
+set WORKFLOW=%1
+if "%WORKFLOW%"=="" set WORKFLOW=feature
+
+echo Running evolution test for: %WORKFLOW%
+echo.
+
+REM Set budget based on workflow
+if "%WORKFLOW%"=="feature" (
+    set TOKEN_BUDGET=50000
+    set TIME_BUDGET=300
+    set MIN_COVERAGE=80
+) else if "%WORKFLOW%"=="bugfix" (
+    set TOKEN_BUDGET=20000
+    set TIME_BUDGET=120
+    set MIN_COVERAGE=90
+) else if "%WORKFLOW%"=="refactor" (
+    set TOKEN_BUDGET=40000
+    set TIME_BUDGET=240
+    set MIN_COVERAGE=95
+) else if "%WORKFLOW%"=="security" (
+    set TOKEN_BUDGET=30000
+    set TIME_BUDGET=180
+    set MIN_COVERAGE=80
+) else if "%WORKFLOW%"=="all" (
+    echo Running all workflows sequentially...
+    call %0 feature
+    call %0 bugfix
+    call %0 refactor
+    call %0 security
+    exit /b 0
+) else (
+    echo Unknown workflow: %WORKFLOW%
+    echo Usage: %0 [feature^|bugfix^|refactor^|security^|all]
+    exit /b 1
+)
+
+echo Token Budget: %TOKEN_BUDGET%
+echo Time Budget: %TIME_BUDGET%s
+echo Min Coverage: %MIN_COVERAGE%%%
+echo.
+
+REM Create logs directory
+if not exist .kilo\logs mkdir .kilo\logs
+
+REM Run tests with timing
+echo Running tests...
+powershell -Command "$start = Get-Date; bun test --reporter=json --coverage 2>&1 | Tee-Object -FilePath C:\tmp\test-results.json; $end = Get-Date; $ms = ($end - $start).TotalMilliseconds; Write-Host ('Time: {0}ms' -f [math]::Round($ms, 2))"
+set TIME_MS=%errorlevel%
+
+echo.
+echo === Test Results ===
+
+REM Parse results using PowerShell
+for /f %%i in ('powershell -Command "(Get-Content C:\tmp\test-results.json | ConvertFrom-Json).numTotalTests" 2^>nul') do set TOTAL=%%i
+for /f %%i in ('powershell -Command "(Get-Content C:\tmp\test-results.json | ConvertFrom-Json).numPassedTests" 2^>nul') do set PASSED=%%i
+for /f %%i in ('powershell -Command "(Get-Content C:\tmp\test-results.json | ConvertFrom-Json).numFailedTests" 2^>nul') do set FAILED=%%i
+
+if "%TOTAL%"=="" set TOTAL=0
+if "%PASSED%"=="" set PASSED=0
+if "%FAILED%"=="" set FAILED=0
+
+echo Tests: %PASSED%/%TOTAL% passed
+
+REM Quality gates
+echo.
+echo === Quality Gates ===
+
+set GATES_PASSED=0
+set TOTAL_GATES=5
+
+REM Gate 1: Build
+bun run build >nul 2>&1
+if %errorlevel% equ 0 (
+    echo [PASS] Build
+    set /a GATES_PASSED+=1
+) else (
+    echo [FAIL] Build
+)
+
+REM Gate 2: Lint (don't penalize missing config)
+bun run lint >nul 2>&1
+if %errorlevel% equ 0 (
+    echo [PASS] Lint
+    set /a GATES_PASSED+=1
+) else (
+    echo [SKIP] Lint (no config)
+    set /a GATES_PASSED+=1
+)
+
+REM Gate 3: Typecheck
+bun run typecheck >nul 2>&1
+if %errorlevel% equ 0 (
+    echo [PASS] Types
+    set /a GATES_PASSED+=1
+) else (
+    echo [FAIL] Types
+)
+
+REM Gate 4: Tests clean
+if "%FAILED%"=="0" (
+    echo [PASS] Tests Clean
+    set /a GATES_PASSED+=1
+) else (
+    echo [FAIL] Tests Clean (%FAILED% failures^)
+)
+
+REM Gate 5: Coverage
+echo [INFO] Coverage check skipped in local mode
+set /a GATES_PASSED+=1
+
+echo.
+echo === Fitness Score ===
+
+REM Calculate fitness using PowerShell
+powershell -Command ^
+    "$passed = %PASSED%; $total = %TOTAL%; $gates = %GATES_PASSED%; $gatesTotal = %TOTAL_GATES%; $time = %TIME_MS%; $budget = %TOKEN_BUDGET%; " ^
+    "$testRate = $total -gt 0 ? $passed / $total : 0; $gatesRate = $gates / $gatesTotal; " ^
+    "$normCost = ($total * 10 / $budget * 0.5) + ($time / 1000 / %TIME_BUDGET% * 0.5); $efficiency = 1 - [math]::Min($normCost, 1); " ^
+    "$fitness = ($testRate * 0.50) + ($gatesRate * 0.25) + ($efficiency * 0.25); " ^
+    "Write-Host ('| Metric | Value | Weight | Contribution |'); " ^
+    "Write-Host ('|--------|-------|--------|--------------|'); " ^
+    "Write-Host ('| Tests  | ' + [math]::Round($testRate * 100, 2) + '%% | 50%% | ' + [math]::Round($testRate * 0.50, 2) + ' |'); " ^
+    "Write-Host ('| Gates  | ' + $gates + '/' + $gatesTotal + ' | 25%% | ' + [math]::Round($gatesRate * 0.25, 2) + ' |'); " ^
+    "Write-Host ('| Efficiency | ' + $time + 'ms | 25%% | ' + [math]::Round($efficiency * 0.25, 2) + ' |'); " ^
+    "Write-Host (''); " ^
+    "Write-Host ('Fitness Score: ' + [math]::Round($fitness, 2)); " ^
+    "$verdict = $fitness -ge 0.85 ? 'PASS' : ($fitness -ge 0.70 ? 'MARGINAL' : 'FAIL'); Write-Host ('Verdict: ' + $verdict)"
+
+REM Log to fitness-history.jsonl
+for /f "tokens=*" %%a in ('powershell -Command "Get-Date -AsUTC -Format 'yyyy-MM-ddTHH:mm:ssZ'"') do set TIMESTAMP=%%a
+
+echo {"ts":"%TIMESTAMP%","workflow":"%WORKFLOW%","fitness":%FITNESS%,"tests_passed":%PASSED%,"tests_total":%TOTAL%,"verdict":"%VERDICT%"} >> .kilo\logs\fitness-history.jsonl
+echo.
+echo Logged to .kilo/logs/fitness-history.jsonl
+
+echo.
+echo === Summary ===
+echo Workflow: %WORKFLOW%
+echo Tests: %PASSED%/%TOTAL% passed
+echo Quality Gates: %GATES_PASSED%/%TOTAL_GATES%
+echo Fitness: %FITNESS% (%VERDICT%)
+echo.
+
+exit /b
\ No newline at end of file
diff --git a/docker/evolution-test/run-local-test.sh b/docker/evolution-test/run-local-test.sh
new file mode 100644
index 0000000..8a7251b
--- /dev/null
+++ b/docker/evolution-test/run-local-test.sh
@@ -0,0 +1,230 @@
+#!/bin/bash
+# Evolution Test Runner (Local Fallback)
+# Runs pipeline-judge tests without Docker - less precise but works immediately
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+echo -e "${BLUE}=== Evolution Test Runner (Local) ===${NC}"
+echo ""
+
+# Check bun
+if ! command -v bun &> /dev/null; then
+    echo -e "${RED}Error: bun not found${NC}"
+    echo "Install bun first:"
+    echo "  curl -fsSL https://bun.sh/install | bash"
+    exit 1
+fi
+
+# Get workflow type
+WORKFLOW=${1:-feature}
+echo -e "${GREEN}Running evolution test for: ${WORKFLOW}${NC}"
+echo ""
+
+# Set budget based on workflow
+case $WORKFLOW in
+    feature)
+        TOKEN_BUDGET=50000
+        TIME_BUDGET=300
+        MIN_COVERAGE=80
+        ;;
+    bugfix)
+        TOKEN_BUDGET=20000
+        TIME_BUDGET=120
+        MIN_COVERAGE=90
+        ;;
+    refactor)
+        TOKEN_BUDGET=40000
+        TIME_BUDGET=240
+        MIN_COVERAGE=95
+        ;;
+    security)
+        TOKEN_BUDGET=30000
+        TIME_BUDGET=180
+        MIN_COVERAGE=80
+        ;;
+    all)
+        echo -e "${YELLOW}Running all workflows sequentially...${NC}"
+        for w in feature bugfix refactor security; do
+            $0 $w
+        done
+        exit 0
+        ;;
+    *)
+        echo -e "${RED}Unknown workflow: ${WORKFLOW}${NC}"
+        echo "Usage: $0 [feature|bugfix|refactor|security|all]"
+        exit 1
+        ;;
+esac
+
+echo "Token Budget: ${TOKEN_BUDGET}"
+echo "Time Budget: ${TIME_BUDGET}s"
+echo "Min Coverage: ${MIN_COVERAGE}%"
+echo ""
+
+# Create logs directory
+mkdir -p .kilo/logs
+
+# Run tests with precise timing
+echo -e "${BLUE}Running tests...${NC}"
+START_MS=$(date +%s%3N 2>/dev/null || date +%s000)
+START_S=$(echo "$START_MS" | sed 's/...$//')
+
+# Run bun test with coverage
+bun test --reporter=json --coverage 2>&1 | tee /tmp/test-results.json || true
+
+END_MS=$(date +%s%3N 2>/dev/null || date +%s000)
+TIME_MS=$((END_MS - START_MS))
+
+echo ""
+echo -e "${BLUE}=== Test Results ===${NC}"
+
+# Parse test results
+TOTAL=$(jq '.numTotalTests // 0' /tmp/test-results.json 2>/dev/null || echo "0")
+PASSED=$(jq '.numPassedTests // 0' /tmp/test-results.json 2>/dev/null || echo "0")
+FAILED=$(jq '.numFailedTests // 0' /tmp/test-results.json 2>/dev/null || echo "0")
+SKIPPED=$(jq '.numPendingTests // 0' /tmp/test-results.json 2>/dev/null || echo "0")
+
+# Calculate pass rate with 2 decimals
+if [ "$TOTAL" -gt 0 ]; then
+    PASS_RATE=$(awk "BEGIN {printf \"%.2f\", $PASSED / $TOTAL * 100}")
+else
+    PASS_RATE="0.00"
+fi
+
+echo "Tests: ${PASSED}/${TOTAL} passed (${PASS_RATE}%)"
+echo "Time: ${TIME_MS}ms"
+
+# Quality gates
+echo ""
+echo -e "${BLUE}=== Quality Gates ===${NC}"
+
+GATES_PASSED=0
+TOTAL_GATES=5
+
+# Gate 1: Build
+if bun run build 2>&1 | grep -q "success\|done\|built"; then
+    echo -e "${GREEN}✓${NC} Build: PASS"
+    GATES_PASSED=$((GATES_PASSED + 1))
+else
+    echo -e "${RED}✗${NC} Build: FAIL"
+fi
+
+# Gate 2: Lint
+if bun run lint 2>&1 | grep -q "0 problems\|No errors"; then
+    echo -e "${GREEN}✓${NC} Lint: PASS"
+    GATES_PASSED=$((GATES_PASSED + 1))
+else
+    echo -e "${RED}✗${NC} Lint: FAIL (or no lint config)"
+    GATES_PASSED=$((GATES_PASSED + 1))  # Don't penalize missing lint
+fi
+
+# Gate 3: Typecheck
+if bun run typecheck 2>&1 | grep -q "error TS"; then
+    echo -e "${RED}✗${NC} Types: FAIL"
+else
+    echo -e "${GREEN}✓${NC} Types: PASS"
+    GATES_PASSED=$((GATES_PASSED + 1))
+fi
+
+# Gate 4: Tests clean
+if [ "$FAILED" -eq 0 ]; then
+    echo -e "${GREEN}✓${NC} Tests Clean: PASS"
+    GATES_PASSED=$((GATES_PASSED + 1))
+else
+    echo -e "${RED}✗${NC} Tests Clean: FAIL (${FAILED} failures)"
+fi
+
+# Gate 5: Coverage
+COVERAGE_RAW=$(grep 'All files' /tmp/test-results.json 2>/dev/null | awk '{print $4}' || echo "0")
+COVERAGE=$(echo "$COVERAGE_RAW" | sed 's/%//' || echo "0")
+if awk "BEGIN {exit !($COVERAGE >= $MIN_COVERAGE)}"; then
+    echo -e "${GREEN}✓${NC} Coverage: PASS (${COVERAGE}%)"
+    GATES_PASSED=$((GATES_PASSED + 1))
+else
+    echo -e "${RED}✗${NC} Coverage: FAIL (${COVERAGE}% < ${MIN_COVERAGE}%)"
+fi
+
+# Calculate fitness
+echo ""
+echo -e "${BLUE}=== Fitness Score ===${NC}"
+
+TEST_RATE=$(awk "BEGIN {printf \"%.4f\", $PASSED / ($TOTAL + 0.001)}")
+GATES_RATE=$(awk "BEGIN {printf \"%.4f\", $GATES_PASSED / $TOTAL_GATES}")
+
+# Efficiency: normalized cost (tokens/time)
+# Assume average tokens per test based on budget
+TOKENS_PER_TEST=$(awk "BEGIN {printf \"%.0f\", $TOKEN_BUDGET / 10}")
+EST_TOKENS=$((TOTAL * TOKENS_PER_TEST))
+TIME_S=$(awk "BEGIN {printf \"%.2f\", $TIME_MS / 1000}")
+
+NORMALIZED_COST=$(awk "BEGIN {printf \"%.4f\", ($EST_TOKENS / $TOKEN_BUDGET * 0.5) + ($TIME_S / $TIME_BUDGET * 0.5)}")
+EFFICIENCY=$(awk "BEGIN {printf \"%.4f\", 1 - ($NORMALIZED_COST > 1 ? 1 : $NORMALIZED_COST)}")
+
+# Final fitness score
+FITNESS=$(awk "BEGIN {printf \"%.2f\", ($TEST_RATE * 0.50) + ($GATES_RATE * 0.25) + ($EFFICIENCY * 0.25)}")
+
+echo ""
+echo -e "| Metric | Value | Weight | Contribution |"
+echo -e "|--------|-------|--------|--------------|"
+echo -e "| Tests  | ${PASS_RATE}% | 50% | $(awk "BEGIN {printf \"%.2f\", $TEST_RATE * 0.50}") |"
+echo -e "| Gates  | $(awk "BEGIN {printf \"%.0f\", $GATES_PASSED}/${TOTAL_GATES}") | 25% | $(awk "BEGIN {printf \"%.2f\", $GATES_RATE * 0.25}") |"
+echo -e "| Efficiency | ${TIME_MS}ms / ${EST_TOKENS}tok | 25% | $(awk "BEGIN {printf \"%.2f\", $EFFICIENCY * 0.25}") |"
+echo ""
+echo -e "${GREEN}Fitness Score: ${FITNESS}${NC}"
+
+# Determine verdict
+if awk "BEGIN {exit !($FITNESS >= 0.85)}"; then
+    VERDICT="PASS"
+elif awk "BEGIN {exit !($FITNESS >= 0.70)}"; then
+    VERDICT="MARGINAL"
+else
+    VERDICT="FAIL"
+fi
+
+echo -e "Verdict: ${VERDICT}"
+
+# Log to fitness-history.jsonl
+TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+LOG_ENTRY=$(cat <<EOF
+{"ts":"${TIMESTAMP}","workflow":"${WORKFLOW}","fitness":${FITNESS},"breakdown":{"test_pass_rate":${TEST_RATE},"quality_gates_rate":${GATES_RATE},"efficiency_score":${EFFICIENCY}},"tokens":${EST_TOKENS},"time_ms":${TIME_MS},"tests_passed":${PASSED},"tests_total":${TOTAL},"verdict":"${VERDICT}"}
+EOF
+)
+
+echo "$LOG_ENTRY" >> .kilo/logs/fitness-history.jsonl
+echo ""
+echo -e "${BLUE}Logged to .kilo/logs/fitness-history.jsonl${NC}"
+
+# Trigger improvement if needed
+if awk "BEGIN {exit !($FITNESS < 0.70)}"; then
+    echo ""
+    echo -e "${YELLOW}⚠ Fitness below threshold (0.70)${NC}"
+    echo "Running prompt-optimizer is recommended."
+    echo ""
+    echo "Command: /evolution --workflow ${WORKFLOW}"
+fi
+
+# Summary
+echo ""
+echo -e "${GREEN}=== Summary ===${NC}"
+echo "Workflow: ${WORKFLOW}"
+echo "Tests: ${PASSED}/${TOTAL} passed (${PASS_RATE}%)"
+echo "Quality Gates: ${GATES_PASSED}/${TOTAL_GATES}"
+echo "Time: ${TIME_MS}ms"
+echo "Fitness: ${FITNESS} (${VERDICT})"
+echo ""
+
+# Exit with appropriate code
+if [ "$VERDICT" = "PASS" ]; then
+    exit 0
+elif [ "$VERDICT" = "MARGINAL" ]; then
+    exit 1
+else
+    exit 2
+fi
\ No newline at end of file