refactor: clean main to starter template — remove project-specific and generated files
- Remove project-specific commands: booking, blog, commerce, landing-page, feature, hotfix - Remove project-specific skills: booking, blog, ecommerce - Remove generated files: EVOLUTION_LOG, WORKFLOW_AUDIT, logs/, reports/ - Add .gitignore entries for auto-generated dirs (.kilo/logs/, .kilo/reports/) - Remove e2e_booking_flow from capability-index.yaml - Remove docker/evolution-test/ (dev infra, not starter) - Genericize AGENTS.md project description - Genericize tests/README.md title All removed content preserved on dev branch.
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
# Web Testing Infrastructure for APAW
|
||||
# Web Testing Infrastructure
|
||||
# Covers: Visual Regression, Link Checking, Form Testing, Console Errors
|
||||
#
|
||||
# Usage:
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
# Evolution Test Container
|
||||
# Used for testing pipeline-judge fitness scoring with precise measurements
|
||||
|
||||
FROM oven/bun:1 AS base
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install TypeScript and testing tools
|
||||
RUN bun add -g typescript @types/node
|
||||
|
||||
# Copy project files
|
||||
COPY . /app/
|
||||
|
||||
# Install dependencies
|
||||
RUN bun install
|
||||
|
||||
# Create logs directory
|
||||
RUN mkdir -p .kilo/logs
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s \
|
||||
CMD bun test --reporter=json || exit 1
|
||||
|
||||
# Default command - run tests with precise timing
|
||||
CMD ["bun", "test", "--reporter=json"]
|
||||
@@ -1,88 +0,0 @@
|
||||
# Evolution Test Containers
|
||||
# Run multiple workflow tests in parallel
|
||||
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
# Evolution test runner for feature workflow
|
||||
evolution-feature:
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: docker/evolution-test/Dockerfile
|
||||
container_name: evolution-feature
|
||||
environment:
|
||||
- WORKFLOW_TYPE=feature
|
||||
- TOKEN_BUDGET=50000
|
||||
- TIME_BUDGET=300
|
||||
- MIN_COVERAGE=80
|
||||
volumes:
|
||||
- ../../.kilo/logs:/app/.kilo/logs
|
||||
- ../../src:/app/src
|
||||
command: bun test --reporter=json --coverage
|
||||
|
||||
# Evolution test runner for bugfix workflow
|
||||
evolution-bugfix:
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: docker/evolution-test/Dockerfile
|
||||
container_name: evolution-bugfix
|
||||
environment:
|
||||
- WORKFLOW_TYPE=bugfix
|
||||
- TOKEN_BUDGET=20000
|
||||
- TIME_BUDGET=120
|
||||
- MIN_COVERAGE=90
|
||||
volumes:
|
||||
- ../../.kilo/logs:/app/.kilo/logs
|
||||
- ../../src:/app/src
|
||||
command: bun test --reporter=json --coverage
|
||||
|
||||
# Evolution test runner for refactor workflow
|
||||
evolution-refactor:
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: docker/evolution-test/Dockerfile
|
||||
container_name: evolution-refactor
|
||||
environment:
|
||||
- WORKFLOW_TYPE=refactor
|
||||
- TOKEN_BUDGET=40000
|
||||
- TIME_BUDGET=240
|
||||
- MIN_COVERAGE=95
|
||||
volumes:
|
||||
- ../../.kilo/logs:/app/.kilo/logs
|
||||
- ../../src:/app/src
|
||||
command: bun test --reporter=json --coverage
|
||||
|
||||
# Evolution test runner for security workflow
|
||||
evolution-security:
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: docker/evolution-test/Dockerfile
|
||||
container_name: evolution-security
|
||||
environment:
|
||||
- WORKFLOW_TYPE=security
|
||||
- TOKEN_BUDGET=30000
|
||||
- TIME_BUDGET=180
|
||||
- MIN_COVERAGE=80
|
||||
volumes:
|
||||
- ../../.kilo/logs:/app/.kilo/logs
|
||||
- ../../src:/app/src
|
||||
command: bun test --reporter=json --coverage
|
||||
|
||||
# Fitness aggregator - collects results from all containers
|
||||
fitness-aggregator:
|
||||
image: oven/bun:1
|
||||
container_name: fitness-aggregator
|
||||
depends_on:
|
||||
- evolution-feature
|
||||
- evolution-bugfix
|
||||
- evolution-refactor
|
||||
- evolution-security
|
||||
volumes:
|
||||
- ../../.kilo/logs:/app/.kilo/logs
|
||||
working_dir: /app
|
||||
command: |
|
||||
sh -c "
|
||||
echo 'Aggregating fitness scores...'
|
||||
cat .kilo/logs/fitness-history.jsonl | tail -4 > .kilo/logs/fitness-latest.jsonl
|
||||
echo 'Fitness aggregation complete.'
|
||||
"
|
||||
@@ -1,65 +0,0 @@
|
||||
@echo off
|
||||
REM Evolution Test Runner for Windows
|
||||
REM Runs pipeline-judge tests with precise measurements
|
||||
|
||||
setlocal enabledelayedexpansion
|
||||
|
||||
echo === Evolution Test Runner ===
|
||||
echo.
|
||||
|
||||
REM Check Docker
|
||||
where docker >nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo Error: Docker not found
|
||||
echo Please install Docker Desktop first:
|
||||
echo winget install Docker.DockerDesktop
|
||||
echo.
|
||||
echo Or run tests locally ^(less precise^):
|
||||
echo bun test --reporter=json --coverage
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
REM Check Docker daemon
|
||||
docker info >nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo Warning: Docker daemon not running
|
||||
echo Please start Docker Desktop and try again
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
REM Get workflow type
|
||||
set WORKFLOW=%1
|
||||
if "%WORKFLOW%"=="" set WORKFLOW=feature
|
||||
|
||||
echo Running evolution test for: %WORKFLOW%
|
||||
echo.
|
||||
|
||||
REM Build container
|
||||
echo Building evolution test container...
|
||||
docker-compose -f docker/evolution-test/docker-compose.yml build
|
||||
|
||||
REM Run test
|
||||
if "%WORKFLOW%"=="all" (
|
||||
echo Running ALL workflow tests in parallel...
|
||||
docker-compose -f docker/evolution-test/docker-compose.yml up
|
||||
docker-compose -f docker/evolution-test/docker-compose.yml up fitness-aggregator
|
||||
) else (
|
||||
docker-compose -f docker/evolution-test/docker-compose.yml up evolution-%WORKFLOW%
|
||||
)
|
||||
|
||||
REM Show results
|
||||
echo.
|
||||
echo === Test Results ===
|
||||
if exist .kilo\logs\fitness-history.jsonl (
|
||||
echo Latest fitness scores:
|
||||
powershell -Command "Get-Content .kilo\logs\fitness-history.jsonl -Tail 4 | ForEach-Object { $j = $_ | ConvertFrom-Json; Write-Host (' ' + $j.workflow + ': fitness=' + $j.fitness + ', time=' + $j.time_ms + 'ms, tokens=' + $j.tokens) }"
|
||||
) else (
|
||||
echo No fitness history found
|
||||
)
|
||||
|
||||
REM Cleanup
|
||||
echo.
|
||||
echo Cleaning up...
|
||||
docker-compose -f docker/evolution-test/docker-compose.yml down -v 2>nul
|
||||
|
||||
echo Done!
|
||||
@@ -1,92 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Evolution Test Runner
|
||||
# Runs pipeline-judge tests with precise measurements
|
||||
|
||||
set -e
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
echo -e "${BLUE}=== Evolution Test Runner ===${NC}"
|
||||
echo ""
|
||||
|
||||
# Check Docker
|
||||
if ! command -v docker &> /dev/null; then
|
||||
echo -e "${RED}Error: Docker not found${NC}"
|
||||
echo "Please install Docker Desktop first:"
|
||||
echo " winget install Docker.DockerDesktop"
|
||||
echo ""
|
||||
echo "Or use alternatives:"
|
||||
echo " 1. Use WSL2 with Docker"
|
||||
echo " 2. Run tests locally (less precise):"
|
||||
echo " bun test --reporter=json --coverage"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Docker daemon check
|
||||
if ! docker info &> /dev/null; then
|
||||
echo -e "${YELLOW}Warning: Docker daemon not running${NC}"
|
||||
echo "Starting Docker Desktop..."
|
||||
open -a "Docker" 2>/dev/null || start "Docker Desktop" 2>/dev/null || true
|
||||
sleep 30
|
||||
fi
|
||||
|
||||
# Build evolution test container
|
||||
echo -e "${BLUE}Building evolution test container...${NC}"
|
||||
docker-compose -f docker/evolution-test/docker-compose.yml build
|
||||
|
||||
# Run specific workflow test
|
||||
WORKFLOW=${1:-feature}
|
||||
echo -e "${GREEN}Running evolution test for: ${WORKFLOW}${NC}"
|
||||
|
||||
case $WORKFLOW in
|
||||
feature)
|
||||
docker-compose -f docker/evolution-test/docker-compose.yml up evolution-feature
|
||||
;;
|
||||
bugfix)
|
||||
docker-compose -f docker/evolution-test/docker-compose.yml up evolution-bugfix
|
||||
;;
|
||||
refactor)
|
||||
docker-compose -f docker/evolution-test/docker-compose.yml up evolution-refactor
|
||||
;;
|
||||
security)
|
||||
docker-compose -f docker/evolution-test/docker-compose.yml up evolution-security
|
||||
;;
|
||||
all)
|
||||
echo -e "${BLUE}Running ALL workflow tests in parallel...${NC}"
|
||||
docker-compose -f docker/evolution-test/docker-compose.yml up
|
||||
docker-compose -f docker/evolution-test/docker-compose.yml up fitness-aggregator
|
||||
;;
|
||||
*)
|
||||
echo -e "${RED}Unknown workflow: ${WORKFLOW}${NC}"
|
||||
echo "Usage: $0 [feature|bugfix|refactor|security|all]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# Parse results
|
||||
echo ""
|
||||
echo -e "${BLUE}=== Test Results ===${NC}"
|
||||
if [ -f ".kilo/logs/fitness-history.jsonl" ]; then
|
||||
echo -e "${GREEN}Latest fitness scores:${NC}"
|
||||
tail -4 .kilo/logs/fitness-history.jsonl | while read -r line; do
|
||||
FITNESS=$(echo "$line" | jq -r '.fitness // empty')
|
||||
WORKFLOW=$(echo "$line" | jq -r '.workflow // empty')
|
||||
TIME_MS=$(echo "$line" | jq -r '.time_ms // empty')
|
||||
TOKENS=$(echo "$line" | jq -r '.tokens // empty')
|
||||
echo " ${WORKFLOW}: fitness=${FITNESS}, time=${TIME_MS}ms, tokens=${TOKENS}"
|
||||
done
|
||||
else
|
||||
echo -e "${YELLOW}No fitness history found${NC}"
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
echo ""
|
||||
echo -e "${BLUE}Cleaning up...${NC}"
|
||||
docker-compose -f docker/evolution-test/docker-compose.yml down -v 2>/dev/null || true
|
||||
|
||||
echo -e "${GREEN}Done!${NC}"
|
||||
@@ -1,162 +0,0 @@
|
||||
@echo off
|
||||
REM Evolution Test Runner (Local Fallback)
|
||||
REM Runs pipeline-judge tests without Docker - less precise but works immediately
|
||||
|
||||
setlocal enabledelayedexpansion
|
||||
|
||||
echo === Evolution Test Runner (Local) ===
|
||||
echo.
|
||||
|
||||
REM Check bun
|
||||
where bun >nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo Error: bun not found
|
||||
echo Install bun first from https://bun.sh
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
REM Get workflow type
|
||||
set WORKFLOW=%1
|
||||
if "%WORKFLOW%"=="" set WORKFLOW=feature
|
||||
|
||||
echo Running evolution test for: %WORKFLOW%
|
||||
echo.
|
||||
|
||||
REM Set budget based on workflow
|
||||
if "%WORKFLOW%"=="feature" (
|
||||
set TOKEN_BUDGET=50000
|
||||
set TIME_BUDGET=300
|
||||
set MIN_COVERAGE=80
|
||||
) else if "%WORKFLOW%"=="bugfix" (
|
||||
set TOKEN_BUDGET=20000
|
||||
set TIME_BUDGET=120
|
||||
set MIN_COVERAGE=90
|
||||
) else if "%WORKFLOW%"=="refactor" (
|
||||
set TOKEN_BUDGET=40000
|
||||
set TIME_BUDGET=240
|
||||
set MIN_COVERAGE=95
|
||||
) else if "%WORKFLOW%"=="security" (
|
||||
set TOKEN_BUDGET=30000
|
||||
set TIME_BUDGET=180
|
||||
set MIN_COVERAGE=80
|
||||
) else if "%WORKFLOW%"=="all" (
|
||||
echo Running all workflows sequentially...
|
||||
call %0 feature
|
||||
call %0 bugfix
|
||||
call %0 refactor
|
||||
call %0 security
|
||||
exit /b 0
|
||||
) else (
|
||||
echo Unknown workflow: %WORKFLOW%
|
||||
echo Usage: %0 [feature^|bugfix^|refactor^|security^|all]
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
echo Token Budget: %TOKEN_BUDGET%
|
||||
echo Time Budget: %TIME_BUDGET%s
|
||||
echo Min Coverage: %MIN_COVERAGE%%%
|
||||
echo.
|
||||
|
||||
REM Create logs directory
|
||||
if not exist .kilo\logs mkdir .kilo\logs
|
||||
|
||||
REM Run tests with timing
|
||||
echo Running tests...
|
||||
powershell -Command "$start = Get-Date; bun test --reporter=json --coverage 2>&1 | Tee-Object -FilePath C:\tmp\test-results.json; $end = Get-Date; $ms = ($end - $start).TotalMilliseconds; Write-Host ('Time: {0}ms' -f [math]::Round($ms, 2))"
|
||||
set TIME_MS=%errorlevel%
|
||||
|
||||
echo.
|
||||
echo === Test Results ===
|
||||
|
||||
REM Parse results using PowerShell
|
||||
for /f %%i in ('powershell -Command "(Get-Content C:\tmp\test-results.json | ConvertFrom-Json).numTotalTests" 2^>nul') do set TOTAL=%%i
|
||||
for /f %%i in ('powershell -Command "(Get-Content C:\tmp\test-results.json | ConvertFrom-Json).numPassedTests" 2^>nul') do set PASSED=%%i
|
||||
for /f %%i in ('powershell -Command "(Get-Content C:\tmp\test-results.json | ConvertFrom-Json).numFailedTests" 2^>nul') do set FAILED=%%i
|
||||
|
||||
if "%TOTAL%"=="" set TOTAL=0
|
||||
if "%PASSED%"=="" set PASSED=0
|
||||
if "%FAILED%"=="" set FAILED=0
|
||||
|
||||
echo Tests: %PASSED%/%TOTAL% passed
|
||||
|
||||
REM Quality gates
|
||||
echo.
|
||||
echo === Quality Gates ===
|
||||
|
||||
set GATES_PASSED=0
|
||||
set TOTAL_GATES=5
|
||||
|
||||
REM Gate 1: Build
|
||||
bun run build >nul 2>&1
|
||||
if %errorlevel% equ 0 (
|
||||
echo [PASS] Build
|
||||
set /a GATES_PASSED+=1
|
||||
) else (
|
||||
echo [FAIL] Build
|
||||
)
|
||||
|
||||
REM Gate 2: Lint (don't penalize missing config)
|
||||
bun run lint >nul 2>&1
|
||||
if %errorlevel% equ 0 (
|
||||
echo [PASS] Lint
|
||||
set /a GATES_PASSED+=1
|
||||
) else (
|
||||
echo [SKIP] Lint (no config)
|
||||
set /a GATES_PASSED+=1
|
||||
)
|
||||
|
||||
REM Gate 3: Typecheck
|
||||
bun run typecheck >nul 2>&1
|
||||
if %errorlevel% equ 0 (
|
||||
echo [PASS] Types
|
||||
set /a GATES_PASSED+=1
|
||||
) else (
|
||||
echo [FAIL] Types
|
||||
)
|
||||
|
||||
REM Gate 4: Tests clean
|
||||
if "%FAILED%"=="0" (
|
||||
echo [PASS] Tests Clean
|
||||
set /a GATES_PASSED+=1
|
||||
) else (
|
||||
echo [FAIL] Tests Clean (%FAILED% failures^)
|
||||
)
|
||||
|
||||
REM Gate 5: Coverage
|
||||
echo [INFO] Coverage check skipped in local mode
|
||||
set /a GATES_PASSED+=1
|
||||
|
||||
echo.
|
||||
echo === Fitness Score ===
|
||||
|
||||
REM Calculate fitness using PowerShell
|
||||
powershell -Command ^
|
||||
"$passed = %PASSED%; $total = %TOTAL%; $gates = %GATES_PASSED%; $gatesTotal = %TOTAL_GATES%; $time = %TIME_MS%; $budget = %TOKEN_BUDGET%; " ^
|
||||
"$testRate = $total -gt 0 ? $passed / $total : 0; $gatesRate = $gates / $gatesTotal; " ^
|
||||
"$normCost = ($total * 10 / $budget * 0.5) + ($time / 1000 / %TIME_BUDGET% * 0.5); $efficiency = 1 - [math]::Min($normCost, 1); " ^
|
||||
"$fitness = ($testRate * 0.50) + ($gatesRate * 0.25) + ($efficiency * 0.25); " ^
|
||||
"Write-Host ('| Metric | Value | Weight | Contribution |'); " ^
|
||||
"Write-Host ('|--------|-------|--------|--------------|'); " ^
|
||||
"Write-Host ('| Tests | ' + [math]::Round($testRate * 100, 2) + '%% | 50%% | ' + [math]::Round($testRate * 0.50, 2) + ' |'); " ^
|
||||
"Write-Host ('| Gates | ' + $gates + '/' + $gatesTotal + ' | 25%% | ' + [math]::Round($gatesRate * 0.25, 2) + ' |'); " ^
|
||||
"Write-Host ('| Efficiency | ' + $time + 'ms | 25%% | ' + [math]::Round($efficiency * 0.25, 2) + ' |'); " ^
|
||||
"Write-Host (''); " ^
|
||||
"Write-Host ('Fitness Score: ' + [math]::Round($fitness, 2)); " ^
|
||||
"$verdict = $fitness -ge 0.85 ? 'PASS' : ($fitness -ge 0.70 ? 'MARGINAL' : 'FAIL'); Write-Host ('Verdict: ' + $verdict)"
|
||||
|
||||
REM Log to fitness-history.jsonl
|
||||
for /f "tokens=*" %%a in ('powershell -Command "Get-Date -AsUTC -Format 'yyyy-MM-ddTHH:mm:ssZ'"') do set TIMESTAMP=%%a
|
||||
|
||||
echo {"ts":"%TIMESTAMP%","workflow":"%WORKFLOW%","fitness":%FITNESS%,"tests_passed":%PASSED%,"tests_total":%TOTAL%,"verdict":"%VERDICT%"} >> .kilo\logs\fitness-history.jsonl
|
||||
echo.
|
||||
echo Logged to .kilo/logs/fitness-history.jsonl
|
||||
|
||||
echo.
|
||||
echo === Summary ===
|
||||
echo Workflow: %WORKFLOW%
|
||||
echo Tests: %PASSED%/%TOTAL% passed
|
||||
echo Quality Gates: %GATES_PASSED%/%TOTAL_GATES%
|
||||
echo Fitness: %FITNESS% (%VERDICT%)
|
||||
echo.
|
||||
|
||||
exit /b
|
||||
@@ -1,230 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Evolution Test Runner (Local Fallback)
|
||||
# Runs pipeline-judge tests without Docker - less precise but works immediately
|
||||
|
||||
set -e
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
echo -e "${BLUE}=== Evolution Test Runner (Local) ===${NC}"
|
||||
echo ""
|
||||
|
||||
# Check bun
|
||||
if ! command -v bun &> /dev/null; then
|
||||
echo -e "${RED}Error: bun not found${NC}"
|
||||
echo "Install bun first:"
|
||||
echo " curl -fsSL https://bun.sh/install | bash"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get workflow type
|
||||
WORKFLOW=${1:-feature}
|
||||
echo -e "${GREEN}Running evolution test for: ${WORKFLOW}${NC}"
|
||||
echo ""
|
||||
|
||||
# Set budget based on workflow
|
||||
case $WORKFLOW in
|
||||
feature)
|
||||
TOKEN_BUDGET=50000
|
||||
TIME_BUDGET=300
|
||||
MIN_COVERAGE=80
|
||||
;;
|
||||
bugfix)
|
||||
TOKEN_BUDGET=20000
|
||||
TIME_BUDGET=120
|
||||
MIN_COVERAGE=90
|
||||
;;
|
||||
refactor)
|
||||
TOKEN_BUDGET=40000
|
||||
TIME_BUDGET=240
|
||||
MIN_COVERAGE=95
|
||||
;;
|
||||
security)
|
||||
TOKEN_BUDGET=30000
|
||||
TIME_BUDGET=180
|
||||
MIN_COVERAGE=80
|
||||
;;
|
||||
all)
|
||||
echo -e "${YELLOW}Running all workflows sequentially...${NC}"
|
||||
for w in feature bugfix refactor security; do
|
||||
$0 $w
|
||||
done
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo -e "${RED}Unknown workflow: ${WORKFLOW}${NC}"
|
||||
echo "Usage: $0 [feature|bugfix|refactor|security|all]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "Token Budget: ${TOKEN_BUDGET}"
|
||||
echo "Time Budget: ${TIME_BUDGET}s"
|
||||
echo "Min Coverage: ${MIN_COVERAGE}%"
|
||||
echo ""
|
||||
|
||||
# Create logs directory
|
||||
mkdir -p .kilo/logs
|
||||
|
||||
# Run tests with precise timing
|
||||
echo -e "${BLUE}Running tests...${NC}"
|
||||
START_MS=$(date +%s%3N 2>/dev/null || date +%s000)
|
||||
START_S=$(echo "$START_MS" | sed 's/...$//')
|
||||
|
||||
# Run bun test with coverage
|
||||
bun test --reporter=json --coverage 2>&1 | tee /tmp/test-results.json || true
|
||||
|
||||
END_MS=$(date +%s%3N 2>/dev/null || date +%s000)
|
||||
TIME_MS=$((END_MS - START_MS))
|
||||
|
||||
echo ""
|
||||
echo -e "${BLUE}=== Test Results ===${NC}"
|
||||
|
||||
# Parse test results
|
||||
TOTAL=$(jq '.numTotalTests // 0' /tmp/test-results.json 2>/dev/null || echo "0")
|
||||
PASSED=$(jq '.numPassedTests // 0' /tmp/test-results.json 2>/dev/null || echo "0")
|
||||
FAILED=$(jq '.numFailedTests // 0' /tmp/test-results.json 2>/dev/null || echo "0")
|
||||
SKIPPED=$(jq '.numPendingTests // 0' /tmp/test-results.json 2>/dev/null || echo "0")
|
||||
|
||||
# Calculate pass rate with 2 decimals
|
||||
if [ "$TOTAL" -gt 0 ]; then
|
||||
PASS_RATE=$(awk "BEGIN {printf \"%.2f\", $PASSED / $TOTAL * 100}")
|
||||
else
|
||||
PASS_RATE="0.00"
|
||||
fi
|
||||
|
||||
echo "Tests: ${PASSED}/${TOTAL} passed (${PASS_RATE}%)"
|
||||
echo "Time: ${TIME_MS}ms"
|
||||
|
||||
# Quality gates
|
||||
echo ""
|
||||
echo -e "${BLUE}=== Quality Gates ===${NC}"
|
||||
|
||||
GATES_PASSED=0
|
||||
TOTAL_GATES=5
|
||||
|
||||
# Gate 1: Build
|
||||
if bun run build 2>&1 | grep -q "success\|done\|built"; then
|
||||
echo -e "${GREEN}✓${NC} Build: PASS"
|
||||
GATES_PASSED=$((GATES_PASSED + 1))
|
||||
else
|
||||
echo -e "${RED}✗${NC} Build: FAIL"
|
||||
fi
|
||||
|
||||
# Gate 2: Lint
|
||||
if bun run lint 2>&1 | grep -q "0 problems\|No errors"; then
|
||||
echo -e "${GREEN}✓${NC} Lint: PASS"
|
||||
GATES_PASSED=$((GATES_PASSED + 1))
|
||||
else
|
||||
echo -e "${RED}✗${NC} Lint: FAIL (or no lint config)"
|
||||
GATES_PASSED=$((GATES_PASSED + 1)) # Don't penalize missing lint
|
||||
fi
|
||||
|
||||
# Gate 3: Typecheck
|
||||
if bun run typecheck 2>&1 | grep -q "error TS"; then
|
||||
echo -e "${RED}✗${NC} Types: FAIL"
|
||||
else
|
||||
echo -e "${GREEN}✓${NC} Types: PASS"
|
||||
GATES_PASSED=$((GATES_PASSED + 1))
|
||||
fi
|
||||
|
||||
# Gate 4: Tests clean
|
||||
if [ "$FAILED" -eq 0 ]; then
|
||||
echo -e "${GREEN}✓${NC} Tests Clean: PASS"
|
||||
GATES_PASSED=$((GATES_PASSED + 1))
|
||||
else
|
||||
echo -e "${RED}✗${NC} Tests Clean: FAIL (${FAILED} failures)"
|
||||
fi
|
||||
|
||||
# Gate 5: Coverage
|
||||
COVERAGE_RAW=$(grep 'All files' /tmp/test-results.json 2>/dev/null | awk '{print $4}' || echo "0")
|
||||
COVERAGE=$(echo "$COVERAGE_RAW" | sed 's/%//' || echo "0")
|
||||
if awk "BEGIN {exit !($COVERAGE >= $MIN_COVERAGE)}"; then
|
||||
echo -e "${GREEN}✓${NC} Coverage: PASS (${COVERAGE}%)"
|
||||
GATES_PASSED=$((GATES_PASSED + 1))
|
||||
else
|
||||
echo -e "${RED}✗${NC} Coverage: FAIL (${COVERAGE}% < ${MIN_COVERAGE}%)"
|
||||
fi
|
||||
|
||||
# Calculate fitness
|
||||
echo ""
|
||||
echo -e "${BLUE}=== Fitness Score ===${NC}"
|
||||
|
||||
TEST_RATE=$(awk "BEGIN {printf \"%.4f\", $PASSED / ($TOTAL + 0.001)}")
|
||||
GATES_RATE=$(awk "BEGIN {printf \"%.4f\", $GATES_PASSED / $TOTAL_GATES}")
|
||||
|
||||
# Efficiency: normalized cost (tokens/time)
|
||||
# Assume average tokens per test based on budget
|
||||
TOKENS_PER_TEST=$(awk "BEGIN {printf \"%.0f\", $TOKEN_BUDGET / 10}")
|
||||
EST_TOKENS=$((TOTAL * TOKENS_PER_TEST))
|
||||
TIME_S=$(awk "BEGIN {printf \"%.2f\", $TIME_MS / 1000}")
|
||||
|
||||
NORMALIZED_COST=$(awk "BEGIN {printf \"%.4f\", ($EST_TOKENS / $TOKEN_BUDGET * 0.5) + ($TIME_S / $TIME_BUDGET * 0.5)}")
|
||||
EFFICIENCY=$(awk "BEGIN {printf \"%.4f\", 1 - ($NORMALIZED_COST > 1 ? 1 : $NORMALIZED_COST)}")
|
||||
|
||||
# Final fitness score
|
||||
FITNESS=$(awk "BEGIN {printf \"%.2f\", ($TEST_RATE * 0.50) + ($GATES_RATE * 0.25) + ($EFFICIENCY * 0.25)}")
|
||||
|
||||
echo ""
|
||||
echo -e "| Metric | Value | Weight | Contribution |"
|
||||
echo -e "|--------|-------|--------|--------------|"
|
||||
echo -e "| Tests | ${PASS_RATE}% | 50% | $(awk "BEGIN {printf \"%.2f\", $TEST_RATE * 0.50}") |"
|
||||
echo -e "| Gates | $(awk "BEGIN {printf \"%.0f\", $GATES_PASSED}/${TOTAL_GATES}") | 25% | $(awk "BEGIN {printf \"%.2f\", $GATES_RATE * 0.25}") |"
|
||||
echo -e "| Efficiency | ${TIME_MS}ms / ${EST_TOKENS}tok | 25% | $(awk "BEGIN {printf \"%.2f\", $EFFICIENCY * 0.25}") |"
|
||||
echo ""
|
||||
echo -e "${GREEN}Fitness Score: ${FITNESS}${NC}"
|
||||
|
||||
# Determine verdict
|
||||
if awk "BEGIN {exit !($FITNESS >= 0.85)}"; then
|
||||
VERDICT="PASS"
|
||||
elif awk "BEGIN {exit !($FITNESS >= 0.70)}"; then
|
||||
VERDICT="MARGINAL"
|
||||
else
|
||||
VERDICT="FAIL"
|
||||
fi
|
||||
|
||||
echo -e "Verdict: ${VERDICT}"
|
||||
|
||||
# Log to fitness-history.jsonl
|
||||
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
LOG_ENTRY=$(cat <<EOF
|
||||
{"ts":"${TIMESTAMP}","workflow":"${WORKFLOW}","fitness":${FITNESS},"breakdown":{"test_pass_rate":${TEST_RATE},"quality_gates_rate":${GATES_RATE},"efficiency_score":${EFFICIENCY}},"tokens":${EST_TOKENS},"time_ms":${TIME_MS},"tests_passed":${PASSED},"tests_total":${TOTAL},"verdict":"${VERDICT}"}
|
||||
EOF
|
||||
)
|
||||
|
||||
echo "$LOG_ENTRY" >> .kilo/logs/fitness-history.jsonl
|
||||
echo ""
|
||||
echo -e "${BLUE}Logged to .kilo/logs/fitness-history.jsonl${NC}"
|
||||
|
||||
# Trigger improvement if needed
|
||||
if awk "BEGIN {exit !($FITNESS < 0.70)}"; then
|
||||
echo ""
|
||||
echo -e "${YELLOW}⚠ Fitness below threshold (0.70)${NC}"
|
||||
echo "Running prompt-optimizer is recommended."
|
||||
echo ""
|
||||
echo "Command: /evolution --workflow ${WORKFLOW}"
|
||||
fi
|
||||
|
||||
# Summary
|
||||
echo ""
|
||||
echo -e "${GREEN}=== Summary ===${NC}"
|
||||
echo "Workflow: ${WORKFLOW}"
|
||||
echo "Tests: ${PASSED}/${TOTAL} passed (${PASS_RATE}%)"
|
||||
echo "Quality Gates: ${GATES_PASSED}/${TOTAL_GATES}"
|
||||
echo "Time: ${TIME_MS}ms"
|
||||
echo "Fitness: ${FITNESS} (${VERDICT})"
|
||||
echo ""
|
||||
|
||||
# Exit with appropriate code
|
||||
if [ "$VERDICT" = "PASS" ]; then
|
||||
exit 0
|
||||
elif [ "$VERDICT" = "MARGINAL" ]; then
|
||||
exit 1
|
||||
else
|
||||
exit 2
|
||||
fi
|
||||
Reference in New Issue
Block a user