Problem: when LLM returned empty content or network error, the orchestrator
immediately stopped with (no response) — visible to user as blank reply.
Solution — 4-layer retry system:
## Go Gateway (gateway/internal/orchestrator/orchestrator.go)
- Extracted shared runLoop() used by Chat(), ChatWithEvents(), ChatWithEventsAndRetry()
- Added RetryPolicy struct: MaxLLMRetries (default 3), InitialDelay (2s),
MaxDelay (30s), RetryOnEmpty (true)
- callLLMWithRetry(): wraps every LLM call with exponential back-off:
* retries on HTTP/network error
* retries on empty choices array
* retries when content=="" AND finish_reason!="tool_calls" (soft empty)
* strips tools on attempt > 1 (avoids repeated tool-format errors)
* logs each attempt; total attempts = MaxLLMRetries + 1 (default: 4)
- Added ChatWithEventsAndRetry() with onRetry callback for client visibility
- SetRetryPolicy() for runtime override
## Config (gateway/config/config.go)
- New fields: MaxLLMRetries (GATEWAY_MAX_LLM_RETRIES, default 3)
RetryDelaySecs (GATEWAY_RETRY_DELAY_SECS, default 2)
## main.go — wires retry policy from config into orchestrator
## docker-compose.yml
- GATEWAY_REQUEST_TIMEOUT_SECS: 120 → 300 (accommodates up to 4 retries)
- GATEWAY_MAX_LLM_RETRIES=3, GATEWAY_RETRY_DELAY_SECS=2 env vars
## API (handlers.go)
- StartChatSession goroutine now uses ChatWithEventsAndRetry
- onRetry callback emits "thinking" DB event with content "⟳ Retry N: reason"
so the client sees retry progress in the console panel
## Frontend (client/src/lib/chatStore.ts + client/src/pages/Chat.tsx)
- ConsoleEntry gains content?: string and new type "retry"
- thinking events with content starting "⟳ Retry" → type=retry (amber)
- Chat ConsolePanel renders retry events in amber with RefreshCw icon
and shows the retry reason string underneath
179 lines
7.1 KiB
YAML
179 lines
7.1 KiB
YAML
##############################################################################
|
|
# GoClaw Control Center — Docker Compose (Local Development)
|
|
#
|
|
# Services:
|
|
# control-center — React + Node.js tRPC frontend/backend (:3000)
|
|
# gateway — Go Orchestrator + Tool Executor (:18789)
|
|
# db — MySQL 8 (:3306)
|
|
#
|
|
# LLM Provider (set in .env or environment):
|
|
# Cloud (default): LLM_BASE_URL=https://ollama.com/v1 + LLM_API_KEY=<key>
|
|
# OpenAI-compat: LLM_BASE_URL=https://api.openai.com/v1 + LLM_API_KEY=<key>
|
|
# Local GPU node: LLM_BASE_URL=http://<gpu-host>:11434 (no key needed)
|
|
#
|
|
# Local Ollama (GPU only):
|
|
# The ollama service below is commented out by default.
|
|
# Uncomment it only on machines with a compatible GPU.
|
|
# Then set: LLM_BASE_URL=http://ollama:11434
|
|
#
|
|
# Usage:
|
|
# cp docker/.env.example docker/.env # fill in LLM_API_KEY etc.
|
|
# docker compose -f docker/docker-compose.yml up -d
|
|
# docker compose -f docker/docker-compose.yml logs -f gateway
|
|
# docker compose -f docker/docker-compose.yml down -v
|
|
##############################################################################
|
|
|
|
name: goclaw
|
|
|
|
networks:
|
|
goclaw-net:
|
|
driver: bridge
|
|
|
|
volumes:
|
|
mysql-data:
|
|
# ollama-data: # Uncomment when using local Ollama service below
|
|
|
|
services:
|
|
|
|
# ── MySQL 8 ──────────────────────────────────────────────────────────────
|
|
db:
|
|
image: mysql:8.0
|
|
container_name: goclaw-db
|
|
restart: unless-stopped
|
|
environment:
|
|
MYSQL_ROOT_PASSWORD: ${MYSQL_ROOT_PASSWORD:-goClawRoot123}
|
|
MYSQL_DATABASE: ${MYSQL_DATABASE:-goclaw}
|
|
MYSQL_USER: ${MYSQL_USER:-goclaw}
|
|
MYSQL_PASSWORD: ${MYSQL_PASSWORD:-goClawPass123}
|
|
ports:
|
|
- "3306:3306"
|
|
volumes:
|
|
- mysql-data:/var/lib/mysql
|
|
networks:
|
|
- goclaw-net
|
|
healthcheck:
|
|
test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "root", "-p${MYSQL_ROOT_PASSWORD:-goClawRoot123}"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|
|
start_period: 30s
|
|
|
|
# ── Local Ollama LLM Server (GPU ONLY — disabled by default) ─────────────
|
|
# Uncomment this entire block only on machines with a compatible NVIDIA or
|
|
# Apple Silicon GPU. Then set LLM_BASE_URL=http://ollama:11434 in the
|
|
# gateway service below (or in your .env file).
|
|
#
|
|
# ollama:
|
|
# image: ollama/ollama:latest
|
|
# container_name: goclaw-ollama
|
|
# restart: unless-stopped
|
|
# ports:
|
|
# - "11434:11434"
|
|
# volumes:
|
|
# - ollama-data:/root/.ollama
|
|
# networks:
|
|
# - goclaw-net
|
|
# environment:
|
|
# - OLLAMA_NUM_PARALLEL=2
|
|
# - OLLAMA_MAX_LOADED_MODELS=2
|
|
# # NVIDIA GPU support — uncomment if available:
|
|
# # deploy:
|
|
# # resources:
|
|
# # reservations:
|
|
# # devices:
|
|
# # - driver: nvidia
|
|
# # count: all
|
|
# # capabilities: [gpu]
|
|
|
|
# ── Go Gateway (Orchestrator + Tool Executor) ─────────────────────────────
|
|
gateway:
|
|
build:
|
|
context: ..
|
|
dockerfile: docker/Dockerfile.gateway
|
|
container_name: goclaw-gateway
|
|
restart: unless-stopped
|
|
ports:
|
|
- "18789:18789"
|
|
environment:
|
|
PORT: "18789"
|
|
# ── LLM Provider ─────────────────────────────────────────────────────
|
|
# Cloud default (Ollama Cloud, OpenAI-compatible):
|
|
LLM_BASE_URL: "${LLM_BASE_URL:-https://ollama.com/v1}"
|
|
LLM_API_KEY: "${LLM_API_KEY:-${OLLAMA_API_KEY:-}}"
|
|
# Legacy alias (still supported):
|
|
OLLAMA_API_KEY: "${OLLAMA_API_KEY:-${LLM_API_KEY:-}}"
|
|
# ── To use local Ollama on GPU node, set in .env: ─────────────────────
|
|
# LLM_BASE_URL=http://ollama:11434 (if ollama service above is enabled)
|
|
# LLM_BASE_URL=http://<gpu-host-ip>:11434 (external GPU machine)
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
DEFAULT_MODEL: "${DEFAULT_MODEL:-qwen2.5:7b}"
|
|
DATABASE_URL: "${MYSQL_USER:-goclaw}:${MYSQL_PASSWORD:-goClawPass123}@tcp(db:3306)/${MYSQL_DATABASE:-goclaw}?parseTime=true"
|
|
PROJECT_ROOT: "/app"
|
|
# Request timeout — must be > (MaxLLMRetries * RetryDelay * 2 + actual LLM time)
|
|
GATEWAY_REQUEST_TIMEOUT_SECS: "300"
|
|
GATEWAY_MAX_TOOL_ITERATIONS: "10"
|
|
# LLM retry policy: retry up to N times on empty response or network error
|
|
GATEWAY_MAX_LLM_RETRIES: "${GATEWAY_MAX_LLM_RETRIES:-3}"
|
|
GATEWAY_RETRY_DELAY_SECS: "${GATEWAY_RETRY_DELAY_SECS:-2}"
|
|
LOG_LEVEL: "info"
|
|
depends_on:
|
|
db:
|
|
condition: service_healthy
|
|
# ollama: # Uncomment if using local Ollama service above
|
|
# condition: service_started
|
|
networks:
|
|
- goclaw-net
|
|
volumes:
|
|
# Mount project root for file tools (read-only)
|
|
- ..:/app:ro
|
|
# Mount Docker socket for docker_exec tool and Swarm management
|
|
- /var/run/docker.sock:/var/run/docker.sock
|
|
# privileged + pid:host allows nsenter to run commands on the host system
|
|
# This gives the orchestrator true shell access to the host for self-modification
|
|
privileged: true
|
|
pid: host
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-qO-", "http://localhost:18789/health"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 3
|
|
start_period: 10s
|
|
|
|
# ── Control Center (React + Node.js) ─────────────────────────────────────
|
|
control-center:
|
|
build:
|
|
context: ..
|
|
dockerfile: docker/Dockerfile.control-center
|
|
container_name: goclaw-control-center
|
|
restart: unless-stopped
|
|
ports:
|
|
- "3000:3000"
|
|
environment:
|
|
NODE_ENV: production
|
|
DATABASE_URL: "mysql://${MYSQL_USER:-goclaw}:${MYSQL_PASSWORD:-goClawPass123}@db:3306/${MYSQL_DATABASE:-goclaw}"
|
|
GATEWAY_URL: "http://gateway:18789"
|
|
JWT_SECRET: "${JWT_SECRET:-change-me-in-production}"
|
|
# ── LLM Provider (same as gateway, used by Node.js tRPC proxy) ──────
|
|
OLLAMA_BASE_URL: "${LLM_BASE_URL:-${OLLAMA_BASE_URL:-https://ollama.com/v1}}"
|
|
OLLAMA_API_KEY: "${LLM_API_KEY:-${OLLAMA_API_KEY:-}}"
|
|
VITE_APP_ID: "${VITE_APP_ID:-}"
|
|
OAUTH_SERVER_URL: "${OAUTH_SERVER_URL:-}"
|
|
VITE_OAUTH_PORTAL_URL: "${VITE_OAUTH_PORTAL_URL:-}"
|
|
BUILT_IN_FORGE_API_URL: "${BUILT_IN_FORGE_API_URL:-}"
|
|
BUILT_IN_FORGE_API_KEY: "${BUILT_IN_FORGE_API_KEY:-}"
|
|
VITE_FRONTEND_FORGE_API_KEY: "${VITE_FRONTEND_FORGE_API_KEY:-}"
|
|
VITE_FRONTEND_FORGE_API_URL: "${VITE_FRONTEND_FORGE_API_URL:-}"
|
|
depends_on:
|
|
db:
|
|
condition: service_healthy
|
|
gateway:
|
|
condition: service_healthy
|
|
networks:
|
|
- goclaw-net
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-qO-", "http://localhost:3000/api/health"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 3
|
|
start_period: 20s
|