From a8a8ea1ee2cedab355611bbc979c691eff9538f7 Mon Sep 17 00:00:00 2001 From: bboxwtf Date: Sat, 21 Mar 2026 20:37:21 +0000 Subject: [PATCH] feat(swarm): autonomous agent containers, Swarm Manager with auto-stop, /nodes UI overhaul MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## 1. Fix /nodes Swarm Status Display - Add SwarmStatusBanner component: clear green/red/loading state - Shows nodeId, managerAddr, isManager badge - Error state explains what to check (docker.sock mount) - Header now shows 'swarm unreachable — check gateway' vs 'active' - swarmOk now checks nodeId presence, not just data existence ## 2. Autonomous Agent Container - New docker/Dockerfile.agent — builds Go agent binary from gateway/cmd/agent/ - New gateway/cmd/agent/main.go — standalone HTTP microservice: * GET /health — liveness probe with idle time info * POST /task — receives task, forwards to Gateway orchestrator * GET /info — agent metadata (id, hostname, gateway url) * Idle watchdog: calls /api/swarm/agents/{name}/stop after IdleTimeoutMinutes * Connects to Swarm overlay network (goclaw-net) → reaches DB/Gateway by DNS * Env: AGENT_ID, GATEWAY_URL, DATABASE_URL, IDLE_TIMEOUT_MINUTES ## 3. Swarm Manager Agent (auto-stop after 15min idle) - New gateway/internal/api/swarm_manager.go: * SwarmManager goroutine checks every 60s * Scales idle GoClaw agent services to 0 replicas after 15 min * Tracks lastActivity from task UpdatedAt timestamps - New REST endpoints in gateway: * GET /api/swarm/agents — list agents with idleMinutes * POST /api/swarm/agents/{name}/start — scale up agent * POST /api/swarm/agents/{name}/stop — scale to 0 * DELETE /api/swarm/services/{id} — remove service permanently - SwarmManager started as background goroutine in main.go with context cancel ## 4. Docker Client Enhancements - Added NetworkAttachment type and Networks field to ServiceSpec - CreateAgentServiceFull(opts) — supports overlay networks, custom labels - CreateAgentService() delegates to CreateAgentServiceFull for backward compat - RemoveService(id) — DELETE /v1.44/services/{id} - GetServiceLastActivity(id) — finds latest task UpdatedAt for idle detection ## 5. tRPC & Gateway Proxy - New functions: removeSwarmService, listSwarmAgents, startSwarmAgent, stopSwarmAgent - SwarmAgentInfo type with idleMinutes, lastActivity, desiredReplicas - createAgentService now accepts networks[] parameter - New tRPC endpoints: nodes.removeService, nodes.listAgents, nodes.startAgent, nodes.stopAgent ## 6. Nodes.tsx UI Overhaul - SwarmStatusBanner component at top — no more silent 'connecting…' - New 'Agents' tab with AgentManagerRow: idle time, auto-stop warning, start/stop/remove buttons - IdleColor coding: green < 5m, yellow 5-10m, red 10m+ with countdown to auto-stop - ServiceRow: added Remove button with confirmation dialog - RemoveConfirmDialog component - DeployAgentDialog: added overlay networks field, default env includes GATEWAY_URL - All queries refetch after agent start/stop/remove --- client/src/pages/Nodes.tsx | 599 ++++++++++++++++++-------- docker/Dockerfile.agent | 53 +++ gateway/cmd/agent/main.go | 270 ++++++++++++ gateway/cmd/gateway/main.go | 10 + gateway/internal/api/handlers.go | 82 +++- gateway/internal/api/swarm_manager.go | 142 ++++++ gateway/internal/docker/client.go | 103 ++++- server/gateway-proxy.ts | 60 ++- server/routers.ts | 43 ++ 9 files changed, 1168 insertions(+), 194 deletions(-) create mode 100644 docker/Dockerfile.agent create mode 100644 gateway/cmd/agent/main.go create mode 100644 gateway/internal/api/swarm_manager.go diff --git a/client/src/pages/Nodes.tsx b/client/src/pages/Nodes.tsx index 3eb26a3..bf2a9b4 100644 --- a/client/src/pages/Nodes.tsx +++ b/client/src/pages/Nodes.tsx @@ -2,18 +2,21 @@ * Nodes — Real Docker Swarm Management * * Shows: - * 1. Swarm overview (node count, managers, manager address, join tokens) - * 2. Node cards (hostname, role, IP, CPU/RAM, availability, labels, leader badge) + * 1. Swarm connection status — clearly shows if Swarm is active or unreachable + * 2. Swarm overview (node count, managers, manager address, join tokens) + * 3. Node cards (hostname, role, IP, CPU/RAM, availability, labels, leader badge) * → Set availability (active/pause/drain) + add labels inline - * 3. Services table (all swarm services: name, image, replicas running/desired) - * → Scale replicas + view tasks per service (which node each replica runs on) - * 4. Deploy Agent dialog — create a new Swarm service from any Docker image - * 5. Host Shell (privileged nsenter → run commands directly on the host) + * 4. Services table (all swarm services: name, image, replicas running/desired) + * → Scale replicas, view tasks, remove service + * 5. Agents tab — GoClaw agents with idle time, start/stop controls + * → Auto-stop after 15 min idle (handled by SwarmManager in gateway) + * 6. Deploy Agent dialog — create a new Swarm service from any Docker image + * 7. Host Shell (privileged nsenter → run commands directly on the host) */ -import { useState, useCallback } from "react"; +import { useState, useCallback, useEffect } from "react"; import { motion, AnimatePresence } from "framer-motion"; import { trpc } from "@/lib/trpc"; -import { Card, CardContent, CardHeader } from "@/components/ui/card"; +import { Card, CardContent } from "@/components/ui/card"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; @@ -22,7 +25,8 @@ import { Terminal, Crown, Layers, ChevronRight, ChevronDown, Plus, Minus, Activity, Loader2, Shield, Bot, ArrowUpRight, Eye, Tag, Power, Rocket, - GitBranch, Globe, AlertTriangle, + GitBranch, Globe, AlertTriangle, Trash2, Play, Square, + Wifi, WifiOff, Clock, Zap, } from "lucide-react"; // ─── Helpers ────────────────────────────────────────────────────────────────── @@ -35,12 +39,12 @@ function formatMB(mb: number) { function getStateColor(state: string) { switch (state?.toLowerCase()) { case "ready": - case "running": return "text-green-400 border-green-400/30 bg-green-400/10"; + case "running": return "text-green-400 border-green-400/30 bg-green-400/10"; case "down": case "disconnected": return "text-red-400 border-red-400/30 bg-red-400/10"; case "drain": - case "pause": return "text-yellow-400 border-yellow-400/30 bg-yellow-400/10"; - default: return "text-muted-foreground border-border bg-muted/20"; + case "pause": return "text-yellow-400 border-yellow-400/30 bg-yellow-400/10"; + default: return "text-muted-foreground border-border bg-muted/20"; } } @@ -55,6 +59,12 @@ function getTaskStateColor(state: string) { } } +function idleColor(minutes: number) { + if (minutes < 5) return "text-green-400"; + if (minutes < 10) return "text-yellow-400"; + return "text-red-400"; +} + function CopyBtn({ text, label }: { text: string; label?: string }) { const [copied, setCopied] = useState(false); const copy = () => { @@ -73,6 +83,75 @@ function CopyBtn({ text, label }: { text: string; label?: string }) { ); } +// ─── Swarm Status Banner ────────────────────────────────────────────────────── + +function SwarmStatusBanner({ + isLoading, isError, swarmInfo, +}: { + isLoading: boolean; + isError: boolean; + swarmInfo: any; +}) { + if (isLoading) { + return ( + + + +
+

Connecting to Docker Swarm…

+

Fetching swarm state via Gateway

+
+
+
+ ); + } + if (isError || !swarmInfo) { + return ( + + + +
+

Swarm unreachable

+

+ The GoClaw Gateway cannot contact the Docker Swarm API. + Ensure the gateway container is running with --mount /var/run/docker.sock. +

+
+
+

Gateway: :18789

+

Socket: /var/run/docker.sock

+
+
+
+ ); + } + const isActive = swarmInfo.localNodeState === "active"; + return ( + + + {isActive + ? + : + } +
+

+ Swarm {isActive ? "active" : swarmInfo.localNodeState ?? "unknown"} +

+

+ Node ID: {swarmInfo.nodeId} + {swarmInfo.managerAddr && <> · Manager: {swarmInfo.managerAddr}} +

+
+
+ {swarmInfo.isManager && ( + Manager + )} +
+
+
+ ); +} + // ─── Node Card ──────────────────────────────────────────────────────────────── type NodeInfo = { @@ -101,18 +180,16 @@ function NodeCard({ node, onRefresh }: { node: NodeInfo; onRefresh: () => void }
- {/* Role icon */}
- {node.isLeader ? : + {node.isLeader ? : node.role === "manager" ? : }
- {/* Main info */}
{node.hostname} @@ -136,14 +213,12 @@ function NodeCard({ node, onRefresh }: { node: NodeInfo; onRefresh: () => void }
{node.ip} - {node.cpuCores} cores + {node.cpuCores} cores {formatMB(node.memTotalMB)} {node.os}/{node.arch} Docker {node.dockerVersion} - {node.id && {node.id}}
- {/* Labels */} {Object.keys(node.labels).length > 0 && (
{Object.entries(node.labels).map(([k, v]) => ( @@ -155,15 +230,12 @@ function NodeCard({ node, onRefresh }: { node: NodeInfo; onRefresh: () => void } )}
-
- {/* Expanded controls */} {expanded && (
{node.managerAddr && ( @@ -179,7 +251,6 @@ function NodeCard({ node, onRefresh }: { node: NodeInfo; onRefresh: () => void }
- {/* Availability control */}
Availability:
@@ -201,34 +272,24 @@ function NodeCard({ node, onRefresh }: { node: NodeInfo; onRefresh: () => void }
- {/* Add label */} {showLabelForm ? (
- setLabelKey(e.target.value)} - placeholder="key" className="h-6 text-[10px] font-mono w-24 px-2" - /> + setLabelKey(e.target.value)} + placeholder="key" className="h-6 text-[10px] font-mono w-24 px-2" /> = - setLabelVal(e.target.value)} - placeholder="value" className="h-6 text-[10px] font-mono w-24 px-2" - /> -
) : ( - )} @@ -249,16 +310,16 @@ type ServiceInfo = { }; function ServiceRow({ - svc, - onScale, - onViewTasks, + svc, onScale, onViewTasks, onRemove, }: { svc: ServiceInfo; onScale: (id: string, current: number) => void; onViewTasks: (id: string, name: string) => void; + onRemove: (id: string, name: string) => void; }) { const healthy = svc.runningTasks >= svc.desiredTasks && svc.desiredTasks > 0; - const partial = svc.runningTasks > 0 && svc.runningTasks < svc.desiredTasks; + const partial = svc.runningTasks > 0 && svc.runningTasks < svc.desiredTasks; + const stopped = svc.desiredReplicas === 0; return ( @@ -266,6 +327,7 @@ function ServiceRow({
{svc.isGoClaw && } {svc.name} + {stopped && stopped}
{svc.image.split(":")[0].split("/").pop()}:{svc.image.split(":")[1] || "latest"} @@ -277,14 +339,12 @@ function ServiceRow({
- {svc.runningTasks}/{svc.desiredTasks} + {svc.runningTasks}/{stopped ? "0" : svc.desiredTasks} - running - {svc.desiredReplicas !== svc.desiredTasks && ( - ({svc.desiredReplicas} desired) - )} + {stopped ? "replicas" : "running"}
@@ -294,32 +354,120 @@ function ServiceRow({ {p} ))}
- ) : ( - - )} + ) : } -
- - +
); } +// ─── Agent Manager Row ──────────────────────────────────────────────────────── + +type AgentServiceInfo = { + id: string; name: string; image: string; + desiredReplicas: number; runningTasks: number; + lastActivity: string; idleMinutes: number; isGoClaw: boolean; +}; + +function AgentManagerRow({ + agent, onRefresh, +}: { + agent: AgentServiceInfo; onRefresh: () => void; +}) { + const startMut = trpc.nodes.startAgent.useMutation({ onSuccess: onRefresh }); + const stopMut = trpc.nodes.stopAgent.useMutation({ onSuccess: onRefresh }); + const removeMut = trpc.nodes.removeService.useMutation({ onSuccess: onRefresh }); + + const stopped = agent.desiredReplicas === 0; + const idle = agent.idleMinutes; + + return ( +
+
+ +
+ +
+
+ {agent.name} + {stopped ? ( + stopped + ) : ( + + {agent.runningTasks}/{agent.desiredReplicas} running + + )} +
+
+ {agent.image.split("/").pop()} + {!stopped && ( + + + {idle < 1 ? "active" : `${idle.toFixed(0)}m idle`} + + )} + {!stopped && idle >= 13 && ( + + + auto-stop in ~{Math.max(0, 15 - Math.ceil(idle))}m + + )} +
+
+ +
+ {stopped ? ( + + ) : ( + + )} + +
+
+ ); +} + // ─── Scale Dialog ───────────────────────────────────────────────────────────── function ScaleDialog({ @@ -343,11 +491,9 @@ function ScaleDialog({ className="w-8 h-8 rounded border border-border/50 flex items-center justify-center hover:bg-secondary/50"> - setVal(parseInt(e.target.value) || 0)} - className="text-center font-mono font-bold text-lg h-10" - /> + className="text-center font-mono font-bold text-lg h-10" /> + +
+ +
+ ); +} + // ─── Deploy Agent Dialog ─────────────────────────────────────────────────────── function DeployAgentDialog({ onClose, onSuccess }: { onClose: () => void; onSuccess: () => void }) { const [name, setName] = useState("goclaw-agent"); - const [image, setImage] = useState("goclaw-gateway:latest"); + const [image, setImage] = useState("goclaw-agent:latest"); const [replicas, setReplicas] = useState(1); - const [port, setPort] = useState(0); - const [envStr, setEnvStr] = useState("AGENT_ROLE=worker\nLOG_LEVEL=info"); + const [port, setPort] = useState(8080); + const [envStr, setEnvStr] = useState( + "AGENT_ID=my-agent\nIDLE_TIMEOUT_MINUTES=15\nGATEWAY_URL=http://goclaw-gateway:18789" + ); + const [networks, setNetworks] = useState("goclaw-net"); const [error, setError] = useState(""); const deployMut = trpc.nodes.deployAgentService.useMutation({ @@ -382,7 +569,8 @@ function DeployAgentDialog({ onClose, onSuccess }: { onClose: () => void; onSucc const handleDeploy = () => { setError(""); const env = envStr.split("\n").map((l) => l.trim()).filter(Boolean); - deployMut.mutate({ name, image, replicas, env, port: port || undefined }); + const nets = networks.split(",").map((n) => n.trim()).filter(Boolean); + deployMut.mutate({ name, image, replicas, env, port: port || undefined, networks: nets }); }; return ( @@ -390,7 +578,7 @@ function DeployAgentDialog({ onClose, onSuccess }: { onClose: () => void; onSucc
@@ -406,15 +594,15 @@ function DeployAgentDialog({ onClose, onSuccess }: { onClose: () => void; onSucc
setImage(e.target.value)} - placeholder="goclaw-gateway:latest" className="h-8 text-xs font-mono" /> + placeholder="goclaw-agent:latest" className="h-8 text-xs font-mono" />

- Use the gateway image or a custom agent image + Build with: docker build -f docker/Dockerfile.agent -t goclaw-agent:latest .

-
+
- + setPort(parseInt(e.target.value) || 0)} className="h-7 text-xs font-mono" />
+
+ + setNetworks(e.target.value)} + placeholder="goclaw-net" className="h-8 text-xs font-mono" /> +

+ Connect to Swarm overlay network so agents can reach Gateway & DB by DNS +

+