feat(swarm): autonomous agent containers, Swarm Manager with auto-stop, /nodes UI overhaul
## 1. Fix /nodes Swarm Status Display
- Add SwarmStatusBanner component: clear green/red/loading state
- Shows nodeId, managerAddr, isManager badge
- Error state explains what to check (docker.sock mount)
- Header now shows 'swarm unreachable — check gateway' vs 'active'
- swarmOk now checks nodeId presence, not just data existence
## 2. Autonomous Agent Container
- New docker/Dockerfile.agent — builds Go agent binary from gateway/cmd/agent/
- New gateway/cmd/agent/main.go — standalone HTTP microservice:
* GET /health — liveness probe with idle time info
* POST /task — receives task, forwards to Gateway orchestrator
* GET /info — agent metadata (id, hostname, gateway url)
* Idle watchdog: calls /api/swarm/agents/{name}/stop after IdleTimeoutMinutes
* Connects to Swarm overlay network (goclaw-net) → reaches DB/Gateway by DNS
* Env: AGENT_ID, GATEWAY_URL, DATABASE_URL, IDLE_TIMEOUT_MINUTES
## 3. Swarm Manager Agent (auto-stop after 15min idle)
- New gateway/internal/api/swarm_manager.go:
* SwarmManager goroutine checks every 60s
* Scales idle GoClaw agent services to 0 replicas after 15 min
* Tracks lastActivity from task UpdatedAt timestamps
- New REST endpoints in gateway:
* GET /api/swarm/agents — list agents with idleMinutes
* POST /api/swarm/agents/{name}/start — scale up agent
* POST /api/swarm/agents/{name}/stop — scale to 0
* DELETE /api/swarm/services/{id} — remove service permanently
- SwarmManager started as background goroutine in main.go with context cancel
## 4. Docker Client Enhancements
- Added NetworkAttachment type and Networks field to ServiceSpec
- CreateAgentServiceFull(opts) — supports overlay networks, custom labels
- CreateAgentService() delegates to CreateAgentServiceFull for backward compat
- RemoveService(id) — DELETE /v1.44/services/{id}
- GetServiceLastActivity(id) — finds latest task UpdatedAt for idle detection
## 5. tRPC & Gateway Proxy
- New functions: removeSwarmService, listSwarmAgents, startSwarmAgent, stopSwarmAgent
- SwarmAgentInfo type with idleMinutes, lastActivity, desiredReplicas
- createAgentService now accepts networks[] parameter
- New tRPC endpoints: nodes.removeService, nodes.listAgents, nodes.startAgent, nodes.stopAgent
## 6. Nodes.tsx UI Overhaul
- SwarmStatusBanner component at top — no more silent 'connecting…'
- New 'Agents' tab with AgentManagerRow: idle time, auto-stop warning, start/stop/remove buttons
- IdleColor coding: green < 5m, yellow 5-10m, red 10m+ with countdown to auto-stop
- ServiceRow: added Remove button with confirmation dialog
- RemoveConfirmDialog component
- DeployAgentDialog: added overlay networks field, default env includes GATEWAY_URL
- All queries refetch after agent start/stop/remove
This commit is contained in:
@@ -703,7 +703,7 @@ export async function setNodeAvailability(nodeId: string, availability: "active"
|
||||
|
||||
/** Deploy a new agent as a Swarm service */
|
||||
export async function createAgentService(opts: {
|
||||
name: string; image: string; replicas: number; env?: string[]; port?: number;
|
||||
name: string; image: string; replicas: number; env?: string[]; port?: number; networks?: string[];
|
||||
}): Promise<{ ok: boolean; serviceId?: string; name?: string } | null> {
|
||||
try {
|
||||
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/services/create`, {
|
||||
@@ -716,3 +716,61 @@ export async function createAgentService(opts: {
|
||||
return res.json();
|
||||
} catch { return null; }
|
||||
}
|
||||
|
||||
/** Remove (stop) a Swarm service by ID or name */
|
||||
export async function removeSwarmService(serviceId: string): Promise<boolean> {
|
||||
try {
|
||||
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/services/${encodeURIComponent(serviceId)}`, {
|
||||
method: "DELETE",
|
||||
signal: AbortSignal.timeout(10_000),
|
||||
});
|
||||
return res.ok;
|
||||
} catch { return false; }
|
||||
}
|
||||
|
||||
export interface SwarmAgentInfo {
|
||||
id: string;
|
||||
name: string;
|
||||
image: string;
|
||||
desiredReplicas: number;
|
||||
runningTasks: number;
|
||||
lastActivity: string;
|
||||
idleMinutes: number;
|
||||
isGoClaw: boolean;
|
||||
}
|
||||
|
||||
/** List all GoClaw agent services with idle time info */
|
||||
export async function listSwarmAgents(): Promise<{ agents: SwarmAgentInfo[]; count: number } | null> {
|
||||
try {
|
||||
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/agents`, {
|
||||
signal: AbortSignal.timeout(10_000),
|
||||
});
|
||||
if (!res.ok) return null;
|
||||
return res.json();
|
||||
} catch { return null; }
|
||||
}
|
||||
|
||||
/** Start (scale-up) an agent service */
|
||||
export async function startSwarmAgent(name: string, replicas = 1): Promise<boolean> {
|
||||
try {
|
||||
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/agents/${encodeURIComponent(name)}/start`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ replicas }),
|
||||
signal: AbortSignal.timeout(10_000),
|
||||
});
|
||||
return res.ok;
|
||||
} catch { return false; }
|
||||
}
|
||||
|
||||
/** Stop (scale-to-0) an agent service */
|
||||
export async function stopSwarmAgent(name: string): Promise<boolean> {
|
||||
try {
|
||||
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/agents/${encodeURIComponent(name)}/stop`, {
|
||||
method: "POST",
|
||||
signal: AbortSignal.timeout(10_000),
|
||||
});
|
||||
return res.ok;
|
||||
} catch { return false; }
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user