Files
GoClaw/server/gateway-proxy.ts
bboxwtf a8a8ea1ee2 feat(swarm): autonomous agent containers, Swarm Manager with auto-stop, /nodes UI overhaul
## 1. Fix /nodes Swarm Status Display
- Add SwarmStatusBanner component: clear green/red/loading state
- Shows nodeId, managerAddr, isManager badge
- Error state explains what to check (docker.sock mount)
- Header now shows 'swarm unreachable — check gateway' vs 'active'
- swarmOk now checks nodeId presence, not just data existence

## 2. Autonomous Agent Container
- New docker/Dockerfile.agent — builds Go agent binary from gateway/cmd/agent/
- New gateway/cmd/agent/main.go — standalone HTTP microservice:
  * GET /health — liveness probe with idle time info
  * POST /task — receives task, forwards to Gateway orchestrator
  * GET /info  — agent metadata (id, hostname, gateway url)
  * Idle watchdog: calls /api/swarm/agents/{name}/stop after IdleTimeoutMinutes
  * Connects to Swarm overlay network (goclaw-net) → reaches DB/Gateway by DNS
  * Env: AGENT_ID, GATEWAY_URL, DATABASE_URL, IDLE_TIMEOUT_MINUTES

## 3. Swarm Manager Agent (auto-stop after 15min idle)
- New gateway/internal/api/swarm_manager.go:
  * SwarmManager goroutine checks every 60s
  * Scales idle GoClaw agent services to 0 replicas after 15 min
  * Tracks lastActivity from task UpdatedAt timestamps
- New REST endpoints in gateway:
  * GET  /api/swarm/agents           — list agents with idleMinutes
  * POST /api/swarm/agents/{name}/start — scale up agent
  * POST /api/swarm/agents/{name}/stop  — scale to 0
  * DELETE /api/swarm/services/{id}     — remove service permanently
- SwarmManager started as background goroutine in main.go with context cancel

## 4. Docker Client Enhancements
- Added NetworkAttachment type and Networks field to ServiceSpec
- CreateAgentServiceFull(opts) — supports overlay networks, custom labels
- CreateAgentService() delegates to CreateAgentServiceFull for backward compat
- RemoveService(id) — DELETE /v1.44/services/{id}
- GetServiceLastActivity(id) — finds latest task UpdatedAt for idle detection

## 5. tRPC & Gateway Proxy
- New functions: removeSwarmService, listSwarmAgents, startSwarmAgent, stopSwarmAgent
- SwarmAgentInfo type with idleMinutes, lastActivity, desiredReplicas
- createAgentService now accepts networks[] parameter
- New tRPC endpoints: nodes.removeService, nodes.listAgents, nodes.startAgent, nodes.stopAgent

## 6. Nodes.tsx UI Overhaul
- SwarmStatusBanner component at top — no more silent 'connecting…'
- New 'Agents' tab with AgentManagerRow: idle time, auto-stop warning, start/stop/remove buttons
- IdleColor coding: green < 5m, yellow 5-10m, red 10m+ with countdown to auto-stop
- ServiceRow: added Remove button with confirmation dialog
- RemoveConfirmDialog component
- DeployAgentDialog: added overlay networks field, default env includes GATEWAY_URL
- All queries refetch after agent start/stop/remove
2026-03-21 20:37:21 +00:00

777 lines
22 KiB
TypeScript

/**
* GoClaw Gateway Proxy
*
* Forwards orchestrator/agent/tool requests from the Node.js tRPC server
* to the Go Gateway running on :18789.
*
* The Go Gateway handles:
* - LLM orchestration (tool-use loop)
* - Tool execution (shell, file, docker, http)
* - Agent listing from DB
* - Model listing from LLM provider
*
* When GATEWAY_URL is not set or the gateway is unreachable, all functions
* return a structured error so callers can fall back gracefully.
*/
const GATEWAY_BASE_URL = process.env.GATEWAY_URL ?? "http://localhost:18789";
const GATEWAY_TIMEOUT_MS = 180_000; // 3 min — LLM can be slow
const QUICK_TIMEOUT_MS = 5_000;
// ─── Types ────────────────────────────────────────────────────────────────────
export interface GatewayMessage {
role: "user" | "assistant" | "system";
content: string;
}
export interface GatewayToolCallStep {
tool: string;
args: Record<string, unknown>;
result: unknown; // required — matches ToolCallStep interface in Chat.tsx / Skills.tsx
error?: string;
success: boolean;
durationMs: number;
}
export interface GatewayChatResult {
success: boolean;
response: string;
toolCalls: GatewayToolCallStep[];
model?: string;
modelWarning?: string;
usage?: {
prompt_tokens: number;
completion_tokens: number;
total_tokens: number;
};
error?: string;
}
export interface GatewayOrchestratorConfig {
id: number | null;
name: string;
model: string;
temperature: number;
maxTokens: number;
allowedTools: string[];
systemPromptPreview: string;
}
export interface GatewayToolDef {
name: string;
description: string;
parameters: Record<string, unknown>;
}
export interface GatewayToolResult {
success: boolean;
output?: unknown;
error?: string;
durationMs: number;
}
export interface GatewayHealthResult {
connected: boolean;
latencyMs: number;
llm?: { connected: boolean; latencyMs: number };
error?: string;
}
// ─── Health ───────────────────────────────────────────────────────────────────
/**
* Check if the Go Gateway is running and healthy.
* Also returns LLM provider health (Ollama / cloud API).
*/
export async function checkGatewayHealth(): Promise<GatewayHealthResult> {
const start = Date.now();
try {
const res = await fetch(`${GATEWAY_BASE_URL}/health`, {
signal: AbortSignal.timeout(QUICK_TIMEOUT_MS),
});
const latencyMs = Date.now() - start;
if (res.ok) {
const data = await res.json();
return {
connected: true,
latencyMs,
// Go Gateway may return "ollama" or "llm" key depending on version
llm: data.llm ?? data.ollama,
};
}
return { connected: false, latencyMs, error: `HTTP ${res.status}` };
} catch (err: unknown) {
const msg = err instanceof Error ? err.message : String(err);
return { connected: false, latencyMs: Date.now() - start, error: msg };
}
}
/**
* Returns true if the Go Gateway is reachable (fast check, no LLM ping).
*/
export async function isGatewayAvailable(): Promise<boolean> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/health`, {
signal: AbortSignal.timeout(2_000),
});
return res.ok;
} catch {
return false;
}
}
// ─── Model Info ───────────────────────────────────────────────────────────────
export interface OllamaModelInfo {
contextLength: number;
parameterSize?: string;
family?: string;
quantization?: string;
capabilities?: string[];
}
/**
* Fetch model details from Ollama /api/show (context_length, parameters, etc.)
* Uses the base URL and API key from environment.
*/
export async function getOllamaModelInfo(modelId: string): Promise<OllamaModelInfo | null> {
const baseUrl = (process.env.OLLAMA_BASE_URL ?? "https://ollama.com/v1").replace(/\/v1\/?$/, "");
const apiKey = process.env.OLLAMA_API_KEY ?? "";
try {
const headers: Record<string, string> = { "Content-Type": "application/json" };
if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
const res = await fetch(`${baseUrl}/api/show`, {
method: "POST",
headers,
body: JSON.stringify({ model: modelId }),
signal: AbortSignal.timeout(10_000),
});
if (!res.ok) return null;
const data = await res.json();
// context_length is under model_info with key "{arch}.context_length"
let contextLength = 0;
if (data.model_info) {
for (const [k, v] of Object.entries(data.model_info)) {
if (k.endsWith(".context_length") && typeof v === "number") {
contextLength = v;
break;
}
}
}
return {
contextLength,
parameterSize: data.details?.parameter_size,
family: data.details?.family,
quantization: data.details?.quantization_level,
capabilities: data.capabilities,
};
} catch {
return null;
}
}
// ─── Orchestrator ─────────────────────────────────────────────────────────────
/**
* Send a chat message to the Go Orchestrator.
* Includes full tool-use loop — the Go side handles all LLM ↔ tool iterations.
*/
export async function gatewayChat(
messages: GatewayMessage[],
model?: string,
maxIter?: number
): Promise<GatewayChatResult> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/orchestrator/chat`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ messages, model, maxIter }),
signal: AbortSignal.timeout(GATEWAY_TIMEOUT_MS),
});
if (!res.ok) {
const text = await res.text();
return {
success: false,
response: "",
toolCalls: [],
error: `Gateway error (${res.status}): ${text}`,
};
}
return res.json();
} catch (err: unknown) {
const msg = err instanceof Error ? err.message : String(err);
return {
success: false,
response: "",
toolCalls: [],
error: `Gateway unreachable: ${msg}. Is the Go Gateway running on ${GATEWAY_BASE_URL}?`,
};
}
}
/**
* Get orchestrator config from Go Gateway (reads from DB).
*/
export async function getGatewayOrchestratorConfig(): Promise<GatewayOrchestratorConfig | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/orchestrator/config`, {
signal: AbortSignal.timeout(QUICK_TIMEOUT_MS),
});
if (!res.ok) return null;
return res.json();
} catch {
return null;
}
}
// ─── Models ───────────────────────────────────────────────────────────────────
/**
* Get list of models from Go Gateway (proxied from LLM provider).
*/
export async function getGatewayModels(): Promise<{ data: { id: string }[] } | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/models`, {
signal: AbortSignal.timeout(10_000),
});
if (!res.ok) return null;
return res.json();
} catch {
return null;
}
}
// ─── Agents ───────────────────────────────────────────────────────────────────
/**
* Get list of agents from Go Gateway (reads from DB).
*/
export async function getGatewayAgents(): Promise<{ agents: unknown[]; count: number } | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/agents`, {
signal: AbortSignal.timeout(QUICK_TIMEOUT_MS),
});
if (!res.ok) return null;
return res.json();
} catch {
return null;
}
}
/**
* Get a single agent by ID from Go Gateway.
*/
export async function getGatewayAgent(id: number): Promise<unknown | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/agents/${id}`, {
signal: AbortSignal.timeout(QUICK_TIMEOUT_MS),
});
if (!res.ok) return null;
return res.json();
} catch {
return null;
}
}
// ─── Tools ────────────────────────────────────────────────────────────────────
/**
* Get list of available tools from Go Gateway.
*/
export async function getGatewayTools(): Promise<GatewayToolDef[] | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/tools`, {
signal: AbortSignal.timeout(QUICK_TIMEOUT_MS),
});
if (!res.ok) return null;
const data = await res.json();
// Go returns OpenAI format: { tools: [{type: "function", function: {name, description, parameters}}, ...], count: N }
if (!Array.isArray(data.tools)) return null;
return data.tools.map((t: { type?: string; function?: { name: string; description: string; parameters: Record<string, unknown> }; name?: string; description?: string; parameters?: Record<string, unknown> }) => {
// Handle OpenAI format: {type: "function", function: {name, ...}}
if (t.function) {
return {
name: t.function.name,
description: t.function.description,
parameters: t.function.parameters,
} as GatewayToolDef;
}
// Handle flat format: {name, description, parameters}
return t as GatewayToolDef;
});
} catch {
return null;
}
}
/**
* Execute a single tool via Go Gateway.
*/
export async function executeGatewayTool(
toolName: string,
args: Record<string, unknown>
): Promise<GatewayToolResult> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/tools/execute`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ tool: toolName, args }),
signal: AbortSignal.timeout(30_000),
});
if (!res.ok) {
const text = await res.text();
return {
success: false,
error: `Gateway error (${res.status}): ${text}`,
durationMs: 0,
};
}
return res.json();
} catch (err: unknown) {
const msg = err instanceof Error ? err.message : String(err);
return {
success: false,
error: `Gateway unreachable: ${msg}`,
durationMs: 0,
};
}
}
// ─── Nodes / Docker Swarm ─────────────────────────────────────────────────────
export interface GatewayNodeInfo {
id: string;
hostname: string;
role: string;
status: string;
availability: string;
ip: string;
os: string;
arch: string;
cpuCores: number;
memTotalMB: number;
dockerVersion: string;
isLeader: boolean;
managerAddr?: string;
labels: Record<string, string>;
updatedAt: string;
}
export interface GatewayContainerInfo {
id: string;
name: string;
image: string;
state: string;
status: string;
}
export interface GatewayNodesResult {
nodes: GatewayNodeInfo[];
count: number;
swarmActive: boolean;
managers?: number;
totalNodes?: number;
containers?: GatewayContainerInfo[];
fetchedAt: string;
}
export interface GatewayContainerStat {
id: string;
name: string;
cpuPct: number;
memUseMB: number;
memLimMB: number;
memPct: number;
}
export interface GatewayNodeStatsResult {
stats: GatewayContainerStat[];
count: number;
fetchedAt: string;
}
/**
* Get Docker Swarm nodes (or standalone Docker host) from Go Gateway.
*/
export async function getGatewayNodes(): Promise<GatewayNodesResult | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/nodes`, {
signal: AbortSignal.timeout(10_000),
});
if (!res.ok) return null;
return res.json();
} catch {
return null;
}
}
/**
* Get live container CPU/RAM stats from Go Gateway.
*/
export async function getGatewayNodeStats(): Promise<GatewayNodeStatsResult | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/nodes/stats`, {
signal: AbortSignal.timeout(15_000),
});
if (!res.ok) return null;
return res.json();
} catch {
return null;
}
}
// ─── Persistent Chat Sessions ─────────────────────────────────────────────────
export interface GatewayChatEvent {
id: number;
sessionId: string;
seq: number;
eventType: "thinking" | "tool_call" | "delta" | "done" | "error";
content: string;
toolName: string;
toolArgs: string; // JSON string
toolResult: string;
toolSuccess: boolean;
durationMs: number;
model: string;
usageJson: string; // JSON string
errorMsg: string;
createdAt: string;
}
export interface GatewayChatSession {
id: number;
sessionId: string;
agentId: number;
status: "running" | "done" | "error";
userMessage: string;
finalResponse: string;
model: string;
totalTokens: number;
processingTimeMs: number;
errorMessage: string;
createdAt: string;
updatedAt: string;
}
/**
* Start a persistent background chat session.
* Returns the sessionId immediately; processing continues on the server.
*/
export async function startChatSession(
messages: GatewayMessage[],
sessionId: string,
model?: string,
maxIter = 10
): Promise<{ sessionId: string; status: string } | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/chat/session`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ messages, sessionId, model, maxIter }),
signal: AbortSignal.timeout(10_000),
});
if (!res.ok) return null;
return res.json();
} catch {
return null;
}
}
/**
* Get session metadata (status, finalResponse, tokens…).
*/
export async function getChatSession(sessionId: string): Promise<GatewayChatSession | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/chat/session/${sessionId}`, {
signal: AbortSignal.timeout(5_000),
});
if (!res.ok) return null;
return res.json();
} catch {
return null;
}
}
/**
* Fetch events for a session with seq > afterSeq.
* Returns { sessionId, status, events[] }.
*/
export async function getChatEvents(
sessionId: string,
afterSeq = 0
): Promise<{ sessionId: string; status: string; events: GatewayChatEvent[] } | null> {
try {
const res = await fetch(
`${GATEWAY_BASE_URL}/api/chat/session/${sessionId}/events?after=${afterSeq}`,
{ signal: AbortSignal.timeout(5_000) }
);
if (!res.ok) return null;
return res.json();
} catch {
return null;
}
}
/**
* List recent sessions (default last 50).
*/
export async function listChatSessions(
limit = 50
): Promise<{ sessions: GatewayChatSession[] } | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/chat/sessions?limit=${limit}`, {
signal: AbortSignal.timeout(5_000),
});
if (!res.ok) return null;
return res.json();
} catch {
return null;
}
}
// ─── Real Docker Swarm API ────────────────────────────────────────────────────
export interface SwarmNodeInfo {
id: string;
hostname: string;
role: "manager" | "worker";
state: string;
availability: string;
ip: string;
os: string;
arch: string;
cpuCores: number;
memTotalMB: number;
dockerVersion: string;
isLeader: boolean;
managerAddr?: string;
labels: Record<string, string>;
updatedAt: string;
}
export interface SwarmServiceInfo {
id: string;
name: string;
image: string;
mode: "replicated" | "global";
desiredReplicas: number;
runningTasks: number;
desiredTasks: number;
labels: Record<string, string>;
updatedAt: string;
ports: string[];
isGoClaw: boolean;
}
export interface SwarmTaskInfo {
id: string;
serviceId: string;
nodeId: string;
slot: number;
state: string;
message: string;
containerId: string;
updatedAt: string;
}
export interface SwarmInfoResult {
nodeId: string;
localNodeState: string;
isManager: boolean;
managers: number;
nodes: number;
managerAddr: string;
joinTokens?: { worker: string; manager: string };
}
export interface JoinTokenResult {
role: string;
token: string;
managerAddr: string;
joinCommand: string;
}
/** Get overall swarm state + join tokens */
export async function getSwarmInfo(): Promise<SwarmInfoResult | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/info`, {
signal: AbortSignal.timeout(QUICK_TIMEOUT_MS),
});
if (!res.ok) return null;
return res.json();
} catch { return null; }
}
/** List all swarm nodes with live status */
export async function listSwarmNodes(): Promise<{ nodes: SwarmNodeInfo[]; count: number } | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/nodes`, {
signal: AbortSignal.timeout(QUICK_TIMEOUT_MS),
});
if (!res.ok) return null;
return res.json();
} catch { return null; }
}
/** List all swarm services */
export async function listSwarmServices(): Promise<{ services: SwarmServiceInfo[]; count: number } | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/services`, {
signal: AbortSignal.timeout(QUICK_TIMEOUT_MS),
});
if (!res.ok) return null;
return res.json();
} catch { return null; }
}
/** Get tasks for a specific service */
export async function getServiceTasks(serviceId: string): Promise<{ tasks: SwarmTaskInfo[] } | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/services/${serviceId}/tasks`, {
signal: AbortSignal.timeout(QUICK_TIMEOUT_MS),
});
if (!res.ok) return null;
return res.json();
} catch { return null; }
}
/** Scale a service to N replicas */
export async function scaleSwarmService(serviceId: string, replicas: number): Promise<boolean> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/services/${serviceId}/scale`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ replicas }),
signal: AbortSignal.timeout(QUICK_TIMEOUT_MS),
});
return res.ok;
} catch { return false; }
}
/** Get join token and command */
export async function getSwarmJoinToken(role: "worker" | "manager"): Promise<JoinTokenResult | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/join-token?role=${role}`, {
signal: AbortSignal.timeout(QUICK_TIMEOUT_MS),
});
if (!res.ok) return null;
return res.json();
} catch { return null; }
}
/** Execute a shell command on the host system */
export async function execSwarmShell(command: string): Promise<{ output: string; success: boolean; error?: string } | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/shell`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ command }),
signal: AbortSignal.timeout(35_000),
});
if (!res.ok) return null;
return res.json();
} catch { return null; }
}
/** Add a label to a swarm node */
export async function addSwarmNodeLabel(nodeId: string, key: string, value: string): Promise<boolean> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/nodes/${nodeId}/label`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ key, value }),
signal: AbortSignal.timeout(QUICK_TIMEOUT_MS),
});
return res.ok;
} catch { return false; }
}
/** Set node availability (active|pause|drain) */
export async function setNodeAvailability(nodeId: string, availability: "active" | "pause" | "drain"): Promise<boolean> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/nodes/${nodeId}/availability`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ availability }),
signal: AbortSignal.timeout(QUICK_TIMEOUT_MS),
});
return res.ok;
} catch { return false; }
}
/** Deploy a new agent as a Swarm service */
export async function createAgentService(opts: {
name: string; image: string; replicas: number; env?: string[]; port?: number; networks?: string[];
}): Promise<{ ok: boolean; serviceId?: string; name?: string } | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/services/create`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(opts),
signal: AbortSignal.timeout(15_000),
});
if (!res.ok) return null;
return res.json();
} catch { return null; }
}
/** Remove (stop) a Swarm service by ID or name */
export async function removeSwarmService(serviceId: string): Promise<boolean> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/services/${encodeURIComponent(serviceId)}`, {
method: "DELETE",
signal: AbortSignal.timeout(10_000),
});
return res.ok;
} catch { return false; }
}
export interface SwarmAgentInfo {
id: string;
name: string;
image: string;
desiredReplicas: number;
runningTasks: number;
lastActivity: string;
idleMinutes: number;
isGoClaw: boolean;
}
/** List all GoClaw agent services with idle time info */
export async function listSwarmAgents(): Promise<{ agents: SwarmAgentInfo[]; count: number } | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/agents`, {
signal: AbortSignal.timeout(10_000),
});
if (!res.ok) return null;
return res.json();
} catch { return null; }
}
/** Start (scale-up) an agent service */
export async function startSwarmAgent(name: string, replicas = 1): Promise<boolean> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/agents/${encodeURIComponent(name)}/start`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ replicas }),
signal: AbortSignal.timeout(10_000),
});
return res.ok;
} catch { return false; }
}
/** Stop (scale-to-0) an agent service */
export async function stopSwarmAgent(name: string): Promise<boolean> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/agents/${encodeURIComponent(name)}/stop`, {
method: "POST",
signal: AbortSignal.timeout(10_000),
});
return res.ok;
} catch { return false; }
}