Loading services…
@@ -857,10 +1044,8 @@ export default function Nodes() {
No swarm services running
-
Deploy agents as services to scale them across nodes
diff --git a/docker/Dockerfile.agent b/docker/Dockerfile.agent
new file mode 100644
index 0000000..c5d7832
--- /dev/null
+++ b/docker/Dockerfile.agent
@@ -0,0 +1,53 @@
+# ── GoClaw Agent Container ────────────────────────────────────────────────────
+#
+# Autonomous agent microservice that:
+# 1. Exposes a lightweight HTTP API (port 8080) for receiving tasks
+# 2. Has access to the Swarm overlay network (goclaw-net)
+# 3. Connects to the shared MySQL database for persistence
+# 4. Calls the LLM API via the GoClaw Gateway
+# 5. Auto-registers itself with the orchestrator on startup
+#
+# Build: docker build -f docker/Dockerfile.agent -t goclaw-agent:latest .
+# Deploy: docker service create --name goclaw-agent-NAME \
+# --network goclaw-net \
+# -e AGENT_ID=NAME \
+# -e GATEWAY_URL=http://goclaw-gateway:18789 \
+# -e DATABASE_URL=mysql://... \
+# goclaw-agent:latest
+# ─────────────────────────────────────────────────────────────────────────────
+
+# ── Stage 1: Build Go agent binary ───────────────────────────────────────────
+FROM golang:1.23-alpine AS builder
+WORKDIR /src
+
+# Copy gateway module (agent reuses gateway internals)
+COPY gateway/go.mod gateway/go.sum ./
+RUN go mod download
+
+COPY gateway/ ./
+
+# Build the agent server binary
+RUN go build -o /agent-server ./cmd/agent/...
+
+# ── Stage 2: Runtime ──────────────────────────────────────────────────────────
+FROM alpine:3.20
+RUN apk add --no-cache ca-certificates curl wget tzdata
+
+WORKDIR /app
+COPY --from=builder /agent-server ./agent-server
+
+# Default environment (override at deploy time)
+ENV AGENT_ID=default-agent \
+ AGENT_PORT=8080 \
+ GATEWAY_URL=http://goclaw-gateway:18789 \
+ LLM_BASE_URL=https://ollama.com/v1 \
+ LLM_API_KEY="" \
+ DATABASE_URL="" \
+ IDLE_TIMEOUT_MINUTES=15
+
+EXPOSE 8080
+
+HEALTHCHECK --interval=15s --timeout=5s --start-period=10s --retries=3 \
+ CMD wget -qO- http://localhost:8080/health || exit 1
+
+ENTRYPOINT ["/app/agent-server"]
diff --git a/gateway/cmd/agent/main.go b/gateway/cmd/agent/main.go
new file mode 100644
index 0000000..58ab376
--- /dev/null
+++ b/gateway/cmd/agent/main.go
@@ -0,0 +1,270 @@
+// GoClaw Agent Server — autonomous agent microservice
+//
+// Each agent runs as an independent container in the Docker Swarm overlay
+// network. It exposes an HTTP API that the GoClaw Orchestrator can reach
+// via the Swarm DNS name (e.g. http://goclaw-agent-researcher:8080).
+//
+// The agent:
+// - Receives task requests from the orchestrator
+// - Calls the LLM via the centrally-managed GoClaw Gateway
+// - Reads/writes shared state in the MySQL database
+// - Reports its last-activity time so the SwarmManager can auto-stop it
+// - Gracefully shuts down after IdleTimeout with no requests
+package main
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "log"
+ "net/http"
+ "os"
+ "os/signal"
+ "strconv"
+ "strings"
+ "syscall"
+ "time"
+)
+
+// ─── Config ──────────────────────────────────────────────────────────────────
+
+type AgentConfig struct {
+ AgentID string
+ Port string
+ GatewayURL string
+ LLMURL string
+ LLMAPIKey string
+ DatabaseURL string
+ IdleTimeoutMinutes int
+}
+
+func loadConfig() AgentConfig {
+ idleMin := 15
+ if v := os.Getenv("IDLE_TIMEOUT_MINUTES"); v != "" {
+ if n, err := strconv.Atoi(v); err == nil {
+ idleMin = n
+ }
+ }
+ port := os.Getenv("AGENT_PORT")
+ if port == "" {
+ port = "8080"
+ }
+ return AgentConfig{
+ AgentID: getEnv("AGENT_ID", "unnamed-agent"),
+ Port: port,
+ GatewayURL: getEnv("GATEWAY_URL", "http://goclaw-gateway:18789"),
+ LLMURL: getEnv("LLM_BASE_URL", "https://ollama.com/v1"),
+ LLMAPIKey: os.Getenv("LLM_API_KEY"),
+ DatabaseURL: os.Getenv("DATABASE_URL"),
+ IdleTimeoutMinutes: idleMin,
+ }
+}
+
+func getEnv(key, fallback string) string {
+ if v := os.Getenv(key); v != "" {
+ return v
+ }
+ return fallback
+}
+
+// ─── State ───────────────────────────────────────────────────────────────────
+
+type Agent struct {
+ cfg AgentConfig
+ lastActivity time.Time
+ httpClient *http.Client
+}
+
+func NewAgent(cfg AgentConfig) *Agent {
+ return &Agent{
+ cfg: cfg,
+ lastActivity: time.Now(),
+ httpClient: &http.Client{Timeout: 120 * time.Second},
+ }
+}
+
+func (a *Agent) touch() {
+ a.lastActivity = time.Now()
+}
+
+// ─── HTTP handlers ────────────────────────────────────────────────────────────
+
+// GET /health — liveness probe
+func (a *Agent) handleHealth(w http.ResponseWriter, r *http.Request) {
+ respond(w, 200, map[string]any{
+ "ok": true,
+ "agentId": a.cfg.AgentID,
+ "lastActivity": a.lastActivity.Format(time.RFC3339),
+ "idleMinutes": time.Since(a.lastActivity).Minutes(),
+ })
+}
+
+// POST /task — receive a task from the orchestrator
+// Body: { "sessionId": "abc", "messages": [...], "model": "qwen2.5:7b", "maxIter": 5 }
+func (a *Agent) handleTask(w http.ResponseWriter, r *http.Request) {
+ a.touch()
+ var body struct {
+ SessionID string `json:"sessionId"`
+ Messages json.RawMessage `json:"messages"`
+ Model string `json:"model"`
+ MaxIter int `json:"maxIter"`
+ }
+ if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
+ respondError(w, 400, "invalid request: "+err.Error())
+ return
+ }
+ // Forward the task to the GoClaw Gateway orchestrator
+ gatewayURL := a.cfg.GatewayURL + "/api/orchestrator/chat"
+ reqBody, _ := json.Marshal(map[string]any{
+ "messages": body.Messages,
+ "model": body.Model,
+ "maxIter": body.MaxIter,
+ })
+
+ req, err := http.NewRequestWithContext(r.Context(), "POST", gatewayURL, strings.NewReader(string(reqBody)))
+ if err != nil {
+ respondError(w, 500, "request build error: "+err.Error())
+ return
+ }
+ req.Header.Set("Content-Type", "application/json")
+
+ resp, err := a.httpClient.Do(req)
+ if err != nil {
+ respondError(w, 502, "gateway error: "+err.Error())
+ return
+ }
+ defer resp.Body.Close()
+
+ var result map[string]any
+ if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+ respondError(w, 502, "gateway response error: "+err.Error())
+ return
+ }
+
+ a.touch()
+ respond(w, 200, map[string]any{
+ "ok": true,
+ "agentId": a.cfg.AgentID,
+ "sessionId": body.SessionID,
+ "result": result,
+ })
+}
+
+// GET /info — agent metadata
+func (a *Agent) handleInfo(w http.ResponseWriter, r *http.Request) {
+ hostname, _ := os.Hostname()
+ respond(w, 200, map[string]any{
+ "agentId": a.cfg.AgentID,
+ "hostname": hostname,
+ "gatewayUrl": a.cfg.GatewayURL,
+ "idleTimeout": a.cfg.IdleTimeoutMinutes,
+ "lastActivity": a.lastActivity.Format(time.RFC3339),
+ "idleMinutes": time.Since(a.lastActivity).Minutes(),
+ })
+}
+
+// ─── Idle watchdog ────────────────────────────────────────────────────────────
+
+func (a *Agent) runIdleWatchdog(cancel context.CancelFunc) {
+ threshold := time.Duration(a.cfg.IdleTimeoutMinutes) * time.Minute
+ ticker := time.NewTicker(30 * time.Second)
+ defer ticker.Stop()
+ for range ticker.C {
+ idle := time.Since(a.lastActivity)
+ if idle >= threshold {
+ log.Printf("[Agent %s] Idle for %.1f min — requesting self-stop via gateway",
+ a.cfg.AgentID, idle.Minutes())
+ a.selfStop()
+ cancel()
+ return
+ }
+ }
+}
+
+// selfStop asks the GoClaw Gateway to scale this service to 0.
+func (a *Agent) selfStop() {
+ url := fmt.Sprintf("%s/api/swarm/agents/%s/stop", a.cfg.GatewayURL, a.cfg.AgentID)
+ req, err := http.NewRequest("POST", url, nil)
+ if err != nil {
+ log.Printf("[Agent %s] selfStop error building request: %v", a.cfg.AgentID, err)
+ return
+ }
+ resp, err := a.httpClient.Do(req)
+ if err != nil {
+ log.Printf("[Agent %s] selfStop error: %v", a.cfg.AgentID, err)
+ return
+ }
+ body, _ := io.ReadAll(resp.Body)
+ resp.Body.Close()
+ log.Printf("[Agent %s] selfStop response %d: %s", a.cfg.AgentID, resp.StatusCode, string(body))
+}
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+func respond(w http.ResponseWriter, status int, data any) {
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(status)
+ json.NewEncoder(w).Encode(data)
+}
+
+func respondError(w http.ResponseWriter, status int, msg string) {
+ respond(w, status, map[string]any{"error": msg})
+}
+
+// ─── Main ─────────────────────────────────────────────────────────────────────
+
+func main() {
+ log.SetFlags(log.LstdFlags | log.Lshortfile)
+
+ cfg := loadConfig()
+ agent := NewAgent(cfg)
+
+ log.Printf("[Agent] %s starting on port %s (idle timeout: %d min)",
+ cfg.AgentID, cfg.Port, cfg.IdleTimeoutMinutes)
+ log.Printf("[Agent] Gateway: %s", cfg.GatewayURL)
+
+ // ── HTTP server ──────────────────────────────────────────────────────────
+ mux := http.NewServeMux()
+ mux.HandleFunc("GET /health", agent.handleHealth)
+ mux.HandleFunc("POST /task", agent.handleTask)
+ mux.HandleFunc("GET /info", agent.handleInfo)
+
+ srv := &http.Server{
+ Addr: ":" + cfg.Port,
+ Handler: mux,
+ ReadTimeout: 30 * time.Second,
+ WriteTimeout: 150 * time.Second,
+ IdleTimeout: 120 * time.Second,
+ }
+
+ ctx, cancel := context.WithCancel(context.Background())
+
+ // ── Idle watchdog ────────────────────────────────────────────────────────
+ go agent.runIdleWatchdog(cancel)
+
+ // ── Graceful shutdown ────────────────────────────────────────────────────
+ quit := make(chan os.Signal, 1)
+ signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
+
+ go func() {
+ log.Printf("[Agent %s] Listening on :%s", cfg.AgentID, cfg.Port)
+ if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
+ log.Fatalf("[Agent %s] Server error: %v", cfg.AgentID, err)
+ }
+ }()
+
+ select {
+ case <-quit:
+ log.Printf("[Agent %s] Signal received — shutting down", cfg.AgentID)
+ case <-ctx.Done():
+ log.Printf("[Agent %s] Context cancelled — shutting down", cfg.AgentID)
+ }
+
+ shutCtx, shutCancel := context.WithTimeout(context.Background(), 10*time.Second)
+ defer shutCancel()
+ if err := srv.Shutdown(shutCtx); err != nil {
+ log.Printf("[Agent %s] Shutdown error: %v", cfg.AgentID, err)
+ }
+ log.Printf("[Agent %s] Stopped.", cfg.AgentID)
+}
diff --git a/gateway/cmd/gateway/main.go b/gateway/cmd/gateway/main.go
index ccad82a..70d4b0d 100644
--- a/gateway/cmd/gateway/main.go
+++ b/gateway/cmd/gateway/main.go
@@ -135,12 +135,22 @@ func main() {
r.Post("/swarm/nodes/{id}/availability", h.SwarmSetNodeAvailability)
r.Get("/swarm/services", h.SwarmServices)
r.Post("/swarm/services/create", h.SwarmCreateService)
+ r.Delete("/swarm/services/{id}", h.SwarmRemoveService)
r.Get("/swarm/services/{id}/tasks", h.SwarmServiceTasks)
r.Post("/swarm/services/{id}/scale", h.SwarmScaleService)
r.Get("/swarm/join-token", h.SwarmJoinToken)
r.Post("/swarm/shell", h.SwarmShell)
+ r.Get("/swarm/agents", h.SwarmListAgents)
+ r.Post("/swarm/agents/{name}/start", h.SwarmStartAgent)
+ r.Post("/swarm/agents/{name}/stop", h.SwarmStopAgent)
})
+ // ── Swarm Manager: auto-stop idle agents after 15 min ────────────────────
+ swarmMgr := api.NewSwarmManager(h, 60*time.Second)
+ managerCtx, managerCancel := context.WithCancel(context.Background())
+ go swarmMgr.Start(managerCtx)
+ defer managerCancel()
+
// ── Start Server ─────────────────────────────────────────────────────────
srv := &http.Server{
Addr: ":" + cfg.Port,
diff --git a/gateway/internal/api/handlers.go b/gateway/internal/api/handlers.go
index 193aa93..d3b8cd9 100644
--- a/gateway/internal/api/handlers.go
+++ b/gateway/internal/api/handlers.go
@@ -1223,7 +1223,7 @@ func (h *Handler) SwarmScaleService(w http.ResponseWriter, r *http.Request) {
// POST /api/swarm/services/create
// Deploy a new GoClaw agent as a Swarm service.
-// Body: { "name": "agent-researcher", "image": "goclaw-gateway:latest", "replicas": 2, "env": ["KEY=val"], "port": 0 }
+// Body: { "name": "agent-researcher", "image": "goclaw-gateway:latest", "replicas": 2, "env": ["KEY=val"], "port": 0, "networks": ["goclaw-net"] }
func (h *Handler) SwarmCreateService(w http.ResponseWriter, r *http.Request) {
var body struct {
Name string `json:"name"`
@@ -1231,6 +1231,7 @@ func (h *Handler) SwarmCreateService(w http.ResponseWriter, r *http.Request) {
Replicas int `json:"replicas"`
Env []string `json:"env"`
Port int `json:"port"`
+ Networks []string `json:"networks"`
}
if err := json.NewDecoder(r.Body).Decode(&body); err != nil || body.Name == "" || body.Image == "" {
respondError(w, http.StatusBadRequest, "name and image required")
@@ -1239,7 +1240,14 @@ func (h *Handler) SwarmCreateService(w http.ResponseWriter, r *http.Request) {
if body.Replicas <= 0 {
body.Replicas = 1
}
- svc, err := h.docker.CreateAgentService(body.Name, body.Image, body.Replicas, body.Env, body.Port)
+ svc, err := h.docker.CreateAgentServiceFull(dockerclient.CreateAgentServiceOpts{
+ Name: body.Name,
+ Image: body.Image,
+ Replicas: body.Replicas,
+ Env: body.Env,
+ Port: body.Port,
+ Networks: body.Networks,
+ })
if err != nil {
respondError(w, http.StatusInternalServerError, "create service: "+err.Error())
return
@@ -1251,6 +1259,76 @@ func (h *Handler) SwarmCreateService(w http.ResponseWriter, r *http.Request) {
})
}
+// DELETE /api/swarm/services/{id}
+// Remove (stop) a swarm service.
+func (h *Handler) SwarmRemoveService(w http.ResponseWriter, r *http.Request) {
+ serviceID := r.PathValue("id")
+ if serviceID == "" {
+ respondError(w, http.StatusBadRequest, "service id required")
+ return
+ }
+ if err := h.docker.RemoveService(serviceID); err != nil {
+ respondError(w, http.StatusInternalServerError, "remove service: "+err.Error())
+ return
+ }
+ log.Printf("[Swarm] Removed service %s", serviceID)
+ respond(w, http.StatusOK, map[string]any{"ok": true})
+}
+
+// GET /api/swarm/agents
+// List all GoClaw agent services with idle time information.
+func (h *Handler) SwarmListAgents(w http.ResponseWriter, r *http.Request) {
+ services, err := h.docker.ListServices()
+ if err != nil {
+ respondError(w, http.StatusInternalServerError, "list services: "+err.Error())
+ return
+ }
+
+ type AgentInfo struct {
+ ID string `json:"id"`
+ Name string `json:"name"`
+ Image string `json:"image"`
+ DesiredReplicas int `json:"desiredReplicas"`
+ RunningTasks int `json:"runningTasks"`
+ LastActivity time.Time `json:"lastActivity"`
+ IdleMinutes float64 `json:"idleMinutes"`
+ IsGoClaw bool `json:"isGoClaw"`
+ }
+
+ var agents []AgentInfo
+ for _, svc := range services {
+ isGoClaw := svc.Spec.Labels["goclaw.agent"] == "true"
+ desired := 0
+ if svc.Spec.Mode.Replicated != nil {
+ desired = svc.Spec.Mode.Replicated.Replicas
+ }
+ running := 0
+ if svc.ServiceStatus != nil {
+ running = svc.ServiceStatus.RunningTasks
+ }
+ lastActivity, _ := h.docker.GetServiceLastActivity(svc.ID)
+ if lastActivity.IsZero() {
+ lastActivity = svc.UpdatedAt
+ }
+ idle := time.Since(lastActivity).Minutes()
+ agents = append(agents, AgentInfo{
+ ID: svc.ID,
+ Name: svc.Spec.Name,
+ Image: svc.Spec.TaskTemplate.ContainerSpec.Image,
+ DesiredReplicas: desired,
+ RunningTasks: running,
+ LastActivity: lastActivity,
+ IdleMinutes: idle,
+ IsGoClaw: isGoClaw,
+ })
+ }
+ if agents == nil {
+ agents = []AgentInfo{}
+ }
+ respond(w, http.StatusOK, map[string]any{"agents": agents, "count": len(agents)})
+}
+
+
// POST /api/swarm/shell
// Execute a shell command on the HOST system (via nsenter into PID 1).
// Body: { "command": "docker ps" }
diff --git a/gateway/internal/api/swarm_manager.go b/gateway/internal/api/swarm_manager.go
new file mode 100644
index 0000000..e591c28
--- /dev/null
+++ b/gateway/internal/api/swarm_manager.go
@@ -0,0 +1,142 @@
+// Package api – Swarm Agent Lifecycle Manager
+//
+// The SwarmManager runs as a background goroutine inside the GoClaw Gateway
+// (which is the Swarm manager node). It watches all agent services and
+// automatically scales them to 0 replicas after IdleTimeout minutes of no
+// activity. The orchestrator can call StartAgent / StopAgent via the REST API
+// to start/stop agents on demand.
+//
+// Start flow: POST /api/swarm/agents/{name}/start → scale to N replicas (default 1)
+// Stop flow: POST /api/swarm/agents/{name}/stop → scale to 0
+// Auto-stop: background loop checks every 60 s, scales idle agents to 0
+package api
+
+import (
+ "context"
+ "encoding/json"
+ "log"
+ "net/http"
+ "time"
+)
+
+const (
+ // IdleTimeout – how many minutes without any task updates before an agent
+ // is automatically scaled to 0.
+ defaultIdleTimeoutMinutes = 15
+)
+
+// SwarmManager watches agent services and auto-scales them down after idle.
+type SwarmManager struct {
+ handler *Handler
+ ticker *time.Ticker
+ done chan struct{}
+}
+
+// NewSwarmManager creates a manager that checks every checkInterval.
+func NewSwarmManager(h *Handler, checkInterval time.Duration) *SwarmManager {
+ return &SwarmManager{
+ handler: h,
+ ticker: time.NewTicker(checkInterval),
+ done: make(chan struct{}),
+ }
+}
+
+// Start launches the background loop. Call in a goroutine.
+func (m *SwarmManager) Start(ctx context.Context) {
+ log.Printf("[SwarmManager] Started — idle timeout %d min, check every %s",
+ defaultIdleTimeoutMinutes, m.ticker)
+ defer m.ticker.Stop()
+ for {
+ select {
+ case <-m.done:
+ return
+ case <-ctx.Done():
+ return
+ case <-m.ticker.C:
+ m.checkIdleAgents()
+ }
+ }
+}
+
+// Stop signals the background loop to exit.
+func (m *SwarmManager) Stop() {
+ close(m.done)
+}
+
+func (m *SwarmManager) checkIdleAgents() {
+ services, err := m.handler.docker.ListServices()
+ if err != nil {
+ log.Printf("[SwarmManager] list services error: %v", err)
+ return
+ }
+ idleThreshold := time.Duration(defaultIdleTimeoutMinutes) * time.Minute
+ now := time.Now()
+ for _, svc := range services {
+ // Only manage services labelled as GoClaw agents
+ if svc.Spec.Labels["goclaw.agent"] != "true" {
+ continue
+ }
+ // Skip already-stopped services (0 desired replicas)
+ desired := 0
+ if svc.Spec.Mode.Replicated != nil {
+ desired = svc.Spec.Mode.Replicated.Replicas
+ }
+ if desired == 0 {
+ continue
+ }
+ // Check last activity time
+ lastActivity, err := m.handler.docker.GetServiceLastActivity(svc.ID)
+ if err != nil || lastActivity.IsZero() {
+ lastActivity = svc.UpdatedAt
+ }
+ idle := now.Sub(lastActivity)
+ if idle >= idleThreshold {
+ log.Printf("[SwarmManager] Agent '%s' idle for %.1f min → scaling to 0",
+ svc.Spec.Name, idle.Minutes())
+ if err := m.handler.docker.ScaleService(svc.ID, 0); err != nil {
+ log.Printf("[SwarmManager] scale-to-0 error for %s: %v", svc.Spec.Name, err)
+ }
+ }
+ }
+}
+
+// ─── HTTP Handlers for agent lifecycle ────────────────────────────────────────
+
+// POST /api/swarm/agents/{name}/start
+// Start (scale-up) a named agent service. Body: { "replicas": 1 }
+func (h *Handler) SwarmStartAgent(w http.ResponseWriter, r *http.Request) {
+ name := r.PathValue("name")
+ if name == "" {
+ respondError(w, http.StatusBadRequest, "agent name required")
+ return
+ }
+ var body struct {
+ Replicas int `json:"replicas"`
+ }
+ _ = json.NewDecoder(r.Body).Decode(&body)
+ if body.Replicas <= 0 {
+ body.Replicas = 1
+ }
+ if err := h.docker.ScaleService(name, body.Replicas); err != nil {
+ respondError(w, http.StatusInternalServerError, "start agent: "+err.Error())
+ return
+ }
+ log.Printf("[Swarm] Agent '%s' started with %d replica(s)", name, body.Replicas)
+ respond(w, http.StatusOK, map[string]any{"ok": true, "name": name, "replicas": body.Replicas})
+}
+
+// POST /api/swarm/agents/{name}/stop
+// Stop (scale-to-0) a named agent service.
+func (h *Handler) SwarmStopAgent(w http.ResponseWriter, r *http.Request) {
+ name := r.PathValue("name")
+ if name == "" {
+ respondError(w, http.StatusBadRequest, "agent name required")
+ return
+ }
+ if err := h.docker.ScaleService(name, 0); err != nil {
+ respondError(w, http.StatusInternalServerError, "stop agent: "+err.Error())
+ return
+ }
+ log.Printf("[Swarm] Agent '%s' stopped (scaled to 0)", name)
+ respond(w, http.StatusOK, map[string]any{"ok": true, "name": name, "replicas": 0})
+}
diff --git a/gateway/internal/docker/client.go b/gateway/internal/docker/client.go
index 21c8e51..6184fcc 100644
--- a/gateway/internal/docker/client.go
+++ b/gateway/internal/docker/client.go
@@ -158,11 +158,17 @@ type SwarmService struct {
}
type ServiceSpec struct {
- Name string `json:"Name"`
- Mode ServiceMode `json:"Mode"`
- TaskTemplate TaskTemplate `json:"TaskTemplate"`
- EndpointSpec *EndpointSpec `json:"EndpointSpec,omitempty"`
- Labels map[string]string `json:"Labels"`
+ Name string `json:"Name"`
+ Mode ServiceMode `json:"Mode"`
+ TaskTemplate TaskTemplate `json:"TaskTemplate"`
+ EndpointSpec *EndpointSpec `json:"EndpointSpec,omitempty"`
+ Labels map[string]string `json:"Labels"`
+ Networks []NetworkAttachment `json:"Networks,omitempty"`
+}
+
+type NetworkAttachment struct {
+ Target string `json:"Target"`
+ Aliases []string `json:"Aliases,omitempty"`
}
type ServiceMode struct {
@@ -443,34 +449,67 @@ func (c *DockerClient) ListAllTasks() ([]SwarmTask, error) {
// CreateAgentService deploys a new swarm service for an AI agent.
// image: container image, name: service name, replicas: initial count,
// env: environment variables, port: optional published port (0 = none).
+// CreateAgentServiceOpts holds options for deploying an agent Swarm service.
+type CreateAgentServiceOpts struct {
+ Name string
+ Image string
+ Replicas int
+ Env []string
+ Port int
+ Networks []string // overlay network names/IDs to attach
+ Labels map[string]string
+}
+
func (c *DockerClient) CreateAgentService(name, image string, replicas int, env []string, port int) (*SwarmService, error) {
+ return c.CreateAgentServiceFull(CreateAgentServiceOpts{
+ Name: name,
+ Image: image,
+ Replicas: replicas,
+ Env: env,
+ Port: port,
+ })
+}
+
+func (c *DockerClient) CreateAgentServiceFull(opts CreateAgentServiceOpts) (*SwarmService, error) {
+ labels := map[string]string{
+ "goclaw.agent": "true",
+ "goclaw.name": opts.Name,
+ }
+ for k, v := range opts.Labels {
+ labels[k] = v
+ }
spec := ServiceSpec{
- Name: name,
+ Name: opts.Name,
Mode: ServiceMode{
- Replicated: &ReplicatedService{Replicas: replicas},
+ Replicated: &ReplicatedService{Replicas: opts.Replicas},
},
TaskTemplate: TaskTemplate{
ContainerSpec: ContainerSpec{
- Image: image,
- Env: env,
+ Image: opts.Image,
+ Env: opts.Env,
},
},
- Labels: map[string]string{
- "goclaw.agent": "true",
- "goclaw.name": name,
- },
+ Labels: labels,
}
- if port > 0 {
+ if opts.Port > 0 {
spec.EndpointSpec = &EndpointSpec{
Ports: []PortConfig{
{
Protocol: "tcp",
- TargetPort: port,
+ TargetPort: opts.Port,
PublishMode: "ingress",
},
},
}
}
+ if len(opts.Networks) > 0 {
+ for _, net := range opts.Networks {
+ spec.Networks = append(spec.Networks, NetworkAttachment{
+ Target: net,
+ Aliases: []string{opts.Name},
+ })
+ }
+ }
var created struct {
ID string `json:"ID"`
}
@@ -480,6 +519,40 @@ func (c *DockerClient) CreateAgentService(name, image string, replicas int, env
return c.GetService(created.ID)
}
+// RemoveService removes a swarm service by ID or name.
+func (c *DockerClient) RemoveService(idOrName string) error {
+ req, err := http.NewRequest(http.MethodDelete, c.baseURL+"/v1.44/services/"+urlEncode(idOrName), nil)
+ if err != nil {
+ return err
+ }
+ resp, err := c.httpClient.Do(req)
+ if err != nil {
+ return fmt.Errorf("docker DELETE service %s: %w", idOrName, err)
+ }
+ defer resp.Body.Close()
+ if resp.StatusCode >= 400 {
+ body, _ := io.ReadAll(resp.Body)
+ return fmt.Errorf("docker DELETE service %s: status %d: %s", idOrName, resp.StatusCode, string(body))
+ }
+ return nil
+}
+
+// GetServiceLastActivity returns the most recent task update time for a service.
+// Used to determine whether a service is idle.
+func (c *DockerClient) GetServiceLastActivity(serviceID string) (time.Time, error) {
+ tasks, err := c.ListServiceTasks(serviceID)
+ if err != nil {
+ return time.Time{}, err
+ }
+ var latest time.Time
+ for _, t := range tasks {
+ if t.UpdatedAt.After(latest) {
+ latest = t.UpdatedAt
+ }
+ }
+ return latest, nil
+}
+
// ─── Methods: Containers ─────────────────────────────────────────────────────
func (c *DockerClient) ListContainers() ([]Container, error) {
diff --git a/server/gateway-proxy.ts b/server/gateway-proxy.ts
index b8bfa9e..03665c9 100644
--- a/server/gateway-proxy.ts
+++ b/server/gateway-proxy.ts
@@ -703,7 +703,7 @@ export async function setNodeAvailability(nodeId: string, availability: "active"
/** Deploy a new agent as a Swarm service */
export async function createAgentService(opts: {
- name: string; image: string; replicas: number; env?: string[]; port?: number;
+ name: string; image: string; replicas: number; env?: string[]; port?: number; networks?: string[];
}): Promise<{ ok: boolean; serviceId?: string; name?: string } | null> {
try {
const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/services/create`, {
@@ -716,3 +716,61 @@ export async function createAgentService(opts: {
return res.json();
} catch { return null; }
}
+
+/** Remove (stop) a Swarm service by ID or name */
+export async function removeSwarmService(serviceId: string): Promise
{
+ try {
+ const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/services/${encodeURIComponent(serviceId)}`, {
+ method: "DELETE",
+ signal: AbortSignal.timeout(10_000),
+ });
+ return res.ok;
+ } catch { return false; }
+}
+
+export interface SwarmAgentInfo {
+ id: string;
+ name: string;
+ image: string;
+ desiredReplicas: number;
+ runningTasks: number;
+ lastActivity: string;
+ idleMinutes: number;
+ isGoClaw: boolean;
+}
+
+/** List all GoClaw agent services with idle time info */
+export async function listSwarmAgents(): Promise<{ agents: SwarmAgentInfo[]; count: number } | null> {
+ try {
+ const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/agents`, {
+ signal: AbortSignal.timeout(10_000),
+ });
+ if (!res.ok) return null;
+ return res.json();
+ } catch { return null; }
+}
+
+/** Start (scale-up) an agent service */
+export async function startSwarmAgent(name: string, replicas = 1): Promise {
+ try {
+ const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/agents/${encodeURIComponent(name)}/start`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({ replicas }),
+ signal: AbortSignal.timeout(10_000),
+ });
+ return res.ok;
+ } catch { return false; }
+}
+
+/** Stop (scale-to-0) an agent service */
+export async function stopSwarmAgent(name: string): Promise {
+ try {
+ const res = await fetch(`${GATEWAY_BASE_URL}/api/swarm/agents/${encodeURIComponent(name)}/stop`, {
+ method: "POST",
+ signal: AbortSignal.timeout(10_000),
+ });
+ return res.ok;
+ } catch { return false; }
+}
+
diff --git a/server/routers.ts b/server/routers.ts
index 5e3cbca..5f1c1c4 100644
--- a/server/routers.ts
+++ b/server/routers.ts
@@ -29,6 +29,10 @@ import {
addSwarmNodeLabel,
setNodeAvailability,
createAgentService,
+ removeSwarmService,
+ listSwarmAgents,
+ startSwarmAgent,
+ stopSwarmAgent,
getOllamaModelInfo,
} from "./gateway-proxy";
@@ -998,6 +1002,7 @@ export const appRouter = router({
replicas: z.number().min(1).max(20).default(1),
env: z.array(z.string()).optional(),
port: z.number().optional(),
+ networks: z.array(z.string()).optional(),
}))
.mutation(async ({ input }) => {
const result = await createAgentService(input);
@@ -1005,6 +1010,44 @@ export const appRouter = router({
return result;
}),
+ /**
+ * Remove (stop and delete) a Swarm service.
+ */
+ removeService: publicProcedure
+ .input(z.object({ serviceId: z.string().min(1) }))
+ .mutation(async ({ input }) => {
+ const ok = await removeSwarmService(input.serviceId);
+ return { ok };
+ }),
+
+ /**
+ * List all GoClaw agent services with idle time info.
+ */
+ listAgents: publicProcedure.query(async () => {
+ const result = await listSwarmAgents();
+ return result ?? { agents: [], count: 0 };
+ }),
+
+ /**
+ * Start (scale-up) an agent service by name.
+ */
+ startAgent: publicProcedure
+ .input(z.object({ name: z.string().min(1), replicas: z.number().min(1).max(20).default(1) }))
+ .mutation(async ({ input }) => {
+ const ok = await startSwarmAgent(input.name, input.replicas);
+ return { ok };
+ }),
+
+ /**
+ * Stop (scale-to-0) an agent service by name.
+ */
+ stopAgent: publicProcedure
+ .input(z.object({ name: z.string().min(1) }))
+ .mutation(async ({ input }) => {
+ const ok = await stopSwarmAgent(input.name);
+ return { ok };
+ }),
+
/**
* Get live container stats (CPU%, RAM) for all running containers.
*/