feat(swarm): autonomous agent containers, Swarm Manager with auto-stop, /nodes UI overhaul
## 1. Fix /nodes Swarm Status Display
- Add SwarmStatusBanner component: clear green/red/loading state
- Shows nodeId, managerAddr, isManager badge
- Error state explains what to check (docker.sock mount)
- Header now shows 'swarm unreachable — check gateway' vs 'active'
- swarmOk now checks nodeId presence, not just data existence
## 2. Autonomous Agent Container
- New docker/Dockerfile.agent — builds Go agent binary from gateway/cmd/agent/
- New gateway/cmd/agent/main.go — standalone HTTP microservice:
* GET /health — liveness probe with idle time info
* POST /task — receives task, forwards to Gateway orchestrator
* GET /info — agent metadata (id, hostname, gateway url)
* Idle watchdog: calls /api/swarm/agents/{name}/stop after IdleTimeoutMinutes
* Connects to Swarm overlay network (goclaw-net) → reaches DB/Gateway by DNS
* Env: AGENT_ID, GATEWAY_URL, DATABASE_URL, IDLE_TIMEOUT_MINUTES
## 3. Swarm Manager Agent (auto-stop after 15min idle)
- New gateway/internal/api/swarm_manager.go:
* SwarmManager goroutine checks every 60s
* Scales idle GoClaw agent services to 0 replicas after 15 min
* Tracks lastActivity from task UpdatedAt timestamps
- New REST endpoints in gateway:
* GET /api/swarm/agents — list agents with idleMinutes
* POST /api/swarm/agents/{name}/start — scale up agent
* POST /api/swarm/agents/{name}/stop — scale to 0
* DELETE /api/swarm/services/{id} — remove service permanently
- SwarmManager started as background goroutine in main.go with context cancel
## 4. Docker Client Enhancements
- Added NetworkAttachment type and Networks field to ServiceSpec
- CreateAgentServiceFull(opts) — supports overlay networks, custom labels
- CreateAgentService() delegates to CreateAgentServiceFull for backward compat
- RemoveService(id) — DELETE /v1.44/services/{id}
- GetServiceLastActivity(id) — finds latest task UpdatedAt for idle detection
## 5. tRPC & Gateway Proxy
- New functions: removeSwarmService, listSwarmAgents, startSwarmAgent, stopSwarmAgent
- SwarmAgentInfo type with idleMinutes, lastActivity, desiredReplicas
- createAgentService now accepts networks[] parameter
- New tRPC endpoints: nodes.removeService, nodes.listAgents, nodes.startAgent, nodes.stopAgent
## 6. Nodes.tsx UI Overhaul
- SwarmStatusBanner component at top — no more silent 'connecting…'
- New 'Agents' tab with AgentManagerRow: idle time, auto-stop warning, start/stop/remove buttons
- IdleColor coding: green < 5m, yellow 5-10m, red 10m+ with countdown to auto-stop
- ServiceRow: added Remove button with confirmation dialog
- RemoveConfirmDialog component
- DeployAgentDialog: added overlay networks field, default env includes GATEWAY_URL
- All queries refetch after agent start/stop/remove
This commit is contained in:
@@ -158,11 +158,17 @@ type SwarmService struct {
|
||||
}
|
||||
|
||||
type ServiceSpec struct {
|
||||
Name string `json:"Name"`
|
||||
Mode ServiceMode `json:"Mode"`
|
||||
TaskTemplate TaskTemplate `json:"TaskTemplate"`
|
||||
EndpointSpec *EndpointSpec `json:"EndpointSpec,omitempty"`
|
||||
Labels map[string]string `json:"Labels"`
|
||||
Name string `json:"Name"`
|
||||
Mode ServiceMode `json:"Mode"`
|
||||
TaskTemplate TaskTemplate `json:"TaskTemplate"`
|
||||
EndpointSpec *EndpointSpec `json:"EndpointSpec,omitempty"`
|
||||
Labels map[string]string `json:"Labels"`
|
||||
Networks []NetworkAttachment `json:"Networks,omitempty"`
|
||||
}
|
||||
|
||||
type NetworkAttachment struct {
|
||||
Target string `json:"Target"`
|
||||
Aliases []string `json:"Aliases,omitempty"`
|
||||
}
|
||||
|
||||
type ServiceMode struct {
|
||||
@@ -443,34 +449,67 @@ func (c *DockerClient) ListAllTasks() ([]SwarmTask, error) {
|
||||
// CreateAgentService deploys a new swarm service for an AI agent.
|
||||
// image: container image, name: service name, replicas: initial count,
|
||||
// env: environment variables, port: optional published port (0 = none).
|
||||
// CreateAgentServiceOpts holds options for deploying an agent Swarm service.
|
||||
type CreateAgentServiceOpts struct {
|
||||
Name string
|
||||
Image string
|
||||
Replicas int
|
||||
Env []string
|
||||
Port int
|
||||
Networks []string // overlay network names/IDs to attach
|
||||
Labels map[string]string
|
||||
}
|
||||
|
||||
func (c *DockerClient) CreateAgentService(name, image string, replicas int, env []string, port int) (*SwarmService, error) {
|
||||
return c.CreateAgentServiceFull(CreateAgentServiceOpts{
|
||||
Name: name,
|
||||
Image: image,
|
||||
Replicas: replicas,
|
||||
Env: env,
|
||||
Port: port,
|
||||
})
|
||||
}
|
||||
|
||||
func (c *DockerClient) CreateAgentServiceFull(opts CreateAgentServiceOpts) (*SwarmService, error) {
|
||||
labels := map[string]string{
|
||||
"goclaw.agent": "true",
|
||||
"goclaw.name": opts.Name,
|
||||
}
|
||||
for k, v := range opts.Labels {
|
||||
labels[k] = v
|
||||
}
|
||||
spec := ServiceSpec{
|
||||
Name: name,
|
||||
Name: opts.Name,
|
||||
Mode: ServiceMode{
|
||||
Replicated: &ReplicatedService{Replicas: replicas},
|
||||
Replicated: &ReplicatedService{Replicas: opts.Replicas},
|
||||
},
|
||||
TaskTemplate: TaskTemplate{
|
||||
ContainerSpec: ContainerSpec{
|
||||
Image: image,
|
||||
Env: env,
|
||||
Image: opts.Image,
|
||||
Env: opts.Env,
|
||||
},
|
||||
},
|
||||
Labels: map[string]string{
|
||||
"goclaw.agent": "true",
|
||||
"goclaw.name": name,
|
||||
},
|
||||
Labels: labels,
|
||||
}
|
||||
if port > 0 {
|
||||
if opts.Port > 0 {
|
||||
spec.EndpointSpec = &EndpointSpec{
|
||||
Ports: []PortConfig{
|
||||
{
|
||||
Protocol: "tcp",
|
||||
TargetPort: port,
|
||||
TargetPort: opts.Port,
|
||||
PublishMode: "ingress",
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
if len(opts.Networks) > 0 {
|
||||
for _, net := range opts.Networks {
|
||||
spec.Networks = append(spec.Networks, NetworkAttachment{
|
||||
Target: net,
|
||||
Aliases: []string{opts.Name},
|
||||
})
|
||||
}
|
||||
}
|
||||
var created struct {
|
||||
ID string `json:"ID"`
|
||||
}
|
||||
@@ -480,6 +519,40 @@ func (c *DockerClient) CreateAgentService(name, image string, replicas int, env
|
||||
return c.GetService(created.ID)
|
||||
}
|
||||
|
||||
// RemoveService removes a swarm service by ID or name.
|
||||
func (c *DockerClient) RemoveService(idOrName string) error {
|
||||
req, err := http.NewRequest(http.MethodDelete, c.baseURL+"/v1.44/services/"+urlEncode(idOrName), nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("docker DELETE service %s: %w", idOrName, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 400 {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("docker DELETE service %s: status %d: %s", idOrName, resp.StatusCode, string(body))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetServiceLastActivity returns the most recent task update time for a service.
|
||||
// Used to determine whether a service is idle.
|
||||
func (c *DockerClient) GetServiceLastActivity(serviceID string) (time.Time, error) {
|
||||
tasks, err := c.ListServiceTasks(serviceID)
|
||||
if err != nil {
|
||||
return time.Time{}, err
|
||||
}
|
||||
var latest time.Time
|
||||
for _, t := range tasks {
|
||||
if t.UpdatedAt.After(latest) {
|
||||
latest = t.UpdatedAt
|
||||
}
|
||||
}
|
||||
return latest, nil
|
||||
}
|
||||
|
||||
// ─── Methods: Containers ─────────────────────────────────────────────────────
|
||||
|
||||
func (c *DockerClient) ListContainers() ([]Container, error) {
|
||||
|
||||
Reference in New Issue
Block a user