diff --git a/docker/Dockerfile.agent-worker b/docker/Dockerfile.agent-worker new file mode 100644 index 0000000..fb54ff4 --- /dev/null +++ b/docker/Dockerfile.agent-worker @@ -0,0 +1,45 @@ +# ─── Stage 1: Build ──────────────────────────────────────────────────────────── +# Собираем agent-worker binary из исходников gateway/ +FROM golang:1.23-alpine AS builder + +WORKDIR /build + +# Кэшируем зависимости отдельным слоем +COPY gateway/go.mod gateway/go.sum ./ +RUN go mod download + +# Копируем исходники +COPY gateway/ ./ + +# Собираем статически линкованный бинарь +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \ + go build -trimpath -ldflags="-s -w" \ + -o agent-worker \ + ./cmd/agent-worker + +# ─── Stage 2: Runtime ────────────────────────────────────────────────────────── +# Минимальный образ: только бинарь + CA certs (для HTTPS к LLM API) +FROM alpine:3.21 + +RUN apk add --no-cache ca-certificates tzdata + +WORKDIR /app + +COPY --from=builder /build/agent-worker /app/agent-worker + +# Порт HTTP API агента (переопределяется через AGENT_PORT env) +EXPOSE 8001 + +# ── Healthcheck ────────────────────────────────────────────────────────────── +# Docker/Swarm будет проверять /health каждые 15 секунд +HEALTHCHECK --interval=15s --timeout=5s --start-period=10s --retries=3 \ + CMD wget -qO- http://localhost:${AGENT_PORT:-8001}/health || exit 1 + +# Required env vars (подставляются при деплое Swarm service): +# AGENT_ID — числовой ID агента из таблицы agents +# DATABASE_URL — mysql://user:pass@host:3306/goclaw +# LLM_BASE_URL — https://ollama.com/v1 или http://ollama:11434/v1 +# LLM_API_KEY — ключ LLM провайдера +# AGENT_PORT — порт HTTP (default: 8001) + +ENTRYPOINT ["/app/agent-worker"] diff --git a/drizzle/migrations/0006_agent_container_fields.sql b/drizzle/migrations/0006_agent_container_fields.sql new file mode 100644 index 0000000..eaae992 --- /dev/null +++ b/drizzle/migrations/0006_agent_container_fields.sql @@ -0,0 +1,12 @@ +-- Migration: 0006_agent_container_fields +-- Add Docker Swarm container tracking fields to agents table. +-- Each agent can now be deployed as an autonomous Swarm service. + +ALTER TABLE `agents` + ADD COLUMN `serviceName` VARCHAR(100) NULL COMMENT 'Docker Swarm service name: goclaw-agent-{id}', + ADD COLUMN `servicePort` INT NULL COMMENT 'HTTP API port inside overlay network (8001-8999)', + ADD COLUMN `containerImage` VARCHAR(255) NOT NULL DEFAULT 'goclaw-agent-worker:latest' COMMENT 'Docker image to run', + ADD COLUMN `containerStatus` ENUM('stopped','deploying','running','error') NOT NULL DEFAULT 'stopped' COMMENT 'Current container lifecycle state'; + +-- Index for quick lookup of running agents +CREATE INDEX `agents_containerStatus_idx` ON `agents` (`containerStatus`); diff --git a/drizzle/schema.ts b/drizzle/schema.ts index 6faa72e..73ccfb4 100644 --- a/drizzle/schema.ts +++ b/drizzle/schema.ts @@ -59,7 +59,17 @@ export const agents = mysqlTable("agents", { isPublic: boolean("isPublic").default(false), isSystem: boolean("isSystem").default(false), // Системный агент (нельзя удалить) isOrchestrator: boolean("isOrchestrator").default(false), // Главный оркестратор чата - + + // ── Container / Swarm fields ────────────────────────────────────────────── + // Имя Docker Swarm service: "goclaw-agent-{id}" + serviceName: varchar("serviceName", { length: 100 }), + // Порт HTTP API агента внутри overlay сети (8001–8999) + servicePort: int("servicePort"), + // Docker image для запуска агента + containerImage: varchar("containerImage", { length: 255 }).default("goclaw-agent-worker:latest"), + // Статус контейнера (обновляется при деплое/остановке) + containerStatus: mysqlEnum("containerStatus", ["stopped", "deploying", "running", "error"]).default("stopped"), + // Метаданные tags: json("tags").$type().default([]), metadata: json("metadata").$type>().default({}), diff --git a/gateway/cmd/agent-worker/main.go b/gateway/cmd/agent-worker/main.go new file mode 100644 index 0000000..fe7db18 --- /dev/null +++ b/gateway/cmd/agent-worker/main.go @@ -0,0 +1,727 @@ +// GoClaw Agent Worker — автономный HTTP-сервер агента. +// +// Каждый агент запускается как отдельный Docker Swarm service. +// Загружает свой конфиг из общей DB по AGENT_ID, выполняет LLM loop +// и принимает параллельные задачи от Orchestrator и других агентов. +// +// Endpoints: +// +// GET /health — liveness probe +// GET /info — конфиг агента (имя, модель, роль) +// POST /chat — синхронный чат (LLM loop, ждёт ответ) +// POST /task — поставить задачу в очередь (async, возвращает task_id) +// GET /tasks — список задач агента (active + recent) +// GET /tasks/{id} — статус конкретной задачи +// GET /memory — последние N сообщений из истории агента +package main + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "os/signal" + "strconv" + "sync" + "syscall" + "time" + + "github.com/go-chi/chi/v5" + "github.com/go-chi/chi/v5/middleware" + "github.com/go-chi/cors" + "github.com/google/uuid" + "github.com/joho/godotenv" + + "git.softuniq.eu/UniqAI/GoClaw/gateway/internal/db" + "git.softuniq.eu/UniqAI/GoClaw/gateway/internal/llm" + "git.softuniq.eu/UniqAI/GoClaw/gateway/internal/tools" +) + +// ─── Task types ────────────────────────────────────────────────────────────── + +type TaskStatus string + +const ( + TaskPending TaskStatus = "pending" + TaskRunning TaskStatus = "running" + TaskDone TaskStatus = "done" + TaskFailed TaskStatus = "failed" + TaskCancelled TaskStatus = "cancelled" +) + +// Task — единица работы агента, принятая через /task. +type Task struct { + ID string `json:"id"` + FromAgentID int `json:"from_agent_id,omitempty"` // кто делегировал (0 = человек) + Input string `json:"input"` // текст задачи + CallbackURL string `json:"callback_url,omitempty"` // куда POST результат + Priority int `json:"priority"` // 0=normal, 1=high + TimeoutSecs int `json:"timeout_secs"` + Status TaskStatus `json:"status"` + Result string `json:"result,omitempty"` + Error string `json:"error,omitempty"` + ToolCalls []ToolCallStep `json:"tool_calls,omitempty"` + CreatedAt time.Time `json:"created_at"` + StartedAt *time.Time `json:"started_at,omitempty"` + DoneAt *time.Time `json:"done_at,omitempty"` +} + +// ToolCallStep — шаг вызова инструмента для отображения в UI. +type ToolCallStep struct { + Tool string `json:"tool"` + Args any `json:"args"` + Result any `json:"result,omitempty"` + Error string `json:"error,omitempty"` + Success bool `json:"success"` + DurationMs int64 `json:"duration_ms"` +} + +// ChatMessage — сообщение в формате для /chat endpoint. +type ChatMessage struct { + Role string `json:"role"` + Content string `json:"content"` +} + +// ChatRequest — запрос на /chat (синхронный). +type ChatRequest struct { + Messages []ChatMessage `json:"messages"` + Model string `json:"model,omitempty"` // override модели агента + MaxIter int `json:"max_iter,omitempty"` // override max iterations +} + +// ChatResponse — ответ /chat. +type ChatResponse struct { + Success bool `json:"success"` + Response string `json:"response"` + ToolCalls []ToolCallStep `json:"tool_calls"` + Model string `json:"model"` + Error string `json:"error,omitempty"` +} + +// TaskRequest — запрос на /task (async). +type TaskRequest struct { + Input string `json:"input"` + FromAgentID int `json:"from_agent_id,omitempty"` + CallbackURL string `json:"callback_url,omitempty"` + Priority int `json:"priority,omitempty"` + TimeoutSecs int `json:"timeout_secs,omitempty"` +} + +// ─── Agent Worker ───────────────────────────────────────────────────────────── + +type AgentWorker struct { + agentID int + cfg *db.AgentConfig + llm *llm.Client + database *db.DB + executor *tools.Executor + + // Task queue — buffered channel + taskQueue chan *Task + // Task store — in-memory (id → Task) + tasksMu sync.RWMutex + tasks map[string]*Task + // Recent tasks ring buffer (для GET /tasks) + recentMu sync.Mutex + recentKeys []string +} + +const ( + taskQueueDepth = 100 + maxRecentTasks = 50 + defaultMaxIter = 8 + defaultTimeout = 120 + workerGoroutines = 4 // параллельных воркеров на агента +) + +func newAgentWorker(agentID int, database *db.DB, llmClient *llm.Client) (*AgentWorker, error) { + cfg, err := database.GetAgentByID(agentID) + if err != nil { + return nil, fmt.Errorf("agent %d not found in DB: %w", agentID, err) + } + log.Printf("[AgentWorker] Loaded config: id=%d name=%q model=%s", cfg.ID, cfg.Name, cfg.Model) + + w := &AgentWorker{ + agentID: agentID, + cfg: cfg, + llm: llmClient, + database: database, + taskQueue: make(chan *Task, taskQueueDepth), + tasks: make(map[string]*Task), + } + // Tool executor: агент использует подмножество инструментов из allowedTools + w.executor = tools.NewExecutor("/app", func() ([]map[string]any, error) { + rows, err := database.ListAgents() + if err != nil { + return nil, err + } + result := make([]map[string]any, len(rows)) + for i, r := range rows { + result[i] = map[string]any{ + "id": r.ID, "name": r.Name, "role": r.Role, + "model": r.Model, "isActive": r.IsActive, + } + } + return result, nil + }) + return w, nil +} + +// StartWorkers запускает N горутин-воркеров, читающих из taskQueue. +func (w *AgentWorker) StartWorkers(ctx context.Context) { + for i := 0; i < workerGoroutines; i++ { + go w.runWorker(ctx, i) + } + log.Printf("[AgentWorker] %d worker goroutines started", workerGoroutines) +} + +func (w *AgentWorker) runWorker(ctx context.Context, workerID int) { + for { + select { + case <-ctx.Done(): + log.Printf("[Worker-%d] shutting down", workerID) + return + case task := <-w.taskQueue: + log.Printf("[Worker-%d] processing task %s", workerID, task.ID) + w.processTask(ctx, task) + } + } +} + +// EnqueueTask добавляет задачу в очередь и в хранилище. +func (w *AgentWorker) EnqueueTask(req TaskRequest) *Task { + timeout := req.TimeoutSecs + if timeout <= 0 { + timeout = defaultTimeout + } + task := &Task{ + ID: uuid.New().String(), + FromAgentID: req.FromAgentID, + Input: req.Input, + CallbackURL: req.CallbackURL, + Priority: req.Priority, + TimeoutSecs: timeout, + Status: TaskPending, + CreatedAt: time.Now(), + } + // Сохранить в store + w.tasksMu.Lock() + w.tasks[task.ID] = task + w.tasksMu.Unlock() + + // Добавить в recent ring + w.recentMu.Lock() + w.recentKeys = append(w.recentKeys, task.ID) + if len(w.recentKeys) > maxRecentTasks { + w.recentKeys = w.recentKeys[len(w.recentKeys)-maxRecentTasks:] + } + w.recentMu.Unlock() + + // Отправить в очередь (non-blocking — если очередь полна, вернуть ошибку через Status) + select { + case w.taskQueue <- task: + default: + w.tasksMu.Lock() + task.Status = TaskFailed + task.Error = "task queue is full — agent is overloaded" + w.tasksMu.Unlock() + log.Printf("[AgentWorker] WARN: task queue full, task %s rejected", task.ID) + } + return task +} + +// processTask выполняет задачу через LLM loop и обновляет её статус. +func (w *AgentWorker) processTask(ctx context.Context, task *Task) { + now := time.Now() + w.tasksMu.Lock() + task.Status = TaskRunning + task.StartedAt = &now + w.tasksMu.Unlock() + + // Выполняем чат + chatCtx, cancel := context.WithTimeout(ctx, time.Duration(task.TimeoutSecs)*time.Second) + defer cancel() + + messages := []ChatMessage{{Role: "user", Content: task.Input}} + resp := w.runChat(chatCtx, messages, "", defaultMaxIter) + + doneAt := time.Now() + w.tasksMu.Lock() + task.DoneAt = &doneAt + task.ToolCalls = resp.ToolCalls + if resp.Success { + task.Status = TaskDone + task.Result = resp.Response + } else { + task.Status = TaskFailed + task.Error = resp.Error + } + w.tasksMu.Unlock() + + log.Printf("[AgentWorker] task %s done: status=%s", task.ID, task.Status) + + // Отправить результат на callback URL если задан + if task.CallbackURL != "" { + go w.postCallback(task) + } + + // Сохранить в DB history + if w.database != nil { + go func() { + userMsg := task.Input + agentResp := task.Result + if task.Status == TaskFailed { + agentResp = "[ERROR] " + task.Error + } + w.database.SaveHistory(db.HistoryInput{ + AgentID: w.agentID, + UserMessage: userMsg, + AgentResponse: agentResp, + }) + }() + } +} + +// runChat — основной LLM loop агента. +func (w *AgentWorker) runChat(ctx context.Context, messages []ChatMessage, overrideModel string, maxIter int) ChatResponse { + model := w.cfg.Model + if overrideModel != "" { + model = overrideModel + } + if maxIter <= 0 { + maxIter = defaultMaxIter + } + + // Собрать контекст: системный промпт + история + новые сообщения + conv := []llm.Message{} + if w.cfg.SystemPrompt != "" { + conv = append(conv, llm.Message{Role: "system", Content: w.cfg.SystemPrompt}) + } + + // Загрузить sliding window памяти из DB + if w.database != nil { + history, err := w.database.GetAgentHistory(w.agentID, 20) + if err == nil { + for _, h := range history { + conv = append(conv, llm.Message{Role: "user", Content: h.UserMessage}) + if h.AgentResponse != "" { + conv = append(conv, llm.Message{Role: "assistant", Content: h.AgentResponse}) + } + } + } + } + + // Добавить текущие сообщения + for _, m := range messages { + conv = append(conv, llm.Message{Role: m.Role, Content: m.Content}) + } + + // Получить доступные инструменты агента + agentTools := w.getAgentTools() + + temp := w.cfg.Temperature + maxTok := w.cfg.MaxTokens + if maxTok == 0 { + maxTok = 4096 + } + + var toolCallSteps []ToolCallStep + var finalResponse string + var lastModel string + + for iter := 0; iter < maxIter; iter++ { + req := llm.ChatRequest{ + Model: model, + Messages: conv, + Temperature: &temp, + MaxTokens: &maxTok, + } + if len(agentTools) > 0 { + req.Tools = agentTools + req.ToolChoice = "auto" + } + + resp, err := w.llm.Chat(ctx, req) + if err != nil { + // Fallback без инструментов + req.Tools = nil + req.ToolChoice = "" + resp2, err2 := w.llm.Chat(ctx, req) + if err2 != nil { + return ChatResponse{ + Success: false, + Error: fmt.Sprintf("LLM error (model: %s): %v", model, err2), + } + } + if len(resp2.Choices) > 0 { + finalResponse = resp2.Choices[0].Message.Content + lastModel = resp2.Model + } + break + } + + if len(resp.Choices) == 0 { + break + } + choice := resp.Choices[0] + lastModel = resp.Model + if lastModel == "" { + lastModel = model + } + + // Инструменты? + if choice.FinishReason == "tool_calls" && len(choice.Message.ToolCalls) > 0 { + conv = append(conv, choice.Message) + for _, tc := range choice.Message.ToolCalls { + start := time.Now() + result := w.executor.Execute(ctx, tc.Function.Name, tc.Function.Arguments) + step := ToolCallStep{ + Tool: tc.Function.Name, + Success: result.Success, + DurationMs: time.Since(start).Milliseconds(), + } + var argsMap any + _ = json.Unmarshal([]byte(tc.Function.Arguments), &argsMap) + step.Args = argsMap + + var toolContent string + if result.Success { + step.Result = result.Result + b, _ := json.Marshal(result.Result) + toolContent = string(b) + } else { + step.Error = result.Error + toolContent = fmt.Sprintf(`{"error": %q}`, result.Error) + } + toolCallSteps = append(toolCallSteps, step) + conv = append(conv, llm.Message{ + Role: "tool", + Content: toolContent, + ToolCallID: tc.ID, + Name: tc.Function.Name, + }) + } + continue + } + + finalResponse = choice.Message.Content + break + } + + return ChatResponse{ + Success: true, + Response: finalResponse, + ToolCalls: toolCallSteps, + Model: lastModel, + } +} + +// getAgentTools возвращает только те инструменты, которые разрешены агенту. +func (w *AgentWorker) getAgentTools() []llm.Tool { + allTools := tools.OrchestratorTools() + allowed := make(map[string]bool, len(w.cfg.AllowedTools)) + for _, t := range w.cfg.AllowedTools { + allowed[t] = true + } + // Если allowedTools пуст — агент получает базовый набор (http_request, file_read) + if len(allowed) == 0 { + allowed = map[string]bool{ + "http_request": true, + "file_read": true, + "file_list": true, + } + } + var result []llm.Tool + for _, td := range allTools { + if allowed[td.Function.Name] { + result = append(result, llm.Tool{ + Type: td.Type, + Function: llm.ToolFunction{ + Name: td.Function.Name, + Description: td.Function.Description, + Parameters: td.Function.Parameters, + }, + }) + } + } + return result +} + +// postCallback отправляет результат задачи на callback URL. +func (w *AgentWorker) postCallback(task *Task) { + w.tasksMu.RLock() + payload, _ := json.Marshal(task) + w.tasksMu.RUnlock() + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + req, err := http.NewRequestWithContext(ctx, http.MethodPost, task.CallbackURL, + bytes.NewReader(payload)) + if err != nil { + log.Printf("[AgentWorker] callback URL invalid for task %s: %v", task.ID, err) + return + } + req.Header.Set("Content-Type", "application/json") + resp, err := http.DefaultClient.Do(req) + if err != nil { + log.Printf("[AgentWorker] callback failed for task %s: %v", task.ID, err) + return + } + resp.Body.Close() + log.Printf("[AgentWorker] callback sent for task %s → %s (status %d)", + task.ID, task.CallbackURL, resp.StatusCode) +} + +// ─── HTTP Handlers ──────────────────────────────────────────────────────────── + +func (w *AgentWorker) handleHealth(rw http.ResponseWriter, r *http.Request) { + json.NewEncoder(rw).Encode(map[string]any{ + "status": "ok", + "agentId": w.agentID, + "name": w.cfg.Name, + "model": w.cfg.Model, + "queueLen": len(w.taskQueue), + }) +} + +func (w *AgentWorker) handleInfo(rw http.ResponseWriter, r *http.Request) { + json.NewEncoder(rw).Encode(map[string]any{ + "id": w.cfg.ID, + "name": w.cfg.Name, + "role": w.cfg.Model, + "model": w.cfg.Model, + "allowedTools": w.cfg.AllowedTools, + "isSystem": w.cfg.IsSystem, + }) +} + +func (w *AgentWorker) handleChat(rw http.ResponseWriter, r *http.Request) { + var req ChatRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(rw, `{"error":"invalid request body"}`, http.StatusBadRequest) + return + } + if len(req.Messages) == 0 { + http.Error(rw, `{"error":"messages required"}`, http.StatusBadRequest) + return + } + + timeout := w.cfg.MaxTokens / 10 // грубая оценка + if timeout < 30 { + timeout = 30 + } + if timeout > 300 { + timeout = 300 + } + ctx, cancel := context.WithTimeout(r.Context(), time.Duration(timeout)*time.Second) + defer cancel() + + resp := w.runChat(ctx, req.Messages, req.Model, req.MaxIter) + rw.Header().Set("Content-Type", "application/json") + json.NewEncoder(rw).Encode(resp) +} + +func (w *AgentWorker) handleTask(rw http.ResponseWriter, r *http.Request) { + var req TaskRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(rw, `{"error":"invalid request body"}`, http.StatusBadRequest) + return + } + if req.Input == "" { + http.Error(rw, `{"error":"input required"}`, http.StatusBadRequest) + return + } + + task := w.EnqueueTask(req) + rw.Header().Set("Content-Type", "application/json") + rw.WriteHeader(http.StatusAccepted) + json.NewEncoder(rw).Encode(map[string]any{ + "task_id": task.ID, + "status": task.Status, + "agent_id": w.agentID, + "queue_len": len(w.taskQueue), + }) +} + +func (w *AgentWorker) handleListTasks(rw http.ResponseWriter, r *http.Request) { + w.recentMu.Lock() + keys := make([]string, len(w.recentKeys)) + copy(keys, w.recentKeys) + w.recentMu.Unlock() + + w.tasksMu.RLock() + result := make([]*Task, 0, len(keys)) + for i := len(keys) - 1; i >= 0; i-- { + if t, ok := w.tasks[keys[i]]; ok { + result = append(result, t) + } + } + w.tasksMu.RUnlock() + + rw.Header().Set("Content-Type", "application/json") + json.NewEncoder(rw).Encode(map[string]any{ + "tasks": result, + "total": len(result), + "queueLen": len(w.taskQueue), + }) +} + +func (w *AgentWorker) handleGetTask(rw http.ResponseWriter, r *http.Request) { + taskID := chi.URLParam(r, "id") + w.tasksMu.RLock() + task, ok := w.tasks[taskID] + w.tasksMu.RUnlock() + if !ok { + http.Error(rw, `{"error":"task not found"}`, http.StatusNotFound) + return + } + rw.Header().Set("Content-Type", "application/json") + json.NewEncoder(rw).Encode(task) +} + +func (w *AgentWorker) handleMemory(rw http.ResponseWriter, r *http.Request) { + limitStr := r.URL.Query().Get("limit") + limit := 20 + if n, err := strconv.Atoi(limitStr); err == nil && n > 0 && n <= 100 { + limit = n + } + + if w.database == nil { + rw.Header().Set("Content-Type", "application/json") + json.NewEncoder(rw).Encode(map[string]any{"messages": []any{}, "total": 0}) + return + } + + history, err := w.database.GetAgentHistory(w.agentID, limit) + if err != nil { + http.Error(rw, `{"error":"failed to load history"}`, http.StatusInternalServerError) + return + } + + rw.Header().Set("Content-Type", "application/json") + json.NewEncoder(rw).Encode(map[string]any{ + "agent_id": w.agentID, + "messages": history, + "total": len(history), + }) +} + +// ─── Main ───────────────────────────────────────────────────────────────────── + +func main() { + log.SetFlags(log.LstdFlags | log.Lshortfile) + + _ = godotenv.Load("../.env") + _ = godotenv.Load(".env") + + // ── Конфиг из env ──────────────────────────────────────────────────────── + agentIDStr := os.Getenv("AGENT_ID") + if agentIDStr == "" { + log.Fatal("[AgentWorker] AGENT_ID env var is required") + } + agentID, err := strconv.Atoi(agentIDStr) + if err != nil || agentID <= 0 { + log.Fatalf("[AgentWorker] AGENT_ID must be a positive integer, got: %q", agentIDStr) + } + + port := os.Getenv("AGENT_PORT") + if port == "" { + port = "8001" + } + + llmBaseURL := getEnvFirst("LLM_BASE_URL", "OLLAMA_BASE_URL") + if llmBaseURL == "" { + llmBaseURL = "https://ollama.com/v1" + } + llmAPIKey := getEnvFirst("LLM_API_KEY", "OLLAMA_API_KEY") + + dbURL := os.Getenv("DATABASE_URL") + if dbURL == "" { + log.Fatal("[AgentWorker] DATABASE_URL env var is required") + } + + log.Printf("[AgentWorker] Starting: AGENT_ID=%d PORT=%s LLM=%s", agentID, port, llmBaseURL) + + // ── DB ─────────────────────────────────────────────────────────────────── + database, err := db.Connect(dbURL) + if err != nil { + log.Fatalf("[AgentWorker] DB connection failed: %v", err) + } + defer database.Close() + + // ── LLM Client ─────────────────────────────────────────────────────────── + llmClient := llm.NewClient(llmBaseURL, llmAPIKey) + + // ── Agent Worker ───────────────────────────────────────────────────────── + worker, err := newAgentWorker(agentID, database, llmClient) + if err != nil { + log.Fatalf("[AgentWorker] init failed: %v", err) + } + + // ── Background workers ─────────────────────────────────────────────────── + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + worker.StartWorkers(ctx) + + // ── Router ─────────────────────────────────────────────────────────────── + r := chi.NewRouter() + r.Use(middleware.RequestID) + r.Use(middleware.RealIP) + r.Use(middleware.Logger) + r.Use(middleware.Recoverer) + r.Use(cors.Handler(cors.Options{ + AllowedOrigins: []string{"*"}, + AllowedMethods: []string{"GET", "POST", "OPTIONS"}, + AllowedHeaders: []string{"Content-Type", "Authorization", "X-Agent-ID"}, + })) + + r.Get("/health", worker.handleHealth) + r.Get("/info", worker.handleInfo) + r.Post("/chat", worker.handleChat) + r.Post("/task", worker.handleTask) + r.Get("/tasks", worker.handleListTasks) + r.Get("/tasks/{id}", worker.handleGetTask) + r.Get("/memory", worker.handleMemory) + + // ── HTTP Server ─────────────────────────────────────────────────────────── + srv := &http.Server{ + Addr: ":" + port, + Handler: r, + ReadTimeout: 30 * time.Second, + WriteTimeout: 310 * time.Second, // > max task timeout + IdleTimeout: 120 * time.Second, + } + + quit := make(chan os.Signal, 1) + signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM) + + go func() { + log.Printf("[AgentWorker] agent-id=%d listening on :%s", agentID, port) + if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { + log.Fatalf("[AgentWorker] server error: %v", err) + } + }() + + <-quit + log.Println("[AgentWorker] shutting down gracefully...") + cancel() // stop task workers + + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 15*time.Second) + defer shutdownCancel() + if err := srv.Shutdown(shutdownCtx); err != nil { + log.Printf("[AgentWorker] shutdown error: %v", err) + } + log.Println("[AgentWorker] stopped.") +} + +func getEnvFirst(keys ...string) string { + for _, k := range keys { + if v := os.Getenv(k); v != "" { + return v + } + } + return "" +} diff --git a/gateway/cmd/agent-worker/main_test.go b/gateway/cmd/agent-worker/main_test.go new file mode 100644 index 0000000..cad7d12 --- /dev/null +++ b/gateway/cmd/agent-worker/main_test.go @@ -0,0 +1,438 @@ +package main + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + "time" + + "git.softuniq.eu/UniqAI/GoClaw/gateway/internal/db" +) + +// ─── Mock DB agent config ───────────────────────────────────────────────────── + +func mockAgentConfig() *db.AgentConfig { + return &db.AgentConfig{ + ID: 42, + Name: "Test Agent", + Model: "qwen2.5:7b", + SystemPrompt: "You are a test agent.", + AllowedTools: []string{"http_request", "file_list"}, + Temperature: 0.7, + MaxTokens: 2048, + IsSystem: false, + IsOrchestrator: false, + IsActive: true, + ContainerImage: "goclaw-agent-worker:latest", + ContainerStatus: "running", + ServiceName: "goclaw-agent-42", + ServicePort: 8001, + } +} + +// ─── Unit: AgentWorker struct ───────────────────────────────────────────────── + +func TestAgentWorkerInit(t *testing.T) { + w := &AgentWorker{ + agentID: 42, + cfg: mockAgentConfig(), + taskQueue: make(chan *Task, taskQueueDepth), + tasks: make(map[string]*Task), + } + if w.agentID != 42 { + t.Errorf("expected agentID=42, got %d", w.agentID) + } + if w.cfg.Name != "Test Agent" { + t.Errorf("expected name 'Test Agent', got %q", w.cfg.Name) + } +} + +// ─── Unit: Task enqueue ─────────────────────────────────────────────────────── + +func TestEnqueueTask(t *testing.T) { + w := &AgentWorker{ + agentID: 42, + cfg: mockAgentConfig(), + taskQueue: make(chan *Task, taskQueueDepth), + tasks: make(map[string]*Task), + } + + task := w.EnqueueTask(TaskRequest{ + Input: "hello world", + TimeoutSecs: 30, + }) + + if task.ID == "" { + t.Error("task ID should not be empty") + } + if task.Status != TaskPending { + t.Errorf("expected status=pending, got %q", task.Status) + } + if task.Input != "hello world" { + t.Errorf("expected input='hello world', got %q", task.Input) + } + if len(w.taskQueue) != 1 { + t.Errorf("expected 1 task in queue, got %d", len(w.taskQueue)) + } + + // Task should be in store + w.tasksMu.RLock() + stored, ok := w.tasks[task.ID] + w.tasksMu.RUnlock() + if !ok { + t.Error("task not found in store") + } + if stored.ID != task.ID { + t.Errorf("stored task ID mismatch: %q != %q", stored.ID, task.ID) + } +} + +func TestEnqueueTask_QueueFull(t *testing.T) { + // Queue depth = 1 for this test + w := &AgentWorker{ + agentID: 42, + cfg: mockAgentConfig(), + taskQueue: make(chan *Task, 1), + tasks: make(map[string]*Task), + } + + // Fill the queue + w.EnqueueTask(TaskRequest{Input: "task 1"}) + // Overflow + task2 := w.EnqueueTask(TaskRequest{Input: "task 2"}) + + w.tasksMu.RLock() + status := task2.Status + w.tasksMu.RUnlock() + + if status != TaskFailed { + t.Errorf("expected task2 status=failed when queue full, got %q", status) + } +} + +func TestEnqueueTask_DefaultTimeout(t *testing.T) { + w := &AgentWorker{ + agentID: 42, + cfg: mockAgentConfig(), + taskQueue: make(chan *Task, taskQueueDepth), + tasks: make(map[string]*Task), + } + + task := w.EnqueueTask(TaskRequest{Input: "no timeout set"}) + if task.TimeoutSecs != defaultTimeout { + t.Errorf("expected default timeout=%d, got %d", defaultTimeout, task.TimeoutSecs) + } +} + +// ─── HTTP Handlers ──────────────────────────────────────────────────────────── + +func makeTestWorker() *AgentWorker { + return &AgentWorker{ + agentID: 42, + cfg: mockAgentConfig(), + taskQueue: make(chan *Task, taskQueueDepth), + tasks: make(map[string]*Task), + } +} + +func TestHandleHealth(t *testing.T) { + w := makeTestWorker() + rr := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/health", nil) + w.handleHealth(rr, req) + + if rr.Code != http.StatusOK { + t.Errorf("expected 200, got %d", rr.Code) + } + + var body map[string]any + if err := json.NewDecoder(rr.Body).Decode(&body); err != nil { + t.Fatalf("invalid JSON response: %v", err) + } + if body["status"] != "ok" { + t.Errorf("expected status=ok, got %v", body["status"]) + } + if int(body["agentId"].(float64)) != 42 { + t.Errorf("expected agentId=42, got %v", body["agentId"]) + } +} + +func TestHandleInfo(t *testing.T) { + w := makeTestWorker() + rr := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/info", nil) + w.handleInfo(rr, req) + + if rr.Code != http.StatusOK { + t.Errorf("expected 200, got %d", rr.Code) + } + var body map[string]any + json.NewDecoder(rr.Body).Decode(&body) + if body["name"] != "Test Agent" { + t.Errorf("expected name='Test Agent', got %v", body["name"]) + } +} + +func TestHandleTask_Valid(t *testing.T) { + w := makeTestWorker() + body := `{"input":"do something useful","timeout_secs":60}` + req := httptest.NewRequest(http.MethodPost, "/task", bytes.NewBufferString(body)) + req.Header.Set("Content-Type", "application/json") + rr := httptest.NewRecorder() + w.handleTask(rr, req) + + if rr.Code != http.StatusAccepted { + t.Errorf("expected 202, got %d", rr.Code) + } + var resp map[string]any + json.NewDecoder(rr.Body).Decode(&resp) + if resp["task_id"] == "" || resp["task_id"] == nil { + t.Error("task_id should be in response") + } + if resp["status"] != string(TaskPending) { + t.Errorf("expected status=pending, got %v", resp["status"]) + } +} + +func TestHandleTask_EmptyInput(t *testing.T) { + w := makeTestWorker() + req := httptest.NewRequest(http.MethodPost, "/task", bytes.NewBufferString(`{"input":""}`)) + req.Header.Set("Content-Type", "application/json") + rr := httptest.NewRecorder() + w.handleTask(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Errorf("expected 400, got %d", rr.Code) + } +} + +func TestHandleTask_InvalidJSON(t *testing.T) { + w := makeTestWorker() + req := httptest.NewRequest(http.MethodPost, "/task", bytes.NewBufferString(`not-json`)) + rr := httptest.NewRecorder() + w.handleTask(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Errorf("expected 400, got %d", rr.Code) + } +} + +func TestHandleGetTask_NotFound(t *testing.T) { + // We can't easily use chi.URLParam in unit tests without a full router. + // Test the store logic directly instead. + w := makeTestWorker() + w.tasksMu.RLock() + _, ok := w.tasks["nonexistent-id"] + w.tasksMu.RUnlock() + if ok { + t.Error("nonexistent task should not be found") + } +} + +func TestHandleListTasks_Empty(t *testing.T) { + w := makeTestWorker() + rr := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/tasks", nil) + w.handleListTasks(rr, req) + + if rr.Code != http.StatusOK { + t.Errorf("expected 200, got %d", rr.Code) + } + var resp map[string]any + json.NewDecoder(rr.Body).Decode(&resp) + if resp["total"].(float64) != 0 { + t.Errorf("expected total=0, got %v", resp["total"]) + } +} + +func TestHandleListTasks_WithTasks(t *testing.T) { + w := makeTestWorker() + w.EnqueueTask(TaskRequest{Input: "task A"}) + w.EnqueueTask(TaskRequest{Input: "task B"}) + + rr := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/tasks", nil) + w.handleListTasks(rr, req) + + var resp map[string]any + json.NewDecoder(rr.Body).Decode(&resp) + if int(resp["total"].(float64)) != 2 { + t.Errorf("expected total=2, got %v", resp["total"]) + } +} + +func TestHandleMemory_NoDB(t *testing.T) { + w := makeTestWorker() // no database set + rr := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/memory", nil) + w.handleMemory(rr, req) + + if rr.Code != http.StatusOK { + t.Errorf("expected 200, got %d", rr.Code) + } + var resp map[string]any + json.NewDecoder(rr.Body).Decode(&resp) + if int(resp["total"].(float64)) != 0 { + t.Errorf("expected total=0 without DB, got %v", resp["total"]) + } +} + +// ─── Unit: getAgentTools ────────────────────────────────────────────────────── + +func TestGetAgentTools_WithAllowedTools(t *testing.T) { + w := makeTestWorker() + agentTools := w.getAgentTools() + + // Worker has allowedTools = ["http_request", "file_list"] + if len(agentTools) == 0 { + t.Error("expected some tools, got none") + } + names := make(map[string]bool) + for _, t := range agentTools { + names[t.Function.Name] = true + } + if !names["http_request"] { + t.Error("expected http_request in allowed tools") + } + if !names["file_list"] { + t.Error("expected file_list in allowed tools") + } + // shell_exec should NOT be allowed + if names["shell_exec"] { + t.Error("shell_exec should NOT be in allowed tools for this agent") + } +} + +func TestGetAgentTools_EmptyAllowedTools_UsesDefaults(t *testing.T) { + cfg := mockAgentConfig() + cfg.AllowedTools = []string{} // empty + w := &AgentWorker{agentID: 1, cfg: cfg, taskQueue: make(chan *Task, 1), tasks: map[string]*Task{}} + tools := w.getAgentTools() + if len(tools) == 0 { + t.Error("expected default tools when allowedTools is empty") + } +} + +// ─── Unit: recent task ring ─────────────────────────────────────────────────── + +func TestRecentRing_MaxCapacity(t *testing.T) { + w := makeTestWorker() + // Enqueue more than maxRecentTasks + for i := 0; i < maxRecentTasks+10; i++ { + // Don't block — drain queue + w.EnqueueTask(TaskRequest{Input: "task"}) + select { + case <-w.taskQueue: + default: + } + } + + w.recentMu.Lock() + count := len(w.recentKeys) + w.recentMu.Unlock() + + if count > maxRecentTasks { + t.Errorf("recent ring should not exceed %d, got %d", maxRecentTasks, count) + } +} + +// ─── Unit: Task lifecycle ───────────────────────────────────────────────────── + +func TestTaskLifecycle_Timestamps(t *testing.T) { + w := makeTestWorker() + before := time.Now() + task := w.EnqueueTask(TaskRequest{Input: "lifecycle test"}) + after := time.Now() + + if task.CreatedAt.Before(before) || task.CreatedAt.After(after) { + t.Errorf("CreatedAt=%v should be between %v and %v", task.CreatedAt, before, after) + } + if task.StartedAt != nil { + t.Error("StartedAt should be nil for pending task") + } + if task.DoneAt != nil { + t.Error("DoneAt should be nil for pending task") + } +} + +// ─── Unit: HTTP Chat handler (no LLM) ──────────────────────────────────────── + +func TestHandleChat_InvalidJSON(t *testing.T) { + w := makeTestWorker() + req := httptest.NewRequest(http.MethodPost, "/chat", bytes.NewBufferString(`not-json`)) + rr := httptest.NewRecorder() + w.handleChat(rr, req) + if rr.Code != http.StatusBadRequest { + t.Errorf("expected 400, got %d", rr.Code) + } +} + +func TestHandleChat_EmptyMessages(t *testing.T) { + w := makeTestWorker() + req := httptest.NewRequest(http.MethodPost, "/chat", + bytes.NewBufferString(`{"messages":[]}`)) + req.Header.Set("Content-Type", "application/json") + rr := httptest.NewRecorder() + w.handleChat(rr, req) + if rr.Code != http.StatusBadRequest { + t.Errorf("expected 400 for empty messages, got %d", rr.Code) + } +} + +// ─── Integration: worker goroutine processes task ───────────────────────────── + +func TestWorkerProcessesTask_WithMockLLM(t *testing.T) { + // Create a mock LLM server that returns a simple response + mockLLM := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(map[string]any{ + "choices": []map[string]any{ + { + "message": map[string]string{"role": "assistant", "content": "Mock answer"}, + "finish_reason": "stop", + }, + }, + "model": "mock-model", + }) + })) + defer mockLLM.Close() + + // We can't easily create a full AgentWorker with llm client without more refactoring, + // so we test the task state machine directly + w := makeTestWorker() + + task := w.EnqueueTask(TaskRequest{Input: "test task", TimeoutSecs: 5}) + if task.Status != TaskPending { + t.Errorf("expected pending, got %s", task.Status) + } + + // Simulate task processing (without LLM) + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + now := time.Now() + w.tasksMu.Lock() + task.Status = TaskRunning + task.StartedAt = &now + w.tasksMu.Unlock() + + // Simulate done + doneAt := time.Now() + w.tasksMu.Lock() + task.Status = TaskDone + task.Result = "completed" + task.DoneAt = &doneAt + w.tasksMu.Unlock() + + _ = ctx + + w.tasksMu.RLock() + finalStatus := task.Status + w.tasksMu.RUnlock() + + if finalStatus != TaskDone { + t.Errorf("expected task done, got %s", finalStatus) + } +} diff --git a/gateway/go.mod b/gateway/go.mod index eeafb30..c9c1135 100644 --- a/gateway/go.mod +++ b/gateway/go.mod @@ -6,6 +6,7 @@ require ( github.com/go-chi/chi/v5 v5.2.1 github.com/go-chi/cors v1.2.1 github.com/go-sql-driver/mysql v1.8.1 + github.com/google/uuid v1.6.0 github.com/joho/godotenv v1.5.1 golang.org/x/crypto v0.37.0 ) diff --git a/gateway/go.sum b/gateway/go.sum index 3ae30bd..6962f72 100644 --- a/gateway/go.sum +++ b/gateway/go.sum @@ -6,6 +6,8 @@ github.com/go-chi/cors v1.2.1 h1:xEC8UT3Rlp2QuWNEr4Fs/c2EAGVKBwy/1vHx3bppil4= github.com/go-chi/cors v1.2.1/go.mod h1:sSbTewc+6wYHBBCW7ytsFSn836hqM7JxpglAy2Vzc58= github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y= github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= diff --git a/gateway/internal/db/db.go b/gateway/internal/db/db.go index 3fe7566..0bbf9ce 100644 --- a/gateway/internal/db/db.go +++ b/gateway/internal/db/db.go @@ -21,9 +21,14 @@ type AgentConfig struct { AllowedTools []string Temperature float64 MaxTokens int - IsOrchestrator bool - IsSystem bool - IsActive bool + IsOrchestrator bool + IsSystem bool + IsActive bool + // Container / Swarm fields (Phase A) + ServiceName string + ServicePort int + ContainerImage string + ContainerStatus string // "stopped" | "deploying" | "running" | "error" } // AgentRow is a minimal agent representation for listing. @@ -674,3 +679,60 @@ func normalizeDSN(dsn string) string { } return fmt.Sprintf("%s@tcp(%s)%s?parseTime=true&charset=utf8mb4%s", userInfo, hostPort, dbName, tlsParam) } + +// ─── Agent Container Fields ─────────────────────────────────────────────────── +// These methods support the agent-worker container architecture where each +// agent runs as an autonomous Docker Swarm service. + +// UpdateContainerStatus updates the container lifecycle state of an agent. +func (d *DB) UpdateContainerStatus(agentID int, status, serviceName string, servicePort int) error { + if d.conn == nil { + return nil + } + _, err := d.conn.Exec(` + UPDATE agents + SET containerStatus = ?, serviceName = ?, servicePort = ?, updatedAt = NOW() + WHERE id = ? + `, status, serviceName, servicePort, agentID) + return err +} + +// HistoryRow is a single entry from agentHistory for sliding window memory. +type HistoryRow struct { + ID int `json:"id"` + UserMessage string `json:"userMessage"` + AgentResponse string `json:"agentResponse"` + ConvID string `json:"conversationId"` +} + +// GetAgentHistory returns the last N conversation turns for an agent, oldest first. +func (d *DB) GetAgentHistory(agentID, limit int) ([]HistoryRow, error) { + if d.conn == nil { + return nil, nil + } + rows, err := d.conn.Query(` + SELECT id, userMessage, COALESCE(agentResponse,''), COALESCE(conversationId,'') + FROM agentHistory + WHERE agentId = ? + ORDER BY id DESC + LIMIT ? + `, agentID, limit) + if err != nil { + return nil, err + } + defer rows.Close() + + var result []HistoryRow + for rows.Next() { + var h HistoryRow + if err := rows.Scan(&h.ID, &h.UserMessage, &h.AgentResponse, &h.ConvID); err != nil { + continue + } + result = append(result, h) + } + // Reverse so oldest is first (for LLM context ordering) + for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 { + result[i], result[j] = result[j], result[i] + } + return result, nil +} diff --git a/gateway/internal/orchestrator/orchestrator.go b/gateway/internal/orchestrator/orchestrator.go index 62247b9..dedd19c 100644 --- a/gateway/internal/orchestrator/orchestrator.go +++ b/gateway/internal/orchestrator/orchestrator.go @@ -125,6 +125,8 @@ func New(llmClient *llm.Client, database *db.DB, projectRoot string) *Orchestrat } // Inject agent list function to avoid circular dependency o.executor = tools.NewExecutor(projectRoot, o.listAgentsFn) + // Inject DB so delegate_to_agent can resolve live agent container addresses + o.executor.SetDatabase(database) return o } diff --git a/gateway/internal/tools/executor.go b/gateway/internal/tools/executor.go index 3d15126..a1ac9cb 100644 --- a/gateway/internal/tools/executor.go +++ b/gateway/internal/tools/executor.go @@ -3,6 +3,7 @@ package tools import ( + "bytes" "context" "encoding/json" "fmt" @@ -13,6 +14,8 @@ import ( "path/filepath" "strings" "time" + + "git.softuniq.eu/UniqAI/GoClaw/gateway/internal/db" ) // ─── Types ──────────────────────────────────────────────────────────────────── @@ -175,6 +178,8 @@ type Executor struct { httpClient *http.Client // agentListFn is injected to avoid circular dependency with orchestrator agentListFn func() ([]map[string]any, error) + // database is used for delegate_to_agent to look up service address + database *db.DB } func NewExecutor(projectRoot string, agentListFn func() ([]map[string]any, error)) *Executor { @@ -187,6 +192,11 @@ func NewExecutor(projectRoot string, agentListFn func() ([]map[string]any, error } } +// SetDatabase injects the DB reference so delegate_to_agent can resolve agent addresses. +func (e *Executor) SetDatabase(database *db.DB) { + e.database = database +} + // Execute dispatches a tool call by name. func (e *Executor) Execute(ctx context.Context, toolName string, argsJSON string) ToolResult { start := time.Now() @@ -215,7 +225,7 @@ func (e *Executor) Execute(ctx context.Context, toolName string, argsJSON string case "list_agents": result, execErr = e.listAgents() case "delegate_to_agent": - result, execErr = e.delegateToAgent(args) + result, execErr = e.delegateToAgent(ctx, args) default: return ToolResult{Success: false, Error: fmt.Sprintf("unknown tool: %s", toolName), DurationMs: ms(start)} } @@ -446,21 +456,89 @@ func (e *Executor) listAgents() (any, error) { return map[string]any{"agents": agents, "count": len(agents)}, nil } -func (e *Executor) delegateToAgent(args map[string]any) (any, error) { - agentID, _ := args["agentId"].(float64) - message, _ := args["message"].(string) - if message == "" { - return nil, fmt.Errorf("message is required") +// delegateToAgent sends a task to an agent's container via HTTP. +// It resolves the agent's service name and port from DB, then POSTs to /task. +// If the agent container is not running (no servicePort), falls back to a stub. +func (e *Executor) delegateToAgent(ctx context.Context, args map[string]any) (any, error) { + agentIDf, _ := args["agentId"].(float64) + agentID := int(agentIDf) + task, _ := args["task"].(string) + if task == "" { + task, _ = args["message"].(string) // backward compat } - // Delegation is handled at orchestrator level; here we return a placeholder + if task == "" { + return nil, fmt.Errorf("task (or message) is required") + } + callbackURL, _ := args["callbackUrl"].(string) + async, _ := args["async"].(bool) + + // Resolve agent container address from DB + if e.database != nil { + cfg, err := e.database.GetAgentByID(agentID) + if err == nil && cfg != nil && cfg.ServicePort > 0 && cfg.ContainerStatus == "running" { + // Agent is deployed — call its container via overlay DNS + // Docker Swarm DNS: service name resolves inside overlay network + agentURL := fmt.Sprintf("http://%s:%d", cfg.ServiceName, cfg.ServicePort) + if async { + return e.postAgentTask(ctx, agentURL, agentID, task, callbackURL) + } + return e.postAgentChat(ctx, agentURL, agentID, task) + } + } + + // Fallback: agent not deployed yet — return informational response return map[string]any{ - "delegated": true, - "agentId": int(agentID), - "message": message, - "note": "Agent delegation queued — response will be processed in next iteration", + "delegated": false, + "agentId": agentID, + "task": task, + "note": fmt.Sprintf("Agent %d is not running (containerStatus != running). Deploy it first via Web Panel.", agentID), }, nil } +// postAgentTask POSTs to agent's /task endpoint (async, returns task_id). +func (e *Executor) postAgentTask(ctx context.Context, agentURL string, fromAgentID int, task, callbackURL string) (any, error) { + payload, _ := json.Marshal(map[string]any{ + "input": task, + "from_agent_id": fromAgentID, + "callback_url": callbackURL, + }) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, agentURL+"/task", bytes.NewReader(payload)) + if err != nil { + return nil, fmt.Errorf("delegate build request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + resp, err := e.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("delegate HTTP error: %w", err) + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + var result map[string]any + _ = json.Unmarshal(body, &result) + return result, nil +} + +// postAgentChat POSTs to agent's /chat endpoint (sync, waits for response). +func (e *Executor) postAgentChat(ctx context.Context, agentURL string, _ int, task string) (any, error) { + payload, _ := json.Marshal(map[string]any{ + "messages": []map[string]string{{"role": "user", "content": task}}, + }) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, agentURL+"/chat", bytes.NewReader(payload)) + if err != nil { + return nil, fmt.Errorf("delegate build request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + resp, err := e.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("delegate HTTP error: %w", err) + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + var result map[string]any + _ = json.Unmarshal(body, &result) + return result, nil +} + // ─── Helpers ────────────────────────────────────────────────────────────────── func (e *Executor) resolvePath(path string) string { diff --git a/todo.md b/todo.md index 99823df..aa12cca 100644 --- a/todo.md +++ b/todo.md @@ -1,259 +1,203 @@ -# GoClaw Control Center TODO +# GoClaw — TODO + +## Целевая архитектура + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Docker Swarm overlay net │ +│ │ +│ [Web Panel :3000] [Orchestrator :18789] [Agent-1 :8001] │ +│ │ │ │ │ +│ └────────────────────┴────────────────────┘ │ +│ goclaw-net │ +│ │ +│ Каждый агент = отдельный Docker Swarm service: │ +│ • своя LLM модель + systemPrompt из DB │ +│ • своя память (conversation history в shared DB) │ +│ • HTTP API: POST /task, POST /chat, GET /health, GET /mem │ +│ • принимает параллельные задачи от любых источников │ +│ • автодеплой при создании агента через Web Panel │ +│ │ +│ Orchestrator = мозг экосистемы: │ +│ • маршрутизирует задачи между агентами │ +│ • tool: delegate_to_agent → HTTP к agent-N:8001/task │ +│ • знает topology: какой агент где живёт │ +│ │ +│ Web Panel = панель управления и мониторинга: │ +│ • создать/удалить агента → автодеплой контейнера │ +│ • видеть статус контейнеров в реальном времени │ +│ • логи, метрики, история задач │ +└─────────────────────────────────────────────────────────────┘ +``` + +--- + +## ✅ ЗАВЕРШЕНО (фундамент) - [x] Basic Dashboard layout (Mission Control theme) -- [x] Agents page with mock data -- [x] Nodes page with mock data -- [x] Chat page with mock conversation -- [x] Settings page with provider cards -- [x] Docker Stack integration -- [x] Fix Home.tsx conflict after upgrade -- [x] Fix DashboardLayout.tsx conflict after upgrade -- [x] Create server-side Ollama API proxy routes (tRPC) -- [x] Integrate real Ollama /v1/models endpoint in Settings -- [x] Integrate real Ollama /v1/chat/completions in Chat page -- [x] Add OLLAMA_API_KEY and OLLAMA_BASE_URL secrets -- [x] Write vitest tests for Ollama API proxy -- [x] Update Dashboard with real model data -- [ ] Add streaming support for chat responses -- [ ] Connect real Docker Swarm API for node monitoring -- [ ] Add authentication/login protection +- [x] Docker Stack integration (docker-stack.yml, docker-compose.yml) +- [x] Go Gateway — отдельный контейнер-оркестратор (:18789) +- [x] Web Panel — отдельный контейнер (:3000) +- [x] Overlay network `goclaw-net` (attachable) +- [x] MySQL shared DB (агенты, метрики, история) +- [x] tRPC API: agents CRUD, metrics, history +- [x] Go Gateway: LLM client (OpenAI-compatible), tool executor +- [x] Go Gateway: tool loop (shell_exec, file_read/write, http_request, docker_exec) +- [x] Go Gateway: DockerClient.CreateAgentServiceFull() — готов к деплою агентов +- [x] Nodes page: реальные данные из Docker API (Swarm nodes, containers) +- [x] Seed: 6 системных агентов в DB при старте +- [x] SSE streaming chat (Phase 18, remote branch) +- [x] Persistent chat sessions в DB (Phase 20, remote branch) +- [x] Workflows: визуальный конструктор граф-воркфлоу (remote branch) +- [x] Real Docker Swarm management: live nodes/services/tasks (Phase 21, remote branch) -## Phase 1: Agent Management UI -- [x] Connect Agents page to trpc.agents.list (load real agents from DB) -- [x] Create AgentDetailModal component for viewing agent config -- [x] Create AgentCreateModal component with form validation -- [x] Implement agent update mutation (model, temperature, maxTokens, systemPrompt) -- [x] Implement agent delete mutation with confirmation -- [x] Add start/pause/restart actions for agents -- [x] Add agent metrics chart (requests, tokens, processing time) -- [x] Add agent history view (recent requests/responses) -- [x] Write vitest tests for agent management components +--- -## Phase 2: Tool Binding System -- [x] Design Tool Binding API schema -- [x] Create tool registry in database -- [x] Implement tool execution sandbox -- [x] Add tool access control per agent -- [x] Create UI for tool management +## 🔥 PHASE A: Agent Worker Container (КРИТИЧЕСКИЙ ПУТЬ) -## Phase 3: Tool Integration -- [x] Implement Browser tool (HTTP fetch-based) -- [x] Implement Shell tool (bash execution with safety checks) -- [x] Implement File tool (read/write with path restrictions) -- [x] Implement Docker tool (container management) -- [x] Implement HTTP tool (GET/POST with domain whitelist) +> Цель: каждый агент живёт в своём контейнере с HTTP API. +> Orchestrator обращается к нему по имени в overlay сети. -## Phase 4: Metrics & History -- [x] AgentMetrics page with request timeline chart -- [x] Conversation history log per agent -- [x] Raw metrics table with token/time data -- [x] Stats cards (total requests, success rate, avg response time, tokens) -- [x] Time range selector (6h/24h/48h/7d) -- [x] Metrics button on agent cards -- [x] Navigation: /agents/:id/metrics route -- [x] Tools page added to sidebar navigation +### A1: agent-worker binary (Go) +- [x] Создать `gateway/cmd/agent-worker/main.go` — HTTP-сервер агента +- [x] Загружает конфиг из DB по `AGENT_ID` env var (model, systemPrompt, allowedTools) +- [x] `GET /health` — liveness/readiness probe +- [x] `GET /info` — конфиг агента (name, model, allowedTools) +- [x] `POST /task` — принять задачу от Orchestrator/другого агента (async, возвращает task_id) +- [x] `POST /chat` — синхронный чат (LLM loop с инструментами агента) +- [x] `GET /memory` — последние N сообщений из conversation history (sliding window) +- [x] Агент сам вызывает LLM через `LLM_BASE_URL` (не через Gateway) +- [x] 4 горутины-воркера на агента — параллельная обработка задач +- [x] Переиспользует `internal/llm`, `internal/db`, `internal/tools` -## Phase 5: Specialized Agents +### A2: Task Queue внутри агента +- [x] In-memory очередь задач (buffered channel, depth=100) +- [x] 4 worker goroutines: берут задачи из очереди, выполняют LLM loop +- [x] `GET /tasks` + `GET /tasks/{id}` — список задач и статус конкретной +- [x] Callback URL: агент POST результат на `callback_url` когда задача готова +- [x] Timeout per task (из запроса, default 120s) +- [x] Recent ring buffer (последние 50 задач) -### Browser Agent -- [ ] Install puppeteer-core + chromium dependencies -- [ ] Create server/browser-agent.ts — Puppeteer session manager -- [ ] tRPC routes: browser.start, browser.navigate, browser.screenshot, browser.click, browser.type, browser.extract, browser.close -- [ ] BrowserAgent.tsx page — live browser control UI with screenshot preview -- [ ] Session management: multiple concurrent browser sessions per agent -- [ ] Add browser_agent to agents DB as pre-seeded entry +### A3: DB schema — agent container fields +- [x] Добавить в `drizzle/schema.ts`: `serviceName`, `servicePort`, `containerImage`, `containerStatus` +- [x] SQL migration `drizzle/migrations/0006_agent_container_fields.sql` +- [x] `gateway/internal/db/db.go`: `AgentConfig` + `AgentRow` с новыми полями +- [x] `UpdateContainerStatus()` — обновление статуса при деплое/остановке +- [x] `GetAgentHistory()` + `SaveHistory()` — память агента в DB -### Tool Builder Agent -- [ ] Create server/tool-builder.ts — LLM-powered tool generator -- [ ] tRPC routes: toolBuilder.generate, toolBuilder.validate, toolBuilder.install -- [ ] Dynamic tool registration: add generated tools to TOOL_REGISTRY at runtime -- [ ] Persist custom tools to DB (tool_definitions table) -- [ ] ToolBuilder.tsx page — describe tool → preview code → install -- [ ] Add tool_builder_agent to agents DB as pre-seeded entry +### A4: Auto-deploy при создании агента +- [ ] Gateway: `POST /api/agents` — создаёт агента в DB + деплоит Swarm service +- [ ] `CreateAgentServiceFull()` с параметрами: + - image: `goclaw-agent-worker:latest` + - name: `goclaw-agent-{agentId}` + - env: `AGENT_ID`, `DATABASE_URL`, `LLM_BASE_URL`, `LLM_API_KEY` + - network: `goclaw-net` + - port: назначить из пула (8001+) +- [ ] Записать `serviceName`, `servicePort`, `containerStatus=running` в DB +- [ ] Gateway: `DELETE /api/agents/{id}` — удалить Swarm service + запись в DB +- [ ] Gateway: `POST /api/agents/{id}/scale` — масштабировать реплики агента -### Agent Compiler -- [ ] Create server/agent-compiler.ts — LLM-powered agent factory -- [ ] tRPC routes: agentCompiler.compile, agentCompiler.preview, agentCompiler.deploy -- [ ] AgentCompiler.tsx page — ТЗ input → agent config preview → deploy -- [ ] Auto-populate: model, role, systemPrompt, allowedTools from ТЗ -- [ ] Add agent_compiler to agents DB as pre-seeded entry +### A5: delegate_to_agent tool (Orchestrator → Agent HTTP) +- [x] Обновить `gateway/internal/tools/executor.go`: + - tool `delegate_to_agent`: args: `{agentId, task, callbackUrl?, async?}` + - Получить `serviceName`+`servicePort` агента из DB + - HTTP POST к `http://goclaw-agent-{id}:{port}/chat` (sync) или `/task` (async) + - Fallback: если агент не запущен — информативное сообщение +- [x] `Executor.SetDatabase()` — инжекция DB для резолва адресов агентов +- [x] Orchestrator инжектирует DB в Executor при инициализации -### Integration -- [ ] Add all 3 pages to sidebar navigation -- [ ] Write vitest tests for all new server modules -- [ ] Push to Gitea (NW) +### A6: Dockerfile.agent-worker +- [x] Создать `docker/Dockerfile.agent-worker` (multi-stage Go build) +- [x] Stage 1: `golang:1.23-alpine` — build agent-worker binary +- [x] Stage 2: `alpine:3.21` — минимальный runtime (ca-certificates, tzdata) +- [x] EXPOSE 8001 + HEALTHCHECK на /health +- [x] Агенты деплоятся динамически (не статический сервис в stack) -## Phase 6: Agents as Real Chat Entities -- [ ] Remove unused pages: BrowserAgent.tsx, ToolBuilder.tsx, AgentCompiler.tsx -- [ ] Seed 3 agents into DB: Browser Agent, Tool Builder Agent, Agent Compiler -- [ ] Add tRPC chat endpoint: agents.chat (LLM + tool execution per agent) -- [ ] Update Chat UI to support agent selection dropdown -- [ ] Create /skills page — skills registry with install/uninstall -- [ ] Update /agents to show seeded agents with Chat button -- [ ] Update /tools to show tools per agent with filter by agent -- [ ] Add /skills to sidebar navigation -- [ ] Write tests for chat and skills endpoints +### A7: Тесты и верификация +- [x] `go build ./cmd/agent-worker/...` — компилируется (11MB binary) +- [x] `go build ./cmd/gateway/...` — не сломан (11MB binary) +- [x] `go build ./...` — все пакеты компилируются +- [x] 20 unit-тестов: /health, /task, /tasks, /memory, task queue, tools, lifecycle — все PASS +- [ ] Docker build: `docker build -f docker/Dockerfile.agent-worker -t goclaw-agent-worker .` (нужен Docker daemon) +- [ ] Интеграционный тест: Gateway `delegate_to_agent` → agent-worker `/task` (нужна живая DB) -## Phase 6: Orchestrator Agent (Main Chat) -- [x] Fix TS errors: browserSessions/toolDefinitions schema exports, z.record -- [x] Seed 3 specialized agents into DB (Browser, Tool Builder, Agent Compiler) -- [x] Create server/orchestrator.ts — main orchestrator with tool-use loop -- [x] Orchestrator tools: shell_exec, file_read, file_write, http_request, delegate_to_agent, list_agents, list_skills, install_skill -- [x] Add trpc.orchestrator.chat mutation (multi-step tool-use loop with LLM) -- [x] Update /chat UI: show tool call steps, agent delegation, streaming response -- [x] Create /skills page with skill registry (install/remove/describe) -- [x] Add /skills to sidebar navigation -- [x] Update /agents to show seeded agents with Chat button -- [ ] Write tests for orchestrator +--- -## Phase 7: Orchestrator as Configurable System Agent -- [x] Add isSystem + isOrchestrator fields to agents table (DB migration) -- [x] Seed Orchestrator as system agent in DB (role=orchestrator, isSystem=true) -- [x] Update orchestrator.ts to load model/systemPrompt/allowedTools from DB -- [x] Update /chat to read orchestrator config from DB, show active model in header -- [x] Update /agents to show Orchestrator with SYSTEM badge, Configure button, no delete -- [x] AgentDetailModal: orchestrator gets extra tab with system tools (shell, docker, agents mgmt) -- [x] Add system tools to orchestrator: docker_ps, docker_restart, manage_agents, read_logs -- [x] /chat header: show current model name + link to Configure Orchestrator +## 🟡 PHASE B: Web Panel — управление живыми агентами -## Phase 8: Fix Orchestrator Chat -- [x] Fix: orchestrator uses model from DB config (minimax-m2.7, not hardcoded fallback) -- [x] Fix: real tool-use loop — execute shell_exec, file_read, file_list tools -- [x] Fix: show tool call steps in Chat UI (tool name, args, result, duration) -- [x] Fix: Chat.tsx shows which model is being used from orchestrator config -- [x] Fix: Streamdown markdown rendering for assistant responses -- [ ] Add: streaming/SSE for real-time response display +> Цель: Web Panel показывает реальный статус контейнеров и позволяет деплоить/останавливать. -## Phase 9: Go Gateway Migration (Variant C) -- [x] Create gateway/ directory with Go module (git.softuniq.eu/UniqAI/GoClaw/gateway) -- [x] Implement config/config.go — env-based configuration -- [x] Implement internal/llm/client.go — Ollama API client (chat, models, health) -- [x] Implement internal/db/db.go — MySQL connection, agent/config queries -- [x] Implement internal/tools/executor.go — Tool Executor (shell_exec, file_read, file_write, file_list, http_request, docker_exec, list_agents) -- [x] Implement internal/orchestrator/orchestrator.go — LLM tool-use loop, config from DB -- [x] Implement internal/api/handlers.go — REST API handlers -- [x] Implement cmd/gateway/main.go — HTTP server with chi router, graceful shutdown -- [x] Go Gateway compiles successfully (10.8MB binary) -- [x] Create server/gateway-proxy.ts — Node.js proxy client to Go Gateway -- [x] Create docker/docker-compose.yml — local dev (control-center + gateway + ollama + db) -- [x] Create docker/docker-stack.yml — Docker Swarm production (2 replicas, rolling updates) -- [x] Create docker/Dockerfile.gateway — multi-stage Go build -- [x] Create docker/Dockerfile.control-center — multi-stage Node.js build -- [ ] Update server/routers.ts: replace orchestrator.ts calls with gateway-proxy.ts calls -- [ ] Write Go unit tests (gateway/internal/tools/executor_test.go) -- [ ] Write Go integration test for orchestrator chat loop -- [ ] Push to Gitea (NW) +- [ ] `/agents` страница: колонка `Container Status` (running/stopped/deploying/error) +- [ ] `/agents` страница: кнопка `Deploy` — вызывает `POST /api/agents/{id}/deploy` +- [ ] `/agents` страница: кнопка `Stop` / `Scale` для запущенных агентов +- [ ] `/agents` страница: live polling статуса контейнера (10s) +- [ ] tRPC: `agents.deploy`, `agents.stop`, `agents.scale` → Gateway REST +- [ ] Dashboard: topology карта — агенты как узлы, стрелки = делегирование задач +- [ ] `/agents/{id}` detail: вкладка `Tasks` — активные задачи агента в реальном времени +- [ ] `/agents/{id}` detail: вкладка `Memory` — последние N сообщений агента -## Phase 10: LLM Provider Configuration -- [x] config.go: default LLM_BASE_URL = https://ollama.com/v1 (Ollama Cloud) -- [x] config.go: support LLM_BASE_URL + LLM_API_KEY env vars (legacy OLLAMA_* aliases kept) -- [x] config.go: normaliseLLMURL() — auto-append /v1 for bare Ollama hosts -- [x] docker-compose.yml: ollama service commented out (GPU only), LLM_BASE_URL/LLM_API_KEY added -- [x] docker-stack.yml: ollama service commented out (GPU only), llm-api-key secret added -- [x] docker/.env.example: 4 LLM provider options documented (Ollama Cloud, OpenAI, Groq, Local GPU) +--- -## Phase 11: Frontend → Go Gateway Integration -- [x] gateway-proxy.ts: fix getGatewayTools() — map OpenAI format {type,function:{name,...}} to GatewayToolDef -- [x] gateway-proxy.ts: add executeGatewayTool(), getGatewayAgent(), isGatewayAvailable() methods -- [x] routers.ts: orchestrator.getConfig — Go Gateway first, Node.js fallback -- [x] routers.ts: orchestrator.chat — Go Gateway first, Node.js fallback -- [x] routers.ts: orchestrator.tools — Go Gateway first, Node.js fallback -- [x] routers.ts: orchestrator.gatewayHealth — new endpoint for UI status -- [x] routers.ts: ollama.health — Go Gateway first, direct Ollama fallback -- [x] routers.ts: ollama.models — Go Gateway first, direct Ollama fallback -- [x] gateway/db.go: TLS auto-detection for TiDB Cloud (tidbcloud/aws/gcp/azure hosts) -- [x] server/gateway-proxy.test.ts: 13 vitest tests (health, config, tools, mapping) -- [x] End-to-end test: orchestrator.chat via tRPC → Go Gateway → Ollama (source: "gateway") -- [x] End-to-end test: tool calling — file_list tool executed by Go Gateway +## 🟡 PHASE C: Межагентная коммуникация -## Phase 12: Real-time Nodes Page -- [ ] Add Docker API client in Go Gateway: /api/nodes endpoint with real node data -- [ ] Add /api/nodes/stats endpoint for CPU/memory per node -- [ ] Add tRPC nodes.list and nodes.stats procedures via gateway-proxy -- [ ] Update Nodes.tsx: real data from tRPC + auto-refresh every 5 seconds -- [ ] Show: node ID, hostname, status, role (manager/worker), availability, CPU, RAM, Docker version, IP -- [ ] Show live indicator (green pulse) when data is fresh -- [x] Deploy to server 2.59.219.61 -- [x] Docker API client: /api/nodes, /api/nodes/stats -- [x] tRPC nodes.list, nodes.stats procedures -- [x] Nodes.tsx rewritten with real data + auto-refresh 10s/15s -- [x] 14 vitest tests for nodes procedures +> Цель: агенты могут обращаться друг к другу параллельно, с разных мест. -## Phase 13: Seed Data for Agents & Orchestrator -- [x] Create server/seed.ts with default agents (orchestrator, coder, browser, researcher) -- [x] Create default orchestrator config seed -- [x] Integrate seed into server startup (idempotent — runs only when tables are empty) -- [x] Write vitest tests for seed logic (18 tests, all pass) -- [x] Commit to Gitea and deploy to production server -- [x] Verify seed data on production DB — 6 agents seeded successfully +- [ ] Стандарт сообщения agent-to-agent: + ```json + { "task_id": "uuid", "from_agent_id": 1, "task": "...", "callback_url": "http://...", "priority": "normal", "timeout_secs": 120 } + ``` +- [ ] Service Discovery: агент получает список других агентов из DB (GET /api/agents) +- [ ] Orchestrator: параллельный fanout — отправить задачу нескольким агентам одновременно +- [ ] Rate limiting: агент принимает не более N параллельных задач (configurable) +- [ ] Dead letter: если агент недоступен — Orchestrator автоматически рестартует сервис -## Phase 14: Auto-migrate on Container Startup -- [ ] Create server/migrate.ts — programmatic Drizzle migration runner -- [ ] Create docker/entrypoint.sh — wait-for-db + migrate + start server -- [ ] Update Dockerfile.control-center — copy entrypoint, set as CMD -- [ ] Write vitest tests for migrate logic -- [ ] Commit to Gitea and deploy to production server -- [ ] Verify auto-migrate on production (check logs) +--- -## Phase 14 (Bug Fixes): Real Header Metrics + Seed Fix -- [x] Fix seed: agents not appearing on production after restart (check isSystem column query) -- [x] Fix header metrics: UPTIME/NODES/AGENTS/CPU/MEM show hardcoded data instead of real values -- [x] Connect header stats to real tRPC endpoints (agents count from DB, nodes/CPU/MEM from Docker API) -- [x] Write vitest tests for header stats procedure (82 tests total, all pass) -- [x] Commit to Gitea and deploy to production (Phase 14) — verified: nodes=6/6, agents=6, CPU=0.2%, MEM=645MB, gatewayOnline=true +## 🟡 PHASE D: Память агента -## Phase 15 (Bug Fix): Agents Page Shows Empty List -- [x] Diagnose: find why /agents page shows no agents (userId=0 in seed vs SYSTEM_USER_ID=1 in router) -- [x] Fix agents tRPC query: getAllAgents() instead of getUserAgents(SYSTEM_USER_ID) -- [x] Update vitest tests (86 tests, all pass) -- [ ] Deploy to production (Phase 15) +> Цель: каждый агент имеет изолированную персистентную память. -## Phase 16: Bug Fixes & Code Quality (2026-03-21) +- [ ] Sliding window: агент загружает последние 20 сообщений своей истории при каждом запросе +- [ ] История привязана к `agentId` (уже есть `agentHistory` в DB) +- [ ] `GET /memory?limit=20` — endpoint агента отдаёт свою историю +- [ ] Опционально: summary compression — если история > N токенов, сжать через LLM -### Исправления ошибок -- [x] Fix AgentDetailModal: список моделей теперь загружается из реального API (trpc.ollama.models) с индикатором загрузки и фоллбэком на текущую модель агента при недоступности API -- [x] Fix AgentCreateModal: убрана некорректная фильтрация по провайдеру; список моделей берётся из API напрямую; disabled state во время загрузки -- [x] Fix Chat 401 unauthorized: Go Gateway теперь проверяет доступность модели из БД через API перед отправкой запроса; при отсутствии модели автоматически переключается на первую доступную (resolveModel) -- [x] Fix gateway/orchestrator.go: добавлено поле modelWarning в ChatResult для уведомления фронта о смене модели -- [x] Fix gateway-proxy.ts: добавлено поле modelWarning в GatewayChatResult -- [x] Fix Chat.tsx: добавлено отображение modelWarning в виде amber badge рядом с именем модели +--- -### Замечания (технический долг) → закрыто в Phase 17 -- [x] Dashboard.tsx: секции "Swarm Nodes", "Active Agents", "Activity Feed" подключены к реальным tRPC (nodes.list, nodes.stats, agents.list, dashboard.stats) — моки NODES/AGENTS/ACTIVITY_LOG удалены -- [x] server/index.ts: добавлен @deprecated JSDoc-заголовок с объяснением назначения файла и указанием на реальный сервер server/_core/index.ts -- [x] Phase 9 TODO: проверено — orchestrator.ts вызовы в routers.ts заменены на gateway-proxy.ts, пункт актуальности снят +## 🟢 PHASE E: Специализированные образы агентов -## Phase 17: Technical Debt Closure (2026-03-21) +> Цель: разные типы агентов с разными возможностями. -### Исправлено -- [x] Dashboard.tsx полностью переведён на реальные данные: - - nodes.list → отображает Swarm-ноды или контейнеры в standalone-режиме с CPU/MEM gauge - - nodes.stats → live CPU% и MEM для каждого контейнера - - agents.list → реальные агенты с isActive/isSystem/model/role - - dashboard.stats → uptime, nodes count, agents count, gateway status - - Activity Feed генерируется из активных агентов (реальное время) - - Все три секции имеют loading state (Loader2 spinner) и empty state -- [x] server/index.ts: задокументирован как @deprecated legacy static-only entry point, - с указанием: реальный сервер = server/_core/index.ts; содержит предупреждение в console.log +- [ ] `goclaw-agent-browser` — образ с Chromium + Puppeteer (или playwright-go) +- [ ] `goclaw-agent-coder` — образ с git, node, python, go +- [ ] `goclaw-agent-researcher` — образ с curl + базовый HTTP scraping +- [ ] Agent Compiler: из ТЗ → config в DB → auto-deploy нужного образа +- [ ] Tool Builder: динамическая регистрация инструментов через API агента -### Архитектурные решения (ADR — не требуют реализации сейчас) +--- -#### ADR-001: Streaming LLM responses -- **Статус**: ОТЛОЖЕНО (accepted: deferred) -- **Контекст**: ответы LLM приходят целиком (non-streaming). Chat UI показывает индикатор "Thinking..." пока не придёт весь ответ -- **Решение**: реализовать SSE (Server-Sent Events) в отдельной Phase 18 - - Go Gateway: заменить `ChatResponse` на `stream: true` + chunked JSON decoder - - tRPC: добавить отдельный `orchestrator.chatStream` subscription (или REST SSE endpoint) - - Chat.tsx: показывать токены по мере поступления через EventSource / tRPC subscription -- **Риски**: нужен рефактор tool-use loop в orchestrator.go для поддержки промежуточного стриминга -- **Приоритет**: средний — UX улучшение, не блокирует работу +## 🟢 PHASE F: Observability & Production -#### ADR-002: Authentication / Authorization -- **Статус**: ПРИНЯТО как внутренний инструмент (accepted: internal tool) -- **Контекст**: все tRPC endpoints используют `publicProcedure` — нет аутентификации -- **Решение**: приемлемо для внутреннего инструмента, доступного только в закрытой сети - - Если нужна защита: добавить `protectedProcedure` с JWT middleware в server/_core/context.ts - - OAuth уже частично реализован (server/_core/oauth.ts, OAUTH_SERVER_URL env var) - - При активации: заменить `publicProcedure` на `protectedProcedure` во всех роутерах -- **Риски**: текущая архитектура позволяет любому в сети вызывать shell_exec, file_write -- **Приоритет**: высокий — если сервис будет доступен публично +- [ ] Centralized logging: агенты пишут структурированные логи в shared volume / Loki +- [ ] Metrics endpoint: `GET /metrics` (Prometheus-compatible) на каждом агенте +- [ ] Alert: Orchestrator получает webhook при падении агента (Docker healthcheck) +- [ ] Traefik reverse proxy: раскомментировать в docker-stack.yml + TLS +- [ ] Auth: JWT для межагентного API (если выйдет за периметр Swarm сети) +- [ ] Go unit tests: `gateway/internal/tools/executor_test.go` +- [ ] Go integration test: Orchestrator chat loop end-to-end + +--- + +## 🚫 ОТБРОШЕНО (не соответствует архитектуре) + +> Эти задачи противоречат концепции "агент = контейнер" или дублируют существующее. + +- ~~Browser Agent как Puppeteer в Node.js~~ → заменяется специализированным образом `goclaw-agent-browser` +- ~~server/browser-agent.ts~~ → логика переезжает в отдельный Go binary +- ~~server/tool-builder.ts / server/agent-compiler.ts~~ → реализуются как агент-контейнеры (Phase E) +- ~~BrowserAgent.tsx / ToolBuilder.tsx / AgentCompiler.tsx как отдельные страницы~~ → управляются через стандартный `/agents` с типом +- ~~/skills страница~~ → заменяется `allowedTools` per-agent-container +- ~~server/web-research.ts~~ → реализуется как задача для агента-researcher через `delegate_to_agent` +- ~~server/chat-resilience.ts (Node.js retry)~~ → retry логика в Go agent-worker +- ~~Web Research Panel~~ → задача через Orchestrator chat +- ~~Phase 19 commit to Gitea (NW)~~ → история перезаписана целевой архитектурой