feat(retry): LLM retry-on-failure for orchestrator — never returns empty response
Problem: when LLM returned empty content or network error, the orchestrator
immediately stopped with (no response) — visible to user as blank reply.
Solution — 4-layer retry system:
## Go Gateway (gateway/internal/orchestrator/orchestrator.go)
- Extracted shared runLoop() used by Chat(), ChatWithEvents(), ChatWithEventsAndRetry()
- Added RetryPolicy struct: MaxLLMRetries (default 3), InitialDelay (2s),
MaxDelay (30s), RetryOnEmpty (true)
- callLLMWithRetry(): wraps every LLM call with exponential back-off:
* retries on HTTP/network error
* retries on empty choices array
* retries when content=="" AND finish_reason!="tool_calls" (soft empty)
* strips tools on attempt > 1 (avoids repeated tool-format errors)
* logs each attempt; total attempts = MaxLLMRetries + 1 (default: 4)
- Added ChatWithEventsAndRetry() with onRetry callback for client visibility
- SetRetryPolicy() for runtime override
## Config (gateway/config/config.go)
- New fields: MaxLLMRetries (GATEWAY_MAX_LLM_RETRIES, default 3)
RetryDelaySecs (GATEWAY_RETRY_DELAY_SECS, default 2)
## main.go — wires retry policy from config into orchestrator
## docker-compose.yml
- GATEWAY_REQUEST_TIMEOUT_SECS: 120 → 300 (accommodates up to 4 retries)
- GATEWAY_MAX_LLM_RETRIES=3, GATEWAY_RETRY_DELAY_SECS=2 env vars
## API (handlers.go)
- StartChatSession goroutine now uses ChatWithEventsAndRetry
- onRetry callback emits "thinking" DB event with content "⟳ Retry N: reason"
so the client sees retry progress in the console panel
## Frontend (client/src/lib/chatStore.ts + client/src/pages/Chat.tsx)
- ConsoleEntry gains content?: string and new type "retry"
- thinking events with content starting "⟳ Retry" → type=retry (amber)
- Chat ConsolePanel renders retry events in amber with RefreshCw icon
and shows the retry reason string underneath
This commit is contained in:
@@ -53,7 +53,7 @@ export interface Conversation {
|
||||
|
||||
export interface ConsoleEntry {
|
||||
id: string;
|
||||
type: "thinking" | "tool_call" | "done" | "error";
|
||||
type: "thinking" | "tool_call" | "done" | "error" | "retry";
|
||||
tool?: string;
|
||||
args?: any;
|
||||
result?: any;
|
||||
@@ -62,6 +62,8 @@ export interface ConsoleEntry {
|
||||
durationMs?: number;
|
||||
timestamp: string;
|
||||
model?: string;
|
||||
/** For thinking events: extra message text (e.g. retry reason) */
|
||||
content?: string;
|
||||
}
|
||||
|
||||
type StoreEvent = "update" | "console";
|
||||
@@ -439,9 +441,16 @@ class ChatStore {
|
||||
if (ev.seq > maxSeq) maxSeq = ev.seq;
|
||||
|
||||
switch (ev.eventType) {
|
||||
case "thinking":
|
||||
this.addConsoleEntry({ type: "thinking" });
|
||||
case "thinking": {
|
||||
// If content starts with retry prefix, show as retry event
|
||||
const thinkMsg = ev.content || "";
|
||||
if (thinkMsg.startsWith("⟳ Retry")) {
|
||||
this.addConsoleEntry({ type: "retry", content: thinkMsg });
|
||||
} else {
|
||||
this.addConsoleEntry({ type: "thinking", content: thinkMsg || undefined });
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "tool_call": {
|
||||
let args: any = {};
|
||||
|
||||
@@ -51,6 +51,7 @@ import {
|
||||
PanelRightOpen,
|
||||
Shell,
|
||||
StopCircle,
|
||||
RefreshCw,
|
||||
} from "lucide-react";
|
||||
|
||||
// ─── useChatStore hook ────────────────────────────────────────────────────────
|
||||
@@ -295,6 +296,8 @@ function ConsolePanel({ entries }: { entries: ConsoleEntry[] }) {
|
||||
animate={{ opacity: 1, x: 0 }}
|
||||
className={`rounded p-2 border ${
|
||||
e.type === "thinking" ? "bg-cyan-500/10 border-cyan-500/20 text-cyan-400"
|
||||
: e.type === "retry"
|
||||
? "bg-amber-500/10 border-amber-500/20 text-amber-400"
|
||||
: e.type === "tool_call"
|
||||
? e.success !== false ? "bg-green-500/10 border-green-500/20 text-green-300"
|
||||
: "bg-red-500/10 border-red-500/20 text-red-300"
|
||||
@@ -304,11 +307,13 @@ function ConsolePanel({ entries }: { entries: ConsoleEntry[] }) {
|
||||
>
|
||||
<div className="flex items-center gap-1.5 mb-1">
|
||||
{e.type === "thinking" && <Loader2 className="w-3 h-3 animate-spin" />}
|
||||
{e.type === "retry" && <RefreshCw className="w-3 h-3 text-amber-400" />}
|
||||
{e.type === "tool_call" && <ToolIcon tool={e.tool ?? ""} />}
|
||||
{e.type === "done" && <CheckCircle className="w-3 h-3" />}
|
||||
{e.type === "error" && <XCircle className="w-3 h-3" />}
|
||||
<span className="font-semibold">
|
||||
{e.type === "thinking" ? "thinking…"
|
||||
: e.type === "retry" ? "retry"
|
||||
: e.type === "tool_call" ? toolLabel(e.tool ?? "")
|
||||
: e.type === "done" ? `done · ${e.model ?? ""}`
|
||||
: "error"}
|
||||
@@ -316,6 +321,11 @@ function ConsolePanel({ entries }: { entries: ConsoleEntry[] }) {
|
||||
<span className="ml-auto text-[9px] opacity-60">{e.timestamp}</span>
|
||||
</div>
|
||||
|
||||
{/* Retry reason or thinking message */}
|
||||
{(e.type === "retry" || e.type === "thinking") && e.content && (
|
||||
<div className="text-[9px] opacity-80 mt-0.5 font-mono break-words">{e.content}</div>
|
||||
)}
|
||||
|
||||
{e.type === "tool_call" && e.args && (
|
||||
<pre className="text-[9px] opacity-70 overflow-hidden max-h-16 whitespace-pre-wrap">
|
||||
{JSON.stringify(e.args, null, 1).slice(0, 200)}
|
||||
|
||||
@@ -109,8 +109,12 @@ services:
|
||||
DEFAULT_MODEL: "${DEFAULT_MODEL:-qwen2.5:7b}"
|
||||
DATABASE_URL: "${MYSQL_USER:-goclaw}:${MYSQL_PASSWORD:-goClawPass123}@tcp(db:3306)/${MYSQL_DATABASE:-goclaw}?parseTime=true"
|
||||
PROJECT_ROOT: "/app"
|
||||
GATEWAY_REQUEST_TIMEOUT_SECS: "120"
|
||||
# Request timeout — must be > (MaxLLMRetries * RetryDelay * 2 + actual LLM time)
|
||||
GATEWAY_REQUEST_TIMEOUT_SECS: "300"
|
||||
GATEWAY_MAX_TOOL_ITERATIONS: "10"
|
||||
# LLM retry policy: retry up to N times on empty response or network error
|
||||
GATEWAY_MAX_LLM_RETRIES: "${GATEWAY_MAX_LLM_RETRIES:-3}"
|
||||
GATEWAY_RETRY_DELAY_SECS: "${GATEWAY_RETRY_DELAY_SECS:-2}"
|
||||
LOG_LEVEL: "info"
|
||||
depends_on:
|
||||
db:
|
||||
|
||||
@@ -47,6 +47,14 @@ func main() {
|
||||
|
||||
// ── Orchestrator ─────────────────────────────────────────────────────────
|
||||
orch := orchestrator.New(llmClient, database, cfg.ProjectRoot)
|
||||
// Apply retry policy from config
|
||||
orch.SetRetryPolicy(orchestrator.RetryPolicy{
|
||||
MaxLLMRetries: cfg.MaxLLMRetries,
|
||||
InitialDelay: time.Duration(cfg.RetryDelaySecs) * time.Second,
|
||||
MaxDelay: 30 * time.Second,
|
||||
RetryOnEmpty: true,
|
||||
})
|
||||
log.Printf("[Gateway] LLM retry policy: maxRetries=%d, initialDelay=%ds", cfg.MaxLLMRetries, cfg.RetryDelaySecs)
|
||||
|
||||
// ── HTTP Handlers ────────────────────────────────────────────────────────
|
||||
h := api.NewHandler(cfg, llmClient, orch, database)
|
||||
|
||||
@@ -46,6 +46,12 @@ type Config struct {
|
||||
DefaultModel string
|
||||
MaxToolIterations int
|
||||
RequestTimeoutSecs int
|
||||
|
||||
// LLM retry policy
|
||||
// GATEWAY_MAX_LLM_RETRIES — additional attempts after a failure/empty response (default 3).
|
||||
MaxLLMRetries int
|
||||
// GATEWAY_RETRY_DELAY_SECS — initial delay before first retry in seconds (default 2).
|
||||
RetryDelaySecs int
|
||||
}
|
||||
|
||||
func Load() *Config {
|
||||
@@ -55,6 +61,8 @@ func Load() *Config {
|
||||
|
||||
maxIter, _ := strconv.Atoi(getEnv("GATEWAY_MAX_TOOL_ITERATIONS", "10"))
|
||||
timeout, _ := strconv.Atoi(getEnv("GATEWAY_REQUEST_TIMEOUT_SECS", "120"))
|
||||
maxLLMRetries, _ := strconv.Atoi(getEnv("GATEWAY_MAX_LLM_RETRIES", "3"))
|
||||
retryDelaySecs, _ := strconv.Atoi(getEnv("GATEWAY_RETRY_DELAY_SECS", "2"))
|
||||
|
||||
// Resolve LLM base URL — priority: LLM_BASE_URL > OLLAMA_BASE_URL > default cloud
|
||||
rawLLMURL := getEnvFirst(
|
||||
@@ -82,6 +90,8 @@ func Load() *Config {
|
||||
DefaultModel: getEnv("DEFAULT_MODEL", "qwen2.5:7b"),
|
||||
MaxToolIterations: maxIter,
|
||||
RequestTimeoutSecs: timeout,
|
||||
MaxLLMRetries: maxLLMRetries,
|
||||
RetryDelaySecs: retryDelaySecs,
|
||||
}
|
||||
|
||||
if cfg.LLMAPIKey == "" {
|
||||
|
||||
@@ -761,24 +761,37 @@ func (h *Handler) StartChatSession(w http.ResponseWriter, r *http.Request) {
|
||||
time.Duration(h.cfg.RequestTimeoutSecs)*time.Second)
|
||||
defer cancel()
|
||||
|
||||
result := h.orch.ChatWithEvents(ctx, messages, model, maxIter, func(step orchestrator.ToolCallStep) {
|
||||
argsJSON, _ := json.Marshal(step.Args)
|
||||
resultStr := ""
|
||||
if step.Result != nil {
|
||||
b, _ := json.Marshal(step.Result)
|
||||
resultStr = string(b)
|
||||
}
|
||||
_ = h.db.AppendEvent(db.ChatEventRow{
|
||||
SessionID: sessionID,
|
||||
EventType: "tool_call",
|
||||
ToolName: step.Tool,
|
||||
ToolArgs: string(argsJSON),
|
||||
ToolResult: resultStr,
|
||||
ToolSuccess: step.Success,
|
||||
DurationMs: int(step.DurationMs),
|
||||
ErrorMsg: step.Error,
|
||||
})
|
||||
})
|
||||
result := h.orch.ChatWithEventsAndRetry(ctx, messages, model, maxIter,
|
||||
// onToolCall — store each tool execution as an event
|
||||
func(step orchestrator.ToolCallStep) {
|
||||
argsJSON, _ := json.Marshal(step.Args)
|
||||
resultStr := ""
|
||||
if step.Result != nil {
|
||||
b, _ := json.Marshal(step.Result)
|
||||
resultStr = string(b)
|
||||
}
|
||||
_ = h.db.AppendEvent(db.ChatEventRow{
|
||||
SessionID: sessionID,
|
||||
EventType: "tool_call",
|
||||
ToolName: step.Tool,
|
||||
ToolArgs: string(argsJSON),
|
||||
ToolResult: resultStr,
|
||||
ToolSuccess: step.Success,
|
||||
DurationMs: int(step.DurationMs),
|
||||
ErrorMsg: step.Error,
|
||||
})
|
||||
},
|
||||
// onRetry — emit a "thinking" event so the client sees retry progress
|
||||
func(attempt int, reason string) {
|
||||
msg := fmt.Sprintf("⟳ Retry %d: %s", attempt, reason)
|
||||
log.Printf("[Orchestrator] %s", msg)
|
||||
_ = h.db.AppendEvent(db.ChatEventRow{
|
||||
SessionID: sessionID,
|
||||
EventType: "thinking",
|
||||
Content: msg,
|
||||
})
|
||||
},
|
||||
)
|
||||
|
||||
processingMs := time.Since(startTime).Milliseconds()
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.softuniq.eu/UniqAI/GoClaw/gateway/internal/db"
|
||||
@@ -53,6 +54,30 @@ type OrchestratorConfig struct {
|
||||
MaxTokens int
|
||||
}
|
||||
|
||||
// RetryPolicy controls how the orchestrator retries failed or empty LLM calls.
|
||||
type RetryPolicy struct {
|
||||
// MaxLLMRetries is the number of additional attempts after a failure.
|
||||
// Total attempts = MaxLLMRetries + 1. Default: 3 (4 total).
|
||||
MaxLLMRetries int
|
||||
// InitialDelay before the first retry. Default: 2s.
|
||||
InitialDelay time.Duration
|
||||
// MaxDelay caps the exponential back-off. Default: 30s.
|
||||
MaxDelay time.Duration
|
||||
// RetryOnEmpty means an empty-content response is treated as a soft failure
|
||||
// and triggers a retry. Default: true.
|
||||
RetryOnEmpty bool
|
||||
}
|
||||
|
||||
// defaultRetryPolicy returns the default retry policy.
|
||||
func defaultRetryPolicy() RetryPolicy {
|
||||
return RetryPolicy{
|
||||
MaxLLMRetries: 3,
|
||||
InitialDelay: 2 * time.Second,
|
||||
MaxDelay: 30 * time.Second,
|
||||
RetryOnEmpty: true,
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Default System Prompt ────────────────────────────────────────────────────
|
||||
|
||||
const defaultSystemPrompt = `You are GoClaw Orchestrator — the main AI agent managing the GoClaw distributed AI system.
|
||||
@@ -88,6 +113,7 @@ type Orchestrator struct {
|
||||
executor *tools.Executor
|
||||
database *db.DB
|
||||
projectRoot string
|
||||
retry RetryPolicy
|
||||
}
|
||||
|
||||
func New(llmClient *llm.Client, database *db.DB, projectRoot string) *Orchestrator {
|
||||
@@ -95,12 +121,18 @@ func New(llmClient *llm.Client, database *db.DB, projectRoot string) *Orchestrat
|
||||
llmClient: llmClient,
|
||||
database: database,
|
||||
projectRoot: projectRoot,
|
||||
retry: defaultRetryPolicy(),
|
||||
}
|
||||
// Inject agent list function to avoid circular dependency
|
||||
o.executor = tools.NewExecutor(projectRoot, o.listAgentsFn)
|
||||
return o
|
||||
}
|
||||
|
||||
// SetRetryPolicy overrides the default retry policy.
|
||||
func (o *Orchestrator) SetRetryPolicy(p RetryPolicy) {
|
||||
o.retry = p
|
||||
}
|
||||
|
||||
// GetConfig loads orchestrator config from DB, falls back to defaults.
|
||||
func (o *Orchestrator) GetConfig() *OrchestratorConfig {
|
||||
if o.database != nil {
|
||||
@@ -159,28 +191,160 @@ func (o *Orchestrator) resolveModel(ctx context.Context, desired string) (model
|
||||
return fallback, warning
|
||||
}
|
||||
|
||||
// Chat runs the full orchestration loop: LLM → tool calls → LLM → response.
|
||||
func (o *Orchestrator) Chat(ctx context.Context, messages []Message, overrideModel string, maxIter int) ChatResult {
|
||||
if maxIter <= 0 {
|
||||
maxIter = 10
|
||||
// ─── LLM call with retry ──────────────────────────────────────────────────────
|
||||
|
||||
// llmCallResult holds one attempt's outcome.
|
||||
type llmCallResult struct {
|
||||
resp *llm.ChatResponse
|
||||
usedTools bool // whether the call was made with tools enabled
|
||||
err error
|
||||
attemptNum int
|
||||
}
|
||||
|
||||
// callLLMWithRetry calls the LLM and retries on error or empty response.
|
||||
// It also strips tools on the second attempt if the first fails with tools.
|
||||
func (o *Orchestrator) callLLMWithRetry(
|
||||
ctx context.Context,
|
||||
req llm.ChatRequest,
|
||||
model string,
|
||||
onRetry func(attempt int, reason string), // optional event callback (may be nil)
|
||||
) llmCallResult {
|
||||
policy := o.retry
|
||||
delay := policy.InitialDelay
|
||||
maxAttempts := policy.MaxLLMRetries + 1
|
||||
hasTools := len(req.Tools) > 0
|
||||
|
||||
for attempt := 1; attempt <= maxAttempts; attempt++ {
|
||||
// On attempt > 1, always strip tools (avoid repeated tool-format errors)
|
||||
useTools := hasTools && attempt == 1
|
||||
r := req
|
||||
if !useTools {
|
||||
r.Tools = nil
|
||||
r.ToolChoice = ""
|
||||
}
|
||||
|
||||
resp, err := o.llmClient.Chat(ctx, r)
|
||||
|
||||
// ── Hard error (network, auth, etc.) ─────────────────────────
|
||||
if err != nil {
|
||||
reason := fmt.Sprintf("LLM error (attempt %d/%d): %v", attempt, maxAttempts, err)
|
||||
log.Printf("[Orchestrator] %s", reason)
|
||||
|
||||
if attempt < maxAttempts {
|
||||
if onRetry != nil {
|
||||
onRetry(attempt, reason)
|
||||
}
|
||||
o.sleep(ctx, delay)
|
||||
delay = min(delay*2, policy.MaxDelay)
|
||||
continue
|
||||
}
|
||||
return llmCallResult{err: fmt.Errorf("LLM error after %d attempts (model: %s): %w", maxAttempts, model, err), attemptNum: attempt}
|
||||
}
|
||||
|
||||
// ── Context cancelled ─────────────────────────────────────────
|
||||
if ctx.Err() != nil {
|
||||
return llmCallResult{err: ctx.Err(), attemptNum: attempt}
|
||||
}
|
||||
|
||||
// ── Empty choices ─────────────────────────────────────────────
|
||||
if len(resp.Choices) == 0 {
|
||||
reason := fmt.Sprintf("empty choices (attempt %d/%d)", attempt, maxAttempts)
|
||||
log.Printf("[Orchestrator] %s", reason)
|
||||
|
||||
if attempt < maxAttempts {
|
||||
if onRetry != nil {
|
||||
onRetry(attempt, reason)
|
||||
}
|
||||
o.sleep(ctx, delay)
|
||||
delay = min(delay*2, policy.MaxDelay)
|
||||
continue
|
||||
}
|
||||
return llmCallResult{resp: resp, usedTools: useTools, attemptNum: attempt}
|
||||
}
|
||||
|
||||
content := strings.TrimSpace(resp.Choices[0].Message.Content)
|
||||
finishReason := resp.Choices[0].FinishReason
|
||||
|
||||
// ── Empty content AND no tool calls — retry ───────────────────
|
||||
if policy.RetryOnEmpty &&
|
||||
content == "" &&
|
||||
finishReason != "tool_calls" &&
|
||||
len(resp.Choices[0].Message.ToolCalls) == 0 {
|
||||
|
||||
reason := fmt.Sprintf("empty response content (attempt %d/%d, finish_reason=%q)", attempt, maxAttempts, finishReason)
|
||||
log.Printf("[Orchestrator] %s", reason)
|
||||
|
||||
if attempt < maxAttempts {
|
||||
if onRetry != nil {
|
||||
onRetry(attempt, reason)
|
||||
}
|
||||
o.sleep(ctx, delay)
|
||||
delay = min(delay*2, policy.MaxDelay)
|
||||
continue
|
||||
}
|
||||
// Exhausted retries — return what we have (even if empty)
|
||||
log.Printf("[Orchestrator] All %d attempts exhausted — returning empty response", maxAttempts)
|
||||
return llmCallResult{resp: resp, usedTools: useTools, attemptNum: attempt}
|
||||
}
|
||||
|
||||
// ── Success ───────────────────────────────────────────────────
|
||||
if attempt > 1 {
|
||||
log.Printf("[Orchestrator] Succeeded on attempt %d/%d", attempt, maxAttempts)
|
||||
}
|
||||
return llmCallResult{resp: resp, usedTools: useTools, attemptNum: attempt}
|
||||
}
|
||||
|
||||
// Should not be reached
|
||||
return llmCallResult{err: fmt.Errorf("retry loop exited unexpectedly"), attemptNum: maxAttempts}
|
||||
}
|
||||
|
||||
// sleep waits for d, returning early if ctx is cancelled.
|
||||
func (o *Orchestrator) sleep(ctx context.Context, d time.Duration) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case <-time.After(d):
|
||||
}
|
||||
}
|
||||
|
||||
// min returns the smaller of two durations.
|
||||
func min(a, b time.Duration) time.Duration {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// ─── Core loop (shared by Chat and ChatWithEvents) ────────────────────────────
|
||||
|
||||
type loopOptions struct {
|
||||
messages []Message
|
||||
overrideModel string
|
||||
maxIter int
|
||||
onToolCall func(ToolCallStep) // may be nil
|
||||
onRetry func(attempt int, reason string) // may be nil
|
||||
}
|
||||
|
||||
func (o *Orchestrator) runLoop(ctx context.Context, opts loopOptions) ChatResult {
|
||||
if opts.maxIter <= 0 {
|
||||
opts.maxIter = 10
|
||||
}
|
||||
|
||||
cfg := o.GetConfig()
|
||||
model := cfg.Model
|
||||
if overrideModel != "" {
|
||||
model = overrideModel
|
||||
if opts.overrideModel != "" {
|
||||
model = opts.overrideModel
|
||||
}
|
||||
|
||||
// Validate model against LLM API — fall back if unavailable (prevents 401/404)
|
||||
model, modelWarning := o.resolveModel(ctx, model)
|
||||
|
||||
log.Printf("[Orchestrator] Chat started: model=%s, messages=%d", model, len(messages))
|
||||
log.Printf("[Orchestrator] Loop started: model=%s, messages=%d, maxIter=%d, maxRetries=%d",
|
||||
model, len(opts.messages), opts.maxIter, o.retry.MaxLLMRetries)
|
||||
|
||||
// Build conversation
|
||||
conv := []llm.Message{
|
||||
{Role: "system", Content: cfg.SystemPrompt},
|
||||
}
|
||||
for _, m := range messages {
|
||||
for _, m := range opts.messages {
|
||||
conv = append(conv, llm.Message{Role: m.Role, Content: m.Content})
|
||||
}
|
||||
|
||||
@@ -206,7 +370,7 @@ func (o *Orchestrator) Chat(ctx context.Context, messages []Message, overrideMod
|
||||
var lastUsage *llm.Usage
|
||||
var lastModel string
|
||||
|
||||
for iter := 0; iter < maxIter; iter++ {
|
||||
for iter := 0; iter < opts.maxIter; iter++ {
|
||||
req := llm.ChatRequest{
|
||||
Model: model,
|
||||
Messages: conv,
|
||||
@@ -216,29 +380,22 @@ func (o *Orchestrator) Chat(ctx context.Context, messages []Message, overrideMod
|
||||
ToolChoice: "auto",
|
||||
}
|
||||
|
||||
resp, err := o.llmClient.Chat(ctx, req)
|
||||
if err != nil {
|
||||
// Fallback: try without tools
|
||||
log.Printf("[Orchestrator] LLM error with tools: %v — retrying without tools", err)
|
||||
req.Tools = nil
|
||||
req.ToolChoice = ""
|
||||
resp2, err2 := o.llmClient.Chat(ctx, req)
|
||||
if err2 != nil {
|
||||
return ChatResult{
|
||||
Success: false,
|
||||
ModelWarning: modelWarning,
|
||||
Error: fmt.Sprintf("LLM error (model: %s): %v", model, err2),
|
||||
}
|
||||
// ── LLM call with retry ────────────────────────────────────
|
||||
callRes := o.callLLMWithRetry(ctx, req, model, opts.onRetry)
|
||||
|
||||
if callRes.err != nil {
|
||||
return ChatResult{
|
||||
Success: false,
|
||||
ToolCalls: toolCallSteps,
|
||||
Model: model,
|
||||
ModelWarning: modelWarning,
|
||||
Error: callRes.err.Error(),
|
||||
}
|
||||
if len(resp2.Choices) > 0 {
|
||||
finalResponse = resp2.Choices[0].Message.Content
|
||||
lastUsage = resp2.Usage
|
||||
lastModel = resp2.Model
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
resp := callRes.resp
|
||||
if len(resp.Choices) == 0 {
|
||||
log.Printf("[Orchestrator] No choices in response — stopping loop at iter %d", iter)
|
||||
break
|
||||
}
|
||||
|
||||
@@ -249,19 +406,17 @@ func (o *Orchestrator) Chat(ctx context.Context, messages []Message, overrideMod
|
||||
lastModel = model
|
||||
}
|
||||
|
||||
// Check if LLM wants to call tools
|
||||
// ── Tool calls ─────────────────────────────────────────────
|
||||
if choice.FinishReason == "tool_calls" && len(choice.Message.ToolCalls) > 0 {
|
||||
// Add assistant message with tool calls to conversation
|
||||
conv = append(conv, choice.Message)
|
||||
|
||||
// Execute each tool call
|
||||
for _, tc := range choice.Message.ToolCalls {
|
||||
toolName := tc.Function.Name
|
||||
argsJSON := tc.Function.Arguments
|
||||
|
||||
log.Printf("[Orchestrator] Executing tool: %s args=%s", toolName, argsJSON)
|
||||
start := time.Now()
|
||||
|
||||
result := o.executor.Execute(ctx, toolName, argsJSON)
|
||||
|
||||
step := ToolCallStep{
|
||||
@@ -270,7 +425,6 @@ func (o *Orchestrator) Chat(ctx context.Context, messages []Message, overrideMod
|
||||
DurationMs: time.Since(start).Milliseconds(),
|
||||
}
|
||||
|
||||
// Parse args for display
|
||||
var argsMap any
|
||||
_ = json.Unmarshal([]byte(argsJSON), &argsMap)
|
||||
step.Args = argsMap
|
||||
@@ -287,7 +441,10 @@ func (o *Orchestrator) Chat(ctx context.Context, messages []Message, overrideMod
|
||||
|
||||
toolCallSteps = append(toolCallSteps, step)
|
||||
|
||||
// Add tool result to conversation
|
||||
if opts.onToolCall != nil {
|
||||
opts.onToolCall(step)
|
||||
}
|
||||
|
||||
conv = append(conv, llm.Message{
|
||||
Role: "tool",
|
||||
Content: toolResultContent,
|
||||
@@ -299,7 +456,7 @@ func (o *Orchestrator) Chat(ctx context.Context, messages []Message, overrideMod
|
||||
continue
|
||||
}
|
||||
|
||||
// LLM finished — extract final response
|
||||
// ── Final response ─────────────────────────────────────────
|
||||
finalResponse = choice.Message.Content
|
||||
break
|
||||
}
|
||||
@@ -314,151 +471,55 @@ func (o *Orchestrator) Chat(ctx context.Context, messages []Message, overrideMod
|
||||
}
|
||||
}
|
||||
|
||||
// ChatWithEvents runs the full orchestration loop and calls onToolCall for each tool execution.
|
||||
// This enables SSE streaming of tool calls in real time.
|
||||
func (o *Orchestrator) ChatWithEvents(ctx context.Context, messages []Message, overrideModel string, maxIter int, onToolCall func(ToolCallStep)) ChatResult {
|
||||
if maxIter <= 0 {
|
||||
maxIter = 10
|
||||
}
|
||||
// ─── Public API ───────────────────────────────────────────────────────────────
|
||||
|
||||
cfg := o.GetConfig()
|
||||
model := cfg.Model
|
||||
if overrideModel != "" {
|
||||
model = overrideModel
|
||||
}
|
||||
|
||||
model, modelWarning := o.resolveModel(ctx, model)
|
||||
log.Printf("[Orchestrator] ChatWithEvents started: model=%s, messages=%d", model, len(messages))
|
||||
|
||||
conv := []llm.Message{
|
||||
{Role: "system", Content: cfg.SystemPrompt},
|
||||
}
|
||||
for _, m := range messages {
|
||||
conv = append(conv, llm.Message{Role: m.Role, Content: m.Content})
|
||||
}
|
||||
|
||||
toolDefs := tools.OrchestratorTools()
|
||||
llmTools := make([]llm.Tool, len(toolDefs))
|
||||
for i, t := range toolDefs {
|
||||
llmTools[i] = llm.Tool{
|
||||
Type: t.Type,
|
||||
Function: llm.ToolFunction{
|
||||
Name: t.Function.Name,
|
||||
Description: t.Function.Description,
|
||||
Parameters: t.Function.Parameters,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
temp := cfg.Temperature
|
||||
maxTok := cfg.MaxTokens
|
||||
|
||||
var toolCallSteps []ToolCallStep
|
||||
var finalResponse string
|
||||
var lastUsage *llm.Usage
|
||||
var lastModel string
|
||||
|
||||
for iter := 0; iter < maxIter; iter++ {
|
||||
req := llm.ChatRequest{
|
||||
Model: model,
|
||||
Messages: conv,
|
||||
Temperature: &temp,
|
||||
MaxTokens: &maxTok,
|
||||
Tools: llmTools,
|
||||
ToolChoice: "auto",
|
||||
}
|
||||
|
||||
resp, err := o.llmClient.Chat(ctx, req)
|
||||
if err != nil {
|
||||
log.Printf("[Orchestrator] LLM error with tools: %v — retrying without tools", err)
|
||||
req.Tools = nil
|
||||
req.ToolChoice = ""
|
||||
resp2, err2 := o.llmClient.Chat(ctx, req)
|
||||
if err2 != nil {
|
||||
return ChatResult{
|
||||
Success: false,
|
||||
ModelWarning: modelWarning,
|
||||
Error: fmt.Sprintf("LLM error (model: %s): %v", model, err2),
|
||||
}
|
||||
}
|
||||
if len(resp2.Choices) > 0 {
|
||||
finalResponse = resp2.Choices[0].Message.Content
|
||||
lastUsage = resp2.Usage
|
||||
lastModel = resp2.Model
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if len(resp.Choices) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
choice := resp.Choices[0]
|
||||
lastUsage = resp.Usage
|
||||
lastModel = resp.Model
|
||||
if lastModel == "" {
|
||||
lastModel = model
|
||||
}
|
||||
|
||||
if choice.FinishReason == "tool_calls" && len(choice.Message.ToolCalls) > 0 {
|
||||
conv = append(conv, choice.Message)
|
||||
|
||||
for _, tc := range choice.Message.ToolCalls {
|
||||
toolName := tc.Function.Name
|
||||
argsJSON := tc.Function.Arguments
|
||||
|
||||
log.Printf("[Orchestrator] Executing tool: %s args=%s", toolName, argsJSON)
|
||||
start := time.Now()
|
||||
result := o.executor.Execute(ctx, toolName, argsJSON)
|
||||
|
||||
step := ToolCallStep{
|
||||
Tool: toolName,
|
||||
Success: result.Success,
|
||||
DurationMs: time.Since(start).Milliseconds(),
|
||||
}
|
||||
var argsMap any
|
||||
_ = json.Unmarshal([]byte(argsJSON), &argsMap)
|
||||
step.Args = argsMap
|
||||
|
||||
var toolResultContent string
|
||||
if result.Success {
|
||||
step.Result = result.Result
|
||||
resultBytes, _ := json.Marshal(result.Result)
|
||||
toolResultContent = string(resultBytes)
|
||||
} else {
|
||||
step.Error = result.Error
|
||||
toolResultContent = fmt.Sprintf(`{"error": %q}`, result.Error)
|
||||
}
|
||||
|
||||
toolCallSteps = append(toolCallSteps, step)
|
||||
if onToolCall != nil {
|
||||
onToolCall(step)
|
||||
}
|
||||
|
||||
conv = append(conv, llm.Message{
|
||||
Role: "tool",
|
||||
Content: toolResultContent,
|
||||
ToolCallID: tc.ID,
|
||||
Name: toolName,
|
||||
})
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
finalResponse = choice.Message.Content
|
||||
break
|
||||
}
|
||||
|
||||
return ChatResult{
|
||||
Success: true,
|
||||
Response: finalResponse,
|
||||
ToolCalls: toolCallSteps,
|
||||
Model: lastModel,
|
||||
ModelWarning: modelWarning,
|
||||
Usage: lastUsage,
|
||||
}
|
||||
// Chat runs the full orchestration loop: LLM → tool calls → LLM → response.
|
||||
func (o *Orchestrator) Chat(ctx context.Context, messages []Message, overrideModel string, maxIter int) ChatResult {
|
||||
return o.runLoop(ctx, loopOptions{
|
||||
messages: messages,
|
||||
overrideModel: overrideModel,
|
||||
maxIter: maxIter,
|
||||
})
|
||||
}
|
||||
|
||||
// ChatWithEvents runs the full orchestration loop and calls callbacks for each
|
||||
// tool execution and each retry attempt. Used for SSE streaming and DB event logging.
|
||||
func (o *Orchestrator) ChatWithEvents(
|
||||
ctx context.Context,
|
||||
messages []Message,
|
||||
overrideModel string,
|
||||
maxIter int,
|
||||
onToolCall func(ToolCallStep),
|
||||
) ChatResult {
|
||||
return o.runLoop(ctx, loopOptions{
|
||||
messages: messages,
|
||||
overrideModel: overrideModel,
|
||||
maxIter: maxIter,
|
||||
onToolCall: onToolCall,
|
||||
})
|
||||
}
|
||||
|
||||
// ChatWithEventsAndRetry is the full-featured variant that also reports retry
|
||||
// attempts through onRetry so they can be streamed to the client.
|
||||
func (o *Orchestrator) ChatWithEventsAndRetry(
|
||||
ctx context.Context,
|
||||
messages []Message,
|
||||
overrideModel string,
|
||||
maxIter int,
|
||||
onToolCall func(ToolCallStep),
|
||||
onRetry func(attempt int, reason string),
|
||||
) ChatResult {
|
||||
return o.runLoop(ctx, loopOptions{
|
||||
messages: messages,
|
||||
overrideModel: overrideModel,
|
||||
maxIter: maxIter,
|
||||
onToolCall: onToolCall,
|
||||
onRetry: onRetry,
|
||||
})
|
||||
}
|
||||
|
||||
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
// listAgentsFn is injected into the tool executor to list agents from DB.
|
||||
func (o *Orchestrator) listAgentsFn() ([]map[string]any, error) {
|
||||
if o.database == nil {
|
||||
|
||||
Reference in New Issue
Block a user