feat(retry): LLM retry-on-failure for orchestrator — never returns empty response
Problem: when LLM returned empty content or network error, the orchestrator
immediately stopped with (no response) — visible to user as blank reply.
Solution — 4-layer retry system:
## Go Gateway (gateway/internal/orchestrator/orchestrator.go)
- Extracted shared runLoop() used by Chat(), ChatWithEvents(), ChatWithEventsAndRetry()
- Added RetryPolicy struct: MaxLLMRetries (default 3), InitialDelay (2s),
MaxDelay (30s), RetryOnEmpty (true)
- callLLMWithRetry(): wraps every LLM call with exponential back-off:
* retries on HTTP/network error
* retries on empty choices array
* retries when content=="" AND finish_reason!="tool_calls" (soft empty)
* strips tools on attempt > 1 (avoids repeated tool-format errors)
* logs each attempt; total attempts = MaxLLMRetries + 1 (default: 4)
- Added ChatWithEventsAndRetry() with onRetry callback for client visibility
- SetRetryPolicy() for runtime override
## Config (gateway/config/config.go)
- New fields: MaxLLMRetries (GATEWAY_MAX_LLM_RETRIES, default 3)
RetryDelaySecs (GATEWAY_RETRY_DELAY_SECS, default 2)
## main.go — wires retry policy from config into orchestrator
## docker-compose.yml
- GATEWAY_REQUEST_TIMEOUT_SECS: 120 → 300 (accommodates up to 4 retries)
- GATEWAY_MAX_LLM_RETRIES=3, GATEWAY_RETRY_DELAY_SECS=2 env vars
## API (handlers.go)
- StartChatSession goroutine now uses ChatWithEventsAndRetry
- onRetry callback emits "thinking" DB event with content "⟳ Retry N: reason"
so the client sees retry progress in the console panel
## Frontend (client/src/lib/chatStore.ts + client/src/pages/Chat.tsx)
- ConsoleEntry gains content?: string and new type "retry"
- thinking events with content starting "⟳ Retry" → type=retry (amber)
- Chat ConsolePanel renders retry events in amber with RefreshCw icon
and shows the retry reason string underneath
This commit is contained in:
@@ -53,7 +53,7 @@ export interface Conversation {
|
||||
|
||||
export interface ConsoleEntry {
|
||||
id: string;
|
||||
type: "thinking" | "tool_call" | "done" | "error";
|
||||
type: "thinking" | "tool_call" | "done" | "error" | "retry";
|
||||
tool?: string;
|
||||
args?: any;
|
||||
result?: any;
|
||||
@@ -62,6 +62,8 @@ export interface ConsoleEntry {
|
||||
durationMs?: number;
|
||||
timestamp: string;
|
||||
model?: string;
|
||||
/** For thinking events: extra message text (e.g. retry reason) */
|
||||
content?: string;
|
||||
}
|
||||
|
||||
type StoreEvent = "update" | "console";
|
||||
@@ -439,9 +441,16 @@ class ChatStore {
|
||||
if (ev.seq > maxSeq) maxSeq = ev.seq;
|
||||
|
||||
switch (ev.eventType) {
|
||||
case "thinking":
|
||||
this.addConsoleEntry({ type: "thinking" });
|
||||
case "thinking": {
|
||||
// If content starts with retry prefix, show as retry event
|
||||
const thinkMsg = ev.content || "";
|
||||
if (thinkMsg.startsWith("⟳ Retry")) {
|
||||
this.addConsoleEntry({ type: "retry", content: thinkMsg });
|
||||
} else {
|
||||
this.addConsoleEntry({ type: "thinking", content: thinkMsg || undefined });
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "tool_call": {
|
||||
let args: any = {};
|
||||
|
||||
@@ -51,6 +51,7 @@ import {
|
||||
PanelRightOpen,
|
||||
Shell,
|
||||
StopCircle,
|
||||
RefreshCw,
|
||||
} from "lucide-react";
|
||||
|
||||
// ─── useChatStore hook ────────────────────────────────────────────────────────
|
||||
@@ -295,6 +296,8 @@ function ConsolePanel({ entries }: { entries: ConsoleEntry[] }) {
|
||||
animate={{ opacity: 1, x: 0 }}
|
||||
className={`rounded p-2 border ${
|
||||
e.type === "thinking" ? "bg-cyan-500/10 border-cyan-500/20 text-cyan-400"
|
||||
: e.type === "retry"
|
||||
? "bg-amber-500/10 border-amber-500/20 text-amber-400"
|
||||
: e.type === "tool_call"
|
||||
? e.success !== false ? "bg-green-500/10 border-green-500/20 text-green-300"
|
||||
: "bg-red-500/10 border-red-500/20 text-red-300"
|
||||
@@ -304,11 +307,13 @@ function ConsolePanel({ entries }: { entries: ConsoleEntry[] }) {
|
||||
>
|
||||
<div className="flex items-center gap-1.5 mb-1">
|
||||
{e.type === "thinking" && <Loader2 className="w-3 h-3 animate-spin" />}
|
||||
{e.type === "retry" && <RefreshCw className="w-3 h-3 text-amber-400" />}
|
||||
{e.type === "tool_call" && <ToolIcon tool={e.tool ?? ""} />}
|
||||
{e.type === "done" && <CheckCircle className="w-3 h-3" />}
|
||||
{e.type === "error" && <XCircle className="w-3 h-3" />}
|
||||
<span className="font-semibold">
|
||||
{e.type === "thinking" ? "thinking…"
|
||||
: e.type === "retry" ? "retry"
|
||||
: e.type === "tool_call" ? toolLabel(e.tool ?? "")
|
||||
: e.type === "done" ? `done · ${e.model ?? ""}`
|
||||
: "error"}
|
||||
@@ -316,6 +321,11 @@ function ConsolePanel({ entries }: { entries: ConsoleEntry[] }) {
|
||||
<span className="ml-auto text-[9px] opacity-60">{e.timestamp}</span>
|
||||
</div>
|
||||
|
||||
{/* Retry reason or thinking message */}
|
||||
{(e.type === "retry" || e.type === "thinking") && e.content && (
|
||||
<div className="text-[9px] opacity-80 mt-0.5 font-mono break-words">{e.content}</div>
|
||||
)}
|
||||
|
||||
{e.type === "tool_call" && e.args && (
|
||||
<pre className="text-[9px] opacity-70 overflow-hidden max-h-16 whitespace-pre-wrap">
|
||||
{JSON.stringify(e.args, null, 1).slice(0, 200)}
|
||||
|
||||
Reference in New Issue
Block a user