diff --git a/server/orchestrator.ts b/server/orchestrator.ts index 2bbe5e1..0c09719 100644 --- a/server/orchestrator.ts +++ b/server/orchestrator.ts @@ -19,7 +19,7 @@ import { join, dirname } from "path"; import { invokeLLM } from "./_core/llm"; import { chatCompletion } from "./ollama"; import { getDb } from "./db"; -import { agents, agentHistory } from "../drizzle/schema"; +import { agents, agentHistory, tasks } from "../drizzle/schema"; import { eq } from "drizzle-orm"; const execAsync = promisify(exec); @@ -576,6 +576,40 @@ export async function orchestratorChat( tool_choice: "auto", }); } catch (err: any) { + // Handle LLM error with task creation and exponential backoff + const errorMessage = err.message || String(err); + const isTimeoutError = errorMessage.includes('deadline exceeded') || errorMessage.includes('timeout'); + + if (isTimeoutError && iterations < 3) { + // Create a task to track this error + try { + const agentId = 1; + const conversationId = `conv-${Date.now()}`; + const taskId = await createErrorRecoveryTask( + agentId, + conversationId, + `LLM Timeout Error (Attempt ${iterations}/4)`, + `Context deadline exceeded on model ${activeModel}. Retrying with exponential backoff.`, + iterations + ); + + // Exponential backoff: 2s, 4s, 8s + const backoffMs = Math.pow(2, iterations) * 1000; + console.log(`[LLM Error] Waiting ${backoffMs}ms before retry (attempt ${iterations + 1}/4)`); + await new Promise(resolve => setTimeout(resolve, backoffMs)); + + // Update task status to in_progress + if (taskId) { + await updateErrorRecoveryTask(taskId, 'in_progress', `Retrying after ${backoffMs}ms backoff`); + } + + // Retry the LLM call + continue; + } catch (taskErr) { + console.error('[Task Creation Error]', taskErr); + } + } + // Fallback: try without tools if model doesn't support them try { const fallbackResult = await chatCompletion(activeModel, conversation as any, { @@ -587,11 +621,27 @@ export async function orchestratorChat( lastModel = fallbackResult.model ?? activeModel; break; } catch (fallbackErr: any) { + // Create final error task + try { + const agentId = 1; + const conversationId = `conv-${Date.now()}`; + await createErrorRecoveryTask( + agentId, + conversationId, + `LLM Error - Final Failure`, + `All retry attempts failed. Error: ${fallbackErr.message}`, + iterations, + 'failed' + ); + } catch (taskErr) { + console.error('[Final Task Creation Error]', taskErr); + } + return { success: false, response: "", toolCalls, - error: `LLM error (model: ${activeModel}): ${fallbackErr.message}`, + error: `LLM error after ${iterations} attempts (model: ${activeModel}): ${fallbackErr.message}`, }; } } @@ -856,3 +906,71 @@ export async function trackTaskCompletion( console.error(`[Orchestrator] Failed to track task completion for task #${taskId}:`, error); } } + + +/** + * Create a task to track LLM error recovery + * Used for automatic error handling and retry logic + */ +async function createErrorRecoveryTask( + agentId: number, + conversationId: string, + title: string, + description: string, + attemptNumber: number, + initialStatus: "pending" | "in_progress" | "completed" | "failed" | "blocked" = "pending" +): Promise { + try { + const db = await getDb(); + if (!db) return null; + + const result = await db.insert(tasks).values({ + agentId, + conversationId, + title, + description, + status: initialStatus, + priority: "high", + metadata: { + errorType: "llm_timeout", + attemptNumber, + createdAt: new Date().toISOString(), + autoRecovery: true, + }, + }); + + // Get the last insert ID from the result + const insertedId = (result as any)?.[0]?.insertId || (result as any)?.insertId; + return insertedId as number | null; + } catch (error) { + console.error("[Error Recovery Task] Failed to create task:", error); + return null; + } +} + +/** + * Update error recovery task status + * Used to track retry progress + */ +async function updateErrorRecoveryTask( + taskId: number, + status: "pending" | "in_progress" | "completed" | "failed" | "blocked", + result?: string +): Promise { + try { + const db = await getDb(); + if (!db) return; + + await db + .update(tasks) + .set({ + status, + result: result || undefined, + ...(status === "in_progress" && { startedAt: new Date() }), + ...(status === "completed" && { completedAt: new Date() }), + }) + .where(eq(tasks.id, taskId)); + } catch (error) { + console.error("[Error Recovery Task] Failed to update task:", error); + } +}