From aecd9b1b400527c1a12fd36be8c543d5917010e0 Mon Sep 17 00:00:00 2001 From: Eduard Ruzga Date: Sun, 29 Dec 2024 11:53:20 +0200 Subject: [PATCH] fix: detect and remove markdown block syntax that llms sometimes hallucinate for file actions (#886) * Clean out markdown syntax * Remove identation removal * Improve for streaming --- app/lib/runtime/message-parser.ts | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/app/lib/runtime/message-parser.ts b/app/lib/runtime/message-parser.ts index ab6b6954..fa3b4a36 100644 --- a/app/lib/runtime/message-parser.ts +++ b/app/lib/runtime/message-parser.ts @@ -52,6 +52,17 @@ interface MessageState { actionId: number; } +function cleanoutMarkdownSyntax(content: string) { + const codeBlockRegex = /^\s*```\w*\n([\s\S]*?)\n\s*```\s*$/; + const match = content.match(codeBlockRegex); + console.log('matching', !!match, content); + + if (match) { + return match[1]; // Remove common leading 4-space indent + } else { + return content; + } +} export class StreamingMessageParser { #messages = new Map(); @@ -95,6 +106,12 @@ export class StreamingMessageParser { let content = currentAction.content.trim(); if ('type' in currentAction && currentAction.type === 'file') { + // Remove markdown code block syntax if present and file is not markdown + if (!currentAction.filePath.endsWith('.md')) { + content = cleanoutMarkdownSyntax(content); + console.log('content after cleanup', content); + } + content += '\n'; } @@ -120,7 +137,11 @@ export class StreamingMessageParser { i = closeIndex + ARTIFACT_ACTION_TAG_CLOSE.length; } else { if ('type' in currentAction && currentAction.type === 'file') { - const content = input.slice(i); + let content = input.slice(i); + + if (!currentAction.filePath.endsWith('.md')) { + content = cleanoutMarkdownSyntax(content); + } this._options.callbacks?.onActionStream?.({ artifactId: currentArtifact.id,