fix: detect and remove markdown block syntax that llms sometimes hallucinate for file actions (#886)

* Clean out markdown syntax

* Remove identation removal

* Improve for streaming
This commit is contained in:
Eduard Ruzga 2024-12-29 11:53:20 +02:00 committed by GitHub
parent 4844db82fa
commit aecd9b1b40
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -52,6 +52,17 @@ interface MessageState {
actionId: number;
}
function cleanoutMarkdownSyntax(content: string) {
const codeBlockRegex = /^\s*```\w*\n([\s\S]*?)\n\s*```\s*$/;
const match = content.match(codeBlockRegex);
console.log('matching', !!match, content);
if (match) {
return match[1]; // Remove common leading 4-space indent
} else {
return content;
}
}
export class StreamingMessageParser {
#messages = new Map<string, MessageState>();
@ -95,6 +106,12 @@ export class StreamingMessageParser {
let content = currentAction.content.trim();
if ('type' in currentAction && currentAction.type === 'file') {
// Remove markdown code block syntax if present and file is not markdown
if (!currentAction.filePath.endsWith('.md')) {
content = cleanoutMarkdownSyntax(content);
console.log('content after cleanup', content);
}
content += '\n';
}
@ -120,7 +137,11 @@ export class StreamingMessageParser {
i = closeIndex + ARTIFACT_ACTION_TAG_CLOSE.length;
} else {
if ('type' in currentAction && currentAction.type === 'file') {
const content = input.slice(i);
let content = input.slice(i);
if (!currentAction.filePath.endsWith('.md')) {
content = cleanoutMarkdownSyntax(content);
}
this._options.callbacks?.onActionStream?.({
artifactId: currentArtifact.id,