bolt.diy/app/lib/runtime/message-parser.ts

325 lines
9.3 KiB
TypeScript
Raw Normal View History

import type { ActionType, BoltAction, BoltActionData, FileAction, ShellAction } from '~/types/actions';
import type { BoltArtifactData } from '~/types/artifact';
import { createScopedLogger } from '~/utils/logger';
import { unreachable } from '~/utils/unreachable';
2024-07-10 16:44:39 +00:00
const ARTIFACT_TAG_OPEN = '<boltArtifact';
const ARTIFACT_TAG_CLOSE = '</boltArtifact>';
const ARTIFACT_ACTION_TAG_OPEN = '<boltAction';
const ARTIFACT_ACTION_TAG_CLOSE = '</boltAction>';
const logger = createScopedLogger('MessageParser');
2024-07-10 16:44:39 +00:00
export interface ArtifactCallbackData extends BoltArtifactData {
messageId: string;
}
2024-07-10 16:44:39 +00:00
export interface ActionCallbackData {
artifactId: string;
messageId: string;
actionId: string;
action: BoltAction;
2024-07-10 16:44:39 +00:00
}
export type ArtifactCallback = (data: ArtifactCallbackData) => void;
export type ActionCallback = (data: ActionCallbackData) => void;
export interface ParserCallbacks {
onArtifactOpen?: ArtifactCallback;
onArtifactClose?: ArtifactCallback;
onActionOpen?: ActionCallback;
onActionStream?: ActionCallback;
onActionClose?: ActionCallback;
2024-07-10 16:44:39 +00:00
}
interface ElementFactoryProps {
messageId: string;
}
type ElementFactory = (props: ElementFactoryProps) => string;
2024-07-10 16:44:39 +00:00
export interface StreamingMessageParserOptions {
callbacks?: ParserCallbacks;
artifactElement?: ElementFactory;
2024-07-10 16:44:39 +00:00
}
interface MessageState {
position: number;
insideArtifact: boolean;
insideAction: boolean;
currentArtifact?: BoltArtifactData;
currentAction: BoltActionData;
actionId: number;
}
function cleanoutMarkdownSyntax(content: string) {
const codeBlockRegex = /^\s*```\w*\n([\s\S]*?)\n\s*```\s*$/;
const match = content.match(codeBlockRegex);
// console.log('matching', !!match, content);
if (match) {
return match[1]; // Remove common leading 4-space indent
} else {
return content;
}
}
2024-07-10 16:44:39 +00:00
export class StreamingMessageParser {
#messages = new Map<string, MessageState>();
2024-07-10 16:44:39 +00:00
2024-11-21 21:05:35 +00:00
constructor(private _options: StreamingMessageParserOptions = {}) {}
2024-07-10 16:44:39 +00:00
parse(messageId: string, input: string) {
let state = this.#messages.get(messageId);
if (!state) {
state = {
position: 0,
insideAction: false,
insideArtifact: false,
currentAction: { content: '' },
actionId: 0,
};
this.#messages.set(messageId, state);
}
2024-07-10 16:44:39 +00:00
let output = '';
let i = state.position;
2024-07-10 16:44:39 +00:00
let earlyBreak = false;
while (i < input.length) {
if (state.insideArtifact) {
const currentArtifact = state.currentArtifact;
if (currentArtifact === undefined) {
unreachable('Artifact not initialized');
}
if (state.insideAction) {
2024-07-10 16:44:39 +00:00
const closeIndex = input.indexOf(ARTIFACT_ACTION_TAG_CLOSE, i);
const currentAction = state.currentAction;
2024-07-10 16:44:39 +00:00
if (closeIndex !== -1) {
currentAction.content += input.slice(i, closeIndex);
2024-07-10 16:44:39 +00:00
let content = currentAction.content.trim();
2024-07-10 16:44:39 +00:00
if ('type' in currentAction && currentAction.type === 'file') {
// Remove markdown code block syntax if present and file is not markdown
if (!currentAction.filePath.endsWith('.md')) {
content = cleanoutMarkdownSyntax(content);
}
2024-07-10 16:44:39 +00:00
content += '\n';
}
currentAction.content = content;
2024-07-10 16:44:39 +00:00
this._options.callbacks?.onActionClose?.({
artifactId: currentArtifact.id,
messageId,
/**
* We decrement the id because it's been incremented already
* when `onActionOpen` was emitted to make sure the ids are
* the same.
*/
actionId: String(state.actionId - 1),
action: currentAction as BoltAction,
});
2024-07-10 16:44:39 +00:00
state.insideAction = false;
state.currentAction = { content: '' };
2024-07-10 16:44:39 +00:00
i = closeIndex + ARTIFACT_ACTION_TAG_CLOSE.length;
} else {
if ('type' in currentAction && currentAction.type === 'file') {
let content = input.slice(i);
if (!currentAction.filePath.endsWith('.md')) {
content = cleanoutMarkdownSyntax(content);
}
this._options.callbacks?.onActionStream?.({
artifactId: currentArtifact.id,
messageId,
actionId: String(state.actionId - 1),
action: {
2024-11-21 21:05:35 +00:00
...(currentAction as FileAction),
content,
filePath: currentAction.filePath,
},
});
}
2024-11-21 21:05:35 +00:00
2024-07-10 16:44:39 +00:00
break;
}
} else {
const actionOpenIndex = input.indexOf(ARTIFACT_ACTION_TAG_OPEN, i);
const artifactCloseIndex = input.indexOf(ARTIFACT_TAG_CLOSE, i);
if (actionOpenIndex !== -1 && (artifactCloseIndex === -1 || actionOpenIndex < artifactCloseIndex)) {
const actionEndIndex = input.indexOf('>', actionOpenIndex);
if (actionEndIndex !== -1) {
state.insideAction = true;
state.currentAction = this.#parseActionTag(input, actionOpenIndex, actionEndIndex);
this._options.callbacks?.onActionOpen?.({
artifactId: currentArtifact.id,
messageId,
actionId: String(state.actionId++),
action: state.currentAction as BoltAction,
});
2024-07-10 16:44:39 +00:00
i = actionEndIndex + 1;
} else {
break;
}
} else if (artifactCloseIndex !== -1) {
this._options.callbacks?.onArtifactClose?.({ messageId, ...currentArtifact });
2024-07-10 16:44:39 +00:00
state.insideArtifact = false;
state.currentArtifact = undefined;
2024-07-10 16:44:39 +00:00
i = artifactCloseIndex + ARTIFACT_TAG_CLOSE.length;
} else {
break;
}
}
} else if (input[i] === '<' && input[i + 1] !== '/') {
let j = i;
let potentialTag = '';
while (j < input.length && potentialTag.length < ARTIFACT_TAG_OPEN.length) {
potentialTag += input[j];
if (potentialTag === ARTIFACT_TAG_OPEN) {
const nextChar = input[j + 1];
if (nextChar && nextChar !== '>' && nextChar !== ' ') {
output += input.slice(i, j + 1);
i = j + 1;
break;
}
const openTagEnd = input.indexOf('>', j);
if (openTagEnd !== -1) {
const artifactTag = input.slice(i, openTagEnd + 1);
const artifactTitle = this.#extractAttribute(artifactTag, 'title') as string;
const type = this.#extractAttribute(artifactTag, 'type') as string;
const artifactId = this.#extractAttribute(artifactTag, 'id') as string;
if (!artifactTitle) {
logger.warn('Artifact title missing');
}
if (!artifactId) {
logger.warn('Artifact id missing');
}
state.insideArtifact = true;
const currentArtifact = {
id: artifactId,
title: artifactTitle,
type,
} satisfies BoltArtifactData;
2024-07-10 16:44:39 +00:00
state.currentArtifact = currentArtifact;
2024-07-10 16:44:39 +00:00
this._options.callbacks?.onArtifactOpen?.({ messageId, ...currentArtifact });
2024-07-10 16:44:39 +00:00
const artifactFactory = this._options.artifactElement ?? createArtifactElement;
output += artifactFactory({ messageId });
2024-07-10 16:44:39 +00:00
i = openTagEnd + 1;
} else {
earlyBreak = true;
}
break;
} else if (!ARTIFACT_TAG_OPEN.startsWith(potentialTag)) {
output += input.slice(i, j + 1);
i = j + 1;
break;
}
j++;
}
if (j === input.length && ARTIFACT_TAG_OPEN.startsWith(potentialTag)) {
break;
}
} else {
output += input[i];
i++;
}
if (earlyBreak) {
break;
}
}
state.position = i;
2024-07-10 16:44:39 +00:00
return output;
}
reset() {
this.#messages.clear();
2024-07-10 16:44:39 +00:00
}
#parseActionTag(input: string, actionOpenIndex: number, actionEndIndex: number) {
const actionTag = input.slice(actionOpenIndex, actionEndIndex + 1);
const actionType = this.#extractAttribute(actionTag, 'type') as ActionType;
const actionAttributes = {
type: actionType,
content: '',
};
if (actionType === 'file') {
const filePath = this.#extractAttribute(actionTag, 'filePath') as string;
if (!filePath) {
logger.debug('File path not specified');
}
(actionAttributes as FileAction).filePath = filePath;
2024-11-21 21:05:35 +00:00
} else if (!['shell', 'start'].includes(actionType)) {
logger.warn(`Unknown action type '${actionType}'`);
}
return actionAttributes as FileAction | ShellAction;
}
2024-07-10 16:44:39 +00:00
#extractAttribute(tag: string, attributeName: string): string | undefined {
const match = tag.match(new RegExp(`${attributeName}="([^"]*)"`, 'i'));
return match ? match[1] : undefined;
}
}
const createArtifactElement: ElementFactory = (props) => {
const elementProps = [
'class="__boltArtifact__"',
2024-08-22 13:06:51 +00:00
...Object.entries(props).map(([key, value]) => {
return `data-${camelToDashCase(key)}=${JSON.stringify(value)}`;
}),
];
return `<div ${elementProps.join(' ')}></div>`;
};
function camelToDashCase(input: string) {
return input.replace(/([a-z])([A-Z])/g, '$1-$2').toLowerCase();
}