From d364a6f774cbb7717bc502b84189f305ce50cc58 Mon Sep 17 00:00:00 2001 From: Sam Denty Date: Thu, 22 Aug 2024 14:06:51 +0100 Subject: [PATCH] feat: sanitize user messages (#42) --- .../app/components/chat/AssistantMessage.tsx | 2 +- .../app/components/chat/Markdown.module.scss | 10 +- .../bolt/app/components/chat/Markdown.tsx | 15 ++- .../bolt/app/components/chat/UserMessage.tsx | 2 +- packages/bolt/app/lib/.server/llm/prompts.ts | 5 + .../bolt/app/lib/runtime/message-parser.ts | 2 +- packages/bolt/app/utils/markdown.ts | 116 +++++++++++++++++- packages/bolt/package.json | 4 +- pnpm-lock.yaml | 23 ++++ 9 files changed, 163 insertions(+), 16 deletions(-) diff --git a/packages/bolt/app/components/chat/AssistantMessage.tsx b/packages/bolt/app/components/chat/AssistantMessage.tsx index c30d8114..a5698e97 100644 --- a/packages/bolt/app/components/chat/AssistantMessage.tsx +++ b/packages/bolt/app/components/chat/AssistantMessage.tsx @@ -8,7 +8,7 @@ interface AssistantMessageProps { export const AssistantMessage = memo(({ content }: AssistantMessageProps) => { return (
- {content} + {content}
); }); diff --git a/packages/bolt/app/components/chat/Markdown.module.scss b/packages/bolt/app/components/chat/Markdown.module.scss index da02823c..3da3861c 100644 --- a/packages/bolt/app/components/chat/Markdown.module.scss +++ b/packages/bolt/app/components/chat/Markdown.module.scss @@ -58,9 +58,13 @@ $code-font-size: 13px; color: #6a737d; } - p:not(:last-of-type) { - margin-block-start: 0; - margin-block-end: 16px; + p { + white-space: pre-wrap; + + &:not(:last-of-type) { + margin-block-start: 0; + margin-block-end: 16px; + } } a { diff --git a/packages/bolt/app/components/chat/Markdown.tsx b/packages/bolt/app/components/chat/Markdown.tsx index 4a966e78..a91df43d 100644 --- a/packages/bolt/app/components/chat/Markdown.tsx +++ b/packages/bolt/app/components/chat/Markdown.tsx @@ -2,7 +2,7 @@ import { memo, useMemo } from 'react'; import ReactMarkdown, { type Components } from 'react-markdown'; import type { BundledLanguage } from 'shiki'; import { createScopedLogger } from '~/utils/logger'; -import { rehypePlugins, remarkPlugins } from '~/utils/markdown'; +import { rehypePlugins, remarkPlugins, allowedHTMLElements } from '~/utils/markdown'; import { Artifact } from './Artifact'; import { CodeBlock } from './CodeBlock'; @@ -12,12 +12,14 @@ const logger = createScopedLogger('MarkdownComponent'); interface MarkdownProps { children: string; + html?: boolean; + limitedMarkdown?: boolean; } -export const Markdown = memo(({ children }: MarkdownProps) => { +export const Markdown = memo(({ children, html = false, limitedMarkdown = false }: MarkdownProps) => { logger.trace('Render'); - const components = useMemo(() => { + const components = useMemo(() => { return { div: ({ className, children, node, ...props }) => { if (className?.includes('__boltArtifact__')) { @@ -55,15 +57,16 @@ export const Markdown = memo(({ children }: MarkdownProps) => { return
{children}
; }, - }; + } satisfies Components; }, []); return ( {children} diff --git a/packages/bolt/app/components/chat/UserMessage.tsx b/packages/bolt/app/components/chat/UserMessage.tsx index 702a79a4..2f4e1d52 100644 --- a/packages/bolt/app/components/chat/UserMessage.tsx +++ b/packages/bolt/app/components/chat/UserMessage.tsx @@ -8,7 +8,7 @@ interface UserMessageProps { export function UserMessage({ content }: UserMessageProps) { return (
- {sanitizeUserMessage(content)} + {sanitizeUserMessage(content)}
); } diff --git a/packages/bolt/app/lib/.server/llm/prompts.ts b/packages/bolt/app/lib/.server/llm/prompts.ts index d53c0cd0..f78b4187 100644 --- a/packages/bolt/app/lib/.server/llm/prompts.ts +++ b/packages/bolt/app/lib/.server/llm/prompts.ts @@ -1,4 +1,5 @@ import { MODIFICATIONS_TAG_NAME, WORK_DIR } from '~/utils/constants'; +import { allowedHTMLElements } from '~/utils/markdown'; import { stripIndents } from '~/utils/stripIndent'; export const getSystemPrompt = (cwd: string = WORK_DIR) => ` @@ -35,6 +36,10 @@ You are Bolt, an expert AI assistant and exceptional senior software developer w Use 2 spaces for code indentation + + You can make the output pretty by using only the following available HTML elements: ${allowedHTMLElements.map((tagName) => `<${tagName}>`).join(', ')} + + For user-made file modifications, a \`<${MODIFICATIONS_TAG_NAME}>\` section will appear at the start of the user message. It will contain either \`\` or \`\` elements for each modified file: diff --git a/packages/bolt/app/lib/runtime/message-parser.ts b/packages/bolt/app/lib/runtime/message-parser.ts index c828815f..317f81df 100644 --- a/packages/bolt/app/lib/runtime/message-parser.ts +++ b/packages/bolt/app/lib/runtime/message-parser.ts @@ -272,7 +272,7 @@ export class StreamingMessageParser { const createArtifactElement: ElementFactory = (props) => { const elementProps = [ 'class="__boltArtifact__"', - Object.entries(props).map(([key, value]) => { + ...Object.entries(props).map(([key, value]) => { return `data-${camelToDashCase(key)}=${JSON.stringify(value)}`; }), ]; diff --git a/packages/bolt/app/utils/markdown.ts b/packages/bolt/app/utils/markdown.ts index 7f13d27f..4409b85d 100644 --- a/packages/bolt/app/utils/markdown.ts +++ b/packages/bolt/app/utils/markdown.ts @@ -1,6 +1,116 @@ import rehypeRaw from 'rehype-raw'; import remarkGfm from 'remark-gfm'; -import type { PluggableList } from 'unified'; +import type { PluggableList, Plugin } from 'unified'; +import rehypeSanitize, { defaultSchema, type Options as RehypeSanitizeOptions } from 'rehype-sanitize'; +import { SKIP, visit } from 'unist-util-visit'; +import type { UnistNode, UnistParent } from 'node_modules/unist-util-visit/lib'; -export const remarkPlugins = [remarkGfm] satisfies PluggableList; -export const rehypePlugins = [rehypeRaw] satisfies PluggableList; +export const allowedHTMLElements = [ + 'a', + 'b', + 'blockquote', + 'br', + 'code', + 'dd', + 'del', + 'details', + 'div', + 'dl', + 'dt', + 'em', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'hr', + 'i', + 'ins', + 'kbd', + 'li', + 'ol', + 'p', + 'pre', + 'q', + 'rp', + 'rt', + 'ruby', + 's', + 'samp', + 'source', + 'span', + 'strike', + 'strong', + 'sub', + 'summary', + 'sup', + 'table', + 'tbody', + 'td', + 'tfoot', + 'th', + 'thead', + 'tr', + 'ul', + 'var', +]; + +const rehypeSanitizeOptions: RehypeSanitizeOptions = { + ...defaultSchema, + tagNames: allowedHTMLElements, + attributes: { + ...defaultSchema.attributes, + div: [...(defaultSchema.attributes?.div ?? []), 'data*', ['className', '__boltArtifact__']], + }, + strip: [], +}; + +export function remarkPlugins(limitedMarkdown: boolean) { + const plugins: PluggableList = [remarkGfm]; + + if (limitedMarkdown) { + plugins.unshift(limitedMarkdownPlugin); + } + + return plugins; +} + +export function rehypePlugins(html: boolean) { + const plugins: PluggableList = []; + + if (html) { + plugins.push(rehypeRaw, [rehypeSanitize, rehypeSanitizeOptions]); + } + + return plugins; +} + +const limitedMarkdownPlugin: Plugin = () => { + return (tree, file) => { + const contents = file.toString(); + + visit(tree, (node: UnistNode, index, parent: UnistParent) => { + if ( + index == null || + ['paragraph', 'text', 'inlineCode', 'code', 'strong', 'emphasis'].includes(node.type) || + !node.position + ) { + return true; + } + + let value = contents.slice(node.position.start.offset, node.position.end.offset); + + if (node.type === 'heading') { + value = `\n${value}`; + } + + parent.children[index] = { + type: 'text', + value, + } as any; + + return [SKIP, index] as const; + }); + }; +}; diff --git a/packages/bolt/package.json b/packages/bolt/package.json index e685b8c8..9a80d54b 100644 --- a/packages/bolt/package.json +++ b/packages/bolt/package.json @@ -64,9 +64,11 @@ "react-resizable-panels": "^2.0.20", "react-toastify": "^10.0.5", "rehype-raw": "^7.0.0", + "rehype-sanitize": "^6.0.0", "remark-gfm": "^4.0.0", "remix-utils": "^7.6.0", - "shiki": "^1.9.1" + "shiki": "^1.9.1", + "unist-util-visit": "^5.0.0" }, "devDependencies": { "@cloudflare/workers-types": "^4.20240620.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a6481edd..00847013 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -179,6 +179,9 @@ importers: rehype-raw: specifier: ^7.0.0 version: 7.0.0 + rehype-sanitize: + specifier: ^6.0.0 + version: 6.0.0 remark-gfm: specifier: ^4.0.0 version: 4.0.0 @@ -188,6 +191,9 @@ importers: shiki: specifier: ^1.9.1 version: 1.9.1 + unist-util-visit: + specifier: ^5.0.0 + version: 5.0.0 devDependencies: '@cloudflare/workers-types': specifier: ^4.20240620.0 @@ -3214,6 +3220,9 @@ packages: hast-util-raw@9.0.4: resolution: {integrity: sha512-LHE65TD2YiNsHD3YuXcKPHXPLuYh/gjp12mOfU8jxSrm1f/yJpsb0F/KKljS6U9LJoP0Ux+tCe8iJ2AsPzTdgA==} + hast-util-sanitize@5.0.1: + resolution: {integrity: sha512-IGrgWLuip4O2nq5CugXy4GI2V8kx4sFVy5Hd4vF7AR2gxS0N9s7nEAVUyeMtZKZvzrxVsHt73XdTsno1tClIkQ==} + hast-util-to-estree@2.3.3: resolution: {integrity: sha512-ihhPIUPxN0v0w6M5+IiAZZrn0LH2uZomeWwhn7uP7avZC6TE7lIiEh2yBMPr5+zi1aUCXq6VoYRgs2Bw9xmycQ==} @@ -4586,6 +4595,9 @@ packages: rehype-raw@7.0.0: resolution: {integrity: sha512-/aE8hCfKlQeA8LmyeyQvQF3eBiLRGNlfBJEvWH7ivp9sBqs7TNqBL5X3v157rM4IFETqDnIOO+z5M/biZbo9Ww==} + rehype-sanitize@6.0.0: + resolution: {integrity: sha512-CsnhKNsyI8Tub6L4sm5ZFsme4puGfc6pYylvXo1AeqaGbjOYyzNv3qZPwvs0oMJ39eryyeOdmxwUIo94IpEhqg==} + remark-frontmatter@4.0.1: resolution: {integrity: sha512-38fJrB0KnmD3E33a5jZC/5+gGAC2WKNiPw1/fdXJvijBlhA7RCsvJklrYJakS0HedninvaCYW8lQGf9C918GfA==} @@ -8935,6 +8947,12 @@ snapshots: web-namespaces: 2.0.1 zwitch: 2.0.4 + hast-util-sanitize@5.0.1: + dependencies: + '@types/hast': 3.0.4 + '@ungap/structured-clone': 1.2.0 + unist-util-position: 5.0.0 + hast-util-to-estree@2.3.3: dependencies: '@types/estree': 1.0.5 @@ -10699,6 +10717,11 @@ snapshots: hast-util-raw: 9.0.4 vfile: 6.0.1 + rehype-sanitize@6.0.0: + dependencies: + '@types/hast': 3.0.4 + hast-util-sanitize: 5.0.1 + remark-frontmatter@4.0.1: dependencies: '@types/mdast': 3.0.15