refac: katex

This commit is contained in:
Timothy J. Baek
2024-08-09 00:01:38 +02:00
parent 3b370bbcb3
commit 92e77d7b33
8 changed files with 147 additions and 12 deletions

View File

@@ -0,0 +1,9 @@
<script lang="ts">
import katex from 'katex';
import 'katex/contrib/mhchem';
export let content: string;
export let displayMode: boolean = false;
</script>
{@html katex.renderToString(content, { displayMode, throwOnError: false })}

View File

@@ -1,8 +1,11 @@
<script lang="ts">
import type { Token } from 'marked';
import { unescapeHtml } from '$lib/utils';
import { revertSanitizedResponseContent, unescapeHtml } from '$lib/utils';
import { onMount } from 'svelte';
import Image from '$lib/components/common/Image.svelte';
import KatexRenderer from './KatexRenderer.svelte';
export let id: string;
export let tokens: Token[];
</script>
@@ -25,14 +28,21 @@
<svelte:self id={`${id}-em`} tokens={token.tokens} />
</em>
{:else if token.type === 'codespan'}
<code class="codespan">{unescapeHtml(token.text.replaceAll('&amp;', '&'))}</code>
<code class="codespan">{revertSanitizedResponseContent(token.raw)}</code>
{:else if token.type === 'br'}
<br />
{:else if token.type === 'del'}
<del>
<svelte:self id={`${id}-del`} tokens={token.tokens} />
</del>
{:else if token.type === 'inlineKatex'}
{#if token.text}
<KatexRenderer
content={revertSanitizedResponseContent(token.text)}
displayMode={token?.displayMode ?? false}
/>
{/if}
{:else if token.type === 'text'}
{unescapeHtml(token.text)}
{token.raw}
{/if}
{/each}

View File

@@ -1,17 +1,22 @@
<script lang="ts">
import { onMount } from 'svelte';
import type { Token } from 'marked';
import { revertSanitizedResponseContent, unescapeHtml } from '$lib/utils';
import CodeBlock from '$lib/components/chat/Messages/CodeBlock.svelte';
import { onMount } from 'svelte';
import MarkdownInlineTokens from '$lib/components/chat/Messages/MarkdownInlineTokens.svelte';
import KatexRenderer from './KatexRenderer.svelte';
export let id: string;
export let tokens: Token[];
export let top = true;
const headerComponent = (depth: number) => {
return 'h' + depth;
};
</script>
<!-- {JSON.stringify(tokens)} -->
{#each tokens as token, tokenIdx}
{#if token.type === 'hr'}
<hr />
@@ -104,6 +109,13 @@
{:else}
{unescapeHtml(token.text)}
{/if}
{:else if token.type === 'inlineKatex'}
{#if token.text}
<KatexRenderer
content={revertSanitizedResponseContent(token.text)}
displayMode={token?.displayMode ?? false}
/>
{/if}
{:else if token.type === 'space'}
{''}
{:else}

View File

@@ -4,7 +4,6 @@
import { marked } from 'marked';
import tippy from 'tippy.js';
import auto_render from 'katex/dist/contrib/auto-render.mjs';
import 'katex/dist/katex.min.css';
import mermaid from 'mermaid';
import { fade } from 'svelte/transition';
@@ -79,19 +78,24 @@
let tokens;
import 'katex/dist/katex.min.css';
import markedKatex from '$lib/utils/katex-extension';
const options = {
throwOnError: false
};
marked.use(markedKatex(options));
$: (async () => {
if (message?.content) {
tokens = marked.lexer(
replaceTokens(sanitizeResponseContent(message?.content), model?.name, $user?.name)
);
// console.log(message?.content, tokens);
}
})();
$: if (message) {
renderStyling();
}
const renderStyling = async () => {
await tick();

View File

@@ -25,7 +25,8 @@ const convertLatexToSingleLine = (content) => {
export const sanitizeResponseContent = (content: string) => {
// replace single backslash with double backslash
content = content.replace(/\\/g, '\\\\');
content = content.replace(/\\\\/g, '\\\\\\\\');
content = convertLatexToSingleLine(content);
// First, temporarily replace valid <video> tags with a placeholder
@@ -87,7 +88,7 @@ export const replaceTokens = (content, char, user) => {
};
export const revertSanitizedResponseContent = (content: string) => {
return content.replaceAll('&lt;', '<').replaceAll('&gt;', '>');
return content.replaceAll('&lt;', '<').replaceAll('&gt;', '>').replaceAll('\\\\', '\\');
};
export function unescapeHtml(html: string) {

View File

@@ -0,0 +1,80 @@
import katex from 'katex';
const inlineRule = /^(\${1,2})(?!\$)((?:\\.|[^\\\n])*?(?:\\.|[^\\\n\$]))\1(?=[\s?!\.,:?!。,:]|$)/;
const inlineRuleNonStandard = /^(\${1,2})(?!\$)((?:\\.|[^\\\n])*?(?:\\.|[^\\\n\$]))\1/; // Non-standard, even if there are no spaces before and after $ or $$, try to parse
const blockRule = /^(\${1,2})\n((?:\\[^]|[^\\])+?)\n\1(?:\n|$)/;
export default function(options = {}) {
return {
extensions: [
inlineKatex(options, createRenderer(options, false)),
blockKatex(options, createRenderer(options, true)),
],
};
}
function createRenderer(options, newlineAfter) {
return (token) => katex.renderToString(token.text, { ...options, displayMode: token.displayMode }) + (newlineAfter ? '\n' : '');
}
function inlineKatex(options, renderer) {
const nonStandard = options && options.nonStandard;
const ruleReg = nonStandard ? inlineRuleNonStandard : inlineRule;
return {
name: 'inlineKatex',
level: 'inline',
start(src) {
let index;
let indexSrc = src;
while (indexSrc) {
index = indexSrc.indexOf('$');
if (index === -1) {
return;
}
const f = nonStandard ? index > -1 : index === 0 || indexSrc.charAt(index - 1) === ' ';
if (f) {
const possibleKatex = indexSrc.substring(index);
if (possibleKatex.match(ruleReg)) {
return index;
}
}
indexSrc = indexSrc.substring(index + 1).replace(/^\$+/, '');
}
},
tokenizer(src, tokens) {
const match = src.match(ruleReg);
if (match) {
return {
type: 'inlineKatex',
raw: match[0],
text: match[2].trim(),
displayMode: match[1].length === 2,
};
}
},
renderer,
};
}
function blockKatex(options, renderer) {
return {
name: 'blockKatex',
level: 'block',
tokenizer(src, tokens) {
const match = src.match(blockRule);
if (match) {
return {
type: 'blockKatex',
raw: match[0],
text: match[2].trim(),
displayMode: match[1].length === 2,
};
}
},
renderer,
};
}