From 6aefc798075340cb59d5730fe7381f59b7e27b84 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Wed, 14 Aug 2024 16:07:39 +0200 Subject: [PATCH] refac: latex --- .../chat/Messages/MarkdownInlineTokens.svelte | 5 +- .../chat/Messages/MarkdownTokens.svelte | 7 + src/lib/utils/katex-extension.ts | 178 +++++++++++------- 3 files changed, 120 insertions(+), 70 deletions(-) diff --git a/src/lib/components/chat/Messages/MarkdownInlineTokens.svelte b/src/lib/components/chat/Messages/MarkdownInlineTokens.svelte index 170429f4b..4567cf507 100644 --- a/src/lib/components/chat/Messages/MarkdownInlineTokens.svelte +++ b/src/lib/components/chat/Messages/MarkdownInlineTokens.svelte @@ -37,10 +37,7 @@ {:else if token.type === 'inlineKatex'} {#if token.text} - + {/if} {:else if token.type === 'text'} {token.raw} diff --git a/src/lib/components/chat/Messages/MarkdownTokens.svelte b/src/lib/components/chat/Messages/MarkdownTokens.svelte index 089a0847a..5d2c938f6 100644 --- a/src/lib/components/chat/Messages/MarkdownTokens.svelte +++ b/src/lib/components/chat/Messages/MarkdownTokens.svelte @@ -116,6 +116,13 @@ displayMode={token?.displayMode ?? false} /> {/if} + {:else if token.type === 'blockKatex'} + {#if token.text} + + {/if} {:else if token.type === 'space'} {''} {:else} diff --git a/src/lib/utils/katex-extension.ts b/src/lib/utils/katex-extension.ts index 8db411b1b..0d6d7a25f 100644 --- a/src/lib/utils/katex-extension.ts +++ b/src/lib/utils/katex-extension.ts @@ -1,83 +1,129 @@ import katex from 'katex'; -const inlineRule = - /^(\${1,2})(?!\$)((?:\\.|[^\\\n])*?(?:\\.|[^\\\n\$]))\1(?=[\s?!\.,:?!。,:]|$)/; -const inlineRuleNonStandard = /^(\${1,2})(?!\$)((?:\\.|[^\\\n])*?(?:\\.|[^\\\n\$]))\1/; // Non-standard, even if there are no spaces before and after $ or $$, try to parse +const DELIMITER_LIST = [ + { left: '$$', right: '$$', display: false }, + { left: '$', right: '$', display: false }, + { left: '\\pu{', right: '}', display: false }, + { left: '\\ce{', right: '}', display: false }, + { left: '\\(', right: '\\)', display: false }, + { left: '( ', right: ' )', display: false }, + { left: '\\[', right: '\\]', display: true }, + { left: '[', right: ']', display: true } +] -const blockRule = /^(\${1,2})\n((?:\\[^]|[^\\])+?)\n\1(?:\n|$)/; +// const DELIMITER_LIST = [ +// { left: '$$', right: '$$', display: false }, +// { left: '$', right: '$', display: false }, +// ]; -export default function (options = {}) { - return { - extensions: [ - inlineKatex(options, createRenderer(options, false)), - blockKatex(options, createRenderer(options, true)) - ] - }; +// const inlineRule = /^(\${1,2})(?!\$)((?:\\.|[^\\\n])*?(?:\\.|[^\\\n\$]))\1(?=[\s?!\.,:?!。,:]|$)/; +// const blockRule = /^(\${1,2})\n((?:\\[^]|[^\\])+?)\n\1(?:\n|$)/; + +let inlinePatterns = []; +let blockPatterns = []; + +function escapeRegex(string) { + return string.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); +} + +function generateRegexRules(delimiters) { + delimiters.forEach(delimiter => { + const { left, right } = delimiter; + // Ensure regex-safe delimiters + const escapedLeft = escapeRegex(left); + const escapedRight = escapeRegex(right); + + // Inline pattern - Capture group $1, token content, followed by end delimiter and normal punctuation marks. + // Example: $text$ + inlinePatterns.push(`${escapedLeft}((?:\\\\.|[^\\\\\\n])*?(?:\\\\.|[^\\\\\\n${escapedRight}]))${escapedRight}`); + + // Block pattern - Starts and ends with the delimiter on new lines. Example: + // $$\ncontent here\n$$ + blockPatterns.push(`${escapedLeft}\n((?:\\\\[^]|[^\\\\])+?)\n${escapedRight}`); + }); + + const inlineRule = new RegExp(`^(${inlinePatterns.join('|')})(?=[\\s?!.,:?!。,:]|$)`, 'u'); + const blockRule = new RegExp(`^(${blockPatterns.join('|')})(?:\n|$)`, 'u'); + + return { inlineRule, blockRule }; +} + +const { inlineRule, blockRule } = generateRegexRules(DELIMITER_LIST); + + + +export default function(options = {}) { + return { + extensions: [ + inlineKatex(options, createRenderer(options, false)), + blockKatex(options, createRenderer(options, true)), + ], + }; } function createRenderer(options, newlineAfter) { - return (token) => - katex.renderToString(token.text, { ...options, displayMode: token.displayMode }) + - (newlineAfter ? '\n' : ''); + return (token) => katex.renderToString(token.text, { ...options, displayMode: token.displayMode }) + (newlineAfter ? '\n' : ''); } function inlineKatex(options, renderer) { - const nonStandard = options && options.nonStandard; - const ruleReg = nonStandard ? inlineRuleNonStandard : inlineRule; - return { - name: 'inlineKatex', - level: 'inline', - start(src) { - let index; - let indexSrc = src; + const ruleReg = inlineRule; + return { + name: 'inlineKatex', + level: 'inline', + start(src) { + let index; + let indexSrc = src; - while (indexSrc) { - index = indexSrc.indexOf('$'); - if (index === -1) { - return; - } - const f = nonStandard ? index > -1 : index === 0 || indexSrc.charAt(index - 1) === ' '; - if (f) { - const possibleKatex = indexSrc.substring(index); + while (indexSrc) { + index = indexSrc.indexOf('$'); + if (index === -1) { + return; + } + const f = index === 0 || indexSrc.charAt(index - 1) === ' '; + if (f) { + const possibleKatex = indexSrc.substring(index); - if (possibleKatex.match(ruleReg)) { - return index; - } - } + if (possibleKatex.match(ruleReg)) { + return index; + } + } - indexSrc = indexSrc.substring(index + 1).replace(/^\$+/, ''); - } - }, - tokenizer(src, tokens) { - const match = src.match(ruleReg); - if (match) { - return { - type: 'inlineKatex', - raw: match[0], - text: match[2].trim(), - displayMode: match[1].length === 2 - }; - } - }, - renderer - }; + indexSrc = indexSrc.substring(index + 1).replace(/^\$+/, ''); + } + }, + tokenizer(src, tokens) { + const match = src.match(ruleReg); + + if (match) { + console.log(match) + const text = match.slice(2).filter((item) => item).find((item) => item.trim()); + + return { + type: 'inlineKatex', + raw: match[0], + text: text, + }; + } + }, + renderer, + }; } function blockKatex(options, renderer) { - return { - name: 'blockKatex', - level: 'block', - tokenizer(src, tokens) { - const match = src.match(blockRule); - if (match) { - return { - type: 'blockKatex', - raw: match[0], - text: match[2].trim(), - displayMode: match[1].length === 2 - }; - } - }, - renderer - }; + return { + name: 'blockKatex', + level: 'block', + tokenizer(src, tokens) { + const match = src.match(blockRule); + if (match) { + return { + type: 'blockKatex', + raw: match[0], + text: match[0], + }; + } + }, + renderer, + }; } +