From 214546399a89a5e3ce50a424b017b2b4aad895f1 Mon Sep 17 00:00:00 2001 From: Hwang In Tak Date: Tue, 24 Sep 2024 16:58:15 +0900 Subject: [PATCH 1/6] fix: fix katex rendering --- src/lib/utils/marked/katex-extension.ts | 163 +++++++++++++----------- 1 file changed, 92 insertions(+), 71 deletions(-) diff --git a/src/lib/utils/marked/katex-extension.ts b/src/lib/utils/marked/katex-extension.ts index c90736462..2c5186dac 100644 --- a/src/lib/utils/marked/katex-extension.ts +++ b/src/lib/utils/marked/katex-extension.ts @@ -1,8 +1,8 @@ import katex from 'katex'; const DELIMITER_LIST = [ - { left: '$$', right: '$$', display: false }, { left: '$', right: '$', display: false }, + { left: '$$', right: '$$', display: true }, { left: '\\pu{', right: '}', display: false }, { left: '\\ce{', right: '}', display: false }, { left: '\\(', right: '\\)', display: false }, @@ -28,24 +28,24 @@ function escapeRegex(string) { function generateRegexRules(delimiters) { delimiters.forEach((delimiter) => { - const { left, right } = delimiter; + const { left, right, display } = delimiter; // Ensure regex-safe delimiters const escapedLeft = escapeRegex(left); const escapedRight = escapeRegex(right); - // Inline pattern - Capture group $1, token content, followed by end delimiter and normal punctuation marks. - // Example: $text$ - inlinePatterns.push( - `${escapedLeft}((?:\\\\.|[^\\\\\\n])*?(?:\\\\.|[^\\\\\\n${escapedRight}]))${escapedRight}` - ); - - // Block pattern - Starts and ends with the delimiter on new lines. Example: - // $$\ncontent here\n$$ - blockPatterns.push(`${escapedLeft}\n((?:\\\\[^]|[^\\\\])+?)\n${escapedRight}`); + if (!display) { + inlinePatterns.push( + `${escapedLeft}((?:\\\\.|[^\\\\\\n])*?(?:\\\\.|[^\\\\\\n${escapedRight}]))${escapedRight}` + ); + } else { + blockPatterns.push( + `${escapedLeft}((?:\\\\.|[^\\\\\\n])*?(?:\\\\.|[^\\\\\\n${escapedRight}]))${escapedRight}` + ); + } }); const inlineRule = new RegExp(`^(${inlinePatterns.join('|')})(?=[\\s?!.,:?!。,:]|$)`, 'u'); - const blockRule = new RegExp(`^(${blockPatterns.join('|')})(?:\n|$)`, 'u'); + const blockRule = new RegExp(`^(${blockPatterns.join('|')})(?=[\\s?!.,:?!。,:]|$)`, 'u'); return { inlineRule, blockRule }; } @@ -55,84 +55,105 @@ const { inlineRule, blockRule } = generateRegexRules(DELIMITER_LIST); export default function (options = {}) { return { extensions: [ - inlineKatex(options, createRenderer(options, false)), - blockKatex(options, createRenderer(options, true)) + inlineKatex(options), + blockKatex(options), ] }; } -function createRenderer(options, newlineAfter) { - return (token) => - katex.renderToString(token.text, { ...options, displayMode: token.displayMode }) + - (newlineAfter ? '\n' : ''); +function katexStart(src, displayMode: boolean) { + let ruleReg = displayMode ? blockRule : inlineRule; + + let indexSrc = src; + + while (indexSrc) { + let index = -1; + let startIndex = -1; + let startDelimiter = ''; + let endDelimiter = ''; + for (let delimiter of DELIMITER_LIST) { + if (delimiter.display !== displayMode) { + continue; + } + + startIndex = indexSrc.indexOf(delimiter.left); + if (startIndex === -1) { + continue; + } + + index = startIndex; + startDelimiter = delimiter.left; + endDelimiter = delimiter.right; + } + + if (index === -1) { + return; + } + + const f = index === 0 || indexSrc.charAt(index - 1) === ' '; + if (f) { + const possibleKatex = indexSrc.substring(index); + + if (possibleKatex.match(ruleReg)) { + return index; + } + } + + indexSrc = indexSrc.substring(index + startDelimiter.length).replace(endDelimiter, ''); + } } -function inlineKatex(options, renderer) { - const ruleReg = inlineRule; +function katexTokenizer(src, tokens, displayMode: boolean) { + let ruleReg = displayMode ? blockRule : inlineRule; + let type = displayMode ? 'blockKatex' : 'inlineKatex'; + + const match = src.match(ruleReg); + + if (match) { + const text = match + .slice(2) + .filter((item) => item) + .find((item) => item.trim()); + + if (displayMode) { + console.log("block matched", match[0]); + } else { + console.log("inline matched", match[0]); + } + + return { + type, + raw: match[0], + text: text, + displayMode, + }; + } +} + + + +function inlineKatex(options) { return { name: 'inlineKatex', level: 'inline', start(src) { - let index; - let indexSrc = src; - - while (indexSrc) { - index = indexSrc.indexOf('$'); - if (index === -1) { - return; - } - const f = index === 0 || indexSrc.charAt(index - 1) === ' '; - if (f) { - const possibleKatex = indexSrc.substring(index); - - if (possibleKatex.match(ruleReg)) { - return index; - } - } - - indexSrc = indexSrc.substring(index + 1).replace(/^\$+/, ''); - } + return katexStart(src, false); }, tokenizer(src, tokens) { - const match = src.match(ruleReg); - - if (match) { - const text = match - .slice(2) - .filter((item) => item) - .find((item) => item.trim()); - - return { - type: 'inlineKatex', - raw: match[0], - text: text - }; - } + return katexTokenizer(src, tokens, false); }, - renderer }; } -function blockKatex(options, renderer) { +function blockKatex(options) { return { name: 'blockKatex', level: 'block', - tokenizer(src, tokens) { - const match = src.match(blockRule); - - if (match) { - const text = match - .slice(2) - .filter((item) => item) - .find((item) => item.trim()); - - return { - type: 'blockKatex', - raw: match[0], - text: text - }; - } + start(src) { + return katexStart(src, true); + }, + tokenizer(src, tokens) { + return katexTokenizer(src, tokens, true); }, - renderer }; } From 377cc427b6701708a38ce5af01508cc649e27ce2 Mon Sep 17 00:00:00 2001 From: Hwang In Tak Date: Tue, 24 Sep 2024 20:40:50 +0900 Subject: [PATCH 2/6] fix: Remove unnecessary logging --- src/lib/utils/marked/katex-extension.ts | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/lib/utils/marked/katex-extension.ts b/src/lib/utils/marked/katex-extension.ts index 2c5186dac..755519566 100644 --- a/src/lib/utils/marked/katex-extension.ts +++ b/src/lib/utils/marked/katex-extension.ts @@ -115,12 +115,6 @@ function katexTokenizer(src, tokens, displayMode: boolean) { .filter((item) => item) .find((item) => item.trim()); - if (displayMode) { - console.log("block matched", match[0]); - } else { - console.log("inline matched", match[0]); - } - return { type, raw: match[0], From 0bfbace9aa38ec93e99b6d48d14e3f75acaa8428 Mon Sep 17 00:00:00 2001 From: Hwang In Tak Date: Tue, 24 Sep 2024 22:00:01 +0900 Subject: [PATCH 3/6] fix: Simplify regex --- src/lib/utils/index.ts | 18 ------------------ src/lib/utils/marked/katex-extension.ts | 6 ++++-- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index bb553c57e..cea7f6e64 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -8,23 +8,6 @@ import { TTS_RESPONSE_SPLIT } from '$lib/types'; // Helper functions ////////////////////////// -const convertLatexToSingleLine = (content) => { - // Patterns to match multiline LaTeX blocks - const patterns = [ - /(\$\$\s[\s\S]*?\s\$\$)/g, // Match $$ ... $$ - /(\\\[[\s\S]*?\\\])/g, // Match \[ ... \] - /(\\begin\{[a-z]+\}[\s\S]*?\\end\{[a-z]+\})/g // Match \begin{...} ... \end{...} - ]; - - patterns.forEach((pattern) => { - content = content.replace(pattern, (match) => { - return match.replace(/\s*\n\s*/g, ' ').trim(); - }); - }); - - return content; -}; - export const replaceTokens = (content, char, user) => { const charToken = /{{char}}/gi; const userToken = /{{user}}/gi; @@ -68,7 +51,6 @@ export const sanitizeResponseContent = (content: string) => { }; export const processResponseContent = (content: string) => { - content = convertLatexToSingleLine(content); return content.trim(); }; diff --git a/src/lib/utils/marked/katex-extension.ts b/src/lib/utils/marked/katex-extension.ts index 755519566..371c4c932 100644 --- a/src/lib/utils/marked/katex-extension.ts +++ b/src/lib/utils/marked/katex-extension.ts @@ -35,11 +35,11 @@ function generateRegexRules(delimiters) { if (!display) { inlinePatterns.push( - `${escapedLeft}((?:\\\\.|[^\\\\\\n])*?(?:\\\\.|[^\\\\\\n${escapedRight}]))${escapedRight}` + `${escapedLeft}((?:\\\\[^]|[^\\\\])+?)${escapedRight}` ); } else { blockPatterns.push( - `${escapedLeft}((?:\\\\.|[^\\\\\\n])*?(?:\\\\.|[^\\\\\\n${escapedRight}]))${escapedRight}` + `${escapedLeft}((?:\\\\[^]|[^\\\\])+?)${escapedRight}` ); } }); @@ -109,6 +109,8 @@ function katexTokenizer(src, tokens, displayMode: boolean) { const match = src.match(ruleReg); + console.log("searching:", src); + if (match) { const text = match .slice(2) From e48d66f918d61440ebd3cff950004709f83a0c3b Mon Sep 17 00:00:00 2001 From: Hwang In Tak Date: Tue, 24 Sep 2024 22:11:05 +0900 Subject: [PATCH 4/6] fix: Remove unnecessary logging --- src/lib/utils/marked/katex-extension.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lib/utils/marked/katex-extension.ts b/src/lib/utils/marked/katex-extension.ts index 371c4c932..96ce9e28c 100644 --- a/src/lib/utils/marked/katex-extension.ts +++ b/src/lib/utils/marked/katex-extension.ts @@ -109,8 +109,6 @@ function katexTokenizer(src, tokens, displayMode: boolean) { const match = src.match(ruleReg); - console.log("searching:", src); - if (match) { const text = match .slice(2) From 3f1255b39e8cef8ee37ab147d107bda67549c27b Mon Sep 17 00:00:00 2001 From: Hwang In Tak Date: Wed, 25 Sep 2024 00:10:49 +0900 Subject: [PATCH 5/6] fix: Change inline and block delimiters --- src/lib/utils/marked/katex-extension.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib/utils/marked/katex-extension.ts b/src/lib/utils/marked/katex-extension.ts index 96ce9e28c..e530ace10 100644 --- a/src/lib/utils/marked/katex-extension.ts +++ b/src/lib/utils/marked/katex-extension.ts @@ -1,14 +1,14 @@ import katex from 'katex'; const DELIMITER_LIST = [ + { left: '$$\n', right: '\n$$', display: true }, + { left: '$$', right: '$$', display: false }, // This should be on top to prevent conflict with $ delimiter { left: '$', right: '$', display: false }, - { left: '$$', right: '$$', display: true }, { left: '\\pu{', right: '}', display: false }, { left: '\\ce{', right: '}', display: false }, { left: '\\(', right: '\\)', display: false }, - { left: '( ', right: ' )', display: false }, - { left: '\\[', right: '\\]', display: true }, - { left: '[ ', right: ' ]', display: true } + { left: '\\[\n', right: '\n\\]', display: true }, + { left: '\\[', right: '\\]', display: false }, ]; // const DELIMITER_LIST = [ @@ -55,8 +55,8 @@ const { inlineRule, blockRule } = generateRegexRules(DELIMITER_LIST); export default function (options = {}) { return { extensions: [ - inlineKatex(options), blockKatex(options), + inlineKatex(options), ] }; } From 30e65b33f6ca25b462d0657171d949286d19c45d Mon Sep 17 00:00:00 2001 From: Hwang In Tak Date: Wed, 25 Sep 2024 00:41:08 +0900 Subject: [PATCH 6/6] fix: Add comments --- src/lib/utils/marked/katex-extension.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/utils/marked/katex-extension.ts b/src/lib/utils/marked/katex-extension.ts index e530ace10..6acbcd922 100644 --- a/src/lib/utils/marked/katex-extension.ts +++ b/src/lib/utils/marked/katex-extension.ts @@ -55,7 +55,7 @@ const { inlineRule, blockRule } = generateRegexRules(DELIMITER_LIST); export default function (options = {}) { return { extensions: [ - blockKatex(options), + blockKatex(options), // This should be on top to prevent conflict with inline delimiters. inlineKatex(options), ] };