Merge pull request #5655 from sp301415/dev

fix: Fix KaTeX Rendering
This commit is contained in:
Timothy Jaeryang Baek 2024-09-24 17:44:08 +02:00 committed by GitHub
commit 3ff52fd1ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 89 additions and 92 deletions

View File

@ -8,23 +8,6 @@ import { TTS_RESPONSE_SPLIT } from '$lib/types';
// Helper functions // Helper functions
////////////////////////// //////////////////////////
const convertLatexToSingleLine = (content) => {
// Patterns to match multiline LaTeX blocks
const patterns = [
/(\$\$\s[\s\S]*?\s\$\$)/g, // Match $$ ... $$
/(\\\[[\s\S]*?\\\])/g, // Match \[ ... \]
/(\\begin\{[a-z]+\}[\s\S]*?\\end\{[a-z]+\})/g // Match \begin{...} ... \end{...}
];
patterns.forEach((pattern) => {
content = content.replace(pattern, (match) => {
return match.replace(/\s*\n\s*/g, ' ').trim();
});
});
return content;
};
export const replaceTokens = (content, char, user) => { export const replaceTokens = (content, char, user) => {
const charToken = /{{char}}/gi; const charToken = /{{char}}/gi;
const userToken = /{{user}}/gi; const userToken = /{{user}}/gi;
@ -68,7 +51,6 @@ export const sanitizeResponseContent = (content: string) => {
}; };
export const processResponseContent = (content: string) => { export const processResponseContent = (content: string) => {
content = convertLatexToSingleLine(content);
return content.trim(); return content.trim();
}; };

View File

@ -1,14 +1,14 @@
import katex from 'katex'; import katex from 'katex';
const DELIMITER_LIST = [ const DELIMITER_LIST = [
{ left: '$$', right: '$$', display: false }, { left: '$$\n', right: '\n$$', display: true },
{ left: '$$', right: '$$', display: false }, // This should be on top to prevent conflict with $ delimiter
{ left: '$', right: '$', display: false }, { left: '$', right: '$', display: false },
{ left: '\\pu{', right: '}', display: false }, { left: '\\pu{', right: '}', display: false },
{ left: '\\ce{', right: '}', display: false }, { left: '\\ce{', right: '}', display: false },
{ left: '\\(', right: '\\)', display: false }, { left: '\\(', right: '\\)', display: false },
{ left: '( ', right: ' )', display: false }, { left: '\\[\n', right: '\n\\]', display: true },
{ left: '\\[', right: '\\]', display: true }, { left: '\\[', right: '\\]', display: false },
{ left: '[ ', right: ' ]', display: true }
]; ];
// const DELIMITER_LIST = [ // const DELIMITER_LIST = [
@ -28,24 +28,24 @@ function escapeRegex(string) {
function generateRegexRules(delimiters) { function generateRegexRules(delimiters) {
delimiters.forEach((delimiter) => { delimiters.forEach((delimiter) => {
const { left, right } = delimiter; const { left, right, display } = delimiter;
// Ensure regex-safe delimiters // Ensure regex-safe delimiters
const escapedLeft = escapeRegex(left); const escapedLeft = escapeRegex(left);
const escapedRight = escapeRegex(right); const escapedRight = escapeRegex(right);
// Inline pattern - Capture group $1, token content, followed by end delimiter and normal punctuation marks. if (!display) {
// Example: $text$
inlinePatterns.push( inlinePatterns.push(
`${escapedLeft}((?:\\\\.|[^\\\\\\n])*?(?:\\\\.|[^\\\\\\n${escapedRight}]))${escapedRight}` `${escapedLeft}((?:\\\\[^]|[^\\\\])+?)${escapedRight}`
); );
} else {
// Block pattern - Starts and ends with the delimiter on new lines. Example: blockPatterns.push(
// $$\ncontent here\n$$ `${escapedLeft}((?:\\\\[^]|[^\\\\])+?)${escapedRight}`
blockPatterns.push(`${escapedLeft}\n((?:\\\\[^]|[^\\\\])+?)\n${escapedRight}`); );
}
}); });
const inlineRule = new RegExp(`^(${inlinePatterns.join('|')})(?=[\\s?!.,:?!。,:]|$)`, 'u'); const inlineRule = new RegExp(`^(${inlinePatterns.join('|')})(?=[\\s?!.,:?!。,:]|$)`, 'u');
const blockRule = new RegExp(`^(${blockPatterns.join('|')})(?:\n|$)`, 'u'); const blockRule = new RegExp(`^(${blockPatterns.join('|')})(?=[\\s?!.,:?!。,:]|$)`, 'u');
return { inlineRule, blockRule }; return { inlineRule, blockRule };
} }
@ -55,32 +55,41 @@ const { inlineRule, blockRule } = generateRegexRules(DELIMITER_LIST);
export default function (options = {}) { export default function (options = {}) {
return { return {
extensions: [ extensions: [
inlineKatex(options, createRenderer(options, false)), blockKatex(options), // This should be on top to prevent conflict with inline delimiters.
blockKatex(options, createRenderer(options, true)) inlineKatex(options),
] ]
}; };
} }
function createRenderer(options, newlineAfter) { function katexStart(src, displayMode: boolean) {
return (token) => let ruleReg = displayMode ? blockRule : inlineRule;
katex.renderToString(token.text, { ...options, displayMode: token.displayMode }) +
(newlineAfter ? '\n' : '');
}
function inlineKatex(options, renderer) {
const ruleReg = inlineRule;
return {
name: 'inlineKatex',
level: 'inline',
start(src) {
let index;
let indexSrc = src; let indexSrc = src;
while (indexSrc) { while (indexSrc) {
index = indexSrc.indexOf('$'); let index = -1;
let startIndex = -1;
let startDelimiter = '';
let endDelimiter = '';
for (let delimiter of DELIMITER_LIST) {
if (delimiter.display !== displayMode) {
continue;
}
startIndex = indexSrc.indexOf(delimiter.left);
if (startIndex === -1) {
continue;
}
index = startIndex;
startDelimiter = delimiter.left;
endDelimiter = delimiter.right;
}
if (index === -1) { if (index === -1) {
return; return;
} }
const f = index === 0 || indexSrc.charAt(index - 1) === ' '; const f = index === 0 || indexSrc.charAt(index - 1) === ' ';
if (f) { if (f) {
const possibleKatex = indexSrc.substring(index); const possibleKatex = indexSrc.substring(index);
@ -90,10 +99,14 @@ function inlineKatex(options, renderer) {
} }
} }
indexSrc = indexSrc.substring(index + 1).replace(/^\$+/, ''); indexSrc = indexSrc.substring(index + startDelimiter.length).replace(endDelimiter, '');
} }
}, }
tokenizer(src, tokens) {
function katexTokenizer(src, tokens, displayMode: boolean) {
let ruleReg = displayMode ? blockRule : inlineRule;
let type = displayMode ? 'blockKatex' : 'inlineKatex';
const match = src.match(ruleReg); const match = src.match(ruleReg);
if (match) { if (match) {
@ -103,36 +116,38 @@ function inlineKatex(options, renderer) {
.find((item) => item.trim()); .find((item) => item.trim());
return { return {
type: 'inlineKatex', type,
raw: match[0], raw: match[0],
text: text text: text,
displayMode,
}; };
} }
}
function inlineKatex(options) {
return {
name: 'inlineKatex',
level: 'inline',
start(src) {
return katexStart(src, false);
},
tokenizer(src, tokens) {
return katexTokenizer(src, tokens, false);
}, },
renderer
}; };
} }
function blockKatex(options, renderer) { function blockKatex(options) {
return { return {
name: 'blockKatex', name: 'blockKatex',
level: 'block', level: 'block',
tokenizer(src, tokens) { start(src) {
const match = src.match(blockRule); return katexStart(src, true);
},
if (match) { tokenizer(src, tokens) {
const text = match return katexTokenizer(src, tokens, true);
.slice(2)
.filter((item) => item)
.find((item) => item.trim());
return {
type: 'blockKatex',
raw: match[0],
text: text
};
}
}, },
renderer
}; };
} }