Merge pull request #5655 from sp301415/dev

fix: Fix KaTeX Rendering
This commit is contained in:
Timothy Jaeryang Baek 2024-09-24 17:44:08 +02:00 committed by GitHub
commit 3ff52fd1ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 89 additions and 92 deletions

View File

@ -8,23 +8,6 @@ import { TTS_RESPONSE_SPLIT } from '$lib/types';
// Helper functions
//////////////////////////
const convertLatexToSingleLine = (content) => {
// Patterns to match multiline LaTeX blocks
const patterns = [
/(\$\$\s[\s\S]*?\s\$\$)/g, // Match $$ ... $$
/(\\\[[\s\S]*?\\\])/g, // Match \[ ... \]
/(\\begin\{[a-z]+\}[\s\S]*?\\end\{[a-z]+\})/g // Match \begin{...} ... \end{...}
];
patterns.forEach((pattern) => {
content = content.replace(pattern, (match) => {
return match.replace(/\s*\n\s*/g, ' ').trim();
});
});
return content;
};
export const replaceTokens = (content, char, user) => {
const charToken = /{{char}}/gi;
const userToken = /{{user}}/gi;
@ -68,7 +51,6 @@ export const sanitizeResponseContent = (content: string) => {
};
export const processResponseContent = (content: string) => {
content = convertLatexToSingleLine(content);
return content.trim();
};

View File

@ -1,14 +1,14 @@
import katex from 'katex';
const DELIMITER_LIST = [
{ left: '$$', right: '$$', display: false },
{ left: '$$\n', right: '\n$$', display: true },
{ left: '$$', right: '$$', display: false }, // This should be on top to prevent conflict with $ delimiter
{ left: '$', right: '$', display: false },
{ left: '\\pu{', right: '}', display: false },
{ left: '\\ce{', right: '}', display: false },
{ left: '\\(', right: '\\)', display: false },
{ left: '( ', right: ' )', display: false },
{ left: '\\[', right: '\\]', display: true },
{ left: '[ ', right: ' ]', display: true }
{ left: '\\[\n', right: '\n\\]', display: true },
{ left: '\\[', right: '\\]', display: false },
];
// const DELIMITER_LIST = [
@ -28,24 +28,24 @@ function escapeRegex(string) {
function generateRegexRules(delimiters) {
delimiters.forEach((delimiter) => {
const { left, right } = delimiter;
const { left, right, display } = delimiter;
// Ensure regex-safe delimiters
const escapedLeft = escapeRegex(left);
const escapedRight = escapeRegex(right);
// Inline pattern - Capture group $1, token content, followed by end delimiter and normal punctuation marks.
// Example: $text$
inlinePatterns.push(
`${escapedLeft}((?:\\\\.|[^\\\\\\n])*?(?:\\\\.|[^\\\\\\n${escapedRight}]))${escapedRight}`
);
// Block pattern - Starts and ends with the delimiter on new lines. Example:
// $$\ncontent here\n$$
blockPatterns.push(`${escapedLeft}\n((?:\\\\[^]|[^\\\\])+?)\n${escapedRight}`);
if (!display) {
inlinePatterns.push(
`${escapedLeft}((?:\\\\[^]|[^\\\\])+?)${escapedRight}`
);
} else {
blockPatterns.push(
`${escapedLeft}((?:\\\\[^]|[^\\\\])+?)${escapedRight}`
);
}
});
const inlineRule = new RegExp(`^(${inlinePatterns.join('|')})(?=[\\s?!.,:?!。,:]|$)`, 'u');
const blockRule = new RegExp(`^(${blockPatterns.join('|')})(?:\n|$)`, 'u');
const blockRule = new RegExp(`^(${blockPatterns.join('|')})(?=[\\s?!.,:?!。,:]|$)`, 'u');
return { inlineRule, blockRule };
}
@ -55,84 +55,99 @@ const { inlineRule, blockRule } = generateRegexRules(DELIMITER_LIST);
export default function (options = {}) {
return {
extensions: [
inlineKatex(options, createRenderer(options, false)),
blockKatex(options, createRenderer(options, true))
blockKatex(options), // This should be on top to prevent conflict with inline delimiters.
inlineKatex(options),
]
};
}
function createRenderer(options, newlineAfter) {
return (token) =>
katex.renderToString(token.text, { ...options, displayMode: token.displayMode }) +
(newlineAfter ? '\n' : '');
function katexStart(src, displayMode: boolean) {
let ruleReg = displayMode ? blockRule : inlineRule;
let indexSrc = src;
while (indexSrc) {
let index = -1;
let startIndex = -1;
let startDelimiter = '';
let endDelimiter = '';
for (let delimiter of DELIMITER_LIST) {
if (delimiter.display !== displayMode) {
continue;
}
startIndex = indexSrc.indexOf(delimiter.left);
if (startIndex === -1) {
continue;
}
index = startIndex;
startDelimiter = delimiter.left;
endDelimiter = delimiter.right;
}
if (index === -1) {
return;
}
const f = index === 0 || indexSrc.charAt(index - 1) === ' ';
if (f) {
const possibleKatex = indexSrc.substring(index);
if (possibleKatex.match(ruleReg)) {
return index;
}
}
indexSrc = indexSrc.substring(index + startDelimiter.length).replace(endDelimiter, '');
}
}
function inlineKatex(options, renderer) {
const ruleReg = inlineRule;
function katexTokenizer(src, tokens, displayMode: boolean) {
let ruleReg = displayMode ? blockRule : inlineRule;
let type = displayMode ? 'blockKatex' : 'inlineKatex';
const match = src.match(ruleReg);
if (match) {
const text = match
.slice(2)
.filter((item) => item)
.find((item) => item.trim());
return {
type,
raw: match[0],
text: text,
displayMode,
};
}
}
function inlineKatex(options) {
return {
name: 'inlineKatex',
level: 'inline',
start(src) {
let index;
let indexSrc = src;
while (indexSrc) {
index = indexSrc.indexOf('$');
if (index === -1) {
return;
}
const f = index === 0 || indexSrc.charAt(index - 1) === ' ';
if (f) {
const possibleKatex = indexSrc.substring(index);
if (possibleKatex.match(ruleReg)) {
return index;
}
}
indexSrc = indexSrc.substring(index + 1).replace(/^\$+/, '');
}
return katexStart(src, false);
},
tokenizer(src, tokens) {
const match = src.match(ruleReg);
if (match) {
const text = match
.slice(2)
.filter((item) => item)
.find((item) => item.trim());
return {
type: 'inlineKatex',
raw: match[0],
text: text
};
}
return katexTokenizer(src, tokens, false);
},
renderer
};
}
function blockKatex(options, renderer) {
function blockKatex(options) {
return {
name: 'blockKatex',
level: 'block',
tokenizer(src, tokens) {
const match = src.match(blockRule);
if (match) {
const text = match
.slice(2)
.filter((item) => item)
.find((item) => item.trim());
return {
type: 'blockKatex',
raw: match[0],
text: text
};
}
start(src) {
return katexStart(src, true);
},
tokenizer(src, tokens) {
return katexTokenizer(src, tokens, true);
},
renderer
};
}