Merge pull request #13567 from tth37/fix_katex_chinese_delimiters
Some checks are pending
Deploy to HuggingFace Spaces / check-secret (push) Waiting to run
Deploy to HuggingFace Spaces / deploy (push) Blocked by required conditions
Create and publish Docker images with specific build args / build-main-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-main-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / merge-main-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-cuda-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-ollama-images (push) Blocked by required conditions
Python CI / Format Backend (3.11.x) (push) Waiting to run
Python CI / Format Backend (3.12.x) (push) Waiting to run
Frontend Build / Format & Build Frontend (push) Waiting to run
Frontend Build / Frontend Unit Tests (push) Waiting to run

fix(katex): Allow CJK characters adjacent to math delimiters
This commit is contained in:
Tim Jaeryang Baek 2025-05-08 14:41:16 +04:00 committed by GitHub
commit a6d6c928d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -10,6 +10,10 @@ const DELIMITER_LIST = [
{ left: '\\begin{equation}', right: '\\end{equation}', display: true }
];
// Defines characters that are allowed to immediately precede or follow a math delimiter.
const ALLOWED_SURROUNDING_CHARS =
'\\s?。,、;!-\\/:-@\\[-`{-~\\p{Script=Han}\\p{Script=Hiragana}\\p{Script=Katakana}\\p{Script=Hangul}';
// const DELIMITER_LIST = [
// { left: '$$', right: '$$', display: false },
// { left: '$', right: '$', display: false },
@ -44,10 +48,13 @@ function generateRegexRules(delimiters) {
// Math formulas can end in special characters
const inlineRule = new RegExp(
`^(${inlinePatterns.join('|')})(?=[\\s?。,!-\/:-@[-\`{-~]|$)`,
`^(${inlinePatterns.join('|')})(?=[${ALLOWED_SURROUNDING_CHARS}]|$)`,
'u'
);
const blockRule = new RegExp(
`^(${blockPatterns.join('|')})(?=[${ALLOWED_SURROUNDING_CHARS}]|$)`,
'u'
);
const blockRule = new RegExp(`^(${blockPatterns.join('|')})(?=[\\s?。,!-\/:-@[-\`{-~]|$)`, 'u');
return { inlineRule, blockRule };
}
@ -91,7 +98,9 @@ function katexStart(src, displayMode: boolean) {
// Check if the delimiter is preceded by a special character.
// If it does, then it's potentially a math formula.
const f = index === 0 || indexSrc.charAt(index - 1).match(/[\s?。,!-\/:-@[-`{-~]/);
const f =
index === 0 ||
indexSrc.charAt(index - 1).match(new RegExp(`[${ALLOWED_SURROUNDING_CHARS}]`, 'u'));
if (f) {
const possibleKatex = indexSrc.substring(index);