refac
Some checks are pending
Deploy to HuggingFace Spaces / check-secret (push) Waiting to run
Deploy to HuggingFace Spaces / deploy (push) Blocked by required conditions
Create and publish Docker images with specific build args / build-main-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-main-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / merge-main-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-cuda-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-ollama-images (push) Blocked by required conditions
Python CI / Format Backend (3.11.x) (push) Waiting to run
Python CI / Format Backend (3.12.x) (push) Waiting to run
Frontend Build / Format & Build Frontend (push) Waiting to run
Frontend Build / Frontend Unit Tests (push) Waiting to run

This commit is contained in:
Timothy Jaeryang Baek 2025-05-26 19:53:29 +04:00
parent 040f29d058
commit caa5ad44d4

View File

@ -90,44 +90,45 @@ export const sanitizeResponseContent = (content: string) => {
};
export const processResponseContent = (content: string) => {
content = processChineseContent(content);
return content.trim();
};
function isChineseChar(char: string): boolean {
return /\p{Script=Han}/u.test(char);
}
// Tackle "Model output issue not following the standard Markdown/LaTeX format" in Chinese.
function processChineseContent(content: string): string {
// This function is used to process the response content before the response content is rendered.
const lines = content.split('\n');
const processedLines = lines.map((line) => {
if (/[\u4e00-\u9fa5]/.test(line)) {
line = processChineseContent(line);
// Problems caused by Chinese parentheses
/* Discription:
* When `*` has Chinese parentheses on the inside, markdown parser ignore bold or italic style.
* - e.g. `**中文名English**中文内容` will be parsed directly,
* instead of `<strong>中文名English</strong>中文内容`.
* Solution:
* Adding a `space` before and after the bold/italic part can solve the problem.
* - e.g. `**中文名English**中文内容` -> ` **中文名English** 中文内容`
* Note:
* Similar problem was found with English parentheses and other full delimiters,
* but they are not handled here because they are less likely to appear in LLM output.
* Change the behavior in future if needed.
*/
if (line.includes('*')) {
// Handle **bold** with Chinese parentheses
line = processChineseParentheses(line, '**', '', '');
// Handle *italic* with Chinese parentheses
line = processChineseParentheses(line, '*', '', '');
}
}
return line;
});
content = processedLines.join('\n');
return content.trim();
};
// Tackle "Model output issue not following the standard Markdown/LaTeX format" in Chinese.
function processChineseContent(line: string): string {
// Problems caused by Chinese parentheses
/* Discription:
* When `*` has Chinese parentheses on the inside, markdown parser ignore bold or italic style.
* - e.g. `**中文名English**中文内容` will be parsed directly,
* instead of `<strong>中文名English</strong>中文内容`.
* Solution:
* Adding a `space` before and after the bold/italic part can solve the problem.
* - e.g. `**中文名English**中文内容` -> ` **中文名English** 中文内容`
* Note:
* Similar problem was found with English parentheses and other full delimiters,
* but they are not handled here because they are less likely to appear in LLM output.
* Change the behavior in future if needed.
*/
if (line.includes('*')) {
// Handle **bold** with Chinese parentheses
line = processChineseParentheses(line, '**', '', '');
// Handle *italic* with Chinese parentheses
line = processChineseParentheses(line, '*', '', '');
}
return line;
}
function isChineseChar(char: string): boolean {
return /\p{Script=Han}/u.test(char);
return content;
}
// Helper function for `processChineseContent`