Seperate into processChineseContent

This commit is contained in:
Yu QX 2025-05-26 20:43:21 +08:00
parent 254dd4246e
commit b9f74a66b2

View File

@ -91,27 +91,16 @@ export const sanitizeResponseContent = (content: string) => {
export const processResponseContent = (content: string) => { export const processResponseContent = (content: string) => {
// This function is used to process the response content before the response content is rendered. // This function is used to process the response content before the response content is rendered.
/* Discription: content = processChineseContent(content);
* In future development, it is recommended to seperate `line to line` processes and `whole content` processes. return content.trim();
* To improve the maintainability, contents here are numbered with indexes to indicate their function };
* because the solution to problems under same category might be scattered between `line to line` and `whole content`.
*
* Index:
* 1. Tackle "Model output issue not following the standard Markdown/LaTeX format".
* - This part obeys the rule of modifying original text as **LITTLE** as possible.
* - Detailed documentation of rendering problems must be provided in comments.
* 1.1. Special cases
* 1.1.1. (Chinese, CN)
* 1.1.1.1. Handle **bold** with Chinese parentheses
* 1.1.1.2. Handle *italic* with Chinese parentheses
*/
// Process from line to line. function processChineseContent(content: string): string {
// Tackle "Model output issue not following the standard Markdown/LaTeX format" in Chinese.
const lines = content.split('\n'); const lines = content.split('\n');
const processedLines = lines.map((line) => { const processedLines = lines.map((line) => {
// 1.1.1. 中文 (Chinese, CN)
if (/[\u4e00-\u9fa5]/.test(line)) { if (/[\u4e00-\u9fa5]/.test(line)) {
// 1.1.1.x Problems caused by Chinese parentheses // Problems caused by Chinese parentheses
/* Discription: /* Discription:
* When `*` has Chinese parentheses on the inside, markdown parser ignore bold or italic style. * When `*` has Chinese parentheses on the inside, markdown parser ignore bold or italic style.
* - e.g. `**中文名English**中文内容` will be parsed directly, * - e.g. `**中文名English**中文内容` will be parsed directly,
@ -125,25 +114,23 @@ export const processResponseContent = (content: string) => {
* Change the behavior in future if needed. * Change the behavior in future if needed.
*/ */
if (line.includes('*')) { if (line.includes('*')) {
// 1.1.1.1. Handle **bold** with Chinese parentheses // Handle **bold** with Chinese parentheses
line = processResponseContent_CN_ParenthesesRelated(line, '**', '', ''); line = processChineseContent_ParenthesesRelated(line, '**', '', '');
// 1.1.1.2. Handle *italic* with Chinese parentheses // Handle *italic* with Chinese parentheses
line = processResponseContent_CN_ParenthesesRelated(line, '*', '', ''); line = processChineseContent_ParenthesesRelated(line, '*', '', '');
} }
} }
return line; return line;
}); });
content = processedLines.join('\n'); return processedLines.join('\n');
}
return content.trim();
};
function isChineseChar(char: string): boolean { function isChineseChar(char: string): boolean {
return /\p{Script=Han}/u.test(char); return /\p{Script=Han}/u.test(char);
} }
// Helper function for `processResponseContent` case `1.1.1.1` and `1.1.1.2` // Helper function for `processChineseContent`
function processResponseContent_CN_ParenthesesRelated( function processChineseContent_ParenthesesRelated(
line: string, line: string,
symbol: string, symbol: string,
leftSymbol: string, leftSymbol: string,