diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index 2677a763b..fea87e2d5 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -90,44 +90,45 @@ export const sanitizeResponseContent = (content: string) => { }; export const processResponseContent = (content: string) => { + content = processChineseContent(content); + return content.trim(); +}; + +function isChineseChar(char: string): boolean { + return /\p{Script=Han}/u.test(char); +} + +// Tackle "Model output issue not following the standard Markdown/LaTeX format" in Chinese. +function processChineseContent(content: string): string { // This function is used to process the response content before the response content is rendered. const lines = content.split('\n'); const processedLines = lines.map((line) => { if (/[\u4e00-\u9fa5]/.test(line)) { - line = processChineseContent(line); + // Problems caused by Chinese parentheses + /* Discription: + * When `*` has Chinese parentheses on the inside, markdown parser ignore bold or italic style. + * - e.g. `**中文名(English)**中文内容` will be parsed directly, + * instead of `中文名(English)中文内容`. + * Solution: + * Adding a `space` before and after the bold/italic part can solve the problem. + * - e.g. `**中文名(English)**中文内容` -> ` **中文名(English)** 中文内容` + * Note: + * Similar problem was found with English parentheses and other full delimiters, + * but they are not handled here because they are less likely to appear in LLM output. + * Change the behavior in future if needed. + */ + if (line.includes('*')) { + // Handle **bold** with Chinese parentheses + line = processChineseParentheses(line, '**', '(', ')'); + // Handle *italic* with Chinese parentheses + line = processChineseParentheses(line, '*', '(', ')'); + } } return line; }); content = processedLines.join('\n'); - return content.trim(); -}; -// Tackle "Model output issue not following the standard Markdown/LaTeX format" in Chinese. -function processChineseContent(line: string): string { - // Problems caused by Chinese parentheses - /* Discription: - * When `*` has Chinese parentheses on the inside, markdown parser ignore bold or italic style. - * - e.g. `**中文名(English)**中文内容` will be parsed directly, - * instead of `中文名(English)中文内容`. - * Solution: - * Adding a `space` before and after the bold/italic part can solve the problem. - * - e.g. `**中文名(English)**中文内容` -> ` **中文名(English)** 中文内容` - * Note: - * Similar problem was found with English parentheses and other full delimiters, - * but they are not handled here because they are less likely to appear in LLM output. - * Change the behavior in future if needed. - */ - if (line.includes('*')) { - // Handle **bold** with Chinese parentheses - line = processChineseParentheses(line, '**', '(', ')'); - // Handle *italic* with Chinese parentheses - line = processChineseParentheses(line, '*', '(', ')'); - } - return line; -} - -function isChineseChar(char: string): boolean { - return /\p{Script=Han}/u.test(char); + return content; } // Helper function for `processChineseContent`