diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index 755ef3550..c169a78b1 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -511,12 +511,31 @@ export const removeFormattings = (str) => { }; export const extractSentences = (text) => { - // Split the paragraph into sentences based on common punctuation marks - const sentences = text.split(/(?<=[.!?])\s+/); + // This regular expression matches code blocks marked by triple backticks + const codeBlockRegex = /```[\s\S]*?```/g; + + let codeBlocks = []; + let index = 0; + + // Temporarily replace code blocks with placeholders and store the blocks separately + text = text.replace(codeBlockRegex, (match) => { + let placeholder = `\u0000${index}\u0000`; // Use a unique placeholder + codeBlocks[index++] = match; + return placeholder; + }); + + // Split the modified text into sentences based on common punctuation marks, avoiding these blocks + let sentences = text.split(/(?<=[.!?])\s+/); + + // Restore code blocks and process sentences + sentences = sentences.map((sentence) => { + // Check if the sentence includes a placeholder for a code block + return sentence.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]); + }); return sentences .map((sentence) => removeFormattings(removeEmojis(sentence.trim()))) - .filter((sentence) => sentence !== ''); + .filter((sentence) => sentence); }; export const extractSentencesForAudio = (text) => {