mirror of
https://github.com/open-webui/open-webui
synced 2024-11-07 09:09:53 +00:00
refac: extractSentences
This commit is contained in:
parent
6ee94c5e97
commit
8e2c377a21
@ -511,12 +511,31 @@ export const removeFormattings = (str) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
export const extractSentences = (text) => {
|
export const extractSentences = (text) => {
|
||||||
// Split the paragraph into sentences based on common punctuation marks
|
// This regular expression matches code blocks marked by triple backticks
|
||||||
const sentences = text.split(/(?<=[.!?])\s+/);
|
const codeBlockRegex = /```[\s\S]*?```/g;
|
||||||
|
|
||||||
|
let codeBlocks = [];
|
||||||
|
let index = 0;
|
||||||
|
|
||||||
|
// Temporarily replace code blocks with placeholders and store the blocks separately
|
||||||
|
text = text.replace(codeBlockRegex, (match) => {
|
||||||
|
let placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
|
||||||
|
codeBlocks[index++] = match;
|
||||||
|
return placeholder;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Split the modified text into sentences based on common punctuation marks, avoiding these blocks
|
||||||
|
let sentences = text.split(/(?<=[.!?])\s+/);
|
||||||
|
|
||||||
|
// Restore code blocks and process sentences
|
||||||
|
sentences = sentences.map((sentence) => {
|
||||||
|
// Check if the sentence includes a placeholder for a code block
|
||||||
|
return sentence.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]);
|
||||||
|
});
|
||||||
|
|
||||||
return sentences
|
return sentences
|
||||||
.map((sentence) => removeFormattings(removeEmojis(sentence.trim())))
|
.map((sentence) => removeFormattings(removeEmojis(sentence.trim())))
|
||||||
.filter((sentence) => sentence !== '');
|
.filter((sentence) => sentence);
|
||||||
};
|
};
|
||||||
|
|
||||||
export const extractSentencesForAudio = (text) => {
|
export const extractSentencesForAudio = (text) => {
|
||||||
|
Loading…
Reference in New Issue
Block a user