feat: Add control for how message content is split for TTS generation reqs
This commit is contained in:
@@ -408,7 +408,7 @@ const convertOpenAIMessages = (convo) => {
|
||||
let currentId = '';
|
||||
let lastId = null;
|
||||
|
||||
for (let message_id in mapping) {
|
||||
for (const message_id in mapping) {
|
||||
const message = mapping[message_id];
|
||||
currentId = message_id;
|
||||
try {
|
||||
@@ -442,7 +442,7 @@ const convertOpenAIMessages = (convo) => {
|
||||
}
|
||||
}
|
||||
|
||||
let history = {};
|
||||
const history: Record<PropertyKey, (typeof messages)[number]> = {};
|
||||
messages.forEach((obj) => (history[obj.id] = obj));
|
||||
|
||||
const chat = {
|
||||
@@ -481,7 +481,7 @@ const validateChat = (chat) => {
|
||||
}
|
||||
|
||||
// Every message's content should be a string
|
||||
for (let message of messages) {
|
||||
for (const message of messages) {
|
||||
if (typeof message.content !== 'string') {
|
||||
return false;
|
||||
}
|
||||
@@ -494,7 +494,7 @@ export const convertOpenAIChats = (_chats) => {
|
||||
// Create a list of dictionaries with each conversation from import
|
||||
const chats = [];
|
||||
let failed = 0;
|
||||
for (let convo of _chats) {
|
||||
for (const convo of _chats) {
|
||||
const chat = convertOpenAIMessages(convo);
|
||||
|
||||
if (validateChat(chat)) {
|
||||
@@ -513,7 +513,7 @@ export const convertOpenAIChats = (_chats) => {
|
||||
return chats;
|
||||
};
|
||||
|
||||
export const isValidHttpUrl = (string) => {
|
||||
export const isValidHttpUrl = (string: string) => {
|
||||
let url;
|
||||
|
||||
try {
|
||||
@@ -525,7 +525,7 @@ export const isValidHttpUrl = (string) => {
|
||||
return url.protocol === 'http:' || url.protocol === 'https:';
|
||||
};
|
||||
|
||||
export const removeEmojis = (str) => {
|
||||
export const removeEmojis = (str: string) => {
|
||||
// Regular expression to match emojis
|
||||
const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
|
||||
|
||||
@@ -533,20 +533,24 @@ export const removeEmojis = (str) => {
|
||||
return str.replace(emojiRegex, '');
|
||||
};
|
||||
|
||||
export const removeFormattings = (str) => {
|
||||
export const removeFormattings = (str: string) => {
|
||||
return str.replace(/(\*)(.*?)\1/g, '').replace(/(```)(.*?)\1/gs, '');
|
||||
};
|
||||
|
||||
export const extractSentences = (text) => {
|
||||
// This regular expression matches code blocks marked by triple backticks
|
||||
const codeBlockRegex = /```[\s\S]*?```/g;
|
||||
export const prepareTextForTTS = (content: string) => {
|
||||
return removeFormattings(removeEmojis(content.trim()));
|
||||
};
|
||||
|
||||
let codeBlocks = [];
|
||||
// This regular expression matches code blocks marked by triple backticks
|
||||
const codeBlockRegex = /```[\s\S]*?```/g;
|
||||
|
||||
export const extractSentences = (text: string) => {
|
||||
const codeBlocks: string[] = [];
|
||||
let index = 0;
|
||||
|
||||
// Temporarily replace code blocks with placeholders and store the blocks separately
|
||||
text = text.replace(codeBlockRegex, (match) => {
|
||||
let placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
|
||||
const placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
|
||||
codeBlocks[index++] = match;
|
||||
return placeholder;
|
||||
});
|
||||
@@ -561,11 +565,36 @@ export const extractSentences = (text) => {
|
||||
});
|
||||
|
||||
return sentences
|
||||
.map((sentence) => removeFormattings(removeEmojis(sentence.trim())))
|
||||
.filter((sentence) => sentence);
|
||||
.map(prepareTextForTTS)
|
||||
.filter(Boolean);
|
||||
};
|
||||
|
||||
export const extractSentencesForAudio = (text) => {
|
||||
export const extractParagraphsForAudio = (text: string) => {
|
||||
const codeBlocks: string[] = [];
|
||||
let index = 0;
|
||||
|
||||
// Temporarily replace code blocks with placeholders and store the blocks separately
|
||||
text = text.replace(codeBlockRegex, (match) => {
|
||||
const placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
|
||||
codeBlocks[index++] = match;
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// Split the modified text into paragraphs based on newlines, avoiding these blocks
|
||||
let paragraphs = text.split(/\n+/);
|
||||
|
||||
// Restore code blocks and process paragraphs
|
||||
paragraphs = paragraphs.map((paragraph) => {
|
||||
// Check if the paragraph includes a placeholder for a code block
|
||||
return paragraph.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]);
|
||||
});
|
||||
|
||||
return paragraphs
|
||||
.map(prepareTextForTTS)
|
||||
.filter(Boolean);
|
||||
};
|
||||
|
||||
export const extractSentencesForAudio = (text: string) => {
|
||||
return extractSentences(text).reduce((mergedTexts, currentText) => {
|
||||
const lastIndex = mergedTexts.length - 1;
|
||||
if (lastIndex >= 0) {
|
||||
@@ -580,7 +609,7 @@ export const extractSentencesForAudio = (text) => {
|
||||
mergedTexts.push(currentText);
|
||||
}
|
||||
return mergedTexts;
|
||||
}, []);
|
||||
}, [] as string[]);
|
||||
};
|
||||
|
||||
export const blobToFile = (blob, fileName) => {
|
||||
|
||||
Reference in New Issue
Block a user