feat: Add control for how message content is split for TTS generation reqs

This commit is contained in:
kiosion
2024-08-24 20:35:42 -04:00
parent f30428754f
commit 3967c34261
8 changed files with 277 additions and 157 deletions

View File

@@ -408,7 +408,7 @@ const convertOpenAIMessages = (convo) => {
let currentId = '';
let lastId = null;
for (let message_id in mapping) {
for (const message_id in mapping) {
const message = mapping[message_id];
currentId = message_id;
try {
@@ -442,7 +442,7 @@ const convertOpenAIMessages = (convo) => {
}
}
let history = {};
const history: Record<PropertyKey, (typeof messages)[number]> = {};
messages.forEach((obj) => (history[obj.id] = obj));
const chat = {
@@ -481,7 +481,7 @@ const validateChat = (chat) => {
}
// Every message's content should be a string
for (let message of messages) {
for (const message of messages) {
if (typeof message.content !== 'string') {
return false;
}
@@ -494,7 +494,7 @@ export const convertOpenAIChats = (_chats) => {
// Create a list of dictionaries with each conversation from import
const chats = [];
let failed = 0;
for (let convo of _chats) {
for (const convo of _chats) {
const chat = convertOpenAIMessages(convo);
if (validateChat(chat)) {
@@ -513,7 +513,7 @@ export const convertOpenAIChats = (_chats) => {
return chats;
};
export const isValidHttpUrl = (string) => {
export const isValidHttpUrl = (string: string) => {
let url;
try {
@@ -525,7 +525,7 @@ export const isValidHttpUrl = (string) => {
return url.protocol === 'http:' || url.protocol === 'https:';
};
export const removeEmojis = (str) => {
export const removeEmojis = (str: string) => {
// Regular expression to match emojis
const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
@@ -533,20 +533,24 @@ export const removeEmojis = (str) => {
return str.replace(emojiRegex, '');
};
export const removeFormattings = (str) => {
export const removeFormattings = (str: string) => {
return str.replace(/(\*)(.*?)\1/g, '').replace(/(```)(.*?)\1/gs, '');
};
export const extractSentences = (text) => {
// This regular expression matches code blocks marked by triple backticks
const codeBlockRegex = /```[\s\S]*?```/g;
export const prepareTextForTTS = (content: string) => {
return removeFormattings(removeEmojis(content.trim()));
};
let codeBlocks = [];
// This regular expression matches code blocks marked by triple backticks
const codeBlockRegex = /```[\s\S]*?```/g;
export const extractSentences = (text: string) => {
const codeBlocks: string[] = [];
let index = 0;
// Temporarily replace code blocks with placeholders and store the blocks separately
text = text.replace(codeBlockRegex, (match) => {
let placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
const placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
codeBlocks[index++] = match;
return placeholder;
});
@@ -561,11 +565,36 @@ export const extractSentences = (text) => {
});
return sentences
.map((sentence) => removeFormattings(removeEmojis(sentence.trim())))
.filter((sentence) => sentence);
.map(prepareTextForTTS)
.filter(Boolean);
};
export const extractSentencesForAudio = (text) => {
export const extractParagraphsForAudio = (text: string) => {
const codeBlocks: string[] = [];
let index = 0;
// Temporarily replace code blocks with placeholders and store the blocks separately
text = text.replace(codeBlockRegex, (match) => {
const placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
codeBlocks[index++] = match;
return placeholder;
});
// Split the modified text into paragraphs based on newlines, avoiding these blocks
let paragraphs = text.split(/\n+/);
// Restore code blocks and process paragraphs
paragraphs = paragraphs.map((paragraph) => {
// Check if the paragraph includes a placeholder for a code block
return paragraph.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]);
});
return paragraphs
.map(prepareTextForTTS)
.filter(Boolean);
};
export const extractSentencesForAudio = (text: string) => {
return extractSentences(text).reduce((mergedTexts, currentText) => {
const lastIndex = mergedTexts.length - 1;
if (lastIndex >= 0) {
@@ -580,7 +609,7 @@ export const extractSentencesForAudio = (text) => {
mergedTexts.push(currentText);
}
return mergedTexts;
}, []);
}, [] as string[]);
};
export const blobToFile = (blob, fileName) => {