This commit is contained in:
Timothy J. Baek 2024-06-13 02:29:56 -07:00
parent 5300d2c531
commit b8136951e4
2 changed files with 120 additions and 52 deletions

View File

@ -887,7 +887,7 @@ async def generate_emoji(form_data: dict, user=Depends(get_verified_user)):
model = app.state.MODELS[model_id]
template = '''
You are a perceptive assistant skilled at interpreting emotions from a provided message. Your task is to reflect the speaker's likely facial expression through a fitting emoji. Prioritize using diverse facial expression emojis to convey the nuanced emotions expressed in the text. Please choose ones that vividly represent the speaker's mood or reaction.
Your task is to reflect the speaker's likely facial expression through a fitting emoji. Interpret emotions from the message and reflect their facial expression using fitting, diverse emojis (e.g., 😊, 😢, 😡, 😱).
Message: """{{prompt}}"""
'''

View File

@ -41,6 +41,7 @@
let assistantSentenceIdx = -1;
let audioQueue = [];
let emojiQueue = [];
$: assistantSentences = extractSentences(assistantMessage).reduce((mergedTexts, currentText) => {
const lastIndex = mergedTexts.length - 1;
@ -65,8 +66,6 @@
let mediaRecorder;
let audioChunks = [];
$: console.log('hasStartedSpeaking', hasStartedSpeaking);
let videoInputDevices = [];
let selectedVideoInputDeviceId = null;
@ -274,6 +273,7 @@
}
await tick();
emojiQueue = [];
audioQueue = [];
await tick();
@ -354,6 +354,14 @@
console.log('playAudioHandler', audioQueue, assistantSpeaking, audioQueue.length > 0);
if (!assistantSpeaking && !interrupted && audioQueue.length > 0) {
assistantSpeaking = true;
if ($settings?.showEmojiInCall ?? false) {
if (emojiQueue.length > 0) {
emoji = emojiQueue.shift();
emojiQueue = emojiQueue;
}
}
const audioToPlay = audioQueue.shift(); // Shift the audio out from queue before playing.
audioQueue = audioQueue;
await playAudio(audioToPlay);
@ -363,9 +371,39 @@
const setContentAudio = async (content, idx) => {
if (assistantSentenceAudios[idx] === undefined) {
console.log('%c%s', 'color: red; font-size: 20px;', content);
// Wait for the previous audio to be loaded
if (idx > 0) {
await new Promise((resolve) => {
const check = setInterval(() => {
if (
assistantSentenceAudios[idx - 1] !== undefined &&
assistantSentenceAudios[idx - 1] !== null
) {
clearInterval(check);
resolve();
}
}, 100);
});
}
assistantSentenceAudios[idx] = null;
if ($settings?.showEmojiInCall ?? false) {
const sentenceEmoji = await generateEmoji(localStorage.token, modelId, content);
if (sentenceEmoji) {
// Big red text with content and emoji
console.log('%c%s', 'color: blue; font-size: 10px;', `${sentenceEmoji}: ${content}`);
if (/\p{Extended_Pictographic}/u.test(sentenceEmoji)) {
emojiQueue.push(sentenceEmoji.match(/\p{Extended_Pictographic}/gu)[0]);
emojiQueue = emojiQueue;
}
}
await tick();
}
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
@ -381,6 +419,9 @@
const blobUrl = URL.createObjectURL(blob);
const audio = new Audio(blobUrl);
assistantSentenceAudios[idx] = audio;
console.log('%c%s', 'color: red; font-size: 20px;', content);
audioQueue.push(audio);
audioQueue = audioQueue;
}
@ -388,9 +429,9 @@
};
const stopRecordingCallback = async (_continue = true) => {
console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
if ($showCallOverlay) {
console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
// deep copy the audioChunks array
const _audioChunks = audioChunks.slice(0);
@ -448,13 +489,31 @@
mediaRecorder.start();
};
$: if ($showCallOverlay) {
startRecording();
} else {
stopCamera();
stopAllAudio();
stopRecordingCallback(false);
}
const resetAssistantMessage = async () => {
interrupted = false;
assistantMessage = '';
assistantSentenceIdx = -1;
assistantSentenceAudios = {}; // Reset audio tracking
audioQueue = []; // Clear the audio queue
audioQueue = audioQueue;
emoji = null;
emojiQueue = [];
emojiQueue = emojiQueue;
};
$: (async () => {
if ($showCallOverlay) {
await resetAssistantMessage();
await tick();
startRecording();
} else {
stopCamera();
stopAllAudio();
stopRecordingCallback(false);
}
})();
$: {
if (audioQueue.length > 0 && !assistantSpeaking) {
@ -463,57 +522,66 @@
}
onMount(() => {
console.log(eventTarget);
eventTarget.addEventListener('chat:start', async (e) => {
console.log('Chat start event:', e);
interrupted = false;
assistantMessage = '';
assistantSentenceIdx = -1;
assistantSentenceAudios = {}; // Reset audio tracking
audioQueue = []; // Clear the audio queue
chatStreaming = true;
if ($showCallOverlay) {
console.log('Chat start event:', e);
await resetAssistantMessage();
await tick();
chatStreaming = true;
}
});
eventTarget.addEventListener('chat', async (e) => {
const { content } = e.detail;
assistantMessage += content;
await tick();
if ($showCallOverlay) {
const { content } = e.detail;
assistantMessage += content;
await tick();
if (!interrupted) {
if ($config.audio.tts.engine !== '') {
assistantSentenceIdx = assistantSentences.length - 2;
if (!interrupted) {
if ($config.audio.tts.engine !== '') {
assistantSentenceIdx = assistantSentences.length - 2;
if (assistantSentenceIdx >= 0 && !assistantSentenceAudios[assistantSentenceIdx]) {
await tick();
setContentAudio(assistantSentences[assistantSentenceIdx], assistantSentenceIdx);
if (assistantSentenceIdx >= 0 && !assistantSentenceAudios[assistantSentenceIdx]) {
await tick();
setContentAudio(assistantSentences[assistantSentenceIdx], assistantSentenceIdx);
}
}
}
}
chatStreaming = true;
chatStreaming = true;
}
});
eventTarget.addEventListener('chat:finish', async (e) => {
chatStreaming = false;
loading = false;
if ($showCallOverlay) {
chatStreaming = false;
loading = false;
console.log('Chat finish event:', e);
await tick();
console.log('Chat finish event:', e);
await tick();
if (!interrupted) {
if ($config.audio.tts.engine !== '') {
for (const [idx, sentence] of assistantSentences.entries()) {
if (!assistantSentenceAudios[idx]) {
await tick();
setContentAudio(sentence, idx);
if (!interrupted) {
if ($config.audio.tts.engine !== '') {
for (const [idx, sentence] of assistantSentences.entries()) {
if (!assistantSentenceAudios[idx]) {
await tick();
setContentAudio(sentence, idx);
}
}
} else {
if ($settings?.showEmojiInCall ?? false) {
const res = await generateEmoji(localStorage.token, modelId, assistantMessage);
if (res) {
console.log(res);
if (/\p{Extended_Pictographic}/u.test(res)) {
emoji = res.match(/\p{Extended_Pictographic}/gu)[0];
}
}
}
speakSpeechSynthesisHandler(assistantMessage);
}
} else {
emoji = generateEmoji(localStorage.token, modelId, assistantMessage);
speakSpeechSynthesisHandler(assistantMessage);
}
}
});
@ -529,7 +597,7 @@
>
<div class="max-w-lg w-full h-screen max-h-[100dvh] flex flex-col justify-between p-3 md:p-6">
{#if camera}
<div class="flex justify-center items-center w-full min-h-20">
<div class="flex justify-center items-center w-full h-20 min-h-20">
{#if loading}
<svg
class="size-12 text-gray-900 dark:text-gray-400"
@ -573,10 +641,10 @@
style="font-size:{rmsLevel * 100 > 4
? '4.5'
: rmsLevel * 100 > 2
? '4'
? '4.25'
: rmsLevel * 100 > 1
? '3.5'
: '3'}rem;width:100%;text-align:center;"
? '3.75'
: '3.5'}rem;width: 100%; text-align:center;"
>
{emoji}
</div>