Merge pull request #537 from wonderwhy-er/Voice-input-with-fixes

Voice input with fixes
This commit is contained in:
Eduard Ruzga 2024-12-04 21:00:14 +02:00 committed by GitHub
commit 1890c4e193
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 199 additions and 46 deletions

View File

@ -23,45 +23,8 @@ import { ImportButtons } from '~/components/chat/chatExportAndImport/ImportButto
import { ExamplePrompts } from '~/components/chat/ExamplePrompts'; import { ExamplePrompts } from '~/components/chat/ExamplePrompts';
import FilePreview from './FilePreview'; import FilePreview from './FilePreview';
import { ModelSelector } from '~/components/chat/ModelSelector';
// @ts-ignore TODO: Introduce proper types import { SpeechRecognitionButton } from '~/components/chat/SpeechRecognition';
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const ModelSelector = ({ model, setModel, provider, setProvider, modelList, providerList, apiKeys }) => {
return (
<div className="mb-2 flex gap-2 flex-col sm:flex-row">
<select
value={provider?.name}
onChange={(e) => {
setProvider(providerList.find((p: ProviderInfo) => p.name === e.target.value));
const firstModel = [...modelList].find((m) => m.provider == e.target.value);
setModel(firstModel ? firstModel.name : '');
}}
className="flex-1 p-2 rounded-lg border border-bolt-elements-borderColor bg-bolt-elements-prompt-background text-bolt-elements-textPrimary focus:outline-none focus:ring-2 focus:ring-bolt-elements-focus transition-all"
>
{providerList.map((provider: ProviderInfo) => (
<option key={provider.name} value={provider.name}>
{provider.name}
</option>
))}
</select>
<select
key={provider?.name}
value={model}
onChange={(e) => setModel(e.target.value)}
className="flex-1 p-2 rounded-lg border border-bolt-elements-borderColor bg-bolt-elements-prompt-background text-bolt-elements-textPrimary focus:outline-none focus:ring-2 focus:ring-bolt-elements-focus transition-all lg:max-w-[70%]"
>
{[...modelList]
.filter((e) => e.provider == provider?.name && e.name)
.map((modelOption) => (
<option key={modelOption.name} value={modelOption.name}>
{modelOption.label}
</option>
))}
</select>
</div>
);
};
const TEXTAREA_MIN_HEIGHT = 76; const TEXTAREA_MIN_HEIGHT = 76;
@ -92,6 +55,7 @@ interface BaseChatProps {
imageDataList?: string[]; imageDataList?: string[];
setImageDataList?: (dataList: string[]) => void; setImageDataList?: (dataList: string[]) => void;
} }
export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>( export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
( (
{ {
@ -126,7 +90,11 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
const [apiKeys, setApiKeys] = useState<Record<string, string>>({}); const [apiKeys, setApiKeys] = useState<Record<string, string>>({});
const [modelList, setModelList] = useState(MODEL_LIST); const [modelList, setModelList] = useState(MODEL_LIST);
const [isModelSettingsCollapsed, setIsModelSettingsCollapsed] = useState(false); const [isModelSettingsCollapsed, setIsModelSettingsCollapsed] = useState(false);
const [isListening, setIsListening] = useState(false);
const [recognition, setRecognition] = useState<SpeechRecognition | null>(null);
const [transcript, setTranscript] = useState('');
console.log(transcript);
useEffect(() => { useEffect(() => {
// Load API keys from cookies on component mount // Load API keys from cookies on component mount
try { try {
@ -149,8 +117,72 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
initializeModelList().then((modelList) => { initializeModelList().then((modelList) => {
setModelList(modelList); setModelList(modelList);
}); });
if (typeof window !== 'undefined' && ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window)) {
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
const recognition = new SpeechRecognition();
recognition.continuous = true;
recognition.interimResults = true;
recognition.onresult = (event) => {
const transcript = Array.from(event.results)
.map((result) => result[0])
.map((result) => result.transcript)
.join('');
setTranscript(transcript);
if (handleInputChange) {
const syntheticEvent = {
target: { value: transcript },
} as React.ChangeEvent<HTMLTextAreaElement>;
handleInputChange(syntheticEvent);
}
};
recognition.onerror = (event) => {
console.error('Speech recognition error:', event.error);
setIsListening(false);
};
setRecognition(recognition);
}
}, []); }, []);
const startListening = () => {
if (recognition) {
recognition.start();
setIsListening(true);
}
};
const stopListening = () => {
if (recognition) {
recognition.stop();
setIsListening(false);
}
};
const handleSendMessage = (event: React.UIEvent, messageInput?: string) => {
if (sendMessage) {
sendMessage(event, messageInput);
if (recognition) {
recognition.abort(); // Stop current recognition
setTranscript(''); // Clear transcript
setIsListening(false);
// Clear the input by triggering handleInputChange with empty value
if (handleInputChange) {
const syntheticEvent = {
target: { value: '' },
} as React.ChangeEvent<HTMLTextAreaElement>;
handleInputChange(syntheticEvent);
}
}
}
};
const updateApiKey = (provider: string, key: string) => { const updateApiKey = (provider: string, key: string) => {
try { try {
const updatedApiKeys = { ...apiKeys, [provider]: key }; const updatedApiKeys = { ...apiKeys, [provider]: key };
@ -316,7 +348,6 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
</button> </button>
</div> </div>
<div className={isModelSettingsCollapsed ? 'hidden' : ''}> <div className={isModelSettingsCollapsed ? 'hidden' : ''}>
<ModelSelector <ModelSelector
key={provider?.name + ':' + modelList.length} key={provider?.name + ':' + modelList.length}
@ -395,7 +426,12 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
event.preventDefault(); event.preventDefault();
sendMessage?.(event); if (isStreaming) {
handleStop?.();
return;
}
handleSendMessage?.(event);
} }
}} }}
value={input} value={input}
@ -422,7 +458,7 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
} }
if (input.length > 0 || uploadedFiles.length > 0) { if (input.length > 0 || uploadedFiles.length > 0) {
sendMessage?.(event); handleSendMessage?.(event);
} }
}} }}
/> />
@ -457,6 +493,13 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
</> </>
)} )}
</IconButton> </IconButton>
<SpeechRecognitionButton
isListening={isListening}
onStart={startListening}
onStop={stopListening}
disabled={isStreaming}
/>
{chatStarted && <ClientOnly>{() => <ExportChatButton exportChat={exportChat} />}</ClientOnly>} {chatStarted && <ClientOnly>{() => <ExportChatButton exportChat={exportChat} />}</ClientOnly>}
</div> </div>
{input.length > 3 ? ( {input.length > 3 ? (
@ -471,7 +514,15 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
</div> </div>
</div> </div>
{!chatStarted && ImportButtons(importChat)} {!chatStarted && ImportButtons(importChat)}
{!chatStarted && ExamplePrompts(sendMessage)} {!chatStarted &&
ExamplePrompts((event, messageInput) => {
if (isStreaming) {
handleStop?.();
return;
}
handleSendMessage?.(event, messageInput);
})}
</div> </div>
<ClientOnly>{() => <Workbench chatStarted={chatStarted} isStreaming={isStreaming} />}</ClientOnly> <ClientOnly>{() => <Workbench chatStarted={chatStarted} isStreaming={isStreaming} />}</ClientOnly>
</div> </div>

View File

@ -0,0 +1,63 @@
import type { ProviderInfo } from '~/types/model';
import type { ModelInfo } from '~/utils/types';
interface ModelSelectorProps {
model?: string;
setModel?: (model: string) => void;
provider?: ProviderInfo;
setProvider?: (provider: ProviderInfo) => void;
modelList: ModelInfo[];
providerList: ProviderInfo[];
apiKeys: Record<string, string>;
}
export const ModelSelector = ({
model,
setModel,
provider,
setProvider,
modelList,
providerList,
}: ModelSelectorProps) => {
return (
<div className="mb-2 flex gap-2 flex-col sm:flex-row">
<select
value={provider?.name ?? ''}
onChange={(e) => {
const newProvider = providerList.find((p: ProviderInfo) => p.name === e.target.value);
if (newProvider && setProvider) {
setProvider(newProvider);
}
const firstModel = [...modelList].find((m) => m.provider === e.target.value);
if (firstModel && setModel) {
setModel(firstModel.name);
}
}}
className="flex-1 p-2 rounded-lg border border-bolt-elements-borderColor bg-bolt-elements-prompt-background text-bolt-elements-textPrimary focus:outline-none focus:ring-2 focus:ring-bolt-elements-focus transition-all"
>
{providerList.map((provider: ProviderInfo) => (
<option key={provider.name} value={provider.name}>
{provider.name}
</option>
))}
</select>
<select
key={provider?.name}
value={model}
onChange={(e) => setModel?.(e.target.value)}
className="flex-1 p-2 rounded-lg border border-bolt-elements-borderColor bg-bolt-elements-prompt-background text-bolt-elements-textPrimary focus:outline-none focus:ring-2 focus:ring-bolt-elements-focus transition-all lg:max-w-[70%]"
>
{[...modelList]
.filter((e) => e.provider == provider?.name && e.name)
.map((modelOption) => (
<option key={modelOption.name} value={modelOption.name}>
{modelOption.label}
</option>
))}
</select>
</div>
);
};

View File

@ -0,0 +1,28 @@
import { IconButton } from '~/components/ui/IconButton';
import { classNames } from '~/utils/classNames';
import React from 'react';
export const SpeechRecognitionButton = ({
isListening,
onStart,
onStop,
disabled,
}: {
isListening: boolean;
onStart: () => void;
onStop: () => void;
disabled: boolean;
}) => {
return (
<IconButton
title={isListening ? 'Stop listening' : 'Start speech recognition'}
disabled={disabled}
className={classNames('transition-all', {
'text-bolt-elements-item-contentAccent': isListening,
})}
onClick={isListening ? onStop : onStart}
>
{isListening ? <div className="i-ph:microphone-slash text-xl" /> : <div className="i-ph:microphone text-xl" />}
</IconButton>
);
};

View File

@ -1,3 +1,5 @@
interface Window { interface Window {
showDirectoryPicker(): Promise<FileSystemDirectoryHandle>; showDirectoryPicker(): Promise<FileSystemDirectoryHandle>;
webkitSpeechRecognition: typeof SpeechRecognition;
SpeechRecognition: typeof SpeechRecognition;
} }

View File

@ -11,7 +11,7 @@ interface Logger {
setLevel: (level: DebugLevel) => void; setLevel: (level: DebugLevel) => void;
} }
let currentLevel: DebugLevel = import.meta.env.VITE_LOG_LEVEL ?? import.meta.env.DEV ? 'debug' : 'info'; let currentLevel: DebugLevel = (import.meta.env.VITE_LOG_LEVEL ?? import.meta.env.DEV) ? 'debug' : 'info';
const isWorker = 'HTMLRewriter' in globalThis; const isWorker = 'HTMLRewriter' in globalThis;
const supportsColor = !isWorker; const supportsColor = !isWorker;

View File

@ -101,6 +101,7 @@
"@cloudflare/workers-types": "^4.20241127.0", "@cloudflare/workers-types": "^4.20241127.0",
"@remix-run/dev": "^2.15.0", "@remix-run/dev": "^2.15.0",
"@types/diff": "^5.2.3", "@types/diff": "^5.2.3",
"@types/dom-speech-recognition": "^0.0.4",
"@types/file-saver": "^2.0.7", "@types/file-saver": "^2.0.7",
"@types/js-cookie": "^3.0.6", "@types/js-cookie": "^3.0.6",
"@types/react": "^18.3.12", "@types/react": "^18.3.12",

View File

@ -222,6 +222,9 @@ importers:
'@types/diff': '@types/diff':
specifier: ^5.2.3 specifier: ^5.2.3
version: 5.2.3 version: 5.2.3
'@types/dom-speech-recognition':
specifier: ^0.0.4
version: 0.0.4
'@types/file-saver': '@types/file-saver':
specifier: ^2.0.7 specifier: ^2.0.7
version: 2.0.7 version: 2.0.7
@ -2039,6 +2042,9 @@ packages:
'@types/diff@5.2.3': '@types/diff@5.2.3':
resolution: {integrity: sha512-K0Oqlrq3kQMaO2RhfrNQX5trmt+XLyom88zS0u84nnIcLvFnRUMRRHmrGny5GSM+kNO9IZLARsdQHDzkhAgmrQ==} resolution: {integrity: sha512-K0Oqlrq3kQMaO2RhfrNQX5trmt+XLyom88zS0u84nnIcLvFnRUMRRHmrGny5GSM+kNO9IZLARsdQHDzkhAgmrQ==}
'@types/dom-speech-recognition@0.0.4':
resolution: {integrity: sha512-zf2GwV/G6TdaLwpLDcGTIkHnXf8JEf/viMux+khqKQKDa8/8BAUtXXZS563GnvJ4Fg0PBLGAaFf2GekEVSZ6GQ==}
'@types/eslint@8.56.10': '@types/eslint@8.56.10':
resolution: {integrity: sha512-Shavhk87gCtY2fhXDctcfS3e6FdxWkCx1iUZ9eEUbh7rTqlZT0/IzOkCOVt0fCjcFuZ9FPYfuezTBImfHCDBGQ==} resolution: {integrity: sha512-Shavhk87gCtY2fhXDctcfS3e6FdxWkCx1iUZ9eEUbh7rTqlZT0/IzOkCOVt0fCjcFuZ9FPYfuezTBImfHCDBGQ==}
@ -7464,6 +7470,8 @@ snapshots:
'@types/diff@5.2.3': {} '@types/diff@5.2.3': {}
'@types/dom-speech-recognition@0.0.4': {}
'@types/eslint@8.56.10': '@types/eslint@8.56.10':
dependencies: dependencies:
'@types/estree': 1.0.6 '@types/estree': 1.0.6
@ -7812,7 +7820,7 @@ snapshots:
'@babel/plugin-syntax-typescript': 7.25.9(@babel/core@7.26.0) '@babel/plugin-syntax-typescript': 7.25.9(@babel/core@7.26.0)
'@vanilla-extract/babel-plugin-debug-ids': 1.1.0 '@vanilla-extract/babel-plugin-debug-ids': 1.1.0
'@vanilla-extract/css': 1.16.1 '@vanilla-extract/css': 1.16.1
esbuild: 0.17.6 esbuild: 0.17.19
eval: 0.1.8 eval: 0.1.8
find-up: 5.0.0 find-up: 5.0.0
javascript-stringify: 2.1.0 javascript-stringify: 2.1.0

View File

@ -1,7 +1,7 @@
{ {
"compilerOptions": { "compilerOptions": {
"lib": ["DOM", "DOM.Iterable", "ESNext"], "lib": ["DOM", "DOM.Iterable", "ESNext"],
"types": ["@remix-run/cloudflare", "vite/client", "@cloudflare/workers-types/2023-07-01"], "types": ["@remix-run/cloudflare", "vite/client", "@cloudflare/workers-types/2023-07-01", "@types/dom-speech-recognition"],
"isolatedModules": true, "isolatedModules": true,
"esModuleInterop": true, "esModuleInterop": true,
"jsx": "react-jsx", "jsx": "react-jsx",