diff --git a/apps/web/src/components/features/input/BusterChatInput/BusterChatInputButtons.tsx b/apps/web/src/components/features/input/BusterChatInput/BusterChatInputButtons.tsx index 26941ce2b..52a0871ae 100644 --- a/apps/web/src/components/features/input/BusterChatInput/BusterChatInputButtons.tsx +++ b/apps/web/src/components/features/input/BusterChatInput/BusterChatInputButtons.tsx @@ -1,5 +1,4 @@ import React, { useEffect } from 'react'; -import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'; import { Button } from '@/components/ui/buttons'; import { ArrowUp, Magnifier, Sparkle2 } from '@/components/ui/icons'; import Atom from '@/components/ui/icons/NucleoIconOutlined/atom'; @@ -16,6 +15,7 @@ import { AppTooltip } from '@/components/ui/tooltip'; import { Text } from '@/components/ui/typography'; import { cn } from '@/lib/utils'; import { useMicrophonePermission } from './hooks/useMicrophonePermission'; +import { useSpeechRecognition } from '@/hooks/useSpeechRecognition'; export type BusterChatInputMode = 'auto' | 'research' | 'deep-research'; @@ -42,21 +42,14 @@ export const BusterChatInputButtons = React.memo( onDictateListeningChange, }: BusterChatInputButtons) => { const hasGrantedPermissions = useMicrophonePermission(); - const { transcript, listening, browserSupportsSpeechRecognition } = useSpeechRecognition(); + const { transcript, listening, browserSupportsSpeechRecognition, onStartListening, onStopListening } = + useSpeechRecognition(); const hasValue = useMentionInputHasValue(); const onChangeValue = useMentionInputSuggestionsOnChangeValue(); const getValue = useMentionInputSuggestionsGetValue(); const disableSubmit = !hasValue; - const startListening = async () => { - SpeechRecognition.startListening({ continuous: true }); - }; - - const stopListening = () => { - SpeechRecognition.stopListening(); - }; - useEffect(() => { if (listening && transcript) { onDictate?.(transcript); @@ -87,7 +80,7 @@ export const BusterChatInputButtons = React.memo( rounding={'large'} variant={'ghost'} prefix={} - onClick={listening ? stopListening : startListening} + onClick={listening ? onStopListening : onStartListening} loading={false} disabled={disabled} className={cn( diff --git a/apps/web/src/hooks/useSpeechRecognition.stories.tsx b/apps/web/src/hooks/useSpeechRecognition.stories.tsx new file mode 100644 index 000000000..0d86bcd29 --- /dev/null +++ b/apps/web/src/hooks/useSpeechRecognition.stories.tsx @@ -0,0 +1,109 @@ +import type { Meta, StoryObj } from '@storybook/react-vite'; +import { useSpeechRecognition } from './useSpeechRecognition'; + +function SpeechRecognitionDemo() { + const { + onStartListening, + onStopListening, + listening, + transcript, + browserSupportsSpeechRecognition, + error, + } = useSpeechRecognition(); + + if (!browserSupportsSpeechRecognition) { + return ( +
+

Speech Recognition Not Supported

+

Your browser does not support speech recognition.

+
+ ); + } + + return ( +
+

Speech Recognition Demo

+
+ + +
+
+ Status:{' '} + + {listening ? 'Listening...' : 'Not listening'} + +
+ {error && ( +
+ Error: {error} +
+ )} +
+ Transcript: +
+ {transcript || 'No speech detected yet...'} +
+
+
+ ); +} + +const meta = { + title: 'Hooks/useSpeechRecognition', + component: SpeechRecognitionDemo, + parameters: { + layout: 'centered', + }, +} satisfies Meta; + +export default meta; +type Story = StoryObj; + +export const Default: Story = {}; diff --git a/apps/web/src/hooks/useSpeechRecognition.tsx b/apps/web/src/hooks/useSpeechRecognition.tsx new file mode 100644 index 000000000..f9a442d65 --- /dev/null +++ b/apps/web/src/hooks/useSpeechRecognition.tsx @@ -0,0 +1,141 @@ +import { useCallback, useEffect, useRef, useState } from 'react'; + +// Type definitions for Web Speech API +interface SpeechRecognitionErrorEvent extends Event { + error: string; + message: string; +} + +interface SpeechRecognitionEvent extends Event { + resultIndex: number; + results: SpeechRecognitionResultList; +} + +interface SpeechRecognition extends EventTarget { + continuous: boolean; + interimResults: boolean; + lang: string; + onstart: ((this: SpeechRecognition, ev: Event) => void) | null; + onresult: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => void) | null; + onerror: ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => void) | null; + onend: ((this: SpeechRecognition, ev: Event) => void) | null; + start(): void; + stop(): void; + abort(): void; +} + +interface SpeechRecognitionConstructor { + new (): SpeechRecognition; +} + +// Extend Window interface to include webkit speech recognition +declare global { + interface Window { + SpeechRecognition: SpeechRecognitionConstructor; + webkitSpeechRecognition: SpeechRecognitionConstructor; + } +} + +interface UseSpeechRecognitionReturn { + onStartListening: () => void; + onStopListening: () => void; + listening: boolean; + transcript: string; + browserSupportsSpeechRecognition: boolean; + error: string | null; +} + +export function useSpeechRecognition(): UseSpeechRecognitionReturn { + const [listening, setListening] = useState(false); + const [transcript, setTranscript] = useState(''); + const recognitionRef = useRef(null); + const [error, setError] = useState(null); + + // Check browser support + const browserSupportsSpeechRecognition = + typeof window !== 'undefined' && (window.SpeechRecognition || window.webkitSpeechRecognition); + + // Initialize speech recognition + useEffect(() => { + if (!browserSupportsSpeechRecognition) { + return; + } + + const SpeechRecognitionAPI = window.SpeechRecognition || window.webkitSpeechRecognition; + const recognition = new SpeechRecognitionAPI(); + + recognition.continuous = true; + recognition.interimResults = true; + recognition.lang = 'en-US'; + + recognition.onstart = () => { + setListening(true); + }; + + recognition.onresult = (event: SpeechRecognitionEvent) => { + let interimTranscript = ''; + let finalTranscript = ''; + + for (let i = event.resultIndex; i < event.results.length; i++) { + const transcriptPiece = event.results[i][0].transcript; + if (event.results[i].isFinal) { + finalTranscript += transcriptPiece; + } else { + interimTranscript += transcriptPiece; + } + } + + setTranscript(finalTranscript || interimTranscript); + }; + + recognition.onerror = (event: SpeechRecognitionErrorEvent) => { + console.error('Speech recognition error:', event.error); + if (event.error.includes('language-not-supported')) { + setError('Browser does not support dictation'); + } else { + setError(event.error); + } + + onStopListening(); + }; + + recognition.onend = () => { + setListening(false); + }; + + recognitionRef.current = recognition; + + return () => { + recognition.stop(); + }; + }, [browserSupportsSpeechRecognition]); + + const onStartListening = useCallback(async () => { + if (recognitionRef.current && !listening) { + try { + // Request microphone permission + await navigator.mediaDevices.getUserMedia({ audio: true }); + setTranscript(''); + recognitionRef.current.start(); + } catch (error) { + console.error('Microphone permission denied:', error); + } + } + }, [listening]); + + const onStopListening = useCallback(() => { + if (recognitionRef.current) { + recognitionRef.current.stop(); + setListening(false); + } + }, [listening]); + + return { + onStartListening, + onStopListening, + listening, + error, + transcript, + browserSupportsSpeechRecognition: Boolean(browserSupportsSpeechRecognition), + }; +}