mirror of https://github.com/buster-so/buster.git
Merge pull request #1220 from buster-so/nate/dictation-so-hot-right-now
Nate/dictation so hot right now
This commit is contained in:
commit
db2744f93d
|
@ -1,5 +1,4 @@
|
|||
import React, { useEffect } from 'react';
|
||||
import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
|
||||
import { Button } from '@/components/ui/buttons';
|
||||
import { ArrowUp, Magnifier, Sparkle2 } from '@/components/ui/icons';
|
||||
import Atom from '@/components/ui/icons/NucleoIconOutlined/atom';
|
||||
|
@ -14,8 +13,8 @@ import { Popover } from '@/components/ui/popover';
|
|||
import { AppSegmented, type AppSegmentedProps } from '@/components/ui/segmented';
|
||||
import { AppTooltip } from '@/components/ui/tooltip';
|
||||
import { Text } from '@/components/ui/typography';
|
||||
import { useSpeechRecognition } from '@/hooks/useSpeechRecognition';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { useMicrophonePermission } from './hooks/useMicrophonePermission';
|
||||
|
||||
export type BusterChatInputMode = 'auto' | 'research' | 'deep-research';
|
||||
|
||||
|
@ -41,22 +40,20 @@ export const BusterChatInputButtons = React.memo(
|
|||
onDictate,
|
||||
onDictateListeningChange,
|
||||
}: BusterChatInputButtons) => {
|
||||
const hasGrantedPermissions = useMicrophonePermission();
|
||||
const { transcript, listening, browserSupportsSpeechRecognition } = useSpeechRecognition();
|
||||
const {
|
||||
transcript,
|
||||
listening,
|
||||
browserSupportsSpeechRecognition,
|
||||
onStartListening,
|
||||
onStopListening,
|
||||
hasPermission,
|
||||
} = useSpeechRecognition();
|
||||
const hasValue = useMentionInputHasValue();
|
||||
const onChangeValue = useMentionInputSuggestionsOnChangeValue();
|
||||
const getValue = useMentionInputSuggestionsGetValue();
|
||||
|
||||
const disableSubmit = !hasValue;
|
||||
|
||||
const startListening = async () => {
|
||||
SpeechRecognition.startListening({ continuous: true });
|
||||
};
|
||||
|
||||
const stopListening = () => {
|
||||
SpeechRecognition.stopListening();
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (listening && transcript) {
|
||||
onDictate?.(transcript);
|
||||
|
@ -77,7 +74,7 @@ export const BusterChatInputButtons = React.memo(
|
|||
<AppTooltip
|
||||
title={
|
||||
listening
|
||||
? !hasGrantedPermissions
|
||||
? !hasPermission
|
||||
? 'Audio permissions not enabled'
|
||||
: 'Stop dictation...'
|
||||
: 'Press to dictate...'
|
||||
|
@ -87,17 +84,17 @@ export const BusterChatInputButtons = React.memo(
|
|||
rounding={'large'}
|
||||
variant={'ghost'}
|
||||
prefix={<Microphone />}
|
||||
onClick={listening ? stopListening : startListening}
|
||||
onClick={listening ? onStopListening : onStartListening}
|
||||
loading={false}
|
||||
disabled={disabled}
|
||||
className={cn(
|
||||
'origin-center transform-gpu transition-all duration-300 ease-out will-change-transform text-text-secondary',
|
||||
!disabled && 'hover:scale-110 active:scale-95',
|
||||
listening && 'bg-item-active shadow border text-foreground',
|
||||
listening && !hasGrantedPermissions && 'bg-red-100! border border-red-300!'
|
||||
listening && !hasPermission && 'bg-red-100! border border-red-300!'
|
||||
)}
|
||||
style={
|
||||
listening && !hasGrantedPermissions
|
||||
listening && !hasPermission
|
||||
? ({
|
||||
'--icon-color': 'var(--color-red-400)',
|
||||
} as React.CSSProperties)
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
import { useEffect, useState } from 'react';
|
||||
|
||||
export function useMicrophonePermission() {
|
||||
const [hasGrantedPermissions, setHasGrantPermissions] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
if (navigator.permissions) {
|
||||
navigator.permissions
|
||||
.query({ name: 'microphone' as PermissionName })
|
||||
.then((result) => {
|
||||
if (result.state === 'granted') {
|
||||
setHasGrantPermissions(true);
|
||||
} else if (result.state === 'denied') {
|
||||
setHasGrantPermissions(false);
|
||||
} else {
|
||||
setHasGrantPermissions(true);
|
||||
}
|
||||
|
||||
// You can also listen for changes
|
||||
result.onchange = () => {
|
||||
const isGranted = result.state === 'granted';
|
||||
setHasGrantPermissions(isGranted);
|
||||
};
|
||||
})
|
||||
.catch((err) => {
|
||||
console.error('Permission API error:', err);
|
||||
});
|
||||
}
|
||||
}, []);
|
||||
|
||||
return hasGrantedPermissions;
|
||||
}
|
|
@ -0,0 +1,109 @@
|
|||
import type { Meta, StoryObj } from '@storybook/react-vite';
|
||||
import { useSpeechRecognition } from './useSpeechRecognition';
|
||||
|
||||
function SpeechRecognitionDemo() {
|
||||
const {
|
||||
onStartListening,
|
||||
onStopListening,
|
||||
listening,
|
||||
transcript,
|
||||
browserSupportsSpeechRecognition,
|
||||
error,
|
||||
} = useSpeechRecognition();
|
||||
|
||||
if (!browserSupportsSpeechRecognition) {
|
||||
return (
|
||||
<div style={{ padding: '2rem', fontFamily: 'sans-serif' }}>
|
||||
<h2>Speech Recognition Not Supported</h2>
|
||||
<p>Your browser does not support speech recognition.</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div style={{ padding: '2rem', fontFamily: 'sans-serif' }}>
|
||||
<h2>Speech Recognition Demo</h2>
|
||||
<div style={{ marginBottom: '1rem' }}>
|
||||
<button
|
||||
type="button"
|
||||
onClick={onStartListening}
|
||||
disabled={listening}
|
||||
style={{
|
||||
padding: '0.5rem 1rem',
|
||||
marginRight: '0.5rem',
|
||||
backgroundColor: listening ? '#ccc' : '#007bff',
|
||||
color: 'white',
|
||||
border: 'none',
|
||||
borderRadius: '4px',
|
||||
cursor: listening ? 'not-allowed' : 'pointer',
|
||||
}}
|
||||
>
|
||||
Start Listening
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={onStopListening}
|
||||
disabled={!listening}
|
||||
style={{
|
||||
padding: '0.5rem 1rem',
|
||||
backgroundColor: !listening ? '#ccc' : '#dc3545',
|
||||
color: 'white',
|
||||
border: 'none',
|
||||
borderRadius: '4px',
|
||||
cursor: !listening ? 'not-allowed' : 'pointer',
|
||||
}}
|
||||
>
|
||||
Stop Listening
|
||||
</button>
|
||||
</div>
|
||||
<div style={{ marginBottom: '1rem' }}>
|
||||
<strong>Status:</strong>{' '}
|
||||
<span style={{ color: listening ? '#28a745' : '#6c757d' }}>
|
||||
{listening ? 'Listening...' : 'Not listening'}
|
||||
</span>
|
||||
</div>
|
||||
{error && (
|
||||
<div
|
||||
style={{
|
||||
marginBottom: '1rem',
|
||||
padding: '0.75rem',
|
||||
backgroundColor: '#f8d7da',
|
||||
color: '#721c24',
|
||||
border: '1px solid #f5c6cb',
|
||||
borderRadius: '4px',
|
||||
}}
|
||||
>
|
||||
<strong>Error:</strong> {error}
|
||||
</div>
|
||||
)}
|
||||
<div>
|
||||
<strong>Transcript:</strong>
|
||||
<div
|
||||
style={{
|
||||
marginTop: '0.5rem',
|
||||
padding: '1rem',
|
||||
border: '1px solid #ddd',
|
||||
borderRadius: '4px',
|
||||
minHeight: '100px',
|
||||
backgroundColor: '#f8f9fa',
|
||||
}}
|
||||
>
|
||||
{transcript || 'No speech detected yet...'}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
const meta = {
|
||||
title: 'Hooks/useSpeechRecognition',
|
||||
component: SpeechRecognitionDemo,
|
||||
parameters: {
|
||||
layout: 'centered',
|
||||
},
|
||||
} satisfies Meta<typeof SpeechRecognitionDemo>;
|
||||
|
||||
export default meta;
|
||||
type Story = StoryObj<typeof meta>;
|
||||
|
||||
export const Default: Story = {};
|
|
@ -0,0 +1,180 @@
|
|||
import { useCallback, useEffect, useRef, useState } from 'react';
|
||||
import { openErrorNotification } from '@/context/BusterNotifications';
|
||||
|
||||
// Type definitions for Web Speech API
|
||||
interface SpeechRecognitionErrorEvent extends Event {
|
||||
error: string;
|
||||
message: string;
|
||||
}
|
||||
|
||||
interface SpeechRecognitionEvent extends Event {
|
||||
resultIndex: number;
|
||||
results: SpeechRecognitionResultList;
|
||||
}
|
||||
|
||||
interface SpeechRecognition extends EventTarget {
|
||||
continuous: boolean;
|
||||
interimResults: boolean;
|
||||
lang: string;
|
||||
onstart: ((this: SpeechRecognition, ev: Event) => void) | null;
|
||||
onresult: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => void) | null;
|
||||
onerror: ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => void) | null;
|
||||
onend: ((this: SpeechRecognition, ev: Event) => void) | null;
|
||||
start(): void;
|
||||
stop(): void;
|
||||
abort(): void;
|
||||
}
|
||||
|
||||
interface SpeechRecognitionConstructor {
|
||||
new (): SpeechRecognition;
|
||||
}
|
||||
|
||||
// Extend Window interface to include webkit speech recognition
|
||||
declare global {
|
||||
interface Window {
|
||||
SpeechRecognition: SpeechRecognitionConstructor;
|
||||
webkitSpeechRecognition: SpeechRecognitionConstructor;
|
||||
}
|
||||
}
|
||||
|
||||
interface UseSpeechRecognitionReturn {
|
||||
onStartListening: () => void;
|
||||
onStopListening: () => void;
|
||||
listening: boolean;
|
||||
transcript: string;
|
||||
browserSupportsSpeechRecognition: boolean;
|
||||
error: string | null;
|
||||
hasPermission: boolean;
|
||||
}
|
||||
|
||||
export function useSpeechRecognition(): UseSpeechRecognitionReturn {
|
||||
const [listening, setListening] = useState(false);
|
||||
const [transcript, setTranscript] = useState('');
|
||||
const recognitionRef = useRef<SpeechRecognition | null>(null);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [hasPermission, setHasPermission] = useState(false);
|
||||
const finalTranscriptRef = useRef('');
|
||||
|
||||
// Check browser support
|
||||
const browserSupportsSpeechRecognition =
|
||||
typeof window !== 'undefined' && (window.SpeechRecognition || window.webkitSpeechRecognition);
|
||||
|
||||
// Check microphone permission
|
||||
useEffect(() => {
|
||||
if (typeof navigator === 'undefined' || !navigator.permissions) {
|
||||
return;
|
||||
}
|
||||
|
||||
navigator.permissions
|
||||
.query({ name: 'microphone' as PermissionName })
|
||||
.then((result) => {
|
||||
setHasPermission(result.state === 'granted');
|
||||
|
||||
// Listen for permission changes
|
||||
result.onchange = () => {
|
||||
setHasPermission(result.state === 'granted');
|
||||
};
|
||||
})
|
||||
.catch((err) => {
|
||||
console.error('Permission API error:', err);
|
||||
});
|
||||
}, []);
|
||||
|
||||
// Initialize speech recognition
|
||||
useEffect(() => {
|
||||
if (!browserSupportsSpeechRecognition) {
|
||||
return;
|
||||
}
|
||||
|
||||
const SpeechRecognitionAPI = window.SpeechRecognition || window.webkitSpeechRecognition;
|
||||
const recognition = new SpeechRecognitionAPI();
|
||||
|
||||
recognition.continuous = true;
|
||||
recognition.interimResults = true;
|
||||
recognition.lang = 'en-US';
|
||||
|
||||
recognition.onstart = () => {
|
||||
setListening(true);
|
||||
};
|
||||
|
||||
recognition.onresult = (event: SpeechRecognitionEvent) => {
|
||||
let interimTranscript = '';
|
||||
let newFinalTranscript = '';
|
||||
|
||||
for (let i = event.resultIndex; i < event.results.length; i++) {
|
||||
const transcriptPiece = event.results[i][0].transcript;
|
||||
if (event.results[i].isFinal) {
|
||||
newFinalTranscript += transcriptPiece;
|
||||
} else {
|
||||
interimTranscript += transcriptPiece;
|
||||
}
|
||||
}
|
||||
|
||||
// Accumulate final transcripts
|
||||
if (newFinalTranscript) {
|
||||
finalTranscriptRef.current += newFinalTranscript;
|
||||
}
|
||||
|
||||
// Set transcript to accumulated final + current interim
|
||||
setTranscript(finalTranscriptRef.current + interimTranscript);
|
||||
};
|
||||
|
||||
recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
|
||||
console.error('Speech recognition error:', event.error);
|
||||
let message = '';
|
||||
if (event.error.includes('language-not-supported')) {
|
||||
message = 'Browser does not support dictation';
|
||||
} else {
|
||||
message = event.error;
|
||||
}
|
||||
|
||||
openErrorNotification({ message });
|
||||
setError(message);
|
||||
|
||||
onStopListening();
|
||||
};
|
||||
|
||||
recognition.onend = () => {
|
||||
setListening(false);
|
||||
};
|
||||
|
||||
recognitionRef.current = recognition;
|
||||
|
||||
return () => {
|
||||
recognition.stop();
|
||||
};
|
||||
}, [browserSupportsSpeechRecognition]);
|
||||
|
||||
const onStartListening = useCallback(async () => {
|
||||
if (recognitionRef.current && !listening) {
|
||||
try {
|
||||
// Request microphone permission
|
||||
await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
// Reset transcripts when starting
|
||||
finalTranscriptRef.current = '';
|
||||
setTranscript('');
|
||||
recognitionRef.current.start();
|
||||
} catch (error) {
|
||||
console.error('Microphone error:', error);
|
||||
openErrorNotification({ message: `Microphone permission denied: ${error}` });
|
||||
}
|
||||
}
|
||||
}, [listening]);
|
||||
|
||||
const onStopListening = useCallback(() => {
|
||||
if (recognitionRef.current) {
|
||||
recognitionRef.current.stop();
|
||||
setListening(false);
|
||||
}
|
||||
}, [listening]);
|
||||
|
||||
return {
|
||||
onStartListening,
|
||||
onStopListening,
|
||||
listening,
|
||||
error,
|
||||
transcript,
|
||||
browserSupportsSpeechRecognition: Boolean(browserSupportsSpeechRecognition),
|
||||
hasPermission,
|
||||
};
|
||||
}
|
Loading…
Reference in New Issue