Merge pull request #1220 from buster-so/nate/dictation-so-hot-right-now

Nate/dictation so hot right now
2025-09-30 15:51:53 -06:00 · 2025-09-30 15:51:53 -06:00 · db2744f93d
parent ec1c90138a 8de707db38
commit db2744f93d
4 changed files with 302 additions and 48 deletions
--- a/apps/web/src/components/features/input/BusterChatInput/BusterChatInputButtons.tsx
+++ b/apps/web/src/components/features/input/BusterChatInput/BusterChatInputButtons.tsx
@ -1,5 +1,4 @@
 import React, { useEffect } from 'react';
-import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
 import { Button } from '@/components/ui/buttons';
 import { ArrowUp, Magnifier, Sparkle2 } from '@/components/ui/icons';
 import Atom from '@/components/ui/icons/NucleoIconOutlined/atom';
@ -14,8 +13,8 @@ import { Popover } from '@/components/ui/popover';
 import { AppSegmented, type AppSegmentedProps } from '@/components/ui/segmented';
 import { AppTooltip } from '@/components/ui/tooltip';
 import { Text } from '@/components/ui/typography';
+import { useSpeechRecognition } from '@/hooks/useSpeechRecognition';
 import { cn } from '@/lib/utils';
-import { useMicrophonePermission } from './hooks/useMicrophonePermission';

 export type BusterChatInputMode = 'auto' | 'research' | 'deep-research';

@ -41,22 +40,20 @@ export const BusterChatInputButtons = React.memo(
    onDictate,
    onDictateListeningChange,
  }: BusterChatInputButtons) => {
-    const hasGrantedPermissions = useMicrophonePermission();
-    const { transcript, listening, browserSupportsSpeechRecognition } = useSpeechRecognition();
+    const {
+      transcript,
+      listening,
+      browserSupportsSpeechRecognition,
+      onStartListening,
+      onStopListening,
+      hasPermission,
+    } = useSpeechRecognition();
    const hasValue = useMentionInputHasValue();
    const onChangeValue = useMentionInputSuggestionsOnChangeValue();
    const getValue = useMentionInputSuggestionsGetValue();

    const disableSubmit = !hasValue;

-    const startListening = async () => {
-      SpeechRecognition.startListening({ continuous: true });
-    };
-
-    const stopListening = () => {
-      SpeechRecognition.stopListening();
-    };
-
    useEffect(() => {
      if (listening && transcript) {
        onDictate?.(transcript);
@ -77,7 +74,7 @@ export const BusterChatInputButtons = React.memo(
            <AppTooltip
              title={
                listening
-                  ? !hasGrantedPermissions
+                  ? !hasPermission
                    ? 'Audio permissions not enabled'
                    : 'Stop dictation...'
                  : 'Press to dictate...'
@ -87,17 +84,17 @@ export const BusterChatInputButtons = React.memo(
                rounding={'large'}
                variant={'ghost'}
                prefix={<Microphone />}
-                onClick={listening ? stopListening : startListening}
+                onClick={listening ? onStopListening : onStartListening}
                loading={false}
                disabled={disabled}
                className={cn(
                  'origin-center transform-gpu transition-all duration-300 ease-out will-change-transform text-text-secondary',
                  !disabled && 'hover:scale-110 active:scale-95',
                  listening && 'bg-item-active shadow border text-foreground',
-                  listening && !hasGrantedPermissions && 'bg-red-100! border border-red-300!'
+                  listening && !hasPermission && 'bg-red-100! border border-red-300!'
                )}
                style={
-                  listening && !hasGrantedPermissions
+                  listening && !hasPermission
                    ? ({
                        '--icon-color': 'var(--color-red-400)',
                      } as React.CSSProperties)
--- a/apps/web/src/components/features/input/BusterChatInput/hooks/useMicrophonePermission.ts
+++ b/apps/web/src/components/features/input/BusterChatInput/hooks/useMicrophonePermission.ts
@ -1,32 +0,0 @@
-import { useEffect, useState } from 'react';
-
-export function useMicrophonePermission() {
-  const [hasGrantedPermissions, setHasGrantPermissions] = useState(false);
-
-  useEffect(() => {
-    if (navigator.permissions) {
-      navigator.permissions
-        .query({ name: 'microphone' as PermissionName })
-        .then((result) => {
-          if (result.state === 'granted') {
-            setHasGrantPermissions(true);
-          } else if (result.state === 'denied') {
-            setHasGrantPermissions(false);
-          } else {
-            setHasGrantPermissions(true);
-          }
-
-          // You can also listen for changes
-          result.onchange = () => {
-            const isGranted = result.state === 'granted';
-            setHasGrantPermissions(isGranted);
-          };
-        })
-        .catch((err) => {
-          console.error('Permission API error:', err);
-        });
-    }
-  }, []);
-
-  return hasGrantedPermissions;
-}
--- a/apps/web/src/hooks/useSpeechRecognition.stories.tsx
+++ b/apps/web/src/hooks/useSpeechRecognition.stories.tsx
@ -0,0 +1,109 @@
+import type { Meta, StoryObj } from '@storybook/react-vite';
+import { useSpeechRecognition } from './useSpeechRecognition';
+
+function SpeechRecognitionDemo() {
+  const {
+    onStartListening,
+    onStopListening,
+    listening,
+    transcript,
+    browserSupportsSpeechRecognition,
+    error,
+  } = useSpeechRecognition();
+
+  if (!browserSupportsSpeechRecognition) {
+    return (
+      <div style={{ padding: '2rem', fontFamily: 'sans-serif' }}>
+        <h2>Speech Recognition Not Supported</h2>
+        <p>Your browser does not support speech recognition.</p>
+      </div>
+    );
+  }
+
+  return (
+    <div style={{ padding: '2rem', fontFamily: 'sans-serif' }}>
+      <h2>Speech Recognition Demo</h2>
+      <div style={{ marginBottom: '1rem' }}>
+        <button
+          type="button"
+          onClick={onStartListening}
+          disabled={listening}
+          style={{
+            padding: '0.5rem 1rem',
+            marginRight: '0.5rem',
+            backgroundColor: listening ? '#ccc' : '#007bff',
+            color: 'white',
+            border: 'none',
+            borderRadius: '4px',
+            cursor: listening ? 'not-allowed' : 'pointer',
+          }}
+        >
+          Start Listening
+        </button>
+        <button
+          type="button"
+          onClick={onStopListening}
+          disabled={!listening}
+          style={{
+            padding: '0.5rem 1rem',
+            backgroundColor: !listening ? '#ccc' : '#dc3545',
+            color: 'white',
+            border: 'none',
+            borderRadius: '4px',
+            cursor: !listening ? 'not-allowed' : 'pointer',
+          }}
+        >
+          Stop Listening
+        </button>
+      </div>
+      <div style={{ marginBottom: '1rem' }}>
+        <strong>Status:</strong>{' '}
+        <span style={{ color: listening ? '#28a745' : '#6c757d' }}>
+          {listening ? 'Listening...' : 'Not listening'}
+        </span>
+      </div>
+      {error && (
+        <div
+          style={{
+            marginBottom: '1rem',
+            padding: '0.75rem',
+            backgroundColor: '#f8d7da',
+            color: '#721c24',
+            border: '1px solid #f5c6cb',
+            borderRadius: '4px',
+          }}
+        >
+          <strong>Error:</strong> {error}
+        </div>
+      )}
+      <div>
+        <strong>Transcript:</strong>
+        <div
+          style={{
+            marginTop: '0.5rem',
+            padding: '1rem',
+            border: '1px solid #ddd',
+            borderRadius: '4px',
+            minHeight: '100px',
+            backgroundColor: '#f8f9fa',
+          }}
+        >
+          {transcript || 'No speech detected yet...'}
+        </div>
+      </div>
+    </div>
+  );
+}
+
+const meta = {
+  title: 'Hooks/useSpeechRecognition',
+  component: SpeechRecognitionDemo,
+  parameters: {
+    layout: 'centered',
+  },
+} satisfies Meta<typeof SpeechRecognitionDemo>;
+
+export default meta;
+type Story = StoryObj<typeof meta>;
+
+export const Default: Story = {};
--- a/apps/web/src/hooks/useSpeechRecognition.tsx
+++ b/apps/web/src/hooks/useSpeechRecognition.tsx
@ -0,0 +1,180 @@
+import { useCallback, useEffect, useRef, useState } from 'react';
+import { openErrorNotification } from '@/context/BusterNotifications';
+
+// Type definitions for Web Speech API
+interface SpeechRecognitionErrorEvent extends Event {
+  error: string;
+  message: string;
+}
+
+interface SpeechRecognitionEvent extends Event {
+  resultIndex: number;
+  results: SpeechRecognitionResultList;
+}
+
+interface SpeechRecognition extends EventTarget {
+  continuous: boolean;
+  interimResults: boolean;
+  lang: string;
+  onstart: ((this: SpeechRecognition, ev: Event) => void) | null;
+  onresult: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => void) | null;
+  onerror: ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => void) | null;
+  onend: ((this: SpeechRecognition, ev: Event) => void) | null;
+  start(): void;
+  stop(): void;
+  abort(): void;
+}
+
+interface SpeechRecognitionConstructor {
+  new (): SpeechRecognition;
+}
+
+// Extend Window interface to include webkit speech recognition
+declare global {
+  interface Window {
+    SpeechRecognition: SpeechRecognitionConstructor;
+    webkitSpeechRecognition: SpeechRecognitionConstructor;
+  }
+}
+
+interface UseSpeechRecognitionReturn {
+  onStartListening: () => void;
+  onStopListening: () => void;
+  listening: boolean;
+  transcript: string;
+  browserSupportsSpeechRecognition: boolean;
+  error: string | null;
+  hasPermission: boolean;
+}
+
+export function useSpeechRecognition(): UseSpeechRecognitionReturn {
+  const [listening, setListening] = useState(false);
+  const [transcript, setTranscript] = useState('');
+  const recognitionRef = useRef<SpeechRecognition | null>(null);
+  const [error, setError] = useState<string | null>(null);
+  const [hasPermission, setHasPermission] = useState(false);
+  const finalTranscriptRef = useRef('');
+
+  // Check browser support
+  const browserSupportsSpeechRecognition =
+    typeof window !== 'undefined' && (window.SpeechRecognition || window.webkitSpeechRecognition);
+
+  // Check microphone permission
+  useEffect(() => {
+    if (typeof navigator === 'undefined' || !navigator.permissions) {
+      return;
+    }
+
+    navigator.permissions
+      .query({ name: 'microphone' as PermissionName })
+      .then((result) => {
+        setHasPermission(result.state === 'granted');
+
+        // Listen for permission changes
+        result.onchange = () => {
+          setHasPermission(result.state === 'granted');
+        };
+      })
+      .catch((err) => {
+        console.error('Permission API error:', err);
+      });
+  }, []);
+
+  // Initialize speech recognition
+  useEffect(() => {
+    if (!browserSupportsSpeechRecognition) {
+      return;
+    }
+
+    const SpeechRecognitionAPI = window.SpeechRecognition || window.webkitSpeechRecognition;
+    const recognition = new SpeechRecognitionAPI();
+
+    recognition.continuous = true;
+    recognition.interimResults = true;
+    recognition.lang = 'en-US';
+
+    recognition.onstart = () => {
+      setListening(true);
+    };
+
+    recognition.onresult = (event: SpeechRecognitionEvent) => {
+      let interimTranscript = '';
+      let newFinalTranscript = '';
+
+      for (let i = event.resultIndex; i < event.results.length; i++) {
+        const transcriptPiece = event.results[i][0].transcript;
+        if (event.results[i].isFinal) {
+          newFinalTranscript += transcriptPiece;
+        } else {
+          interimTranscript += transcriptPiece;
+        }
+      }
+
+      // Accumulate final transcripts
+      if (newFinalTranscript) {
+        finalTranscriptRef.current += newFinalTranscript;
+      }
+
+      // Set transcript to accumulated final + current interim
+      setTranscript(finalTranscriptRef.current + interimTranscript);
+    };
+
+    recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
+      console.error('Speech recognition error:', event.error);
+      let message = '';
+      if (event.error.includes('language-not-supported')) {
+        message = 'Browser does not support dictation';
+      } else {
+        message = event.error;
+      }
+
+      openErrorNotification({ message });
+      setError(message);
+
+      onStopListening();
+    };
+
+    recognition.onend = () => {
+      setListening(false);
+    };
+
+    recognitionRef.current = recognition;
+
+    return () => {
+      recognition.stop();
+    };
+  }, [browserSupportsSpeechRecognition]);
+
+  const onStartListening = useCallback(async () => {
+    if (recognitionRef.current && !listening) {
+      try {
+        // Request microphone permission
+        await navigator.mediaDevices.getUserMedia({ audio: true });
+        // Reset transcripts when starting
+        finalTranscriptRef.current = '';
+        setTranscript('');
+        recognitionRef.current.start();
+      } catch (error) {
+        console.error('Microphone error:', error);
+        openErrorNotification({ message: `Microphone permission denied: ${error}` });
+      }
+    }
+  }, [listening]);
+
+  const onStopListening = useCallback(() => {
+    if (recognitionRef.current) {
+      recognitionRef.current.stop();
+      setListening(false);
+    }
+  }, [listening]);
+
+  return {
+    onStartListening,
+    onStopListening,
+    listening,
+    error,
+    transcript,
+    browserSupportsSpeechRecognition: Boolean(browserSupportsSpeechRecognition),
+    hasPermission,
+  };
+}