software-mansion · msluszniak · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026 · Jan 21, 2026
diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt
@@ -104,3 +104,5 @@ POTTEDPLANT
 TVMONITOR
 sublist
 TTFT
+timestamping
+logprob
diff --git a/apps/llm/app/voice_chat/index.tsx b/apps/llm/app/voice_chat/index.tsx
@@ -1,4 +1,4 @@
-import { useContext, useEffect, useRef, useState } from 'react';
+import { useContext, useEffect, useState } from 'react';
 import {
   Keyboard,
   KeyboardAvoidingView,
@@ -35,14 +35,16 @@ export default function VoiceChatScreenWrapper() {
 
 function VoiceChatScreen() {
   const [isRecording, setIsRecording] = useState(false);
+  const [liveTranscription, setLiveTranscription] = useState('');
+
   const [recorder] = useState(
     () =>
       new AudioRecorder({
         sampleRate: 16000,
         bufferLengthInSamples: 1600,
       })
   );
-  const messageRecorded = useRef<boolean>(false);
+
   const { setGlobalGenerating } = useContext(GeneratingContext);
 
   const llm = useLLM({ model: QWEN3_0_6B_QUANTIZED });
@@ -67,16 +69,32 @@ function VoiceChatScreen() {
     if (isRecording) {
       setIsRecording(false);
       recorder.stop();
-      messageRecorded.current = true;
       speechToText.streamStop();
     } else {
       setIsRecording(true);
+      setLiveTranscription('');
+
       recorder.onAudioReady(({ buffer }) => {
         speechToText.streamInsert(buffer.getChannelData(0));
       });
       recorder.start();
-      const transcription = await speechToText.stream();
-      await llm.sendMessage(transcription);
+
+      let finalResult = '';
+
+      try {
+        for await (const result of speechToText.stream()) {
+          const text = result.committed.text + result.nonCommitted.text;
+          setLiveTranscription(text);
+          finalResult = text;
+        }
+      } catch (e) {
+        console.error('Streaming error:', e);
+      } finally {
+        if (finalResult.trim().length > 0) {
+          await llm.sendMessage(finalResult);
+          setLiveTranscription('');
+        }
+      }
     }
   };
 
@@ -96,16 +114,17 @@ function VoiceChatScreen() {
           <SWMIcon width={45} height={45} />
           <Text style={styles.textModelName}>Qwen 3 x Whisper</Text>
         </View>
-        {llm.messageHistory.length || speechToText.committedTranscription ? (
+
+        {llm.messageHistory.length > 0 || liveTranscription.length > 0 ? (
           <View style={styles.chatContainer}>
             <Messages
               chatHistory={
-                speechToText.isGenerating
+                isRecording && liveTranscription.length > 0
                   ? [
                       ...llm.messageHistory,
                       {
                         role: 'user',
-                        content: speechToText.committedTranscription,
+                        content: liveTranscription,
                       },
                     ]
                   : llm.messageHistory
@@ -123,6 +142,7 @@ function VoiceChatScreen() {
             </Text>
           </View>
         )}
+
         <View style={styles.bottomContainer}>
           {DeviceInfo.isEmulatorSync() ? (
             <View style={styles.emulatorBox}>

diff --git a/apps/speech/app.json b/apps/speech/app.json
@@ -17,18 +17,44 @@
       "bundleIdentifier": "com.anonymous.speech",
       "infoPlist": {
         "NSMicrophoneUsageDescription": "This app needs access to your microphone to record audio."
+      },
+      "entitlements": {
+        "com.apple.developer.kernel.increased-memory-limit": true
       }
     },
     "android": {
       "adaptiveIcon": {
         "foregroundImage": "./assets/adaptive-icon.png",
         "backgroundColor": "#ffffff"
       },
-      "package": "com.anonymous.speech"
+      "package": "com.anonymous.speech",
+      "permissions": [
+        "android.permission.RECORD_AUDIO",
+        "android.permission.MODIFY_AUDIO_SETTINGS",
+        "android.permission.FOREGROUND_SERVICE",
+        "android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK"
+      ]
     },
     "web": {
       "favicon": "./assets/favicon.png"
     },
-    "plugins": ["expo-font"]
+    "plugins": [
+      "expo-font",
+      [
+        "react-native-audio-api",
+        {
+          "iosBackgroundMode": true,
+          "iosMicrophonePermission": "This app requires access to the microphone to record audio.",
+          "androidPermissions": [
+            "android.permission.MODIFY_AUDIO_SETTINGS",
+            "android.permission.FOREGROUND_SERVICE",
+            "android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK",
+            "android.permission.RECORD_AUDIO"
+          ],
+          "androidForegroundService": true,
+          "androidFSTypes": ["mediaPlayback", "microphone"]
+        }
+      ]
+    ]
   }
 }
diff --git a/apps/speech/components/VerboseTranscription.tsx b/apps/speech/components/VerboseTranscription.tsx
@@ -0,0 +1,241 @@
+import React from 'react';
+import { View, Text, StyleSheet } from 'react-native';
+import { TranscriptionResult } from 'react-native-executorch';
+
+export const VerboseTranscription = ({
+  data,
+}: {
+  data: TranscriptionResult;
+}) => {
+  if (!data) return null;
+
+  const hasSegments = Array.isArray(data.segments) && data.segments.length > 0;
+
+  const hasLanguage =
+    !!data.language && data.language !== 'N/A' && data.language.trim() !== '';
+
+  const hasDuration = typeof data.duration === 'number' && data.duration > 0;
+
+  const hasMetadata = hasLanguage || hasDuration;
+
+  return (
+    <View style={styles.container}>
+      <View style={styles.metaContainer}>
+        <Text style={styles.label}>Full Text:</Text>
+        <Text style={styles.text}>{data.text || ''}</Text>
+
+        {hasMetadata && (
+          <View style={styles.row}>
+            {hasLanguage && (
+              <Text style={styles.metaItem}>Language: {data.language}</Text>
+            )}
+            {hasDuration && (
+              <Text style={styles.metaItem}>
+                Duration: {data.duration?.toFixed(2)}s
+              </Text>
+            )}
+          </View>
+        )}
+      </View>
+
+      {hasSegments && (
+        <>
+          <Text style={styles.sectionHeader}>
+            Segments ({data.segments?.length})
+          </Text>
+
+          {data.segments?.map((seg, index) => (
+            <View key={index} style={styles.segmentCard}>
+              <View style={styles.segmentHeader}>
+                <Text style={styles.timeBadge}>
+                  {seg.start.toFixed(2)}s - {seg.end.toFixed(2)}s
+                </Text>
+                <Text style={styles.segmentId}>ID: {index}</Text>
+              </View>
+
+              <Text style={styles.segmentText}>"{seg.text}"</Text>
+
+              {seg.words && seg.words.length > 0 && (
+                <View style={styles.wordsContainer}>
+                  <Text style={styles.statLabel}>Word Timestamps:</Text>
+                  <View style={styles.wordsGrid}>
+                    {seg.words.map((w, wIdx) => (
+                      <View key={wIdx} style={styles.wordChip}>
+                        <Text style={styles.wordText}>{w.word.trim()}</Text>
+                        <Text style={styles.wordTime}>
+                          {w.start.toFixed(2)}s
+                        </Text>
+                      </View>
+                    ))}
+                  </View>
+                </View>
+              )}
+
+              <View style={styles.statsGrid}>
+                <View style={styles.statItem}>
+                  <Text style={styles.statLabel}>Avg LogProb</Text>
+                  <Text style={styles.statValue}>
+                    {data.task === 'transcribe'
+                      ? seg.avgLogprob?.toFixed(4)
+                      : 'N/A'}
+                  </Text>
+                </View>
+                <View style={styles.statItem}>
+                  <Text style={styles.statLabel}>Temp</Text>
+                  <Text style={styles.statValue}>
+                    {data.task === 'transcribe'
+                      ? seg.temperature?.toFixed(2)
+                      : 'N/A'}
+                  </Text>
+                </View>
+                <View style={styles.statItem}>
+                  {/*eslint-disable-next-line @cspell/spellchecker*/}
+                  <Text style={styles.statLabel}>Compr.</Text>
+                  <Text style={styles.statValue}>
+                    {data.task === 'transcribe'
+                      ? seg.compressionRatio?.toFixed(2)
+                      : 'N/A'}
+                  </Text>
+                </View>
+              </View>
+            </View>
+          ))}
+        </>
+      )}
+    </View>
+  );
+};
+
+const styles = StyleSheet.create({
+  container: {
+    padding: 4,
+  },
+  metaContainer: {
+    marginBottom: 16,
+    padding: 12,
+    backgroundColor: '#f0f2f5',
+    borderRadius: 8,
+  },
+  label: {
+    fontWeight: 'bold',
+    color: '#0f186e',
+    marginBottom: 4,
+  },
+  text: {
+    fontSize: 16,
+    color: '#333',
+    marginBottom: 8,
+  },
+  row: {
+    flexDirection: 'row',
+    gap: 10,
+    marginTop: 8,
+  },
+  metaItem: {
+    fontSize: 12,
+    color: '#666',
+    backgroundColor: '#e1e4e8',
+    paddingHorizontal: 8,
+    paddingVertical: 2,
+    borderRadius: 4,
+    overflow: 'hidden',
+  },
+  sectionHeader: {
+    fontSize: 18,
+    fontWeight: 'bold',
+    color: '#0f186e',
+    marginBottom: 8,
+    marginTop: 8,
+  },
+  segmentCard: {
+    backgroundColor: '#fff',
+    borderRadius: 8,
+    borderWidth: 1,
+    borderColor: '#e1e4e8',
+    marginBottom: 12,
+    padding: 12,
+    shadowColor: '#000',
+    shadowOffset: { width: 0, height: 1 },
+    shadowOpacity: 0.1,
+    shadowRadius: 2,
+    elevation: 2,
+  },
+  segmentHeader: {
+    flexDirection: 'row',
+    justifyContent: 'space-between',
+    marginBottom: 8,
+  },
+  timeBadge: {
+    fontSize: 12,
+    fontWeight: 'bold',
+    color: '#fff',
+    backgroundColor: '#0f186e',
+    paddingHorizontal: 8,
+    paddingVertical: 2,
+    borderRadius: 12,
+    overflow: 'hidden',
+  },
+  segmentId: {
+    fontSize: 12,
+    color: '#888',
+  },
+  segmentText: {
+    fontSize: 15,
+    fontStyle: 'italic',
+    color: '#333',
+    marginBottom: 12,
+  },
+  statsGrid: {
+    flexDirection: 'row',
+    flexWrap: 'wrap',
+    gap: 8,
+    borderTopWidth: 1,
+    borderTopColor: '#f0f0f0',
+    paddingTop: 8,
+  },
+  statItem: {
+    flex: 1,
+    minWidth: '45%',
+    flexDirection: 'row',
+    justifyContent: 'space-between',
+  },
+  statLabel: {
+    fontSize: 11,
+    color: '#888',
+  },
+  statValue: {
+    fontSize: 11,
+    fontWeight: '600',
+    color: '#444',
+  },
+  wordsContainer: {
+    marginVertical: 8,
+    backgroundColor: '#f8f9fa',
+    padding: 8,
+    borderRadius: 6,
+  },
+  wordsGrid: {
+    flexDirection: 'row',
+    flexWrap: 'wrap',
+    gap: 6,
+    marginTop: 4,
+  },
+  wordChip: {
+    backgroundColor: '#ffffff',
+    borderWidth: 1,
+    borderColor: '#e1e4e8',
+    borderRadius: 4,
+    paddingHorizontal: 6,
+    paddingVertical: 2,
+    alignItems: 'center',
+  },
+  wordText: {
+    fontSize: 12,
+    color: '#333',
+  },
+  wordTime: {
+    fontSize: 9,
+    color: '#888',
+    marginTop: 1,
+  },
+});
diff --git a/apps/speech/package.json b/apps/speech/package.json
@@ -19,7 +19,7 @@
     "metro-config": "^0.81.0",
     "react": "19.1.0",
     "react-native": "0.81.5",
-    "react-native-audio-api": "0.6.5",
+    "react-native-audio-api": "0.11.3",
     "react-native-device-info": "^14.0.4",
     "react-native-executorch": "workspace:*",
     "react-native-reanimated": "~4.1.1",
-Original file line number
+Diff line change
@@ Expand Up / @@ -104,3 +104,5 @@ POTTEDPLANT @@
     TVMONITOR
     sublist
     TTFT
+    timestamping
+    logprob