Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
89029cb
Draft of changes introducing timestamping
msluszniak Jan 20, 2026
995d81f
Add missing headers
msluszniak Jan 20, 2026
27910a4
Add draft of working version for timestamps only
msluszniak Jan 20, 2026
0dcff40
Working version of both timestamping and regular version
msluszniak Jan 21, 2026
2d119d2
Clear files
msluszniak Jan 21, 2026
30b76cf
Apply suggestions from code review
msluszniak Jan 21, 2026
084cf1e
Apply further clearing
msluszniak Jan 21, 2026
24c4606
Apply suggestion from @msluszniak
msluszniak Jan 21, 2026
8b019fe
Apply autofix lint changes
msluszniak Jan 21, 2026
5eab00d
Fix linter issues
msluszniak Jan 21, 2026
db68c22
Revert changing error messages
msluszniak Jan 21, 2026
2a69753
Revert one more message
msluszniak Jan 21, 2026
11e01e8
Update docs
msluszniak Jan 21, 2026
3a82333
Fix error in demo app
msluszniak Jan 21, 2026
861c085
chore: Add non-completed draft for transcription with timestamping ut…
msluszniak Feb 4, 2026
2ce351d
fix: Add corrections (still not working version)
msluszniak Feb 5, 2026
c4c8fdc
feat: Add working word level timestamping that mimics OpenAI API
msluszniak Feb 5, 2026
78c2b8e
chore: Suppress pre-commit hook warnings
msluszniak Feb 6, 2026
a774155
chore: Make demo app more intuitive
msluszniak Feb 6, 2026
e7dab90
chore: Check if all demo apps using Speech To Text works
msluszniak Feb 6, 2026
e8af8b5
chore: Clean files
msluszniak Feb 6, 2026
3e169d2
chore: Further cleaning
msluszniak Feb 6, 2026
843b819
chore: Further cleaning
msluszniak Feb 6, 2026
cdc4f7b
chore: Cleaning once again
msluszniak Feb 6, 2026
dc9a561
chore: post-rebase cleaning
msluszniak Feb 6, 2026
1d0bff9
docs: Add generated documentation for changed code
msluszniak Feb 6, 2026
99c59b6
chore: Add update types for stt
msluszniak Feb 6, 2026
1835722
chore: fix small details
msluszniak Feb 6, 2026
12b18fe
Apply suggestion from @msluszniak
msluszniak Feb 6, 2026
190c9c2
Apply suggestion from @msluszniak
msluszniak Feb 6, 2026
560c485
Apply suggestion from @msluszniak
msluszniak Feb 6, 2026
8e49949
Apply suggestion from @msluszniak
msluszniak Feb 6, 2026
553efb8
Update documentation for the current implementation
msluszniak Feb 6, 2026
4840e99
test: Adapt test to comply with the current implementation
msluszniak Feb 9, 2026
88f5fec
chore fix example app and make sure that elements exists in c++
msluszniak Feb 9, 2026
1fb5ac3
fix: update demo app to align with newer version of react-native-audi…
msluszniak Feb 10, 2026
9503be8
chore: fix CI
msluszniak Feb 11, 2026
2363c34
chore: bring back settings
msluszniak Feb 11, 2026
83bc085
chore: name tasks in transcription the same as in OpenAI API
msluszniak Feb 11, 2026
dd89b5c
chore: add suggestions from code review
msluszniak Feb 11, 2026
8478203
Apply suggestions from code review
msluszniak Feb 11, 2026
2c41fc0
docs: update types in transcription results to make them more explicit
msluszniak Feb 11, 2026
52128fc
chore: update naming convention
msluszniak Feb 11, 2026
b204c9e
Apply suggestions from code review
msluszniak Feb 11, 2026
933e830
docs: Update api reference after naming changes
msluszniak Feb 11, 2026
4db9100
Add noSpeechProb
msluszniak Feb 11, 2026
4b38855
Revert "Add noSpeechProb"
msluszniak Feb 11, 2026
2ec725e
chore: Remove noSpeechProb as it is no supported
msluszniak Feb 11, 2026
9680579
chore: Apply suggestions from code review
msluszniak Feb 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .cspell-wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,5 @@ POTTEDPLANT
TVMONITOR
sublist
TTFT
timestamping
logprob
36 changes: 28 additions & 8 deletions apps/llm/app/voice_chat/index.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { useContext, useEffect, useRef, useState } from 'react';
import { useContext, useEffect, useState } from 'react';
import {
Keyboard,
KeyboardAvoidingView,
Expand Down Expand Up @@ -35,14 +35,16 @@ export default function VoiceChatScreenWrapper() {

function VoiceChatScreen() {
const [isRecording, setIsRecording] = useState(false);
const [liveTranscription, setLiveTranscription] = useState('');

const [recorder] = useState(
() =>
new AudioRecorder({
sampleRate: 16000,
bufferLengthInSamples: 1600,
})
);
const messageRecorded = useRef<boolean>(false);

const { setGlobalGenerating } = useContext(GeneratingContext);

const llm = useLLM({ model: QWEN3_0_6B_QUANTIZED });
Expand All @@ -67,16 +69,32 @@ function VoiceChatScreen() {
if (isRecording) {
setIsRecording(false);
recorder.stop();
messageRecorded.current = true;
speechToText.streamStop();
} else {
setIsRecording(true);
setLiveTranscription('');

recorder.onAudioReady(({ buffer }) => {
speechToText.streamInsert(buffer.getChannelData(0));
});
recorder.start();
const transcription = await speechToText.stream();
await llm.sendMessage(transcription);

let finalResult = '';

try {
for await (const result of speechToText.stream()) {
const text = result.committed.text + result.nonCommitted.text;
setLiveTranscription(text);
finalResult = text;
}
} catch (e) {
console.error('Streaming error:', e);
} finally {
if (finalResult.trim().length > 0) {
await llm.sendMessage(finalResult);
setLiveTranscription('');
}
}
}
};

Expand All @@ -96,16 +114,17 @@ function VoiceChatScreen() {
<SWMIcon width={45} height={45} />
<Text style={styles.textModelName}>Qwen 3 x Whisper</Text>
</View>
{llm.messageHistory.length || speechToText.committedTranscription ? (

{llm.messageHistory.length > 0 || liveTranscription.length > 0 ? (
<View style={styles.chatContainer}>
<Messages
chatHistory={
speechToText.isGenerating
isRecording && liveTranscription.length > 0
? [
...llm.messageHistory,
{
role: 'user',
content: speechToText.committedTranscription,
content: liveTranscription,
},
]
: llm.messageHistory
Expand All @@ -123,6 +142,7 @@ function VoiceChatScreen() {
</Text>
</View>
)}

<View style={styles.bottomContainer}>
{DeviceInfo.isEmulatorSync() ? (
<View style={styles.emulatorBox}>
Expand Down
30 changes: 28 additions & 2 deletions apps/speech/app.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,44 @@
"bundleIdentifier": "com.anonymous.speech",
"infoPlist": {
"NSMicrophoneUsageDescription": "This app needs access to your microphone to record audio."
},
"entitlements": {
"com.apple.developer.kernel.increased-memory-limit": true
}
},
"android": {
"adaptiveIcon": {
"foregroundImage": "./assets/adaptive-icon.png",
"backgroundColor": "#ffffff"
},
"package": "com.anonymous.speech"
"package": "com.anonymous.speech",
"permissions": [
"android.permission.RECORD_AUDIO",
"android.permission.MODIFY_AUDIO_SETTINGS",
"android.permission.FOREGROUND_SERVICE",
"android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK"
]
},
"web": {
"favicon": "./assets/favicon.png"
},
"plugins": ["expo-font"]
"plugins": [
"expo-font",
[
"react-native-audio-api",
{
"iosBackgroundMode": true,
"iosMicrophonePermission": "This app requires access to the microphone to record audio.",
"androidPermissions": [
"android.permission.MODIFY_AUDIO_SETTINGS",
"android.permission.FOREGROUND_SERVICE",
"android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK",
"android.permission.RECORD_AUDIO"
],
"androidForegroundService": true,
"androidFSTypes": ["mediaPlayback", "microphone"]
}
]
]
}
}
241 changes: 241 additions & 0 deletions apps/speech/components/VerboseTranscription.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
import React from 'react';
import { View, Text, StyleSheet } from 'react-native';
import { TranscriptionResult } from 'react-native-executorch';

export const VerboseTranscription = ({
data,
}: {
data: TranscriptionResult;
}) => {
if (!data) return null;

const hasSegments = Array.isArray(data.segments) && data.segments.length > 0;

const hasLanguage =
!!data.language && data.language !== 'N/A' && data.language.trim() !== '';

const hasDuration = typeof data.duration === 'number' && data.duration > 0;

const hasMetadata = hasLanguage || hasDuration;

return (
<View style={styles.container}>
<View style={styles.metaContainer}>
<Text style={styles.label}>Full Text:</Text>
<Text style={styles.text}>{data.text || ''}</Text>

{hasMetadata && (
<View style={styles.row}>
{hasLanguage && (
<Text style={styles.metaItem}>Language: {data.language}</Text>
)}
{hasDuration && (
<Text style={styles.metaItem}>
Duration: {data.duration?.toFixed(2)}s
</Text>
)}
</View>
)}
</View>

{hasSegments && (
<>
<Text style={styles.sectionHeader}>
Segments ({data.segments?.length})
</Text>

{data.segments?.map((seg, index) => (
<View key={index} style={styles.segmentCard}>
<View style={styles.segmentHeader}>
<Text style={styles.timeBadge}>
{seg.start.toFixed(2)}s - {seg.end.toFixed(2)}s
</Text>
<Text style={styles.segmentId}>ID: {index}</Text>
</View>

<Text style={styles.segmentText}>"{seg.text}"</Text>

{seg.words && seg.words.length > 0 && (
<View style={styles.wordsContainer}>
<Text style={styles.statLabel}>Word Timestamps:</Text>
<View style={styles.wordsGrid}>
{seg.words.map((w, wIdx) => (
<View key={wIdx} style={styles.wordChip}>
<Text style={styles.wordText}>{w.word.trim()}</Text>
<Text style={styles.wordTime}>
{w.start.toFixed(2)}s
</Text>
</View>
))}
</View>
</View>
)}

<View style={styles.statsGrid}>
<View style={styles.statItem}>
<Text style={styles.statLabel}>Avg LogProb</Text>
<Text style={styles.statValue}>
{data.task === 'transcribe'
? seg.avgLogprob?.toFixed(4)
: 'N/A'}
</Text>
</View>
<View style={styles.statItem}>
<Text style={styles.statLabel}>Temp</Text>
<Text style={styles.statValue}>
{data.task === 'transcribe'
? seg.temperature?.toFixed(2)
: 'N/A'}
</Text>
</View>
<View style={styles.statItem}>
{/*eslint-disable-next-line @cspell/spellchecker*/}
<Text style={styles.statLabel}>Compr.</Text>
<Text style={styles.statValue}>
{data.task === 'transcribe'
? seg.compressionRatio?.toFixed(2)
: 'N/A'}
</Text>
</View>
</View>
</View>
))}
</>
)}
</View>
);
};

const styles = StyleSheet.create({
container: {
padding: 4,
},
metaContainer: {
marginBottom: 16,
padding: 12,
backgroundColor: '#f0f2f5',
borderRadius: 8,
},
label: {
fontWeight: 'bold',
color: '#0f186e',
marginBottom: 4,
},
text: {
fontSize: 16,
color: '#333',
marginBottom: 8,
},
row: {
flexDirection: 'row',
gap: 10,
marginTop: 8,
},
metaItem: {
fontSize: 12,
color: '#666',
backgroundColor: '#e1e4e8',
paddingHorizontal: 8,
paddingVertical: 2,
borderRadius: 4,
overflow: 'hidden',
},
sectionHeader: {
fontSize: 18,
fontWeight: 'bold',
color: '#0f186e',
marginBottom: 8,
marginTop: 8,
},
segmentCard: {
backgroundColor: '#fff',
borderRadius: 8,
borderWidth: 1,
borderColor: '#e1e4e8',
marginBottom: 12,
padding: 12,
shadowColor: '#000',
shadowOffset: { width: 0, height: 1 },
shadowOpacity: 0.1,
shadowRadius: 2,
elevation: 2,
},
segmentHeader: {
flexDirection: 'row',
justifyContent: 'space-between',
marginBottom: 8,
},
timeBadge: {
fontSize: 12,
fontWeight: 'bold',
color: '#fff',
backgroundColor: '#0f186e',
paddingHorizontal: 8,
paddingVertical: 2,
borderRadius: 12,
overflow: 'hidden',
},
segmentId: {
fontSize: 12,
color: '#888',
},
segmentText: {
fontSize: 15,
fontStyle: 'italic',
color: '#333',
marginBottom: 12,
},
statsGrid: {
flexDirection: 'row',
flexWrap: 'wrap',
gap: 8,
borderTopWidth: 1,
borderTopColor: '#f0f0f0',
paddingTop: 8,
},
statItem: {
flex: 1,
minWidth: '45%',
flexDirection: 'row',
justifyContent: 'space-between',
},
statLabel: {
fontSize: 11,
color: '#888',
},
statValue: {
fontSize: 11,
fontWeight: '600',
color: '#444',
},
wordsContainer: {
marginVertical: 8,
backgroundColor: '#f8f9fa',
padding: 8,
borderRadius: 6,
},
wordsGrid: {
flexDirection: 'row',
flexWrap: 'wrap',
gap: 6,
marginTop: 4,
},
wordChip: {
backgroundColor: '#ffffff',
borderWidth: 1,
borderColor: '#e1e4e8',
borderRadius: 4,
paddingHorizontal: 6,
paddingVertical: 2,
alignItems: 'center',
},
wordText: {
fontSize: 12,
color: '#333',
},
wordTime: {
fontSize: 9,
color: '#888',
marginTop: 1,
},
});
2 changes: 1 addition & 1 deletion apps/speech/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"metro-config": "^0.81.0",
"react": "19.1.0",
"react-native": "0.81.5",
"react-native-audio-api": "0.6.5",
"react-native-audio-api": "0.11.3",
"react-native-device-info": "^14.0.4",
"react-native-executorch": "workspace:*",
"react-native-reanimated": "~4.1.1",
Expand Down
Loading
Loading