Skip to content

Commit afb2b0c

Browse files
authored
Merge pull request #4 from HumeAI/twitchard/improve-streaming
Improve streaming
2 parents c6622fc + 74e4ded commit afb2b0c

10 files changed

+605
-283
lines changed

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ $ hume tts <text>
6060
--speed #0 Speaking speed multiplier (0.25-3.0, default is 1.0)
6161
--trailing-silence #0 Seconds of silence to add at the end (0.0-5.0, default is 0.35)
6262
--streaming Use streaming mode for TTS generation (default: true)
63+
--instant-mode Enable ultra-low latency mode for significantly faster generation (requires streaming=true, a voice, and incurs 10% higher cost)
6364
6465
━━━ Details ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
6566
@@ -103,6 +104,12 @@ $ hume tts "I am speaking very slowly" -v narrator --speed 0.75
103104
Adding trailing silence
104105
$ hume tts "Wait for it..." -v narrator --trailing-silence 3.5
105106
107+
Using instant mode for ultra-low latency
108+
$ hume tts "Hello world" -v narrator --instant-mode
109+
110+
Setting instant mode in your config (always enable)
111+
$ hume config set tts.instantMode true
112+
106113
## Voice Management
107114
108115
Save a voice from a previous generation

bun.lock

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"bun": "^1.2.2",
99
"clipanion": "^4.0.0-rc.4",
1010
"debug": "^4.4.0",
11-
"hume": "^0.10.0",
11+
"hume": "^0.10.3",
1212
"open": "^10.1.0",
1313
"typanion": "^3.14.0",
1414
},
@@ -125,7 +125,7 @@
125125

126126
"hasown": ["hasown@2.0.2", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ=="],
127127

128-
"hume": ["hume@0.10.0", "", { "dependencies": { "form-data": "^4.0.0", "form-data-encoder": "^4.0.2", "formdata-node": "^6.0.3", "node-fetch": "^2.7.0", "qs": "^6.13.1", "readable-stream": "^4.5.2", "url-join": "4.0.1", "uuid": "9.0.1", "ws": "^8.14.2", "zod": "^3.23.8" } }, "sha512-rjarPoQylEIJ1lqWginPgyj0yCxBhZevyFdAarkQkgedmIBdLwW/jyYMHvFzzipALzt1NL5P0D3qmqXgPPgEmg=="],
128+
"hume": ["hume@0.10.3", "", { "dependencies": { "form-data": "^4.0.0", "form-data-encoder": "^4.0.2", "formdata-node": "^6.0.3", "node-fetch": "^2.7.0", "qs": "^6.13.1", "readable-stream": "^4.5.2", "url-join": "4.0.1", "uuid": "9.0.1", "ws": "^8.14.2", "zod": "^3.23.8" } }, "sha512-CmTv98BXD5ZttNPyUbGarelB7yLoYQkCUJAy9IWSnn2WFoT2AeW6A9MYJ0ZtjiCwN/yVoKrTwyXvnlAva3BfZA=="],
129129

130130
"ieee754": ["ieee754@1.2.1", "", {}, "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA=="],
131131

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
"bun": "^1.2.2",
2727
"clipanion": "^4.0.0-rc.4",
2828
"debug": "^4.4.0",
29-
"hume": "^0.10.0",
29+
"hume": "^0.10.3",
3030
"open": "^10.1.0",
3131
"typanion": "^3.14.0"
3232
},

src/common.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,6 @@ export const getSettings = async (
121121
};
122122

123123
export const getHumeClient = (opts: { apiKey: string; baseUrl?: string }) => {
124-
const environment = opts.baseUrl || 'https://test-api.hume.ai';
125-
debug('Creating HumeClient with environment: %s', environment);
126124
return new HumeClient({
127125
apiKey: opts.apiKey,
128126
environment: opts.baseUrl ?? 'https://api.hume.ai',

src/config.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ export type ConfigData = {
3333
speed?: number;
3434
trailingSilence?: number;
3535
streaming?: boolean;
36+
instantMode?: boolean;
3637
};
3738
json?: boolean;
3839
pretty?: boolean;
@@ -54,6 +55,7 @@ export const configValidators = {
5455
'tts.speed': t.cascade(t.isNumber(), t.isInInclusiveRange(0.25, 3.0)),
5556
'tts.trailingSilence': t.cascade(t.isNumber(), t.isInInclusiveRange(0.0, 5.0)),
5657
'tts.streaming': t.isBoolean(),
58+
'tts.instantMode': t.isBoolean(),
5759
json: t.isBoolean(),
5860
pretty: t.isBoolean(),
5961
apiKey: t.isString(),

src/e2e.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,9 @@ describe('CLI End-to-End Tests', () => {
523523
generation_id: generationId,
524524
audio: Buffer.from(`audio-data-${generationId}-${id}`).toString('base64'),
525525
utterance_index: partial.utteranceIndex ?? 0,
526+
// Add the missing properties required by the RawSnippetAudioChunk type
527+
snippet_id: id,
528+
text: `Sample text for ${id}`,
526529
};
527530
};
528531

src/index.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ const usageDescriptions = {
2424
'tts.speed': 'Speaking speed multiplier (0.25-3.0, default is 1.0)',
2525
'tts.trailingSilence': 'Seconds of silence to add at the end (0.0-5.0, default is 0.35)',
2626
'tts.streaming': 'Use streaming mode for TTS generation (default: true)',
27+
'tts.instantMode':
28+
'Enable ultra-low latency mode for significantly faster generation (requires streaming=true, a voice, and incurs 10% higher cost)',
2729
apiKey: 'Override the default API key',
2830
json: 'Output in JSON format',
2931
pretty: 'Output in human-readable format',
@@ -348,6 +350,8 @@ const ttsExamples: Usage['examples'] = [
348350
],
349351
['Adjusting speech speed', '$0 tts "I am speaking very slowly" -v narrator --speed 0.75'],
350352
['Adding trailing silence', '$0 tts "Wait for it..." -v narrator --trailing-silence 3.5'],
353+
['Using instant mode for ultra-low latency', '$0 tts "Hello world" -v narrator --instant-mode'],
354+
['Setting instant mode in your config', 'hume config set tts.instantMode true'],
351355
];
352356
class TtsCommand extends Command {
353357
static paths = [['tts']];
@@ -446,6 +450,10 @@ class TtsCommand extends Command {
446450
description: usageDescriptions['tts.streaming'],
447451
});
448452

453+
instantMode = Option.Boolean('--instant-mode', {
454+
description: usageDescriptions['tts.instantMode'],
455+
});
456+
449457
async execute() {
450458
const tts = new Tts();
451459
await tts.synthesize(this);

src/play_audio.ts

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import { debug } from './common';
2+
3+
type Command = {
4+
cmd: string;
5+
argsWithPath: (path: string) => string[];
6+
argsWithStdin: string[] | null;
7+
};
8+
9+
let defaultAudioPlayer: Command | null | undefined = undefined;
10+
const findDefaultAudioPlayer = (): Command | null => {
11+
if (defaultAudioPlayer === undefined) {
12+
defaultAudioPlayer = findDefaultAudioPlayer_();
13+
}
14+
return defaultAudioPlayer;
15+
};
16+
const findDefaultAudioPlayer_ = (): Command | null => {
17+
const isWindows = process.platform === 'win32';
18+
19+
const atEnd =
20+
(...arr: string[]) =>
21+
(path: string) => [...arr, path];
22+
23+
// Ordered by preference
24+
const commonPlayers: Command[] = isWindows
25+
? [
26+
{
27+
cmd: 'powershell',
28+
argsWithPath: (path) => ['-c', `"(New-Object Media.SoundPlayer '${path}').PlaySync()"`],
29+
argsWithStdin: null,
30+
},
31+
{
32+
cmd: 'ffplay',
33+
argsWithPath: atEnd('-nodisp', '-autoexit'),
34+
argsWithStdin: ['-nodisp', '-autoexit', '-i', '-'],
35+
},
36+
{ cmd: 'mpv', argsWithPath: atEnd('--no-video'), argsWithStdin: ['--no-video', '-'] },
37+
{ cmd: 'mplayer', argsWithPath: atEnd(''), argsWithStdin: ['-'] },
38+
]
39+
: [
40+
{
41+
cmd: 'ffplay',
42+
argsWithPath: atEnd('-nodisp', '-autoexit'),
43+
argsWithStdin: ['-nodisp', '-autoexit', '-i', '-'],
44+
},
45+
{ cmd: 'afplay', argsWithPath: atEnd(''), argsWithStdin: null },
46+
{ cmd: 'mplayer', argsWithPath: atEnd(''), argsWithStdin: ['-'] },
47+
{ cmd: 'mpv', argsWithPath: atEnd('--no-video'), argsWithStdin: ['--no-video', '-'] },
48+
{ cmd: 'aplay', argsWithPath: atEnd(''), argsWithStdin: ['-'] },
49+
{ cmd: 'play', argsWithPath: atEnd(''), argsWithStdin: ['-'] },
50+
];
51+
52+
for (const player of commonPlayers) {
53+
const checkCmd = isWindows ? 'where' : 'which';
54+
try {
55+
Bun.spawnSync([checkCmd, player.cmd]);
56+
return player; // found!
57+
} catch {}
58+
}
59+
60+
return null;
61+
};
62+
63+
export const playAudioFile = async (
64+
path: string,
65+
customCommand: string | null
66+
): Promise<unknown> => {
67+
const command = ensureAudioPlayer(
68+
customCommand ? parseCustomCommand(customCommand) : findDefaultAudioPlayer()
69+
);
70+
const isWindows = process.platform === 'win32';
71+
const sanitizedPath = isWindows ? path.replace(/\\/g, '\\\\') : path;
72+
73+
return Bun.spawn([command.cmd, ...command.argsWithPath(sanitizedPath)], {
74+
stdout: 'ignore',
75+
stderr: 'ignore',
76+
}).exited;
77+
};
78+
79+
export const parseCustomCommand = (command: string): Command => {
80+
const [cmd, ...args] = command.split(' ');
81+
const argsWithPath = (path: string) => args.map((arg) => arg.replace('$AUDIO_FILE', path));
82+
const argsWithStdin = args.some((arg) => arg.includes('$AUDIO_FILE')) ? argsWithPath('-') : args;
83+
84+
return {
85+
cmd,
86+
argsWithPath,
87+
argsWithStdin,
88+
};
89+
};
90+
91+
const ensureAudioPlayer = (command: Command | null): Command => {
92+
if (!command) {
93+
throw new Error(
94+
'No audio player found. Please install ffplay or specify a custom player with --play-command'
95+
);
96+
}
97+
return command;
98+
};
99+
100+
const ensureStdinSupport = (command: Command): Command & { argsWithStdin: string[] } => {
101+
const { argsWithStdin } = command;
102+
if (!argsWithStdin) {
103+
throw new Error(
104+
`The audio player does not support playing from stdin. Please specify a custom player with --play-command`
105+
);
106+
}
107+
return { ...command, argsWithStdin };
108+
};
109+
110+
export const withStdinAudioPlayer = async (
111+
customCommand: string | null,
112+
f: (writeAudio: (audioBuffer: Buffer) => void) => Promise<void>
113+
): Promise<void> => {
114+
const command = ensureStdinSupport(
115+
ensureAudioPlayer(customCommand ? parseCustomCommand(customCommand) : findDefaultAudioPlayer())
116+
);
117+
118+
debug([command.cmd, command.argsWithStdin]);
119+
const proc = Bun.spawn([command.cmd, ...command.argsWithStdin], {
120+
stdout: 'ignore',
121+
stderr: 'ignore',
122+
stdin: 'pipe',
123+
});
124+
125+
await f((audioBuffer: Buffer) => {
126+
proc.stdin.write(audioBuffer);
127+
});
128+
proc.stdin.end();
129+
await proc.exited;
130+
};

0 commit comments

Comments
 (0)