Skip to content
This repository was archived by the owner on May 6, 2022. It is now read-only.

Commit c233013

Browse files
committed
feat(asr): make spokestackASRService and SpeechPipeline available
1 parent 8380465 commit c233013

File tree

11 files changed

+423
-65
lines changed

11 files changed

+423
-65
lines changed

README.md

Lines changed: 302 additions & 29 deletions
Large diffs are not rendered by default.

examples/with-next/pages/index.tsx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,10 @@ export default class Index extends PureComponent {
322322
streaming: false
323323
})
324324
})
325-
ws.addEventListener('message', (e) => this.updateTerm(e.data))
325+
ws.addEventListener('message', (e) => {
326+
console.log(e)
327+
this.updateTerm(e.data)
328+
})
326329
} catch (e) {
327330
console.error(e)
328331
this.setState({

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"scripts": {
88
"build": "npm run clean && rollup --config && npm run minify",
99
"clean": "rm -rf dist/",
10-
"docs": "typedoc --plugin typedoc-plugin-markdown --hideBreadcrumbs --out docs --readme none src/index.ts src/client.ts && node tasks/docs.js",
10+
"docs": "typedoc --excludePrivate --plugin typedoc-plugin-markdown --hideBreadcrumbs --out docs --readme none src/index.ts src/client.ts && node tasks/docs.js",
1111
"format": "eslint . --fix && prettier --write \"**/*.ts\" \"**/*.js\" \"**/*.md\" \"**/*.json\"",
1212
"lint": "concurrently --raw \"eslint .\" \"npm run prettier\" \"npm run typescript\"",
1313
"minify": "concurrently --raw \"npm run minify:tf\" \"npm run minify:worker\"",

src/client/SpeechPipeline.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ const defaultSpeechConfig = {
1212
hopLength: 10
1313
}
1414

15-
interface SpeechPipelineConfig {
15+
export interface SpeechPipelineConfig {
1616
speechConfig: SpeechConfig
1717
stages: Stage[]
1818
workerUrl?: string

src/client/pipeline.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { SpeechConfig, Stage } from './types'
2-
import SpeechPipeline, { PipelineEventHandler } from './SpeechPipeline'
2+
import SpeechPipeline, { PipelineEventHandler, SpeechPipelineConfig } from './SpeechPipeline'
33

44
/**
55
* Preset profiles for use with startPipeline that include both
@@ -202,3 +202,5 @@ export function stopPipeline() {
202202
pipeline = undefined
203203
}
204204
}
205+
206+
export { SpeechPipeline, SpeechPipelineConfig, PipelineEventHandler }

src/index.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
1-
/**
2-
* The main export is the server code
3-
*/
41
export { default as encryptSecret } from './server/encryptSecret'
52
export { default as spokestackMiddleware } from './server/expressMiddleware'
63
export * from './server/socketServer'
74
export * from './server/asr'
5+
export {
6+
default as spokestackASRService,
7+
SpokestackASRConfig,
8+
ASRHypothesis,
9+
ASRFormat,
10+
SpokestackResponse
11+
} from './server/spokestackASRService'

src/server/asr.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,15 @@ export function asr(content: string | Uint8Array, sampleRate: number): Promise<s
5050
.filter(Boolean)
5151
.join('\n')
5252
)
53+
} else if (response.status === 'error') {
54+
reject(new Error(response.error))
5355
}
5456
})
5557
.then((spokestackSocket) => {
5658
spokestackSocket.on('error', reject)
5759
spokestackSocket.send(content)
58-
// Send an empty buffer to signal that the transaction is done
59-
spokestackSocket.send(Buffer.from(''))
60+
// Send an empty string to signal that the transaction is done
61+
spokestackSocket.send('')
6062
})
6163
.catch(reject)
6264
})

src/server/spokestackASRService.ts

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,34 +11,66 @@ export interface SpokestackASRConfig {
1111
limit?: number
1212
sampleRate: number
1313
/**
14-
* This timeout is for resetting the speech recognition
15-
* and clearing the transcript.
14+
* Reset speech recognition and clear the transcript every `timeout`
15+
* milliseconds.
1616
* When no new data comes in for the given timeout,
1717
* the auth message is sent again to begin a new ASR transcation.
1818
* Set to 0 to disable.
1919
* Default: 3000
2020
*/
2121
timeout?: number
22+
/**
23+
* Set a different location for the Spokestack socket URL.
24+
* This is very rarely needed.
25+
* Default: 'wss:api.spokestack.io/v1/asr/websocket'
26+
*/
27+
spokestackUrl?: string
2228
}
2329

24-
interface SpokestackMessage {
30+
interface SpokestackAuthMessage {
2531
keyId: string
2632
signature: string
2733
body: string
2834
}
2935

30-
interface ASRHypothesis {
31-
confident: number
36+
export interface ASRHypothesis {
37+
/**
38+
* A number between 0 and 1 to indicate the
39+
* tensorflow confidence level for the given transcript.
40+
*/
41+
confidence: number
3242
transcript: string
3343
}
3444

35-
interface SpokestackResponse {
45+
export interface SpokestackResponse {
3646
status: 'ok' | 'error'
47+
/** When the status is "error", the error message is available here. */
3748
error?: string
49+
/**
50+
* The `final` key is used to indicate that
51+
* the highest confidence transcript for the utterance is sent.
52+
* However, this will only be set to true after
53+
* signaling to Spokestack ASR that no more audio data is incoming.
54+
* Signal this by sending an empty string (e.g. `socket.send('')`).
55+
* See the source for `asr` for an example.
56+
*/
3857
final: boolean
58+
/**
59+
* This is a list of transcripts, each associated with their own
60+
* confidence level from 0 to 1.
61+
* It is an array to allow for the possibility of multiple
62+
* transcripts in the API, but is almost always a list of one.
63+
*/
3964
hypotheses: ASRHypothesis[]
4065
}
4166

67+
/**
68+
* A low-level utility for working with the Spokestack ASR service directly.
69+
* This should not be used most of the time. It is only for
70+
* custom, advanced integrations.
71+
* See `asr` for one-off ASR and `asrSocketServer` for ASR streaming using
72+
* a websocket server that can be added to any node server.
73+
*/
4274
export default function asrService(
4375
config: SpokestackASRConfig,
4476
onData: (response: SpokestackResponse) => void
@@ -59,7 +91,7 @@ export default function asrService(
5991
}
6092

6193
// Open socket
62-
const socket = new WebSocket(`wss:api.spokestack.io/v1/asr/websocket`)
94+
const socket = new WebSocket(config.spokestackUrl || 'wss:api.spokestack.io/v1/asr/websocket')
6395

6496
let prevTranscript: string | null = null
6597
let transcriptTimeout: NodeJS.Timeout
@@ -86,7 +118,7 @@ export default function asrService(
86118
rate
87119
})
88120
const signature = encryptSecret(body)
89-
const message: SpokestackMessage = {
121+
const message: SpokestackAuthMessage = {
90122
keyId: clientId,
91123
signature,
92124
body
@@ -95,7 +127,7 @@ export default function asrService(
95127
}
96128

97129
socket.on('message', (data: string) => {
98-
// console.log('Spokestack ASR socket message', data)
130+
console.log('Spokestack ASR socket message', data)
99131
try {
100132
const json: SpokestackResponse = JSON.parse(data)
101133
if (
@@ -113,6 +145,8 @@ export default function asrService(
113145
prevTranscript = null
114146
}, timeout)
115147
}
148+
} else if (json.status === 'error') {
149+
onData.call(null, json)
116150
}
117151
} catch (e) {
118152
console.error('Data format from Spokestack ASR is unexpected')

tasks/docs.js

Lines changed: 56 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,28 @@ function write(filename, data) {
1111
const header = '\n---\n\n## Convenience functions for Node.js servers'
1212
let data = read('../README.md').replace(new RegExp(header + '[^]+'), '') + header
1313

14-
function redoLinks(data) {
15-
return (
16-
data
17-
// Remove links that aren't links to source
18-
.replace(/\[([^:]+)\]\(.*?\)/g, '$1')
19-
.replace(/\bPipelineProfile([^.])/g, '[PipelineProfile](#PipelineProfile)$1')
20-
.replace(/\bStage([^.])/g, '[Stage](#Stage)$1')
21-
.replace(/\bRecordConfig([^.])/g, '[RecordConfig](#RecordConfig)$1')
22-
)
14+
// Remove links that aren't links to source
15+
function removeLinks(data) {
16+
return data.replace(/\[([^:]+)\]\(.*?\)/g, '$1')
17+
}
18+
19+
function addLinks(data) {
20+
return data
21+
.replace(/\bPipelineProfile([^.])/g, '[PipelineProfile](#PipelineProfile)$1')
22+
.replace(/\bStage([^.])/g, '[Stage](#Stage)$1')
23+
.replace(/\bRecordConfig([^.])/g, '[RecordConfig](#RecordConfig)$1')
24+
.replace(/\bSpokestackASRConfig([^.])/g, '[SpokestackASRConfig](#SpokestackASRConfig)$1')
25+
.replace(/\bSpokestackResponse([^.])/g, '[SpokestackResponse](#SpokestackResponse)$1')
26+
.replace(/\bASRHypothesis([^.])/g, '[ASRHypothesis](#ASRHypothesis)$1')
27+
.replace(/\bSpeechPipelineConfig([^.])/g, '[SpeechPipelineConfig](#SpeechPipelineConfig)$1')
2328
}
2429

2530
/**
2631
* @param {string} filename
2732
* @param {Array<string>} functions List of functions to extract from docs
2833
*/
2934
function getModuleFunctions(filename, functions) {
30-
const available = redoLinks(read(`../docs/modules/${filename}`))
35+
const available = addLinks(removeLinks(read(`../docs/modules/${filename}`)))
3136
// Remove everything up to functions
3237
.replace(/[^]+#{2}\s*Functions/, '')
3338
.split(/___/)
@@ -41,8 +46,8 @@ function getModuleFunctions(filename, functions) {
4146
}
4247

4348
function getInterfaceContent(filename) {
44-
return (
45-
redoLinks(read(`../docs/interfaces/${filename}`))
49+
return removeLinks(
50+
read(`../docs/interfaces/${filename}`)
4651
.replace(/# Interface:\s*(.+)[^]+##\s*Properties/, '#### $1')
4752
.replace(/___/g, '')
4853
.replace(/\n### /g, '\n##### ')
@@ -53,14 +58,42 @@ function getInterfaceContent(filename) {
5358
)
5459
}
5560

61+
function getClassContent(filename) {
62+
return removeLinks(
63+
read(`../docs/classes/${filename}`)
64+
.replace(/# Class:\s*(.+)/, '#### $1')
65+
.replace(/\[.+\]\([\.\/a-z]+\)\..+/, '')
66+
.replace(/\n### .+/g, '')
67+
.replace(/## Table of contents[^]+## Constructors/, '')
68+
.replace(/___/g, '')
69+
)
70+
}
71+
72+
function getEnumContent(filename) {
73+
return removeLinks(
74+
read(`../docs/enums/${filename}`)
75+
.replace(/# Enumeration:\s*(.+)/, '#### $1')
76+
.replace(/\[.+\]\([\.\/a-z]+\)\..+/, '')
77+
.replace(/\n### .+/g, '')
78+
.replace(/## Table of contents[^]+## Enumeration members/, '')
79+
.replace(/___/g, '')
80+
)
81+
}
82+
83+
data += getModuleFunctions('index.md', ['spokestackMiddleware', 'asrSocketServer'])
84+
85+
data += getInterfaceContent('index.spokestackasrconfig.md')
86+
5687
data += getModuleFunctions('index.md', [
57-
'spokestackMiddleware',
58-
'asrSocketServer',
5988
'asr',
6089
'googleASRSocketServer',
6190
'googleASR',
62-
'encryptSecret'
91+
'spokestackASRService'
6392
])
93+
data += getInterfaceContent('index.spokestackresponse.md')
94+
data += getInterfaceContent('index.asrhypothesis.md')
95+
data += getEnumContent('index.asrformat.md')
96+
data += getModuleFunctions('index.md', ['encryptSecret'])
6497

6598
data += '\n---\n\n## Convenience functions for the client'
6699
data += '\n\nThese functions are available exports from `spokestack/client`.'
@@ -71,10 +104,15 @@ data += getModuleFunctions('client.md', [
71104
'startStream',
72105
'stopStream',
73106
'convertFloat32ToInt16',
74-
'startPipeline',
75-
'stopPipeline',
76-
'countdown'
107+
'startPipeline'
77108
])
109+
data += getClassContent('client.speechpipeline.md')
110+
data += getInterfaceContent('client.speechpipelineconfig.md')
111+
112+
data += getEnumContent('client.pipelineprofile.md')
113+
data += getEnumContent('client.speecheventtype.md')
114+
data += getEnumContent('client.stage.md')
115+
data += getModuleFunctions('client.md', ['stopPipeline', 'countdown'])
78116

79117
data += '\n---\n\n## Low-level processor functions'
80118
data +=

test/client.spec.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,6 @@ describe('client', () => {
1010
assert.ok(Client.startPipeline, 'Client contains startPipeline')
1111
assert.ok(Client.stopPipeline, 'Client contains stopPipeline')
1212
assert.ok(Client.Stage, 'Client contains Stage')
13+
assert.ok(Client.SpeechPipeline, 'Client contains SpeechPipeline')
1314
})
1415
})

0 commit comments

Comments
 (0)