feat(asr): make spokestackASRService and SpeechPipeline available

timmywil · timmywil · commit c233013b654c · 2021-03-30T14:16:06.000-04:00
diff --git a/README.md b/README.md
diff --git a/examples/with-next/pages/index.tsx b/examples/with-next/pages/index.tsx
@@ -322,7 +322,10 @@ export default class Index extends PureComponent {
             streaming: false
           })
         })
-        ws.addEventListener('message', (e) => this.updateTerm(e.data))
+        ws.addEventListener('message', (e) => {
+          console.log(e)
+          this.updateTerm(e.data)
+        })
       } catch (e) {
         console.error(e)
         this.setState({
diff --git a/package.json b/package.json
@@ -7,7 +7,7 @@
   "scripts": {
     "build": "npm run clean && rollup --config && npm run minify",
     "clean": "rm -rf dist/",
-    "docs": "typedoc --plugin typedoc-plugin-markdown --hideBreadcrumbs --out docs --readme none src/index.ts src/client.ts && node tasks/docs.js",
+    "docs": "typedoc --excludePrivate --plugin typedoc-plugin-markdown --hideBreadcrumbs --out docs --readme none src/index.ts src/client.ts && node tasks/docs.js",
     "format": "eslint . --fix && prettier --write \"**/*.ts\" \"**/*.js\" \"**/*.md\" \"**/*.json\"",
     "lint": "concurrently --raw \"eslint .\" \"npm run prettier\" \"npm run typescript\"",
     "minify": "concurrently --raw \"npm run minify:tf\" \"npm run minify:worker\"",
diff --git a/src/client/SpeechPipeline.ts b/src/client/SpeechPipeline.ts
@@ -12,7 +12,7 @@ const defaultSpeechConfig = {
   hopLength: 10
 }
 
-interface SpeechPipelineConfig {
+export interface SpeechPipelineConfig {
   speechConfig: SpeechConfig
   stages: Stage[]
   workerUrl?: string
diff --git a/src/client/pipeline.ts b/src/client/pipeline.ts
@@ -1,5 +1,5 @@
 import { SpeechConfig, Stage } from './types'
-import SpeechPipeline, { PipelineEventHandler } from './SpeechPipeline'
+import SpeechPipeline, { PipelineEventHandler, SpeechPipelineConfig } from './SpeechPipeline'
 
 /**
  * Preset profiles for use with startPipeline that include both
@@ -202,3 +202,5 @@ export function stopPipeline() {
     pipeline = undefined
   }
 }
+
+export { SpeechPipeline, SpeechPipelineConfig, PipelineEventHandler }
diff --git a/src/index.ts b/src/index.ts
@@ -1,7 +1,11 @@
-/**
- * The main export is the server code
- */
 export { default as encryptSecret } from './server/encryptSecret'
 export { default as spokestackMiddleware } from './server/expressMiddleware'
 export * from './server/socketServer'
 export * from './server/asr'
+export {
+  default as spokestackASRService,
+  SpokestackASRConfig,
+  ASRHypothesis,
+  ASRFormat,
+  SpokestackResponse
+} from './server/spokestackASRService'
diff --git a/src/server/asr.ts b/src/server/asr.ts
@@ -50,13 +50,15 @@ export function asr(content: string | Uint8Array, sampleRate: number): Promise<s
             .filter(Boolean)
             .join('\n')
         )
+      } else if (response.status === 'error') {
+        reject(new Error(response.error))
       }
     })
       .then((spokestackSocket) => {
         spokestackSocket.on('error', reject)
         spokestackSocket.send(content)
-        // Send an empty buffer to signal that the transaction is done
-        spokestackSocket.send(Buffer.from(''))
+        // Send an empty string to signal that the transaction is done
+        spokestackSocket.send('')
       })
       .catch(reject)
   })
diff --git a/src/server/spokestackASRService.ts b/src/server/spokestackASRService.ts
@@ -11,34 +11,66 @@ export interface SpokestackASRConfig {
   limit?: number
   sampleRate: number
   /**
-   * This timeout is for resetting the speech recognition
-   * and clearing the transcript.
+   * Reset speech recognition and clear the transcript every `timeout`
+   * milliseconds.
    * When no new data comes in for the given timeout,
    * the auth message is sent again to begin a new ASR transcation.
    * Set to 0 to disable.
    * Default: 3000
    */
   timeout?: number
+  /**
+   * Set a different location for the Spokestack socket URL.
+   * This is very rarely needed.
+   * Default: 'wss:api.spokestack.io/v1/asr/websocket'
+   */
+  spokestackUrl?: string
 }
 
-interface SpokestackMessage {
+interface SpokestackAuthMessage {
   keyId: string
   signature: string
   body: string
 }
 
-interface ASRHypothesis {
-  confident: number
+export interface ASRHypothesis {
+  /**
+   * A number between 0 and 1 to indicate the
+   * tensorflow confidence level for the given transcript.
+   */
+  confidence: number
   transcript: string
 }
 
-interface SpokestackResponse {
+export interface SpokestackResponse {
   status: 'ok' | 'error'
+  /** When the status is "error", the error message is available here. */
   error?: string
+  /**
+   * The `final` key is used to indicate that
+   * the highest confidence transcript for the utterance is sent.
+   * However, this will only be set to true after
+   * signaling to Spokestack ASR that no more audio data is incoming.
+   * Signal this by sending an empty string (e.g. `socket.send('')`).
+   * See the source for `asr` for an example.
+   */
   final: boolean
+  /**
+   * This is a list of transcripts, each associated with their own
+   * confidence level from 0 to 1.
+   * It is an array to allow for the possibility of multiple
+   * transcripts in the API, but is almost always a list of one.
+   */
   hypotheses: ASRHypothesis[]
 }
 
+/**
+ * A low-level utility for working with the Spokestack ASR service directly.
+ * This should not be used most of the time. It is only for
+ * custom, advanced integrations.
+ * See `asr` for one-off ASR and `asrSocketServer` for ASR streaming using
+ * a websocket server that can be added to any node server.
+ */
 export default function asrService(
   config: SpokestackASRConfig,
   onData: (response: SpokestackResponse) => void
@@ -59,7 +91,7 @@ export default function asrService(
   }
 
   // Open socket
-  const socket = new WebSocket(`wss:api.spokestack.io/v1/asr/websocket`)
+  const socket = new WebSocket(config.spokestackUrl || 'wss:api.spokestack.io/v1/asr/websocket')
 
   let prevTranscript: string | null = null
   let transcriptTimeout: NodeJS.Timeout
@@ -86,7 +118,7 @@ export default function asrService(
       rate
     })
     const signature = encryptSecret(body)
-    const message: SpokestackMessage = {
+    const message: SpokestackAuthMessage = {
       keyId: clientId,
       signature,
       body
@@ -95,7 +127,7 @@ export default function asrService(
   }
 
   socket.on('message', (data: string) => {
-    // console.log('Spokestack ASR socket message', data)
+    console.log('Spokestack ASR socket message', data)
     try {
       const json: SpokestackResponse = JSON.parse(data)
       if (
@@ -113,6 +145,8 @@ export default function asrService(
             prevTranscript = null
           }, timeout)
         }
+      } else if (json.status === 'error') {
+        onData.call(null, json)
       }
     } catch (e) {
       console.error('Data format from Spokestack ASR is unexpected')
diff --git a/tasks/docs.js b/tasks/docs.js
@@ -11,23 +11,28 @@ function write(filename, data) {
 const header = '\n---\n\n## Convenience functions for Node.js servers'
 let data = read('../README.md').replace(new RegExp(header + '[^]+'), '') + header
 
-function redoLinks(data) {
-  return (
-    data
-      // Remove links that aren't links to source
-      .replace(/\[([^:]+)\]\(.*?\)/g, '$1')
-      .replace(/\bPipelineProfile([^.])/g, '[PipelineProfile](#PipelineProfile)$1')
-      .replace(/\bStage([^.])/g, '[Stage](#Stage)$1')
-      .replace(/\bRecordConfig([^.])/g, '[RecordConfig](#RecordConfig)$1')
-  )
+// Remove links that aren't links to source
+function removeLinks(data) {
+  return data.replace(/\[([^:]+)\]\(.*?\)/g, '$1')
+}
+
+function addLinks(data) {
+  return data
+    .replace(/\bPipelineProfile([^.])/g, '[PipelineProfile](#PipelineProfile)$1')
+    .replace(/\bStage([^.])/g, '[Stage](#Stage)$1')
+    .replace(/\bRecordConfig([^.])/g, '[RecordConfig](#RecordConfig)$1')
+    .replace(/\bSpokestackASRConfig([^.])/g, '[SpokestackASRConfig](#SpokestackASRConfig)$1')
+    .replace(/\bSpokestackResponse([^.])/g, '[SpokestackResponse](#SpokestackResponse)$1')
+    .replace(/\bASRHypothesis([^.])/g, '[ASRHypothesis](#ASRHypothesis)$1')
+    .replace(/\bSpeechPipelineConfig([^.])/g, '[SpeechPipelineConfig](#SpeechPipelineConfig)$1')
 }
 
 /**
  * @param {string} filename
  * @param {Array<string>} functions List of functions to extract from docs
  */
 function getModuleFunctions(filename, functions) {
-  const available = redoLinks(read(`../docs/modules/${filename}`))
+  const available = addLinks(removeLinks(read(`../docs/modules/${filename}`)))
     // Remove everything up to functions
     .replace(/[^]+#{2}\s*Functions/, '')
     .split(/___/)
@@ -41,8 +46,8 @@ function getModuleFunctions(filename, functions) {
 }
 
 function getInterfaceContent(filename) {
-  return (
-    redoLinks(read(`../docs/interfaces/${filename}`))
+  return removeLinks(
+    read(`../docs/interfaces/${filename}`)
       .replace(/# Interface:\s*(.+)[^]+##\s*Properties/, '#### $1')
       .replace(/___/g, '')
       .replace(/\n### /g, '\n##### ')
@@ -53,14 +58,42 @@ function getInterfaceContent(filename) {
   )
 }
 
+function getClassContent(filename) {
+  return removeLinks(
+    read(`../docs/classes/${filename}`)
+      .replace(/# Class:\s*(.+)/, '#### $1')
+      .replace(/\[.+\]\([\.\/a-z]+\)\..+/, '')
+      .replace(/\n### .+/g, '')
+      .replace(/## Table of contents[^]+## Constructors/, '')
+      .replace(/___/g, '')
+  )
+}
+
+function getEnumContent(filename) {
+  return removeLinks(
+    read(`../docs/enums/${filename}`)
+      .replace(/# Enumeration:\s*(.+)/, '#### $1')
+      .replace(/\[.+\]\([\.\/a-z]+\)\..+/, '')
+      .replace(/\n### .+/g, '')
+      .replace(/## Table of contents[^]+## Enumeration members/, '')
+      .replace(/___/g, '')
+  )
+}
+
+data += getModuleFunctions('index.md', ['spokestackMiddleware', 'asrSocketServer'])
+
+data += getInterfaceContent('index.spokestackasrconfig.md')
+
 data += getModuleFunctions('index.md', [
-  'spokestackMiddleware',
-  'asrSocketServer',
   'asr',
   'googleASRSocketServer',
   'googleASR',
-  'encryptSecret'
+  'spokestackASRService'
 ])
+data += getInterfaceContent('index.spokestackresponse.md')
+data += getInterfaceContent('index.asrhypothesis.md')
+data += getEnumContent('index.asrformat.md')
+data += getModuleFunctions('index.md', ['encryptSecret'])
 
 data += '\n---\n\n## Convenience functions for the client'
 data += '\n\nThese functions are available exports from `spokestack/client`.'
@@ -71,10 +104,15 @@ data += getModuleFunctions('client.md', [
   'startStream',
   'stopStream',
   'convertFloat32ToInt16',
-  'startPipeline',
-  'stopPipeline',
-  'countdown'
+  'startPipeline'
 ])
+data += getClassContent('client.speechpipeline.md')
+data += getInterfaceContent('client.speechpipelineconfig.md')
+
+data += getEnumContent('client.pipelineprofile.md')
+data += getEnumContent('client.speecheventtype.md')
+data += getEnumContent('client.stage.md')
+data += getModuleFunctions('client.md', ['stopPipeline', 'countdown'])
 
 data += '\n---\n\n## Low-level processor functions'
 data +=
diff --git a/test/client.spec.ts b/test/client.spec.ts
@@ -10,5 +10,6 @@ describe('client', () => {
     assert.ok(Client.startPipeline, 'Client contains startPipeline')
     assert.ok(Client.stopPipeline, 'Client contains stopPipeline')
     assert.ok(Client.Stage, 'Client contains Stage')
+    assert.ok(Client.SpeechPipeline, 'Client contains SpeechPipeline')
   })
 })
diff --git a/test/index.spec.ts b/test/index.spec.ts
@@ -10,5 +10,6 @@ describe('index', () => {
     assert.ok(Index.googleASRSocketServer, 'Index contains googleASRSocketServer')
     assert.ok(Index.asr, 'Index contains asr')
     assert.ok(Index.googleASR, 'Index contains googleASR')
+    assert.ok(Index.spokestackASRService, 'Index contains spokestackASRService')
   })
 })

Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@ const defaultSpeechConfig = {`
`12`	`12`	`hopLength: 10`
`13`	`13`	`}`
`14`	`14`
`15`		`-interface SpeechPipelineConfig {`
	`15`	`+export interface SpeechPipelineConfig {`
`16`	`16`	`speechConfig: SpeechConfig`
`17`	`17`	`stages: Stage[]`
`18`	`18`	`workerUrl?: string`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`import { SpeechConfig, Stage } from './types'`
`2`		`-import SpeechPipeline, { PipelineEventHandler } from './SpeechPipeline'`
	`2`	`+import SpeechPipeline, { PipelineEventHandler, SpeechPipelineConfig } from './SpeechPipeline'`
`3`	`3`
`4`	`4`	`/**`
`5`	`5`	`* Preset profiles for use with startPipeline that include both`
`@@ -202,3 +202,5 @@ export function stopPipeline() {`
`202`	`202`	`pipeline = undefined`
`203`	`203`	`}`
`204`	`204`	`}`
	`205`	`+`
	`206`	`+export { SpeechPipeline, SpeechPipelineConfig, PipelineEventHandler }`