feat(worker): bundle tensorflow with the web worker

timmywil · timmywil · commit 10bf00cea9b1 · 2021-03-30T15:10:41.000-04:00
- rather than adding an option to customize tensorflow's position
diff --git a/README.md b/README.md
@@ -51,11 +51,21 @@ app.use(
   '/spokestack-web-worker.js',
   express.static(`./node_modules/spokestack/dist/web-worker.min.js`)
 )
-app.use('/tensorflow.js', express.static(`./node_modules/spokestack/dist/tensorflow.min.js`))
 ```
 
 With these made available to your front-end, the speech pipeline can be started.
 
+Another option is to copy the file from node_modules to your static/public folder during your build process.
+
+```json
+// In package.json
+"scripts": {
+  // ...
+  "copy:spokestack": "cp node_modules/spokestack/dist/web-worker.min.js public/spokestack-web-worker.js",
+  "build": "npm run copy:spokestack && next build"
+}
+```
+
 ## Setup
 
 Go to [spokestack.io](https://spokestack.io) and create an account. Create a token at [spokestack.io/account/settings#api](https://spokestack.io/account/settings#api). Note that you'll only be able to see the token secret once. If you accidentally leave the page, create another token. Once you have a token, set the following environment variables in your `.bash_profile` or `.zshenv`:
diff --git a/examples/with-next/server/index.ts b/examples/with-next/server/index.ts
@@ -27,10 +27,6 @@ app.prepare().then(() => {
     '/spokestack-web-worker.js',
     express.static(`./node_modules/spokestack/dist/web-worker${dev ? '' : '.min'}.js`)
   )
-  expressApp.use(
-    '/tensorflow.js',
-    express.static(`./node_modules/spokestack/dist/tensorflow${dev ? '' : '.min'}.js`)
-  )
 
   expressApp.use('/graphql', bodyParser.json(), (req, res) => {
     const accept = req.headers.accept || ''
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -5,14 +5,12 @@
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
   "scripts": {
-    "build": "npm run clean && rollup --config && npm run minify",
+    "build": "npm run clean && rollup --config && npm run minify:worker",
     "clean": "rm -rf dist/",
     "docs": "typedoc --excludePrivate --plugin typedoc-plugin-markdown --hideBreadcrumbs --out docs --readme none src/index.ts src/client.ts && node tasks/docs.js",
     "format": "eslint . --fix && prettier --write \"**/*.ts\" \"**/*.js\" \"**/*.md\" \"**/*.json\"",
     "lint": "concurrently --raw \"eslint .\" \"npm run prettier\" \"npm run typescript\"",
-    "minify": "concurrently --raw \"npm run minify:tf\" \"npm run minify:worker\"",
-    "minify:tf": "uglifyjs --compress --comments /license/ --output dist/tensorflow.min.js -- dist/tensorflow.js",
-    "minify:worker": "uglifyjs --compress --mangle --comments /license/ --output dist/web-worker.min.js -- dist/web-worker.js",
+    "minify:worker": "uglifyjs --compress --mangle --output dist/web-worker.min.js -- dist/web-worker.js",
     "prepare": "husky install && npm run build",
     "prettier": "prettier --check \"**/*.md\" \"**/*.json\"",
     "release": "release-it",
@@ -58,6 +56,7 @@
     "@commitlint/cli": "^12.0.1",
     "@commitlint/config-conventional": "^12.0.1",
     "@release-it/conventional-changelog": "^2.0.1",
+    "@rollup/plugin-alias": "^3.1.2",
     "@rollup/plugin-commonjs": "^18.0.0",
     "@rollup/plugin-node-resolve": "^11.2.1",
     "@tensorflow/tfjs": "^3.3.0",
diff --git a/rollup.config.js b/rollup.config.js
@@ -1,3 +1,4 @@
+import alias from '@rollup/plugin-alias'
 import commonjs from '@rollup/plugin-commonjs'
 import { nodeResolve } from '@rollup/plugin-node-resolve'
 import typescript from 'rollup-plugin-typescript2'
@@ -65,30 +66,20 @@ const client = {
   }
 }
 
-const tfjs = {
-  input: './custom_tfjs/custom_tfjs.js',
-  plugins: [
-    commonjs(),
-    nodeResolve({
-      browser: true
-    })
-  ],
-  output: {
-    compact: true,
-    format: 'iife',
-    name: 'tf',
-    file: 'dist/tensorflow.js'
-  },
-  watch: {
-    include: ['custom_tfjs/**']
-  }
-}
-
 const worker = {
   input: './src/worker/index.ts',
   plugins: [
+    alias({
+      entries: {
+        '@tensorflow/tfjs': './custom_tfjs/custom_tfjs.js'
+      }
+    }),
     typescript({
       tsconfig: 'src/worker/tsconfig.json'
+    }),
+    commonjs(),
+    nodeResolve({
+      browser: true
     })
   ],
   output: {
@@ -100,4 +91,4 @@ const worker = {
   }
 }
 
-export default [server, client, tfjs, worker]
+export default [server, client, worker]
diff --git a/src/client/pipeline.ts b/src/client/pipeline.ts
@@ -131,10 +131,6 @@ let pipeline: SpeechPipeline | undefined
  *   '/spokestack-web-worker.js',
  *   express.static(`./node_modules/spokestack/dist/web-worker.min.js`)
  * )
- * app.use(
- *   '/tensorflow.js',
- *   express.static(`./node_modules/spokestack/dist/tensorflow.min.js`)
- * )
  * ```
  *
  * ```ts
diff --git a/src/worker/index.ts b/src/worker/index.ts
@@ -4,9 +4,6 @@
  * Licensed under the MIT license.
  * https://github.com/spokestack/node-spokestack/blob/develop/MIT-License.txt
  */
-importScripts('/tensorflow.js')
-
-import type * as tf from '@tensorflow/tfjs'
 
 import { SpeechConfig, SpeechEvent, SpeechEventType, Stage } from '../client/types'
 import { SpeechContext, SpeechProcessor } from './types'
@@ -15,12 +12,6 @@ import KeywordRecognizer from './processors/keyword'
 import VadTrigger from './processors/vad'
 import WakewordTrigger from './processors/wakeword'
 
-declare global {
-  interface WorkerGlobalScope {
-    tf: typeof tf
-  }
-}
-
 interface Frame {
   vad: boolean
   frame: number[]
diff --git a/src/worker/processors/keyword.ts b/src/worker/processors/keyword.ts
@@ -1,8 +1,9 @@
+import * as tf from '@tensorflow/tfjs'
+
 import { CommandModels, SpeechContext, SpeechProcessor } from '../types'
 import { SpeechConfig, SpeechEvent, SpeechEventType } from '../../client/types'
 
 import RingBuffer from '../RingBuffer'
-import type { Tensor } from '@tensorflow/tfjs'
 
 const defaultConfig = {
   melLength: 110,
@@ -79,9 +80,9 @@ export default class KeywordRecognizer implements SpeechProcessor {
   private models: CommandModels
   private hopSamples: number
   private sampleWindow = new RingBuffer<number>(0)
-  private encodeWindow = new RingBuffer<Tensor>(0)
-  private encodeState = self.tf.zeros([1])
-  private frameWindow = new RingBuffer<Tensor>(0)
+  private encodeWindow = new RingBuffer<tf.Tensor>(0)
+  private encodeState = tf.zeros([1])
+  private frameWindow = new RingBuffer<tf.Tensor>(0)
   private vadActive = false
 
   static async create(config: SpeechConfig) {
@@ -100,7 +101,6 @@ export default class KeywordRecognizer implements SpeechProcessor {
   }
 
   constructor(models: CommandModels, options: KeywordRecognizerConfig) {
-    const tf = self.tf
     this.models = models
     const config = (this.config = { ...defaultConfig, ...options })
 
@@ -109,7 +109,7 @@ export default class KeywordRecognizer implements SpeechProcessor {
     this.sampleWindow = new RingBuffer<number>(config.fftWidth)
 
     const melSamples = (config.melLength * config.sampleRate) / 1000 / this.hopSamples
-    this.frameWindow = new RingBuffer<Tensor>(melSamples)
+    this.frameWindow = new RingBuffer<tf.Tensor>(melSamples)
     const frameFill = tf.zeros([config.melWidth])
     this.frameWindow.fill(frameFill)
 
@@ -118,7 +118,7 @@ export default class KeywordRecognizer implements SpeechProcessor {
       const encodeLength = detectIn[1]
       const encodeWidth = detectIn[detectIn.length - 1]
 
-      this.encodeWindow = new RingBuffer<Tensor>(encodeLength)
+      this.encodeWindow = new RingBuffer<tf.Tensor>(encodeLength)
       const encodeFill = tf.fill([encodeWidth], -1.0)
       this.encodeWindow.fill(encodeFill)
     } else {
@@ -134,7 +134,6 @@ export default class KeywordRecognizer implements SpeechProcessor {
   }
 
   static async loadModels(baseUrl: string, sampleRate: number): Promise<CommandModels> {
-    const tf = self.tf
     return Promise.all([
       tf.loadGraphModel(`${baseUrl}/filter_${sampleRate}/model.json`),
       tf.loadGraphModel(`${baseUrl}/encode/model.json`),
@@ -166,32 +165,31 @@ export default class KeywordRecognizer implements SpeechProcessor {
     }
   }
 
-  async filter() {
-    const tf = self.tf
+  filter() {
     const frame = this.sampleWindow.toArray()
-    const filtered = this.models.filter.execute([tf.stack(frame)]) as Tensor
+    const result = this.models.filter.execute(tf.stack(frame))
+    const filtered = Array.isArray(result) ? result[0] : result
     this.frameWindow.rewind().seek(1)
     this.frameWindow.write(filtered)
-    await this.encode()
+    return this.encode()
   }
 
   async encode() {
-    const tf = self.tf
     const filtered = this.frameWindow.toArray()
     const stacked = tf.stack(filtered)
     const input = [tf.expandDims(stacked), this.encodeState]
-    const result = (await this.models.encode.executeAsync(input)) as Tensor[]
+    const result = (await this.models.encode.executeAsync(input)) as tf.Tensor[]
     this.encodeWindow.rewind().seek(1)
     this.encodeWindow.write(tf.squeeze(result[0]))
     this.encodeState = tf.squeeze(result[1], [0])
   }
 
   async classify(context: SpeechContext) {
-    const tf = self.tf
     const encoded = this.encodeWindow.toArray()
     const stacked = tf.stack(encoded)
     const input = tf.expandDims(stacked)
-    const detected = this.models.detect.execute([input]) as Tensor
+    const result = this.models.detect.execute(input)
+    const detected = Array.isArray(result) ? result[0] : result
     // look up class
     const clazz = tf.argMax(detected, 1).dataSync()[0]
     const keyword = this.config.keywordClasses[clazz]
@@ -216,7 +214,6 @@ export default class KeywordRecognizer implements SpeechProcessor {
   }
 
   reset() {
-    const tf = self.tf
     this.sampleWindow.reset()
 
     const frameFill = tf.zeros([this.config.melWidth])
diff --git a/src/worker/processors/wakeword.ts b/src/worker/processors/wakeword.ts
@@ -1,8 +1,9 @@
+import * as tf from '@tensorflow/tfjs'
+
 import { CommandModels, SpeechContext, SpeechProcessor } from '../types'
 import { SpeechConfig, SpeechEvent, SpeechEventType } from '../../client/types'
 
 import RingBuffer from '../RingBuffer'
-import type { Tensor } from '@tensorflow/tfjs'
 
 const defaultConfig = {
   melLength: 10,
@@ -74,9 +75,9 @@ export default class WakewordTrigger implements SpeechProcessor {
   private models: CommandModels
   private hopSamples: number
   private sampleWindow = new RingBuffer<number>(0)
-  private encodeWindow = new RingBuffer<Tensor>(0)
-  private encodeState = self.tf.zeros([1])
-  private frameWindow = new RingBuffer<Tensor>(0)
+  private encodeWindow = new RingBuffer<tf.Tensor>(0)
+  private encodeState = tf.zeros([1])
+  private frameWindow = new RingBuffer<tf.Tensor>(0)
   private vadActive = false
 
   static async create(config: SpeechConfig) {
@@ -88,7 +89,6 @@ export default class WakewordTrigger implements SpeechProcessor {
   }
 
   constructor(models: CommandModels, options: WakewordTriggerConfig) {
-    const tf = self.tf
     const config = (this.config = { ...defaultConfig, ...options })
     this.models = models
 
@@ -101,7 +101,7 @@ export default class WakewordTrigger implements SpeechProcessor {
     this.hopSamples = config.hopLength * (config.sampleRate / 1000)
     this.sampleWindow = new RingBuffer<number>(config.fftWidth)
     const melSamples = (config.melLength * config.sampleRate) / 1000 / this.hopSamples
-    this.frameWindow = new RingBuffer<Tensor>(melSamples)
+    this.frameWindow = new RingBuffer<tf.Tensor>(melSamples)
     const frameFill = tf.zeros([config.melWidth])
     this.frameWindow.fill(frameFill)
 
@@ -110,7 +110,7 @@ export default class WakewordTrigger implements SpeechProcessor {
       const encodeLength = detectIn[1]
       const encodeWidth = detectIn[detectIn.length - 1]
 
-      this.encodeWindow = new RingBuffer<Tensor>(encodeLength)
+      this.encodeWindow = new RingBuffer<tf.Tensor>(encodeLength)
       const encodeFill = tf.fill([encodeWidth], -1.0)
       this.encodeWindow.fill(encodeFill)
     } else {
@@ -126,7 +126,6 @@ export default class WakewordTrigger implements SpeechProcessor {
   }
 
   static async loadModels(baseUrl: string): Promise<CommandModels> {
-    const tf = self.tf
     return Promise.all([
       tf.loadGraphModel(`${baseUrl}/filter/model.json`),
       tf.loadGraphModel(`${baseUrl}/encode/model.json`),
@@ -161,32 +160,32 @@ export default class WakewordTrigger implements SpeechProcessor {
     }
   }
 
-  async filter(context: SpeechContext) {
+  filter(context: SpeechContext) {
     const frame = this.sampleWindow.toArray()
-    const filtered = this.models.filter.execute([self.tf.stack(frame)]) as Tensor
+    const filtered = this.models.filter.execute(tf.stack(frame))
     this.frameWindow.rewind().seek(1)
-    this.frameWindow.write(filtered)
-    await this.encode(context)
+    this.frameWindow.write(Array.isArray(filtered) ? filtered[0] : filtered)
+    return this.encode(context)
   }
 
   async encode(context: SpeechContext) {
-    const tf = self.tf
     const filtered = this.frameWindow.toArray()
     const stacked = tf.stack(filtered)
     const input = [tf.expandDims(stacked), this.encodeState]
-    const result = (await this.models.encode.executeAsync(input)) as Tensor[]
+    const result = (await this.models.encode.executeAsync(input)) as tf.Tensor[]
+    console.log(JSON.stringify(result))
     this.encodeWindow.rewind().seek(1)
     this.encodeWindow.write(tf.squeeze(result[0]))
     this.encodeState = result[1]
-    await this.detect(context)
+    return this.detect(context)
   }
 
-  async detect(context: SpeechContext) {
-    const tf = self.tf
+  detect(context: SpeechContext) {
     const encoded = this.encodeWindow.toArray()
     const stacked = tf.stack(encoded)
     const input = tf.expandDims(stacked)
-    const detected = this.models.detect.execute([input]) as Tensor
+    const result = this.models.detect.execute(input)
+    const detected = Array.isArray(result) ? result[0] : result
     const confidence = tf.max(detected).dataSync()[0]
 
     // console.log(`wakeword: ${confidence.toFixed(6)}`)
@@ -205,7 +204,6 @@ export default class WakewordTrigger implements SpeechProcessor {
   }
 
   reset() {
-    const tf = self.tf
     this.sampleWindow.reset()
 
     const frameFill = tf.zeros([this.config.melWidth])