Skip to content
This repository was archived by the owner on May 6, 2022. It is now read-only.

Commit 10bf00c

Browse files
committed
feat(worker): bundle tensorflow with the web worker
- rather than adding an option to customize tensorflow's position
1 parent c233013 commit 10bf00c

File tree

9 files changed

+65
-78
lines changed

9 files changed

+65
-78
lines changed

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,21 @@ app.use(
5151
'/spokestack-web-worker.js',
5252
express.static(`./node_modules/spokestack/dist/web-worker.min.js`)
5353
)
54-
app.use('/tensorflow.js', express.static(`./node_modules/spokestack/dist/tensorflow.min.js`))
5554
```
5655

5756
With these made available to your front-end, the speech pipeline can be started.
5857

58+
Another option is to copy the file from node_modules to your static/public folder during your build process.
59+
60+
```json
61+
// In package.json
62+
"scripts": {
63+
// ...
64+
"copy:spokestack": "cp node_modules/spokestack/dist/web-worker.min.js public/spokestack-web-worker.js",
65+
"build": "npm run copy:spokestack && next build"
66+
}
67+
```
68+
5969
## Setup
6070

6171
Go to [spokestack.io](https://spokestack.io) and create an account. Create a token at [spokestack.io/account/settings#api](https://spokestack.io/account/settings#api). Note that you'll only be able to see the token secret once. If you accidentally leave the page, create another token. Once you have a token, set the following environment variables in your `.bash_profile` or `.zshenv`:

examples/with-next/server/index.ts

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,6 @@ app.prepare().then(() => {
2727
'/spokestack-web-worker.js',
2828
express.static(`./node_modules/spokestack/dist/web-worker${dev ? '' : '.min'}.js`)
2929
)
30-
expressApp.use(
31-
'/tensorflow.js',
32-
express.static(`./node_modules/spokestack/dist/tensorflow${dev ? '' : '.min'}.js`)
33-
)
3430

3531
expressApp.use('/graphql', bodyParser.json(), (req, res) => {
3632
const accept = req.headers.accept || ''

package-lock.json

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@
55
"main": "dist/index.js",
66
"types": "dist/index.d.ts",
77
"scripts": {
8-
"build": "npm run clean && rollup --config && npm run minify",
8+
"build": "npm run clean && rollup --config && npm run minify:worker",
99
"clean": "rm -rf dist/",
1010
"docs": "typedoc --excludePrivate --plugin typedoc-plugin-markdown --hideBreadcrumbs --out docs --readme none src/index.ts src/client.ts && node tasks/docs.js",
1111
"format": "eslint . --fix && prettier --write \"**/*.ts\" \"**/*.js\" \"**/*.md\" \"**/*.json\"",
1212
"lint": "concurrently --raw \"eslint .\" \"npm run prettier\" \"npm run typescript\"",
13-
"minify": "concurrently --raw \"npm run minify:tf\" \"npm run minify:worker\"",
14-
"minify:tf": "uglifyjs --compress --comments /license/ --output dist/tensorflow.min.js -- dist/tensorflow.js",
15-
"minify:worker": "uglifyjs --compress --mangle --comments /license/ --output dist/web-worker.min.js -- dist/web-worker.js",
13+
"minify:worker": "uglifyjs --compress --mangle --output dist/web-worker.min.js -- dist/web-worker.js",
1614
"prepare": "husky install && npm run build",
1715
"prettier": "prettier --check \"**/*.md\" \"**/*.json\"",
1816
"release": "release-it",
@@ -58,6 +56,7 @@
5856
"@commitlint/cli": "^12.0.1",
5957
"@commitlint/config-conventional": "^12.0.1",
6058
"@release-it/conventional-changelog": "^2.0.1",
59+
"@rollup/plugin-alias": "^3.1.2",
6160
"@rollup/plugin-commonjs": "^18.0.0",
6261
"@rollup/plugin-node-resolve": "^11.2.1",
6362
"@tensorflow/tfjs": "^3.3.0",

rollup.config.js

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import alias from '@rollup/plugin-alias'
12
import commonjs from '@rollup/plugin-commonjs'
23
import { nodeResolve } from '@rollup/plugin-node-resolve'
34
import typescript from 'rollup-plugin-typescript2'
@@ -65,30 +66,20 @@ const client = {
6566
}
6667
}
6768

68-
const tfjs = {
69-
input: './custom_tfjs/custom_tfjs.js',
70-
plugins: [
71-
commonjs(),
72-
nodeResolve({
73-
browser: true
74-
})
75-
],
76-
output: {
77-
compact: true,
78-
format: 'iife',
79-
name: 'tf',
80-
file: 'dist/tensorflow.js'
81-
},
82-
watch: {
83-
include: ['custom_tfjs/**']
84-
}
85-
}
86-
8769
const worker = {
8870
input: './src/worker/index.ts',
8971
plugins: [
72+
alias({
73+
entries: {
74+
'@tensorflow/tfjs': './custom_tfjs/custom_tfjs.js'
75+
}
76+
}),
9077
typescript({
9178
tsconfig: 'src/worker/tsconfig.json'
79+
}),
80+
commonjs(),
81+
nodeResolve({
82+
browser: true
9283
})
9384
],
9485
output: {
@@ -100,4 +91,4 @@ const worker = {
10091
}
10192
}
10293

103-
export default [server, client, tfjs, worker]
94+
export default [server, client, worker]

src/client/pipeline.ts

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,6 @@ let pipeline: SpeechPipeline | undefined
131131
* '/spokestack-web-worker.js',
132132
* express.static(`./node_modules/spokestack/dist/web-worker.min.js`)
133133
* )
134-
* app.use(
135-
* '/tensorflow.js',
136-
* express.static(`./node_modules/spokestack/dist/tensorflow.min.js`)
137-
* )
138134
* ```
139135
*
140136
* ```ts

src/worker/index.ts

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
* Licensed under the MIT license.
55
* https://github.com/spokestack/node-spokestack/blob/develop/MIT-License.txt
66
*/
7-
importScripts('/tensorflow.js')
8-
9-
import type * as tf from '@tensorflow/tfjs'
107

118
import { SpeechConfig, SpeechEvent, SpeechEventType, Stage } from '../client/types'
129
import { SpeechContext, SpeechProcessor } from './types'
@@ -15,12 +12,6 @@ import KeywordRecognizer from './processors/keyword'
1512
import VadTrigger from './processors/vad'
1613
import WakewordTrigger from './processors/wakeword'
1714

18-
declare global {
19-
interface WorkerGlobalScope {
20-
tf: typeof tf
21-
}
22-
}
23-
2415
interface Frame {
2516
vad: boolean
2617
frame: number[]

src/worker/processors/keyword.ts

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1+
import * as tf from '@tensorflow/tfjs'
2+
13
import { CommandModels, SpeechContext, SpeechProcessor } from '../types'
24
import { SpeechConfig, SpeechEvent, SpeechEventType } from '../../client/types'
35

46
import RingBuffer from '../RingBuffer'
5-
import type { Tensor } from '@tensorflow/tfjs'
67

78
const defaultConfig = {
89
melLength: 110,
@@ -79,9 +80,9 @@ export default class KeywordRecognizer implements SpeechProcessor {
7980
private models: CommandModels
8081
private hopSamples: number
8182
private sampleWindow = new RingBuffer<number>(0)
82-
private encodeWindow = new RingBuffer<Tensor>(0)
83-
private encodeState = self.tf.zeros([1])
84-
private frameWindow = new RingBuffer<Tensor>(0)
83+
private encodeWindow = new RingBuffer<tf.Tensor>(0)
84+
private encodeState = tf.zeros([1])
85+
private frameWindow = new RingBuffer<tf.Tensor>(0)
8586
private vadActive = false
8687

8788
static async create(config: SpeechConfig) {
@@ -100,7 +101,6 @@ export default class KeywordRecognizer implements SpeechProcessor {
100101
}
101102

102103
constructor(models: CommandModels, options: KeywordRecognizerConfig) {
103-
const tf = self.tf
104104
this.models = models
105105
const config = (this.config = { ...defaultConfig, ...options })
106106

@@ -109,7 +109,7 @@ export default class KeywordRecognizer implements SpeechProcessor {
109109
this.sampleWindow = new RingBuffer<number>(config.fftWidth)
110110

111111
const melSamples = (config.melLength * config.sampleRate) / 1000 / this.hopSamples
112-
this.frameWindow = new RingBuffer<Tensor>(melSamples)
112+
this.frameWindow = new RingBuffer<tf.Tensor>(melSamples)
113113
const frameFill = tf.zeros([config.melWidth])
114114
this.frameWindow.fill(frameFill)
115115

@@ -118,7 +118,7 @@ export default class KeywordRecognizer implements SpeechProcessor {
118118
const encodeLength = detectIn[1]
119119
const encodeWidth = detectIn[detectIn.length - 1]
120120

121-
this.encodeWindow = new RingBuffer<Tensor>(encodeLength)
121+
this.encodeWindow = new RingBuffer<tf.Tensor>(encodeLength)
122122
const encodeFill = tf.fill([encodeWidth], -1.0)
123123
this.encodeWindow.fill(encodeFill)
124124
} else {
@@ -134,7 +134,6 @@ export default class KeywordRecognizer implements SpeechProcessor {
134134
}
135135

136136
static async loadModels(baseUrl: string, sampleRate: number): Promise<CommandModels> {
137-
const tf = self.tf
138137
return Promise.all([
139138
tf.loadGraphModel(`${baseUrl}/filter_${sampleRate}/model.json`),
140139
tf.loadGraphModel(`${baseUrl}/encode/model.json`),
@@ -166,32 +165,31 @@ export default class KeywordRecognizer implements SpeechProcessor {
166165
}
167166
}
168167

169-
async filter() {
170-
const tf = self.tf
168+
filter() {
171169
const frame = this.sampleWindow.toArray()
172-
const filtered = this.models.filter.execute([tf.stack(frame)]) as Tensor
170+
const result = this.models.filter.execute(tf.stack(frame))
171+
const filtered = Array.isArray(result) ? result[0] : result
173172
this.frameWindow.rewind().seek(1)
174173
this.frameWindow.write(filtered)
175-
await this.encode()
174+
return this.encode()
176175
}
177176

178177
async encode() {
179-
const tf = self.tf
180178
const filtered = this.frameWindow.toArray()
181179
const stacked = tf.stack(filtered)
182180
const input = [tf.expandDims(stacked), this.encodeState]
183-
const result = (await this.models.encode.executeAsync(input)) as Tensor[]
181+
const result = (await this.models.encode.executeAsync(input)) as tf.Tensor[]
184182
this.encodeWindow.rewind().seek(1)
185183
this.encodeWindow.write(tf.squeeze(result[0]))
186184
this.encodeState = tf.squeeze(result[1], [0])
187185
}
188186

189187
async classify(context: SpeechContext) {
190-
const tf = self.tf
191188
const encoded = this.encodeWindow.toArray()
192189
const stacked = tf.stack(encoded)
193190
const input = tf.expandDims(stacked)
194-
const detected = this.models.detect.execute([input]) as Tensor
191+
const result = this.models.detect.execute(input)
192+
const detected = Array.isArray(result) ? result[0] : result
195193
// look up class
196194
const clazz = tf.argMax(detected, 1).dataSync()[0]
197195
const keyword = this.config.keywordClasses[clazz]
@@ -216,7 +214,6 @@ export default class KeywordRecognizer implements SpeechProcessor {
216214
}
217215

218216
reset() {
219-
const tf = self.tf
220217
this.sampleWindow.reset()
221218

222219
const frameFill = tf.zeros([this.config.melWidth])

src/worker/processors/wakeword.ts

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1+
import * as tf from '@tensorflow/tfjs'
2+
13
import { CommandModels, SpeechContext, SpeechProcessor } from '../types'
24
import { SpeechConfig, SpeechEvent, SpeechEventType } from '../../client/types'
35

46
import RingBuffer from '../RingBuffer'
5-
import type { Tensor } from '@tensorflow/tfjs'
67

78
const defaultConfig = {
89
melLength: 10,
@@ -74,9 +75,9 @@ export default class WakewordTrigger implements SpeechProcessor {
7475
private models: CommandModels
7576
private hopSamples: number
7677
private sampleWindow = new RingBuffer<number>(0)
77-
private encodeWindow = new RingBuffer<Tensor>(0)
78-
private encodeState = self.tf.zeros([1])
79-
private frameWindow = new RingBuffer<Tensor>(0)
78+
private encodeWindow = new RingBuffer<tf.Tensor>(0)
79+
private encodeState = tf.zeros([1])
80+
private frameWindow = new RingBuffer<tf.Tensor>(0)
8081
private vadActive = false
8182

8283
static async create(config: SpeechConfig) {
@@ -88,7 +89,6 @@ export default class WakewordTrigger implements SpeechProcessor {
8889
}
8990

9091
constructor(models: CommandModels, options: WakewordTriggerConfig) {
91-
const tf = self.tf
9292
const config = (this.config = { ...defaultConfig, ...options })
9393
this.models = models
9494

@@ -101,7 +101,7 @@ export default class WakewordTrigger implements SpeechProcessor {
101101
this.hopSamples = config.hopLength * (config.sampleRate / 1000)
102102
this.sampleWindow = new RingBuffer<number>(config.fftWidth)
103103
const melSamples = (config.melLength * config.sampleRate) / 1000 / this.hopSamples
104-
this.frameWindow = new RingBuffer<Tensor>(melSamples)
104+
this.frameWindow = new RingBuffer<tf.Tensor>(melSamples)
105105
const frameFill = tf.zeros([config.melWidth])
106106
this.frameWindow.fill(frameFill)
107107

@@ -110,7 +110,7 @@ export default class WakewordTrigger implements SpeechProcessor {
110110
const encodeLength = detectIn[1]
111111
const encodeWidth = detectIn[detectIn.length - 1]
112112

113-
this.encodeWindow = new RingBuffer<Tensor>(encodeLength)
113+
this.encodeWindow = new RingBuffer<tf.Tensor>(encodeLength)
114114
const encodeFill = tf.fill([encodeWidth], -1.0)
115115
this.encodeWindow.fill(encodeFill)
116116
} else {
@@ -126,7 +126,6 @@ export default class WakewordTrigger implements SpeechProcessor {
126126
}
127127

128128
static async loadModels(baseUrl: string): Promise<CommandModels> {
129-
const tf = self.tf
130129
return Promise.all([
131130
tf.loadGraphModel(`${baseUrl}/filter/model.json`),
132131
tf.loadGraphModel(`${baseUrl}/encode/model.json`),
@@ -161,32 +160,32 @@ export default class WakewordTrigger implements SpeechProcessor {
161160
}
162161
}
163162

164-
async filter(context: SpeechContext) {
163+
filter(context: SpeechContext) {
165164
const frame = this.sampleWindow.toArray()
166-
const filtered = this.models.filter.execute([self.tf.stack(frame)]) as Tensor
165+
const filtered = this.models.filter.execute(tf.stack(frame))
167166
this.frameWindow.rewind().seek(1)
168-
this.frameWindow.write(filtered)
169-
await this.encode(context)
167+
this.frameWindow.write(Array.isArray(filtered) ? filtered[0] : filtered)
168+
return this.encode(context)
170169
}
171170

172171
async encode(context: SpeechContext) {
173-
const tf = self.tf
174172
const filtered = this.frameWindow.toArray()
175173
const stacked = tf.stack(filtered)
176174
const input = [tf.expandDims(stacked), this.encodeState]
177-
const result = (await this.models.encode.executeAsync(input)) as Tensor[]
175+
const result = (await this.models.encode.executeAsync(input)) as tf.Tensor[]
176+
console.log(JSON.stringify(result))
178177
this.encodeWindow.rewind().seek(1)
179178
this.encodeWindow.write(tf.squeeze(result[0]))
180179
this.encodeState = result[1]
181-
await this.detect(context)
180+
return this.detect(context)
182181
}
183182

184-
async detect(context: SpeechContext) {
185-
const tf = self.tf
183+
detect(context: SpeechContext) {
186184
const encoded = this.encodeWindow.toArray()
187185
const stacked = tf.stack(encoded)
188186
const input = tf.expandDims(stacked)
189-
const detected = this.models.detect.execute([input]) as Tensor
187+
const result = this.models.detect.execute(input)
188+
const detected = Array.isArray(result) ? result[0] : result
190189
const confidence = tf.max(detected).dataSync()[0]
191190

192191
// console.log(`wakeword: ${confidence.toFixed(6)}`)
@@ -205,7 +204,6 @@ export default class WakewordTrigger implements SpeechProcessor {
205204
}
206205

207206
reset() {
208-
const tf = self.tf
209207
this.sampleWindow.reset()
210208

211209
const frameFill = tf.zeros([this.config.melWidth])

0 commit comments

Comments
 (0)