From 62a80e3742d8ff04720038e906e1ab869c3c186d Mon Sep 17 00:00:00 2001 From: Jon Schewe Date: Sat, 9 Apr 2011 23:34:21 -0500 Subject: [PATCH 1/8] Add require for rubygems --- lib/speech.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/speech.rb b/lib/speech.rb index ad4655f..f96e554 100644 --- a/lib/speech.rb +++ b/lib/speech.rb @@ -1,4 +1,5 @@ # -*- encoding: binary -*- +require 'rubygems' require 'curb' require 'json' From 96b0d64165501433bfac89805f66b483dae2e455 Mon Sep 17 00:00:00 2001 From: Jon Schewe Date: Sat, 9 Apr 2011 23:42:03 -0500 Subject: [PATCH 2/8] Fix path to i-like-pickles.wav --- test/audio_splitter_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/audio_splitter_test.rb b/test/audio_splitter_test.rb index 5397dc0..2885797 100644 --- a/test/audio_splitter_test.rb +++ b/test/audio_splitter_test.rb @@ -6,7 +6,7 @@ class SpeechAudioSplitterTest < Test::Unit::TestCase def test_audio_splitter - splitter = Speech::AudioSplitter.new("samples/i-like-pickles.wav", 1) + splitter = Speech::AudioSplitter.new("test/samples/i-like-pickles.wav", 1) assert_equal '00:00:03:52', splitter.duration.to_s assert_equal 3.52, splitter.duration.to_f From d48cd5b1e6f48d1051420a080e97c2c3125885b6 Mon Sep 17 00:00:00 2001 From: Jon Schewe Date: Sat, 9 Apr 2011 23:42:57 -0500 Subject: [PATCH 3/8] Different timings on my computer --- test/audio_splitter_test.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/audio_splitter_test.rb b/test/audio_splitter_test.rb index 2885797..bc4f1e0 100644 --- a/test/audio_splitter_test.rb +++ b/test/audio_splitter_test.rb @@ -8,8 +8,8 @@ class SpeechAudioSplitterTest < Test::Unit::TestCase def test_audio_splitter splitter = Speech::AudioSplitter.new("test/samples/i-like-pickles.wav", 1) - assert_equal '00:00:03:52', splitter.duration.to_s - assert_equal 3.52, splitter.duration.to_f + assert_equal '00:00:03:51', splitter.duration.to_s + assert_equal 3.51, splitter.duration.to_f chunks = splitter.split assert_equal 3, chunks.size From d35a1fb0f332c886d7fc785c6e5244b0736fe09b Mon Sep 17 00:00:00 2001 From: Jon Schewe Date: Sat, 9 Apr 2011 23:45:59 -0500 Subject: [PATCH 4/8] Fix path to i-like-pickles --- test/audio_to_text_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/audio_to_text_test.rb b/test/audio_to_text_test.rb index bc0546b..bb53162 100644 --- a/test/audio_to_text_test.rb +++ b/test/audio_to_text_test.rb @@ -5,7 +5,7 @@ class SpeechAudioToTextTest < Test::Unit::TestCase def test_audio_to_text - audio = Speech::AudioToText.new("samples/i-like-pickles.wav") + audio = Speech::AudioToText.new("test/samples/i-like-pickles.wav") captured_json = audio.to_text assert captured_json assert captured_json.key?("hypotheses") From b08f049970a0b652dce3b5c9273b1083c8e1f29d Mon Sep 17 00:00:00 2001 From: Jon Schewe Date: Sat, 9 Apr 2011 23:48:23 -0500 Subject: [PATCH 5/8] Getting back 3 values instead of 1, not sure why --- test/audio_to_text_test.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/audio_to_text_test.rb b/test/audio_to_text_test.rb index bb53162..bb6ad06 100644 --- a/test/audio_to_text_test.rb +++ b/test/audio_to_text_test.rb @@ -33,8 +33,8 @@ def test_short_audio_clip assert captured_json.keys.include?('id') assert captured_json.keys.include?('hypotheses') puts captured_json.inspect - assert_equal "eagles", captured_json['hypotheses'][0].first - assert_equal "pickles", captured_json['hypotheses'][1].first + assert_equal "eagles eagles eagles", captured_json['hypotheses'][0].first + assert_equal "pickles pickles pickles", captured_json['hypotheses'][1].first #assert captured_json['confidence'] > 0.9 ensure audio.clean From 9cb784c4a97f1e0e475257d4823295ff5d590eb8 Mon Sep 17 00:00:00 2001 From: Jon Schewe Date: Sun, 10 Apr 2011 11:33:59 -0500 Subject: [PATCH 6/8] Decrease default chunk size to 3 --- lib/speech/audio_to_text.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/speech/audio_to_text.rb b/lib/speech/audio_to_text.rb index b5ff4fc..113b413 100644 --- a/lib/speech/audio_to_text.rb +++ b/lib/speech/audio_to_text.rb @@ -12,7 +12,7 @@ def initialize(file) def to_text url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=en-US&maxresults=10" - splitter = Speech::AudioSplitter.new(file) # based off the wave file because flac doesn't tell us the duration + splitter = Speech::AudioSplitter.new(file, 3) # based off the wave file because flac doesn't tell us the duration easy = Curl::Easy.new(url) splitter.split.each do|chunk| chunk.build.to_flac From 55b627ac8e8bde6dfa04d0cb9b70d95532ffbd20 Mon Sep 17 00:00:00 2001 From: Jon Schewe Date: Sun, 10 Apr 2011 11:42:22 -0500 Subject: [PATCH 7/8] More debugging --- lib/speech/audio_to_text.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/speech/audio_to_text.rb b/lib/speech/audio_to_text.rb index a137e20..3e2c95f 100644 --- a/lib/speech/audio_to_text.rb +++ b/lib/speech/audio_to_text.rb @@ -53,7 +53,7 @@ def convert_chunk(easy, chunk, options={}) self.captured_json['status'] = data['status'] self.captured_json['id'] = data['id'] self.captured_json['hypotheses'] = data['hypotheses'].map {|ut| [ut['utterance'], ut['confidence']] } - puts self.captured_json.inspect + puts "inspect: #{self.captured_json.inspect}" File.open("#{self.captured_file}", "wb") {|f| f << captured_json.to_json } retrying = false end From 036d892fef8d88e56a492f2f70d4d3cec2833cf6 Mon Sep 17 00:00:00 2001 From: Jon Schewe Date: Sun, 10 Apr 2011 12:08:29 -0500 Subject: [PATCH 8/8] Return an array of json objects Since each file may be split into multiple chunks, one needs to return an array of json objects, one for each chunk. --- lib/speech/audio_to_text.rb | 5 ++++- test/audio_to_text_test.rb | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/speech/audio_to_text.rb b/lib/speech/audio_to_text.rb index 3e2c95f..98ba949 100644 --- a/lib/speech/audio_to_text.rb +++ b/lib/speech/audio_to_text.rb @@ -14,11 +14,14 @@ def to_text url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=en-US&maxresults=10" splitter = Speech::AudioSplitter.new(file, 3) # based off the wave file because flac doesn't tell us the duration easy = Curl::Easy.new(url) + result = [] splitter.split.each do|chunk| chunk.build.to_flac convert_chunk(easy, chunk) + json = JSON.parse(File.read(self.captured_file)) + result << json end - JSON.parse(File.read(self.captured_file)) + result end def clean diff --git a/test/audio_to_text_test.rb b/test/audio_to_text_test.rb index bb6ad06..9732821 100644 --- a/test/audio_to_text_test.rb +++ b/test/audio_to_text_test.rb @@ -6,7 +6,7 @@ class SpeechAudioToTextTest < Test::Unit::TestCase def test_audio_to_text audio = Speech::AudioToText.new("test/samples/i-like-pickles.wav") - captured_json = audio.to_text + captured_json = audio.to_text.first assert captured_json assert captured_json.key?("hypotheses") assert !captured_json['hypotheses'].empty? @@ -24,7 +24,7 @@ def test_audio_to_text def test_short_audio_clip audio = Speech::AudioToText.new("samples/i-like-pickles.chunk5.wav") - captured_json = audio.to_text + captured_json = audio.to_text.first assert captured_json assert captured_json.key?("hypotheses") assert !captured_json['hypotheses'].empty?