diff --git a/Runtime/NeocortexAudioReceiver.cs b/Runtime/NeocortexAudioReceiver.cs index 63f8439..c8dc614 100644 --- a/Runtime/NeocortexAudioReceiver.cs +++ b/Runtime/NeocortexAudioReceiver.cs @@ -6,18 +6,28 @@ namespace Neocortex public class NeocortexAudioReceiver : AudioReceiver { private const int FREQUENCY = 22050; - private const int AUDIO_SAMPLE_WINDOW = 64; + private const int AUDIO_SAMPLE_WINDOW = 256; private const int AMPLITUDE_MULTIPLIER = 10; + private const float ZCR_TRESHOLD = 0.2f; + private const float MIN_AMPLITUDE_RISE = 0.04f; + private AudioClip audioClip; private bool initialized; - + public string SelectedMicrophone { get; set; } public bool IsUserSpeaking { get; private set; } - - [SerializeField, Range(0, 1)] private float amplitudeThreshold = 0.1f; + + [SerializeField, Range(0, 1)] private float amplitudeThreshold = 0.05f; + [SerializeField] private float minSpeechDuration = 0.25f; [SerializeField] private float maxWaitTime = 1f; + private float zcrValue = 0; + private float previousAmplitude = 0; + private float rollingAverage = 0; + private const float smoothingFactor = 0.95f; + private float speechStartTime = -1f; + public override void StartMicrophone() { try @@ -31,7 +41,7 @@ public override void StartMicrophone() OnRecordingFailed?.Invoke(e.Message); } } - + public override void StopMicrophone() { NeocortexMicrophone.End(SelectedMicrophone); @@ -39,27 +49,37 @@ public override void StopMicrophone() IsUserSpeaking = false; AudioRecorded(); } - + private void Update() { if (!initialized) return; - UpdateAmplitude(); + float ampDelta = Amplitude - previousAmplitude; + if (UsePushToTalk) return; - - if(!IsUserSpeaking && Amplitude > amplitudeThreshold) + + if (!IsUserSpeaking && + Amplitude > amplitudeThreshold && + ampDelta > MIN_AMPLITUDE_RISE && + zcrValue < ZCR_TRESHOLD) { - IsUserSpeaking = true; + if (speechStartTime < 0) speechStartTime = Time.time; + else if (Time.time - speechStartTime >= minSpeechDuration) + IsUserSpeaking = true; } - + else + { + speechStartTime = -1f; + } + if (IsUserSpeaking) { if (Amplitude < amplitudeThreshold) { ElapsedWaitTime += Time.deltaTime; - - if(ElapsedWaitTime >= maxWaitTime) + + if (ElapsedWaitTime >= maxWaitTime) { ElapsedWaitTime = 0; StopMicrophone(); @@ -84,21 +104,40 @@ private void AudioRecorded() OnAudioRecorded?.Invoke(trimmed); } } - + private void UpdateAmplitude() { int clipPosition = NeocortexMicrophone.GetPosition(SelectedMicrophone); int startPosition = Mathf.Max(0, clipPosition - AUDIO_SAMPLE_WINDOW); float[] audioSamples = new float[AUDIO_SAMPLE_WINDOW]; audioClip.GetData(audioSamples, startPosition); - + float sum = 0; for (int i = 0; i < AUDIO_SAMPLE_WINDOW; i++) { sum += Mathf.Abs(audioSamples[i]); } - - Amplitude = Mathf.Clamp01(sum / AUDIO_SAMPLE_WINDOW * AMPLITUDE_MULTIPLIER); + + float currentAmplitude = sum / AUDIO_SAMPLE_WINDOW; + + rollingAverage = (rollingAverage * smoothingFactor) + (currentAmplitude * (1 - smoothingFactor)); + Amplitude = Mathf.Clamp01(rollingAverage * AMPLITUDE_MULTIPLIER); + zcrValue = CalculateZCR(audioSamples); + } + + private float CalculateZCR(float[] samples) + { + int zeroCrossings = 0; + for (int i = 1; i < samples.Length; i++) + { + if ((samples[i - 1] > 0 && samples[i] < 0) || + (samples[i - 1] < 0 && samples[i] > 0)) + { + zeroCrossings++; + } + } + + return (float)zeroCrossings / samples.Length; } private void OnDestroy()